{ "best_global_step": 30650, "best_metric": 0.12099920213222504, "best_model_checkpoint": "saves_multiple/p-tuning/llama-3-8b-instruct/train_multirc_42_1762240404/checkpoint-30650", "epoch": 20.0, "eval_steps": 6130, "global_step": 122600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008156606851549756, "grad_norm": 314.6358337402344, "learning_rate": 3.262642740619902e-07, "loss": 8.3174, "num_input_tokens_seen": 10144, "step": 5 }, { "epoch": 0.0016313213703099511, "grad_norm": 264.6385803222656, "learning_rate": 7.34094616639478e-07, "loss": 7.4675, "num_input_tokens_seen": 20704, "step": 10 }, { "epoch": 0.0024469820554649264, "grad_norm": 224.40530395507812, "learning_rate": 1.1419249592169658e-06, "loss": 6.5126, "num_input_tokens_seen": 32096, "step": 15 }, { "epoch": 0.0032626427406199023, "grad_norm": 172.0708770751953, "learning_rate": 1.5497553017944535e-06, "loss": 5.3189, "num_input_tokens_seen": 42272, "step": 20 }, { "epoch": 0.004078303425774877, "grad_norm": 135.7971649169922, "learning_rate": 1.957585644371941e-06, "loss": 3.7183, "num_input_tokens_seen": 53792, "step": 25 }, { "epoch": 0.004893964110929853, "grad_norm": 92.28217315673828, "learning_rate": 2.365415986949429e-06, "loss": 3.0498, "num_input_tokens_seen": 64864, "step": 30 }, { "epoch": 0.005709624796084829, "grad_norm": 85.29937744140625, "learning_rate": 2.7732463295269165e-06, "loss": 2.1791, "num_input_tokens_seen": 76480, "step": 35 }, { "epoch": 0.0065252854812398045, "grad_norm": 59.62224578857422, "learning_rate": 3.1810766721044044e-06, "loss": 1.7123, "num_input_tokens_seen": 87616, "step": 40 }, { "epoch": 0.00734094616639478, "grad_norm": 110.24623107910156, "learning_rate": 3.5889070146818927e-06, "loss": 1.2501, "num_input_tokens_seen": 98624, "step": 45 }, { "epoch": 0.008156606851549755, "grad_norm": 30.345088958740234, "learning_rate": 3.99673735725938e-06, "loss": 0.7328, "num_input_tokens_seen": 108896, "step": 50 }, { "epoch": 0.00897226753670473, "grad_norm": 88.82992553710938, "learning_rate": 4.404567699836868e-06, "loss": 0.6449, "num_input_tokens_seen": 119936, "step": 55 }, { "epoch": 0.009787928221859706, "grad_norm": 21.83713722229004, "learning_rate": 4.812398042414356e-06, "loss": 0.4664, "num_input_tokens_seen": 131520, "step": 60 }, { "epoch": 0.010603588907014683, "grad_norm": 46.5600471496582, "learning_rate": 5.2202283849918435e-06, "loss": 0.5114, "num_input_tokens_seen": 142912, "step": 65 }, { "epoch": 0.011419249592169658, "grad_norm": 44.04116439819336, "learning_rate": 5.628058727569331e-06, "loss": 0.4, "num_input_tokens_seen": 153600, "step": 70 }, { "epoch": 0.012234910277324634, "grad_norm": 31.065277099609375, "learning_rate": 6.035889070146819e-06, "loss": 0.4806, "num_input_tokens_seen": 164576, "step": 75 }, { "epoch": 0.013050570962479609, "grad_norm": 58.715877532958984, "learning_rate": 6.443719412724307e-06, "loss": 0.4291, "num_input_tokens_seen": 174912, "step": 80 }, { "epoch": 0.013866231647634585, "grad_norm": 36.94560623168945, "learning_rate": 6.851549755301794e-06, "loss": 0.3929, "num_input_tokens_seen": 185312, "step": 85 }, { "epoch": 0.01468189233278956, "grad_norm": 41.26664733886719, "learning_rate": 7.2593800978792825e-06, "loss": 0.3971, "num_input_tokens_seen": 196608, "step": 90 }, { "epoch": 0.015497553017944535, "grad_norm": 52.26409149169922, "learning_rate": 7.66721044045677e-06, "loss": 0.5684, "num_input_tokens_seen": 209408, "step": 95 }, { "epoch": 0.01631321370309951, "grad_norm": 42.45866012573242, "learning_rate": 8.075040783034257e-06, "loss": 0.476, "num_input_tokens_seen": 220192, "step": 100 }, { "epoch": 0.017128874388254486, "grad_norm": 37.38993453979492, "learning_rate": 8.482871125611746e-06, "loss": 0.3937, "num_input_tokens_seen": 230464, "step": 105 }, { "epoch": 0.01794453507340946, "grad_norm": 12.51401424407959, "learning_rate": 8.890701468189234e-06, "loss": 0.3869, "num_input_tokens_seen": 241664, "step": 110 }, { "epoch": 0.018760195758564437, "grad_norm": 51.17809295654297, "learning_rate": 9.298531810766722e-06, "loss": 0.4125, "num_input_tokens_seen": 253248, "step": 115 }, { "epoch": 0.01957585644371941, "grad_norm": 34.78268814086914, "learning_rate": 9.706362153344209e-06, "loss": 0.3685, "num_input_tokens_seen": 263072, "step": 120 }, { "epoch": 0.020391517128874388, "grad_norm": 18.864459991455078, "learning_rate": 1.0114192495921697e-05, "loss": 0.352, "num_input_tokens_seen": 274944, "step": 125 }, { "epoch": 0.021207177814029365, "grad_norm": 17.730932235717773, "learning_rate": 1.0522022838499184e-05, "loss": 0.3605, "num_input_tokens_seen": 285056, "step": 130 }, { "epoch": 0.02202283849918434, "grad_norm": 33.00218200683594, "learning_rate": 1.0929853181076672e-05, "loss": 0.3986, "num_input_tokens_seen": 296064, "step": 135 }, { "epoch": 0.022838499184339316, "grad_norm": 20.735210418701172, "learning_rate": 1.1337683523654159e-05, "loss": 0.4069, "num_input_tokens_seen": 307136, "step": 140 }, { "epoch": 0.02365415986949429, "grad_norm": 21.635116577148438, "learning_rate": 1.1745513866231649e-05, "loss": 0.4322, "num_input_tokens_seen": 317152, "step": 145 }, { "epoch": 0.024469820554649267, "grad_norm": 28.22029685974121, "learning_rate": 1.2153344208809135e-05, "loss": 0.4022, "num_input_tokens_seen": 327520, "step": 150 }, { "epoch": 0.02528548123980424, "grad_norm": 13.397923469543457, "learning_rate": 1.2561174551386624e-05, "loss": 0.5004, "num_input_tokens_seen": 338720, "step": 155 }, { "epoch": 0.026101141924959218, "grad_norm": 16.55824089050293, "learning_rate": 1.296900489396411e-05, "loss": 0.3486, "num_input_tokens_seen": 349824, "step": 160 }, { "epoch": 0.026916802610114192, "grad_norm": 39.6685676574707, "learning_rate": 1.3376835236541599e-05, "loss": 0.3352, "num_input_tokens_seen": 360576, "step": 165 }, { "epoch": 0.02773246329526917, "grad_norm": 57.711448669433594, "learning_rate": 1.3784665579119085e-05, "loss": 0.3404, "num_input_tokens_seen": 371328, "step": 170 }, { "epoch": 0.028548123980424143, "grad_norm": 63.481502532958984, "learning_rate": 1.4192495921696575e-05, "loss": 0.4694, "num_input_tokens_seen": 382624, "step": 175 }, { "epoch": 0.02936378466557912, "grad_norm": 175.08531188964844, "learning_rate": 1.4600326264274062e-05, "loss": 0.4874, "num_input_tokens_seen": 395232, "step": 180 }, { "epoch": 0.030179445350734094, "grad_norm": 53.07209777832031, "learning_rate": 1.500815660685155e-05, "loss": 0.7018, "num_input_tokens_seen": 406784, "step": 185 }, { "epoch": 0.03099510603588907, "grad_norm": 16.1356143951416, "learning_rate": 1.5415986949429037e-05, "loss": 0.416, "num_input_tokens_seen": 417248, "step": 190 }, { "epoch": 0.03181076672104405, "grad_norm": 11.130064010620117, "learning_rate": 1.5823817292006523e-05, "loss": 0.4056, "num_input_tokens_seen": 428224, "step": 195 }, { "epoch": 0.03262642740619902, "grad_norm": 8.214327812194824, "learning_rate": 1.6231647634584013e-05, "loss": 0.3701, "num_input_tokens_seen": 437312, "step": 200 }, { "epoch": 0.033442088091353996, "grad_norm": 19.780445098876953, "learning_rate": 1.66394779771615e-05, "loss": 0.3989, "num_input_tokens_seen": 447776, "step": 205 }, { "epoch": 0.03425774877650897, "grad_norm": 23.08263397216797, "learning_rate": 1.704730831973899e-05, "loss": 0.3959, "num_input_tokens_seen": 459040, "step": 210 }, { "epoch": 0.03507340946166395, "grad_norm": 24.81744384765625, "learning_rate": 1.7455138662316477e-05, "loss": 0.3368, "num_input_tokens_seen": 470656, "step": 215 }, { "epoch": 0.03588907014681892, "grad_norm": 10.550070762634277, "learning_rate": 1.7862969004893963e-05, "loss": 0.3761, "num_input_tokens_seen": 480864, "step": 220 }, { "epoch": 0.0367047308319739, "grad_norm": 13.143187522888184, "learning_rate": 1.8270799347471453e-05, "loss": 0.349, "num_input_tokens_seen": 491680, "step": 225 }, { "epoch": 0.037520391517128875, "grad_norm": 13.331697463989258, "learning_rate": 1.867862969004894e-05, "loss": 0.3603, "num_input_tokens_seen": 502432, "step": 230 }, { "epoch": 0.03833605220228385, "grad_norm": 7.329267501831055, "learning_rate": 1.908646003262643e-05, "loss": 0.3927, "num_input_tokens_seen": 512800, "step": 235 }, { "epoch": 0.03915171288743882, "grad_norm": 15.567852973937988, "learning_rate": 1.9494290375203913e-05, "loss": 0.4135, "num_input_tokens_seen": 523424, "step": 240 }, { "epoch": 0.0399673735725938, "grad_norm": 7.734094142913818, "learning_rate": 1.9902120717781403e-05, "loss": 0.3361, "num_input_tokens_seen": 533600, "step": 245 }, { "epoch": 0.040783034257748776, "grad_norm": 36.39061737060547, "learning_rate": 2.0309951060358893e-05, "loss": 0.411, "num_input_tokens_seen": 544576, "step": 250 }, { "epoch": 0.041598694942903754, "grad_norm": 18.676137924194336, "learning_rate": 2.071778140293638e-05, "loss": 0.41, "num_input_tokens_seen": 554784, "step": 255 }, { "epoch": 0.04241435562805873, "grad_norm": 7.464500427246094, "learning_rate": 2.1125611745513866e-05, "loss": 0.39, "num_input_tokens_seen": 565632, "step": 260 }, { "epoch": 0.0432300163132137, "grad_norm": 9.962908744812012, "learning_rate": 2.1533442088091353e-05, "loss": 0.3545, "num_input_tokens_seen": 576000, "step": 265 }, { "epoch": 0.04404567699836868, "grad_norm": 9.480332374572754, "learning_rate": 2.1941272430668843e-05, "loss": 0.3468, "num_input_tokens_seen": 586816, "step": 270 }, { "epoch": 0.044861337683523655, "grad_norm": 13.83477783203125, "learning_rate": 2.234910277324633e-05, "loss": 0.3342, "num_input_tokens_seen": 597568, "step": 275 }, { "epoch": 0.04567699836867863, "grad_norm": 7.380682468414307, "learning_rate": 2.2756933115823816e-05, "loss": 0.36, "num_input_tokens_seen": 608064, "step": 280 }, { "epoch": 0.0464926590538336, "grad_norm": 14.0297269821167, "learning_rate": 2.3164763458401306e-05, "loss": 0.3185, "num_input_tokens_seen": 619904, "step": 285 }, { "epoch": 0.04730831973898858, "grad_norm": 43.77539825439453, "learning_rate": 2.3572593800978793e-05, "loss": 0.484, "num_input_tokens_seen": 630560, "step": 290 }, { "epoch": 0.04812398042414356, "grad_norm": 30.18943977355957, "learning_rate": 2.3980424143556283e-05, "loss": 0.3715, "num_input_tokens_seen": 641184, "step": 295 }, { "epoch": 0.048939641109298535, "grad_norm": 12.309725761413574, "learning_rate": 2.4388254486133766e-05, "loss": 0.4153, "num_input_tokens_seen": 651424, "step": 300 }, { "epoch": 0.049755301794453505, "grad_norm": 14.924318313598633, "learning_rate": 2.4796084828711256e-05, "loss": 0.3906, "num_input_tokens_seen": 663072, "step": 305 }, { "epoch": 0.05057096247960848, "grad_norm": 9.72663688659668, "learning_rate": 2.5203915171288743e-05, "loss": 0.396, "num_input_tokens_seen": 672416, "step": 310 }, { "epoch": 0.05138662316476346, "grad_norm": 11.389423370361328, "learning_rate": 2.5611745513866233e-05, "loss": 0.4705, "num_input_tokens_seen": 683008, "step": 315 }, { "epoch": 0.052202283849918436, "grad_norm": 52.0793571472168, "learning_rate": 2.6019575856443723e-05, "loss": 0.4732, "num_input_tokens_seen": 694976, "step": 320 }, { "epoch": 0.05301794453507341, "grad_norm": 10.719328880310059, "learning_rate": 2.6427406199021206e-05, "loss": 0.4089, "num_input_tokens_seen": 705824, "step": 325 }, { "epoch": 0.053833605220228384, "grad_norm": 6.3341474533081055, "learning_rate": 2.6835236541598696e-05, "loss": 0.4339, "num_input_tokens_seen": 717088, "step": 330 }, { "epoch": 0.05464926590538336, "grad_norm": 5.4407782554626465, "learning_rate": 2.7243066884176183e-05, "loss": 0.4164, "num_input_tokens_seen": 728768, "step": 335 }, { "epoch": 0.05546492659053834, "grad_norm": 4.411542892456055, "learning_rate": 2.7650897226753673e-05, "loss": 0.4142, "num_input_tokens_seen": 738528, "step": 340 }, { "epoch": 0.05628058727569331, "grad_norm": 10.264647483825684, "learning_rate": 2.805872756933116e-05, "loss": 0.3912, "num_input_tokens_seen": 749696, "step": 345 }, { "epoch": 0.057096247960848286, "grad_norm": 6.780503273010254, "learning_rate": 2.8466557911908646e-05, "loss": 0.3433, "num_input_tokens_seen": 760672, "step": 350 }, { "epoch": 0.05791190864600326, "grad_norm": 2.577894449234009, "learning_rate": 2.8874388254486136e-05, "loss": 0.3593, "num_input_tokens_seen": 771712, "step": 355 }, { "epoch": 0.05872756933115824, "grad_norm": 5.244052410125732, "learning_rate": 2.9282218597063623e-05, "loss": 0.3646, "num_input_tokens_seen": 782912, "step": 360 }, { "epoch": 0.05954323001631321, "grad_norm": 9.439083099365234, "learning_rate": 2.969004893964111e-05, "loss": 0.3592, "num_input_tokens_seen": 794368, "step": 365 }, { "epoch": 0.06035889070146819, "grad_norm": 2.9597246646881104, "learning_rate": 3.0097879282218596e-05, "loss": 0.3586, "num_input_tokens_seen": 804704, "step": 370 }, { "epoch": 0.061174551386623165, "grad_norm": 4.3669633865356445, "learning_rate": 3.0505709624796086e-05, "loss": 0.3525, "num_input_tokens_seen": 815872, "step": 375 }, { "epoch": 0.06199021207177814, "grad_norm": 4.579859733581543, "learning_rate": 3.0913539967373576e-05, "loss": 0.3348, "num_input_tokens_seen": 826912, "step": 380 }, { "epoch": 0.06280587275693311, "grad_norm": 6.14414119720459, "learning_rate": 3.132137030995106e-05, "loss": 0.4138, "num_input_tokens_seen": 836864, "step": 385 }, { "epoch": 0.0636215334420881, "grad_norm": 14.99302864074707, "learning_rate": 3.172920065252855e-05, "loss": 0.4467, "num_input_tokens_seen": 848448, "step": 390 }, { "epoch": 0.06443719412724307, "grad_norm": 8.607397079467773, "learning_rate": 3.213703099510604e-05, "loss": 0.3861, "num_input_tokens_seen": 858976, "step": 395 }, { "epoch": 0.06525285481239804, "grad_norm": 12.14357852935791, "learning_rate": 3.254486133768352e-05, "loss": 0.4364, "num_input_tokens_seen": 869664, "step": 400 }, { "epoch": 0.06606851549755302, "grad_norm": 7.592681884765625, "learning_rate": 3.295269168026101e-05, "loss": 0.3747, "num_input_tokens_seen": 881696, "step": 405 }, { "epoch": 0.06688417618270799, "grad_norm": 9.035486221313477, "learning_rate": 3.33605220228385e-05, "loss": 0.5381, "num_input_tokens_seen": 893440, "step": 410 }, { "epoch": 0.06769983686786298, "grad_norm": 4.473764896392822, "learning_rate": 3.3768352365415986e-05, "loss": 0.3439, "num_input_tokens_seen": 904224, "step": 415 }, { "epoch": 0.06851549755301795, "grad_norm": 4.291823863983154, "learning_rate": 3.4176182707993476e-05, "loss": 0.4344, "num_input_tokens_seen": 915776, "step": 420 }, { "epoch": 0.06933115823817292, "grad_norm": 24.67149543762207, "learning_rate": 3.458401305057096e-05, "loss": 0.383, "num_input_tokens_seen": 926656, "step": 425 }, { "epoch": 0.0701468189233279, "grad_norm": 27.043270111083984, "learning_rate": 3.4991843393148456e-05, "loss": 0.4272, "num_input_tokens_seen": 937728, "step": 430 }, { "epoch": 0.07096247960848287, "grad_norm": 13.670089721679688, "learning_rate": 3.539967373572594e-05, "loss": 0.4359, "num_input_tokens_seen": 948800, "step": 435 }, { "epoch": 0.07177814029363784, "grad_norm": 17.373754501342773, "learning_rate": 3.580750407830342e-05, "loss": 0.3757, "num_input_tokens_seen": 959712, "step": 440 }, { "epoch": 0.07259380097879282, "grad_norm": 7.593050479888916, "learning_rate": 3.621533442088092e-05, "loss": 0.2887, "num_input_tokens_seen": 970592, "step": 445 }, { "epoch": 0.0734094616639478, "grad_norm": 19.885852813720703, "learning_rate": 3.66231647634584e-05, "loss": 0.3668, "num_input_tokens_seen": 981632, "step": 450 }, { "epoch": 0.07422512234910278, "grad_norm": 3.8135032653808594, "learning_rate": 3.703099510603589e-05, "loss": 0.3811, "num_input_tokens_seen": 991680, "step": 455 }, { "epoch": 0.07504078303425775, "grad_norm": 6.083225250244141, "learning_rate": 3.7438825448613375e-05, "loss": 0.3688, "num_input_tokens_seen": 1002880, "step": 460 }, { "epoch": 0.07585644371941272, "grad_norm": 4.485177993774414, "learning_rate": 3.7846655791190865e-05, "loss": 0.3793, "num_input_tokens_seen": 1015072, "step": 465 }, { "epoch": 0.0766721044045677, "grad_norm": 2.079634666442871, "learning_rate": 3.8254486133768355e-05, "loss": 0.3835, "num_input_tokens_seen": 1026176, "step": 470 }, { "epoch": 0.07748776508972267, "grad_norm": 5.577203750610352, "learning_rate": 3.866231647634584e-05, "loss": 0.3721, "num_input_tokens_seen": 1037632, "step": 475 }, { "epoch": 0.07830342577487764, "grad_norm": 6.679996967315674, "learning_rate": 3.907014681892333e-05, "loss": 0.3572, "num_input_tokens_seen": 1047040, "step": 480 }, { "epoch": 0.07911908646003263, "grad_norm": 75.6471176147461, "learning_rate": 3.947797716150082e-05, "loss": 0.332, "num_input_tokens_seen": 1057344, "step": 485 }, { "epoch": 0.0799347471451876, "grad_norm": 1.4969229698181152, "learning_rate": 3.98858075040783e-05, "loss": 0.3001, "num_input_tokens_seen": 1068064, "step": 490 }, { "epoch": 0.08075040783034258, "grad_norm": 79.28744506835938, "learning_rate": 4.029363784665579e-05, "loss": 0.5147, "num_input_tokens_seen": 1078560, "step": 495 }, { "epoch": 0.08156606851549755, "grad_norm": 54.21046829223633, "learning_rate": 4.070146818923328e-05, "loss": 0.2397, "num_input_tokens_seen": 1089824, "step": 500 }, { "epoch": 0.08238172920065252, "grad_norm": 14.4324369430542, "learning_rate": 4.1109298531810765e-05, "loss": 0.6049, "num_input_tokens_seen": 1101120, "step": 505 }, { "epoch": 0.08319738988580751, "grad_norm": 21.910572052001953, "learning_rate": 4.1517128874388255e-05, "loss": 0.6272, "num_input_tokens_seen": 1111840, "step": 510 }, { "epoch": 0.08401305057096248, "grad_norm": 12.105114936828613, "learning_rate": 4.1924959216965745e-05, "loss": 0.5606, "num_input_tokens_seen": 1122720, "step": 515 }, { "epoch": 0.08482871125611746, "grad_norm": 0.32360613346099854, "learning_rate": 4.233278955954323e-05, "loss": 0.1157, "num_input_tokens_seen": 1131808, "step": 520 }, { "epoch": 0.08564437194127243, "grad_norm": 1.4238195419311523, "learning_rate": 4.274061990212072e-05, "loss": 0.2238, "num_input_tokens_seen": 1142624, "step": 525 }, { "epoch": 0.0864600326264274, "grad_norm": 10.996274948120117, "learning_rate": 4.314845024469821e-05, "loss": 0.2579, "num_input_tokens_seen": 1153536, "step": 530 }, { "epoch": 0.08727569331158239, "grad_norm": 0.11020765453577042, "learning_rate": 4.35562805872757e-05, "loss": 0.1138, "num_input_tokens_seen": 1164960, "step": 535 }, { "epoch": 0.08809135399673736, "grad_norm": 4.610954761505127, "learning_rate": 4.396411092985318e-05, "loss": 0.102, "num_input_tokens_seen": 1175552, "step": 540 }, { "epoch": 0.08890701468189233, "grad_norm": 25.28290367126465, "learning_rate": 4.4371941272430665e-05, "loss": 0.3639, "num_input_tokens_seen": 1186624, "step": 545 }, { "epoch": 0.08972267536704731, "grad_norm": 10.82431411743164, "learning_rate": 4.477977161500816e-05, "loss": 1.4397, "num_input_tokens_seen": 1198336, "step": 550 }, { "epoch": 0.09053833605220228, "grad_norm": 6.7620134353637695, "learning_rate": 4.5187601957585645e-05, "loss": 0.3143, "num_input_tokens_seen": 1209760, "step": 555 }, { "epoch": 0.09135399673735727, "grad_norm": 5.035055160522461, "learning_rate": 4.559543230016313e-05, "loss": 0.3098, "num_input_tokens_seen": 1219936, "step": 560 }, { "epoch": 0.09216965742251224, "grad_norm": 0.12749716639518738, "learning_rate": 4.6003262642740625e-05, "loss": 0.194, "num_input_tokens_seen": 1231456, "step": 565 }, { "epoch": 0.0929853181076672, "grad_norm": 7.260269641876221, "learning_rate": 4.641109298531811e-05, "loss": 0.1057, "num_input_tokens_seen": 1241952, "step": 570 }, { "epoch": 0.09380097879282219, "grad_norm": 0.058566510677337646, "learning_rate": 4.68189233278956e-05, "loss": 0.3255, "num_input_tokens_seen": 1253920, "step": 575 }, { "epoch": 0.09461663947797716, "grad_norm": 14.459625244140625, "learning_rate": 4.722675367047308e-05, "loss": 0.1501, "num_input_tokens_seen": 1265632, "step": 580 }, { "epoch": 0.09543230016313213, "grad_norm": 22.226320266723633, "learning_rate": 4.763458401305057e-05, "loss": 0.692, "num_input_tokens_seen": 1276256, "step": 585 }, { "epoch": 0.09624796084828711, "grad_norm": 1.782360553741455, "learning_rate": 4.804241435562806e-05, "loss": 0.2242, "num_input_tokens_seen": 1287424, "step": 590 }, { "epoch": 0.09706362153344208, "grad_norm": 0.1359458863735199, "learning_rate": 4.8450244698205544e-05, "loss": 0.0625, "num_input_tokens_seen": 1298272, "step": 595 }, { "epoch": 0.09787928221859707, "grad_norm": 7.373677730560303, "learning_rate": 4.885807504078304e-05, "loss": 0.2317, "num_input_tokens_seen": 1309632, "step": 600 }, { "epoch": 0.09869494290375204, "grad_norm": 0.5091097950935364, "learning_rate": 4.9265905383360524e-05, "loss": 0.4314, "num_input_tokens_seen": 1319264, "step": 605 }, { "epoch": 0.09951060358890701, "grad_norm": 11.11815071105957, "learning_rate": 4.967373572593801e-05, "loss": 0.3954, "num_input_tokens_seen": 1330144, "step": 610 }, { "epoch": 0.100326264274062, "grad_norm": 6.977790832519531, "learning_rate": 5.00815660685155e-05, "loss": 0.2213, "num_input_tokens_seen": 1340640, "step": 615 }, { "epoch": 0.10114192495921696, "grad_norm": 9.725613594055176, "learning_rate": 5.048939641109299e-05, "loss": 0.4465, "num_input_tokens_seen": 1351136, "step": 620 }, { "epoch": 0.10195758564437195, "grad_norm": 3.7128005027770996, "learning_rate": 5.089722675367047e-05, "loss": 0.2263, "num_input_tokens_seen": 1363104, "step": 625 }, { "epoch": 0.10277324632952692, "grad_norm": 3.203340530395508, "learning_rate": 5.130505709624796e-05, "loss": 0.235, "num_input_tokens_seen": 1373856, "step": 630 }, { "epoch": 0.10358890701468189, "grad_norm": 1.902295708656311, "learning_rate": 5.171288743882545e-05, "loss": 0.1787, "num_input_tokens_seen": 1385984, "step": 635 }, { "epoch": 0.10440456769983687, "grad_norm": 4.927067279815674, "learning_rate": 5.212071778140294e-05, "loss": 0.1436, "num_input_tokens_seen": 1397472, "step": 640 }, { "epoch": 0.10522022838499184, "grad_norm": 2.7606987953186035, "learning_rate": 5.2528548123980424e-05, "loss": 0.184, "num_input_tokens_seen": 1407744, "step": 645 }, { "epoch": 0.10603588907014681, "grad_norm": 7.337327480316162, "learning_rate": 5.293637846655791e-05, "loss": 0.4137, "num_input_tokens_seen": 1418688, "step": 650 }, { "epoch": 0.1068515497553018, "grad_norm": 0.5223647952079773, "learning_rate": 5.3344208809135404e-05, "loss": 0.2377, "num_input_tokens_seen": 1429056, "step": 655 }, { "epoch": 0.10766721044045677, "grad_norm": 20.811302185058594, "learning_rate": 5.375203915171289e-05, "loss": 0.3604, "num_input_tokens_seen": 1439264, "step": 660 }, { "epoch": 0.10848287112561175, "grad_norm": 12.334787368774414, "learning_rate": 5.415986949429037e-05, "loss": 0.4648, "num_input_tokens_seen": 1451072, "step": 665 }, { "epoch": 0.10929853181076672, "grad_norm": 0.2776586413383484, "learning_rate": 5.456769983686787e-05, "loss": 0.2371, "num_input_tokens_seen": 1460640, "step": 670 }, { "epoch": 0.11011419249592169, "grad_norm": 5.741067886352539, "learning_rate": 5.497553017944535e-05, "loss": 0.2627, "num_input_tokens_seen": 1472224, "step": 675 }, { "epoch": 0.11092985318107668, "grad_norm": 4.412654876708984, "learning_rate": 5.538336052202284e-05, "loss": 0.4448, "num_input_tokens_seen": 1483744, "step": 680 }, { "epoch": 0.11174551386623165, "grad_norm": 5.101598739624023, "learning_rate": 5.579119086460033e-05, "loss": 0.2309, "num_input_tokens_seen": 1495552, "step": 685 }, { "epoch": 0.11256117455138662, "grad_norm": 4.5909271240234375, "learning_rate": 5.6199021207177814e-05, "loss": 0.1059, "num_input_tokens_seen": 1506880, "step": 690 }, { "epoch": 0.1133768352365416, "grad_norm": 9.4280366897583, "learning_rate": 5.6606851549755304e-05, "loss": 0.4037, "num_input_tokens_seen": 1518336, "step": 695 }, { "epoch": 0.11419249592169657, "grad_norm": 2.6467490196228027, "learning_rate": 5.701468189233279e-05, "loss": 0.169, "num_input_tokens_seen": 1528704, "step": 700 }, { "epoch": 0.11500815660685156, "grad_norm": 8.759927749633789, "learning_rate": 5.7422512234910284e-05, "loss": 0.2088, "num_input_tokens_seen": 1539552, "step": 705 }, { "epoch": 0.11582381729200653, "grad_norm": 1.059167742729187, "learning_rate": 5.783034257748777e-05, "loss": 0.1942, "num_input_tokens_seen": 1549696, "step": 710 }, { "epoch": 0.1166394779771615, "grad_norm": 4.634883403778076, "learning_rate": 5.823817292006525e-05, "loss": 0.1503, "num_input_tokens_seen": 1559040, "step": 715 }, { "epoch": 0.11745513866231648, "grad_norm": 1.964306116104126, "learning_rate": 5.864600326264275e-05, "loss": 0.3159, "num_input_tokens_seen": 1570144, "step": 720 }, { "epoch": 0.11827079934747145, "grad_norm": 3.4446094036102295, "learning_rate": 5.905383360522023e-05, "loss": 0.2283, "num_input_tokens_seen": 1579776, "step": 725 }, { "epoch": 0.11908646003262642, "grad_norm": 1069.54541015625, "learning_rate": 5.9461663947797714e-05, "loss": 3.0837, "num_input_tokens_seen": 1589120, "step": 730 }, { "epoch": 0.1199021207177814, "grad_norm": 227.7669219970703, "learning_rate": 5.9869494290375204e-05, "loss": 2.2193, "num_input_tokens_seen": 1600512, "step": 735 }, { "epoch": 0.12071778140293637, "grad_norm": 2.590147018432617, "learning_rate": 6.0277324632952694e-05, "loss": 0.7939, "num_input_tokens_seen": 1610880, "step": 740 }, { "epoch": 0.12153344208809136, "grad_norm": 16.200777053833008, "learning_rate": 6.0685154975530184e-05, "loss": 0.2326, "num_input_tokens_seen": 1620896, "step": 745 }, { "epoch": 0.12234910277324633, "grad_norm": 6.109814643859863, "learning_rate": 6.109298531810767e-05, "loss": 0.131, "num_input_tokens_seen": 1631232, "step": 750 }, { "epoch": 0.1231647634584013, "grad_norm": 1.4488592147827148, "learning_rate": 6.150081566068516e-05, "loss": 0.1412, "num_input_tokens_seen": 1642688, "step": 755 }, { "epoch": 0.12398042414355628, "grad_norm": 12.674674034118652, "learning_rate": 6.190864600326265e-05, "loss": 0.4573, "num_input_tokens_seen": 1653792, "step": 760 }, { "epoch": 0.12479608482871125, "grad_norm": 75.09253692626953, "learning_rate": 6.231647634584014e-05, "loss": 2.002, "num_input_tokens_seen": 1664160, "step": 765 }, { "epoch": 0.12561174551386622, "grad_norm": 77.74810028076172, "learning_rate": 6.272430668841763e-05, "loss": 6.3921, "num_input_tokens_seen": 1675232, "step": 770 }, { "epoch": 0.1264274061990212, "grad_norm": 37.449378967285156, "learning_rate": 6.31321370309951e-05, "loss": 4.1665, "num_input_tokens_seen": 1685280, "step": 775 }, { "epoch": 0.1272430668841762, "grad_norm": 11.413688659667969, "learning_rate": 6.35399673735726e-05, "loss": 1.2492, "num_input_tokens_seen": 1694848, "step": 780 }, { "epoch": 0.12805872756933115, "grad_norm": 16.399850845336914, "learning_rate": 6.394779771615008e-05, "loss": 0.4529, "num_input_tokens_seen": 1705600, "step": 785 }, { "epoch": 0.12887438825448613, "grad_norm": 8.579042434692383, "learning_rate": 6.435562805872756e-05, "loss": 0.5815, "num_input_tokens_seen": 1717152, "step": 790 }, { "epoch": 0.12969004893964112, "grad_norm": 293.99151611328125, "learning_rate": 6.476345840130505e-05, "loss": 0.5153, "num_input_tokens_seen": 1728288, "step": 795 }, { "epoch": 0.13050570962479607, "grad_norm": 9.329829216003418, "learning_rate": 6.517128874388255e-05, "loss": 0.2736, "num_input_tokens_seen": 1739936, "step": 800 }, { "epoch": 0.13132137030995106, "grad_norm": 7.285165309906006, "learning_rate": 6.557911908646004e-05, "loss": 0.1994, "num_input_tokens_seen": 1750656, "step": 805 }, { "epoch": 0.13213703099510604, "grad_norm": 12.667640686035156, "learning_rate": 6.598694942903752e-05, "loss": 0.3218, "num_input_tokens_seen": 1759392, "step": 810 }, { "epoch": 0.132952691680261, "grad_norm": 5.781430244445801, "learning_rate": 6.639477977161501e-05, "loss": 0.2163, "num_input_tokens_seen": 1770816, "step": 815 }, { "epoch": 0.13376835236541598, "grad_norm": 1.2714248895645142, "learning_rate": 6.68026101141925e-05, "loss": 0.155, "num_input_tokens_seen": 1781088, "step": 820 }, { "epoch": 0.13458401305057097, "grad_norm": 25.848276138305664, "learning_rate": 6.721044045676998e-05, "loss": 0.2787, "num_input_tokens_seen": 1790912, "step": 825 }, { "epoch": 0.13539967373572595, "grad_norm": 0.10717128962278366, "learning_rate": 6.761827079934747e-05, "loss": 0.0285, "num_input_tokens_seen": 1800736, "step": 830 }, { "epoch": 0.1362153344208809, "grad_norm": 5.4765625, "learning_rate": 6.802610114192497e-05, "loss": 0.2213, "num_input_tokens_seen": 1810016, "step": 835 }, { "epoch": 0.1370309951060359, "grad_norm": 5.046440601348877, "learning_rate": 6.843393148450245e-05, "loss": 0.3239, "num_input_tokens_seen": 1820032, "step": 840 }, { "epoch": 0.13784665579119088, "grad_norm": 0.4740993082523346, "learning_rate": 6.884176182707994e-05, "loss": 0.2064, "num_input_tokens_seen": 1830080, "step": 845 }, { "epoch": 0.13866231647634583, "grad_norm": 6.393681049346924, "learning_rate": 6.924959216965743e-05, "loss": 0.3382, "num_input_tokens_seen": 1840832, "step": 850 }, { "epoch": 0.13947797716150082, "grad_norm": 4.807803630828857, "learning_rate": 6.96574225122349e-05, "loss": 0.4432, "num_input_tokens_seen": 1852000, "step": 855 }, { "epoch": 0.1402936378466558, "grad_norm": 0.19360409677028656, "learning_rate": 7.006525285481239e-05, "loss": 0.2847, "num_input_tokens_seen": 1863264, "step": 860 }, { "epoch": 0.14110929853181076, "grad_norm": 3.193110227584839, "learning_rate": 7.047308319738988e-05, "loss": 0.2202, "num_input_tokens_seen": 1875104, "step": 865 }, { "epoch": 0.14192495921696574, "grad_norm": 1.3773865699768066, "learning_rate": 7.088091353996739e-05, "loss": 0.143, "num_input_tokens_seen": 1885088, "step": 870 }, { "epoch": 0.14274061990212072, "grad_norm": 2.0674312114715576, "learning_rate": 7.128874388254486e-05, "loss": 0.2554, "num_input_tokens_seen": 1895424, "step": 875 }, { "epoch": 0.14355628058727568, "grad_norm": 3.5503621101379395, "learning_rate": 7.169657422512235e-05, "loss": 0.0546, "num_input_tokens_seen": 1906048, "step": 880 }, { "epoch": 0.14437194127243066, "grad_norm": 1.0717246532440186, "learning_rate": 7.210440456769984e-05, "loss": 0.017, "num_input_tokens_seen": 1916320, "step": 885 }, { "epoch": 0.14518760195758565, "grad_norm": 8.777630805969238, "learning_rate": 7.251223491027732e-05, "loss": 0.1349, "num_input_tokens_seen": 1926880, "step": 890 }, { "epoch": 0.14600326264274063, "grad_norm": 0.08770612627267838, "learning_rate": 7.292006525285481e-05, "loss": 0.1599, "num_input_tokens_seen": 1937792, "step": 895 }, { "epoch": 0.1468189233278956, "grad_norm": 1.7737507820129395, "learning_rate": 7.332789559543231e-05, "loss": 0.4864, "num_input_tokens_seen": 1950048, "step": 900 }, { "epoch": 0.14763458401305057, "grad_norm": 14.238997459411621, "learning_rate": 7.373572593800979e-05, "loss": 0.5282, "num_input_tokens_seen": 1960352, "step": 905 }, { "epoch": 0.14845024469820556, "grad_norm": 7.535554885864258, "learning_rate": 7.414355628058728e-05, "loss": 0.2474, "num_input_tokens_seen": 1970784, "step": 910 }, { "epoch": 0.14926590538336051, "grad_norm": 2.2550277709960938, "learning_rate": 7.455138662316477e-05, "loss": 0.3021, "num_input_tokens_seen": 1982528, "step": 915 }, { "epoch": 0.1500815660685155, "grad_norm": 0.23242764174938202, "learning_rate": 7.495921696574225e-05, "loss": 0.23, "num_input_tokens_seen": 1992704, "step": 920 }, { "epoch": 0.15089722675367048, "grad_norm": 4.0261101722717285, "learning_rate": 7.536704730831974e-05, "loss": 0.2117, "num_input_tokens_seen": 2003328, "step": 925 }, { "epoch": 0.15171288743882544, "grad_norm": 1.9249851703643799, "learning_rate": 7.577487765089723e-05, "loss": 0.1221, "num_input_tokens_seen": 2013696, "step": 930 }, { "epoch": 0.15252854812398042, "grad_norm": 0.0670514702796936, "learning_rate": 7.618270799347473e-05, "loss": 0.1289, "num_input_tokens_seen": 2024288, "step": 935 }, { "epoch": 0.1533442088091354, "grad_norm": 4.352898597717285, "learning_rate": 7.65905383360522e-05, "loss": 0.1985, "num_input_tokens_seen": 2035296, "step": 940 }, { "epoch": 0.15415986949429036, "grad_norm": 3.5093870162963867, "learning_rate": 7.69983686786297e-05, "loss": 0.1994, "num_input_tokens_seen": 2047680, "step": 945 }, { "epoch": 0.15497553017944535, "grad_norm": 2.5943655967712402, "learning_rate": 7.740619902120719e-05, "loss": 0.3784, "num_input_tokens_seen": 2058208, "step": 950 }, { "epoch": 0.15579119086460033, "grad_norm": 0.6778724789619446, "learning_rate": 7.781402936378466e-05, "loss": 0.1371, "num_input_tokens_seen": 2069504, "step": 955 }, { "epoch": 0.1566068515497553, "grad_norm": 2.2445712089538574, "learning_rate": 7.822185970636215e-05, "loss": 0.1568, "num_input_tokens_seen": 2080416, "step": 960 }, { "epoch": 0.15742251223491027, "grad_norm": 5.930630207061768, "learning_rate": 7.862969004893964e-05, "loss": 0.1437, "num_input_tokens_seen": 2090880, "step": 965 }, { "epoch": 0.15823817292006526, "grad_norm": 2.420844793319702, "learning_rate": 7.903752039151713e-05, "loss": 0.2885, "num_input_tokens_seen": 2101440, "step": 970 }, { "epoch": 0.15905383360522024, "grad_norm": 3.8338279724121094, "learning_rate": 7.944535073409462e-05, "loss": 0.2561, "num_input_tokens_seen": 2111488, "step": 975 }, { "epoch": 0.1598694942903752, "grad_norm": 0.34937554597854614, "learning_rate": 7.985318107667211e-05, "loss": 0.08, "num_input_tokens_seen": 2123392, "step": 980 }, { "epoch": 0.16068515497553018, "grad_norm": 8.5946683883667, "learning_rate": 8.026101141924959e-05, "loss": 0.0638, "num_input_tokens_seen": 2134016, "step": 985 }, { "epoch": 0.16150081566068517, "grad_norm": 6.2617669105529785, "learning_rate": 8.066884176182708e-05, "loss": 0.2836, "num_input_tokens_seen": 2144736, "step": 990 }, { "epoch": 0.16231647634584012, "grad_norm": 1.5171515941619873, "learning_rate": 8.107667210440457e-05, "loss": 0.1919, "num_input_tokens_seen": 2155520, "step": 995 }, { "epoch": 0.1631321370309951, "grad_norm": 4.9571428298950195, "learning_rate": 8.148450244698205e-05, "loss": 0.276, "num_input_tokens_seen": 2165312, "step": 1000 }, { "epoch": 0.1639477977161501, "grad_norm": 5.026040077209473, "learning_rate": 8.189233278955955e-05, "loss": 0.2269, "num_input_tokens_seen": 2175040, "step": 1005 }, { "epoch": 0.16476345840130505, "grad_norm": 1.8221598863601685, "learning_rate": 8.230016313213704e-05, "loss": 0.2278, "num_input_tokens_seen": 2186176, "step": 1010 }, { "epoch": 0.16557911908646003, "grad_norm": 3.4954538345336914, "learning_rate": 8.270799347471453e-05, "loss": 0.1486, "num_input_tokens_seen": 2195808, "step": 1015 }, { "epoch": 0.16639477977161501, "grad_norm": 0.697935938835144, "learning_rate": 8.3115823817292e-05, "loss": 0.0983, "num_input_tokens_seen": 2207232, "step": 1020 }, { "epoch": 0.16721044045676997, "grad_norm": 0.5946976542472839, "learning_rate": 8.35236541598695e-05, "loss": 0.1653, "num_input_tokens_seen": 2218912, "step": 1025 }, { "epoch": 0.16802610114192496, "grad_norm": 5.24078893661499, "learning_rate": 8.393148450244699e-05, "loss": 0.1919, "num_input_tokens_seen": 2229536, "step": 1030 }, { "epoch": 0.16884176182707994, "grad_norm": 4.404401779174805, "learning_rate": 8.433931484502446e-05, "loss": 0.1526, "num_input_tokens_seen": 2240608, "step": 1035 }, { "epoch": 0.16965742251223492, "grad_norm": 7.255613803863525, "learning_rate": 8.474714518760197e-05, "loss": 0.3607, "num_input_tokens_seen": 2250176, "step": 1040 }, { "epoch": 0.17047308319738988, "grad_norm": 2.6354668140411377, "learning_rate": 8.515497553017946e-05, "loss": 0.1367, "num_input_tokens_seen": 2262080, "step": 1045 }, { "epoch": 0.17128874388254486, "grad_norm": 0.04987457022070885, "learning_rate": 8.556280587275693e-05, "loss": 0.0759, "num_input_tokens_seen": 2271968, "step": 1050 }, { "epoch": 0.17210440456769985, "grad_norm": 3.4202687740325928, "learning_rate": 8.597063621533442e-05, "loss": 0.1543, "num_input_tokens_seen": 2282080, "step": 1055 }, { "epoch": 0.1729200652528548, "grad_norm": 6.735346794128418, "learning_rate": 8.637846655791191e-05, "loss": 0.1822, "num_input_tokens_seen": 2291744, "step": 1060 }, { "epoch": 0.1737357259380098, "grad_norm": 2.105976104736328, "learning_rate": 8.678629690048939e-05, "loss": 0.0599, "num_input_tokens_seen": 2302368, "step": 1065 }, { "epoch": 0.17455138662316477, "grad_norm": 18.188085556030273, "learning_rate": 8.719412724306688e-05, "loss": 0.1898, "num_input_tokens_seen": 2313664, "step": 1070 }, { "epoch": 0.17536704730831973, "grad_norm": 3.8808937072753906, "learning_rate": 8.760195758564438e-05, "loss": 0.0623, "num_input_tokens_seen": 2325184, "step": 1075 }, { "epoch": 0.1761827079934747, "grad_norm": 7.137500286102295, "learning_rate": 8.800978792822187e-05, "loss": 0.2835, "num_input_tokens_seen": 2336352, "step": 1080 }, { "epoch": 0.1769983686786297, "grad_norm": 1.505784034729004, "learning_rate": 8.841761827079935e-05, "loss": 0.0245, "num_input_tokens_seen": 2346816, "step": 1085 }, { "epoch": 0.17781402936378465, "grad_norm": 0.14202602207660675, "learning_rate": 8.882544861337684e-05, "loss": 0.0903, "num_input_tokens_seen": 2356672, "step": 1090 }, { "epoch": 0.17862969004893964, "grad_norm": 5.891081809997559, "learning_rate": 8.923327895595433e-05, "loss": 0.2582, "num_input_tokens_seen": 2367168, "step": 1095 }, { "epoch": 0.17944535073409462, "grad_norm": 5.1646904945373535, "learning_rate": 8.96411092985318e-05, "loss": 0.2548, "num_input_tokens_seen": 2378912, "step": 1100 }, { "epoch": 0.1802610114192496, "grad_norm": 1.1886264085769653, "learning_rate": 9.00489396411093e-05, "loss": 0.3341, "num_input_tokens_seen": 2390848, "step": 1105 }, { "epoch": 0.18107667210440456, "grad_norm": 1.5492854118347168, "learning_rate": 9.04567699836868e-05, "loss": 0.199, "num_input_tokens_seen": 2401344, "step": 1110 }, { "epoch": 0.18189233278955955, "grad_norm": 0.5389428734779358, "learning_rate": 9.086460032626427e-05, "loss": 0.1403, "num_input_tokens_seen": 2412224, "step": 1115 }, { "epoch": 0.18270799347471453, "grad_norm": 1.1149766445159912, "learning_rate": 9.127243066884176e-05, "loss": 0.0222, "num_input_tokens_seen": 2423072, "step": 1120 }, { "epoch": 0.1835236541598695, "grad_norm": 6.989159107208252, "learning_rate": 9.168026101141925e-05, "loss": 0.1462, "num_input_tokens_seen": 2434368, "step": 1125 }, { "epoch": 0.18433931484502447, "grad_norm": 0.5632582902908325, "learning_rate": 9.208809135399673e-05, "loss": 0.3705, "num_input_tokens_seen": 2444544, "step": 1130 }, { "epoch": 0.18515497553017946, "grad_norm": 4.612658977508545, "learning_rate": 9.249592169657422e-05, "loss": 0.1005, "num_input_tokens_seen": 2454048, "step": 1135 }, { "epoch": 0.1859706362153344, "grad_norm": 13.68948745727539, "learning_rate": 9.290375203915171e-05, "loss": 0.2374, "num_input_tokens_seen": 2464704, "step": 1140 }, { "epoch": 0.1867862969004894, "grad_norm": 4.448404788970947, "learning_rate": 9.33115823817292e-05, "loss": 0.359, "num_input_tokens_seen": 2475712, "step": 1145 }, { "epoch": 0.18760195758564438, "grad_norm": 0.15590998530387878, "learning_rate": 9.371941272430669e-05, "loss": 0.3164, "num_input_tokens_seen": 2488352, "step": 1150 }, { "epoch": 0.18841761827079934, "grad_norm": 2.397275924682617, "learning_rate": 9.412724306688418e-05, "loss": 0.206, "num_input_tokens_seen": 2499232, "step": 1155 }, { "epoch": 0.18923327895595432, "grad_norm": 0.36472228169441223, "learning_rate": 9.453507340946167e-05, "loss": 0.0639, "num_input_tokens_seen": 2509376, "step": 1160 }, { "epoch": 0.1900489396411093, "grad_norm": 2.360445022583008, "learning_rate": 9.494290375203915e-05, "loss": 0.178, "num_input_tokens_seen": 2520352, "step": 1165 }, { "epoch": 0.19086460032626426, "grad_norm": 0.22225035727024078, "learning_rate": 9.535073409461664e-05, "loss": 0.1206, "num_input_tokens_seen": 2530752, "step": 1170 }, { "epoch": 0.19168026101141925, "grad_norm": 0.07478147000074387, "learning_rate": 9.575856443719413e-05, "loss": 0.4729, "num_input_tokens_seen": 2539872, "step": 1175 }, { "epoch": 0.19249592169657423, "grad_norm": 0.3705507516860962, "learning_rate": 9.616639477977162e-05, "loss": 0.2679, "num_input_tokens_seen": 2551456, "step": 1180 }, { "epoch": 0.1933115823817292, "grad_norm": 0.7973094582557678, "learning_rate": 9.657422512234911e-05, "loss": 0.119, "num_input_tokens_seen": 2563136, "step": 1185 }, { "epoch": 0.19412724306688417, "grad_norm": 0.2963646650314331, "learning_rate": 9.69820554649266e-05, "loss": 0.0195, "num_input_tokens_seen": 2573984, "step": 1190 }, { "epoch": 0.19494290375203915, "grad_norm": 0.3415137529373169, "learning_rate": 9.738988580750407e-05, "loss": 0.2677, "num_input_tokens_seen": 2585344, "step": 1195 }, { "epoch": 0.19575856443719414, "grad_norm": 0.12055652588605881, "learning_rate": 9.779771615008156e-05, "loss": 0.2518, "num_input_tokens_seen": 2596448, "step": 1200 }, { "epoch": 0.1965742251223491, "grad_norm": 8.142424583435059, "learning_rate": 9.820554649265905e-05, "loss": 0.5066, "num_input_tokens_seen": 2606848, "step": 1205 }, { "epoch": 0.19738988580750408, "grad_norm": 1.2064582109451294, "learning_rate": 9.861337683523653e-05, "loss": 0.1164, "num_input_tokens_seen": 2617952, "step": 1210 }, { "epoch": 0.19820554649265906, "grad_norm": 0.07324664294719696, "learning_rate": 9.902120717781403e-05, "loss": 0.0511, "num_input_tokens_seen": 2628448, "step": 1215 }, { "epoch": 0.19902120717781402, "grad_norm": 3.5778586864471436, "learning_rate": 9.942903752039152e-05, "loss": 0.2794, "num_input_tokens_seen": 2639648, "step": 1220 }, { "epoch": 0.199836867862969, "grad_norm": 3.4566075801849365, "learning_rate": 9.983686786296901e-05, "loss": 0.0933, "num_input_tokens_seen": 2650112, "step": 1225 }, { "epoch": 0.200652528548124, "grad_norm": 3.4942331314086914, "learning_rate": 0.00010024469820554649, "loss": 0.5332, "num_input_tokens_seen": 2660576, "step": 1230 }, { "epoch": 0.20146818923327894, "grad_norm": 0.2953527271747589, "learning_rate": 0.00010065252854812398, "loss": 0.0976, "num_input_tokens_seen": 2669440, "step": 1235 }, { "epoch": 0.20228384991843393, "grad_norm": 1.5927826166152954, "learning_rate": 0.00010106035889070147, "loss": 0.2455, "num_input_tokens_seen": 2680544, "step": 1240 }, { "epoch": 0.2030995106035889, "grad_norm": 2.855224609375, "learning_rate": 0.00010146818923327896, "loss": 0.2392, "num_input_tokens_seen": 2692608, "step": 1245 }, { "epoch": 0.2039151712887439, "grad_norm": 2.0778863430023193, "learning_rate": 0.00010187601957585645, "loss": 0.1251, "num_input_tokens_seen": 2701760, "step": 1250 }, { "epoch": 0.20473083197389885, "grad_norm": 3.7085254192352295, "learning_rate": 0.00010228384991843394, "loss": 0.2331, "num_input_tokens_seen": 2712288, "step": 1255 }, { "epoch": 0.20554649265905384, "grad_norm": 2.640923500061035, "learning_rate": 0.00010269168026101142, "loss": 0.1405, "num_input_tokens_seen": 2723488, "step": 1260 }, { "epoch": 0.20636215334420882, "grad_norm": 0.7595841884613037, "learning_rate": 0.00010309951060358891, "loss": 0.1611, "num_input_tokens_seen": 2734400, "step": 1265 }, { "epoch": 0.20717781402936378, "grad_norm": 0.3323002755641937, "learning_rate": 0.0001035073409461664, "loss": 0.0854, "num_input_tokens_seen": 2746208, "step": 1270 }, { "epoch": 0.20799347471451876, "grad_norm": 6.0016303062438965, "learning_rate": 0.00010391517128874387, "loss": 0.1824, "num_input_tokens_seen": 2757216, "step": 1275 }, { "epoch": 0.20880913539967375, "grad_norm": 0.13172432780265808, "learning_rate": 0.00010432300163132138, "loss": 0.1912, "num_input_tokens_seen": 2768192, "step": 1280 }, { "epoch": 0.2096247960848287, "grad_norm": 3.026542901992798, "learning_rate": 0.00010473083197389887, "loss": 0.0904, "num_input_tokens_seen": 2778400, "step": 1285 }, { "epoch": 0.21044045676998369, "grad_norm": 4.948746681213379, "learning_rate": 0.00010513866231647634, "loss": 0.3657, "num_input_tokens_seen": 2788416, "step": 1290 }, { "epoch": 0.21125611745513867, "grad_norm": 0.26640602946281433, "learning_rate": 0.00010554649265905383, "loss": 0.3439, "num_input_tokens_seen": 2799360, "step": 1295 }, { "epoch": 0.21207177814029363, "grad_norm": 0.36822858452796936, "learning_rate": 0.00010595432300163132, "loss": 0.199, "num_input_tokens_seen": 2809728, "step": 1300 }, { "epoch": 0.2128874388254486, "grad_norm": 2.3697075843811035, "learning_rate": 0.00010636215334420881, "loss": 0.2865, "num_input_tokens_seen": 2819776, "step": 1305 }, { "epoch": 0.2137030995106036, "grad_norm": 0.7037314772605896, "learning_rate": 0.00010676998368678629, "loss": 0.1692, "num_input_tokens_seen": 2831360, "step": 1310 }, { "epoch": 0.21451876019575855, "grad_norm": 1.2460227012634277, "learning_rate": 0.0001071778140293638, "loss": 0.1563, "num_input_tokens_seen": 2842432, "step": 1315 }, { "epoch": 0.21533442088091354, "grad_norm": 0.42761459946632385, "learning_rate": 0.00010758564437194128, "loss": 0.122, "num_input_tokens_seen": 2852448, "step": 1320 }, { "epoch": 0.21615008156606852, "grad_norm": 0.9030336737632751, "learning_rate": 0.00010799347471451876, "loss": 0.088, "num_input_tokens_seen": 2862912, "step": 1325 }, { "epoch": 0.2169657422512235, "grad_norm": 1.067244052886963, "learning_rate": 0.00010840130505709625, "loss": 0.2082, "num_input_tokens_seen": 2873888, "step": 1330 }, { "epoch": 0.21778140293637846, "grad_norm": 0.8108012676239014, "learning_rate": 0.00010880913539967374, "loss": 0.1894, "num_input_tokens_seen": 2885664, "step": 1335 }, { "epoch": 0.21859706362153344, "grad_norm": 0.14770320057868958, "learning_rate": 0.00010921696574225122, "loss": 0.367, "num_input_tokens_seen": 2896288, "step": 1340 }, { "epoch": 0.21941272430668843, "grad_norm": 2.9352149963378906, "learning_rate": 0.0001096247960848287, "loss": 0.1078, "num_input_tokens_seen": 2907264, "step": 1345 }, { "epoch": 0.22022838499184338, "grad_norm": 0.31926628947257996, "learning_rate": 0.00011003262642740621, "loss": 0.3341, "num_input_tokens_seen": 2918464, "step": 1350 }, { "epoch": 0.22104404567699837, "grad_norm": 7.357073783874512, "learning_rate": 0.00011044045676998369, "loss": 0.4854, "num_input_tokens_seen": 2929792, "step": 1355 }, { "epoch": 0.22185970636215335, "grad_norm": 4.365267753601074, "learning_rate": 0.00011084828711256118, "loss": 0.1755, "num_input_tokens_seen": 2940064, "step": 1360 }, { "epoch": 0.2226753670473083, "grad_norm": 0.6887586116790771, "learning_rate": 0.00011125611745513867, "loss": 0.0783, "num_input_tokens_seen": 2950368, "step": 1365 }, { "epoch": 0.2234910277324633, "grad_norm": 4.315596580505371, "learning_rate": 0.00011166394779771616, "loss": 0.3597, "num_input_tokens_seen": 2960448, "step": 1370 }, { "epoch": 0.22430668841761828, "grad_norm": 0.5510193109512329, "learning_rate": 0.00011207177814029363, "loss": 0.1461, "num_input_tokens_seen": 2972032, "step": 1375 }, { "epoch": 0.22512234910277323, "grad_norm": 0.8004043102264404, "learning_rate": 0.00011247960848287112, "loss": 0.1338, "num_input_tokens_seen": 2984160, "step": 1380 }, { "epoch": 0.22593800978792822, "grad_norm": 0.13799701631069183, "learning_rate": 0.00011288743882544863, "loss": 0.0982, "num_input_tokens_seen": 2994752, "step": 1385 }, { "epoch": 0.2267536704730832, "grad_norm": 1.3625370264053345, "learning_rate": 0.0001132952691680261, "loss": 0.1263, "num_input_tokens_seen": 3005376, "step": 1390 }, { "epoch": 0.2275693311582382, "grad_norm": 7.915112018585205, "learning_rate": 0.00011370309951060359, "loss": 0.2681, "num_input_tokens_seen": 3016576, "step": 1395 }, { "epoch": 0.22838499184339314, "grad_norm": 35.910255432128906, "learning_rate": 0.00011411092985318108, "loss": 0.3882, "num_input_tokens_seen": 3028704, "step": 1400 }, { "epoch": 0.22920065252854813, "grad_norm": 3.048297166824341, "learning_rate": 0.00011451876019575856, "loss": 0.3004, "num_input_tokens_seen": 3039456, "step": 1405 }, { "epoch": 0.2300163132137031, "grad_norm": 3.245952606201172, "learning_rate": 0.00011492659053833605, "loss": 0.2714, "num_input_tokens_seen": 3050816, "step": 1410 }, { "epoch": 0.23083197389885807, "grad_norm": 0.7663965821266174, "learning_rate": 0.00011533442088091354, "loss": 0.1503, "num_input_tokens_seen": 3060064, "step": 1415 }, { "epoch": 0.23164763458401305, "grad_norm": 1.7909114360809326, "learning_rate": 0.00011574225122349103, "loss": 0.2217, "num_input_tokens_seen": 3071936, "step": 1420 }, { "epoch": 0.23246329526916804, "grad_norm": 1.2014027833938599, "learning_rate": 0.00011615008156606852, "loss": 0.0531, "num_input_tokens_seen": 3082752, "step": 1425 }, { "epoch": 0.233278955954323, "grad_norm": 0.3540492653846741, "learning_rate": 0.00011655791190864601, "loss": 0.0955, "num_input_tokens_seen": 3094592, "step": 1430 }, { "epoch": 0.23409461663947798, "grad_norm": 1.3756060600280762, "learning_rate": 0.0001169657422512235, "loss": 0.2884, "num_input_tokens_seen": 3105312, "step": 1435 }, { "epoch": 0.23491027732463296, "grad_norm": 0.6397413611412048, "learning_rate": 0.00011737357259380098, "loss": 0.0951, "num_input_tokens_seen": 3115776, "step": 1440 }, { "epoch": 0.23572593800978792, "grad_norm": 0.5612933039665222, "learning_rate": 0.00011778140293637847, "loss": 0.0828, "num_input_tokens_seen": 3127392, "step": 1445 }, { "epoch": 0.2365415986949429, "grad_norm": 2.8829073905944824, "learning_rate": 0.00011818923327895596, "loss": 0.1222, "num_input_tokens_seen": 3137760, "step": 1450 }, { "epoch": 0.23735725938009788, "grad_norm": 6.076878547668457, "learning_rate": 0.00011859706362153345, "loss": 0.2851, "num_input_tokens_seen": 3148192, "step": 1455 }, { "epoch": 0.23817292006525284, "grad_norm": 0.2625199854373932, "learning_rate": 0.00011900489396411094, "loss": 0.1418, "num_input_tokens_seen": 3159008, "step": 1460 }, { "epoch": 0.23898858075040783, "grad_norm": 0.1276540756225586, "learning_rate": 0.00011941272430668843, "loss": 0.0804, "num_input_tokens_seen": 3170784, "step": 1465 }, { "epoch": 0.2398042414355628, "grad_norm": 2.398977041244507, "learning_rate": 0.0001198205546492659, "loss": 0.1997, "num_input_tokens_seen": 3182016, "step": 1470 }, { "epoch": 0.2406199021207178, "grad_norm": 0.0415484681725502, "learning_rate": 0.00012022838499184339, "loss": 0.2176, "num_input_tokens_seen": 3192288, "step": 1475 }, { "epoch": 0.24143556280587275, "grad_norm": 0.10787076503038406, "learning_rate": 0.00012063621533442088, "loss": 0.1662, "num_input_tokens_seen": 3202560, "step": 1480 }, { "epoch": 0.24225122349102773, "grad_norm": 2.47776460647583, "learning_rate": 0.00012104404567699836, "loss": 0.2372, "num_input_tokens_seen": 3213888, "step": 1485 }, { "epoch": 0.24306688417618272, "grad_norm": 0.6052817702293396, "learning_rate": 0.00012145187601957586, "loss": 0.2751, "num_input_tokens_seen": 3224032, "step": 1490 }, { "epoch": 0.24388254486133767, "grad_norm": 0.7725127339363098, "learning_rate": 0.00012185970636215335, "loss": 0.0815, "num_input_tokens_seen": 3234720, "step": 1495 }, { "epoch": 0.24469820554649266, "grad_norm": 2.9930591583251953, "learning_rate": 0.00012226753670473083, "loss": 0.2188, "num_input_tokens_seen": 3244928, "step": 1500 }, { "epoch": 0.24551386623164764, "grad_norm": 3.0129828453063965, "learning_rate": 0.00012267536704730833, "loss": 0.3815, "num_input_tokens_seen": 3254528, "step": 1505 }, { "epoch": 0.2463295269168026, "grad_norm": 0.08210022002458572, "learning_rate": 0.0001230831973898858, "loss": 0.0437, "num_input_tokens_seen": 3264768, "step": 1510 }, { "epoch": 0.24714518760195758, "grad_norm": 1.515202522277832, "learning_rate": 0.0001234910277324633, "loss": 0.0471, "num_input_tokens_seen": 3276736, "step": 1515 }, { "epoch": 0.24796084828711257, "grad_norm": 2.027247667312622, "learning_rate": 0.0001238988580750408, "loss": 0.5108, "num_input_tokens_seen": 3287264, "step": 1520 }, { "epoch": 0.24877650897226752, "grad_norm": 2.5827109813690186, "learning_rate": 0.00012430668841761827, "loss": 0.3454, "num_input_tokens_seen": 3298048, "step": 1525 }, { "epoch": 0.2495921696574225, "grad_norm": 1.8741847276687622, "learning_rate": 0.00012471451876019577, "loss": 0.2622, "num_input_tokens_seen": 3308640, "step": 1530 }, { "epoch": 0.25040783034257746, "grad_norm": 0.7166083455085754, "learning_rate": 0.00012512234910277325, "loss": 0.1249, "num_input_tokens_seen": 3319040, "step": 1535 }, { "epoch": 0.25122349102773245, "grad_norm": 0.20941627025604248, "learning_rate": 0.00012553017944535072, "loss": 0.0722, "num_input_tokens_seen": 3330720, "step": 1540 }, { "epoch": 0.25203915171288743, "grad_norm": 8.934325218200684, "learning_rate": 0.00012593800978792823, "loss": 0.3257, "num_input_tokens_seen": 3340352, "step": 1545 }, { "epoch": 0.2528548123980424, "grad_norm": 0.5297455787658691, "learning_rate": 0.0001263458401305057, "loss": 0.2996, "num_input_tokens_seen": 3350272, "step": 1550 }, { "epoch": 0.2536704730831974, "grad_norm": 2.1738781929016113, "learning_rate": 0.0001267536704730832, "loss": 0.2076, "num_input_tokens_seen": 3361440, "step": 1555 }, { "epoch": 0.2544861337683524, "grad_norm": 6.6399455070495605, "learning_rate": 0.00012716150081566068, "loss": 0.2664, "num_input_tokens_seen": 3373600, "step": 1560 }, { "epoch": 0.2553017944535073, "grad_norm": 5.0540385246276855, "learning_rate": 0.00012756933115823819, "loss": 0.2678, "num_input_tokens_seen": 3384160, "step": 1565 }, { "epoch": 0.2561174551386623, "grad_norm": 18.535308837890625, "learning_rate": 0.00012797716150081566, "loss": 0.6454, "num_input_tokens_seen": 3393792, "step": 1570 }, { "epoch": 0.2569331158238173, "grad_norm": 3.7608978748321533, "learning_rate": 0.00012838499184339314, "loss": 0.4939, "num_input_tokens_seen": 3404576, "step": 1575 }, { "epoch": 0.25774877650897227, "grad_norm": 3.712184190750122, "learning_rate": 0.00012879282218597064, "loss": 0.2159, "num_input_tokens_seen": 3414816, "step": 1580 }, { "epoch": 0.25856443719412725, "grad_norm": 3.3276753425598145, "learning_rate": 0.00012920065252854812, "loss": 0.1312, "num_input_tokens_seen": 3425248, "step": 1585 }, { "epoch": 0.25938009787928223, "grad_norm": 0.4005642831325531, "learning_rate": 0.00012960848287112562, "loss": 0.069, "num_input_tokens_seen": 3435680, "step": 1590 }, { "epoch": 0.2601957585644372, "grad_norm": 1.2974168062210083, "learning_rate": 0.0001300163132137031, "loss": 0.0704, "num_input_tokens_seen": 3447488, "step": 1595 }, { "epoch": 0.26101141924959215, "grad_norm": 0.48080500960350037, "learning_rate": 0.0001304241435562806, "loss": 0.1689, "num_input_tokens_seen": 3458336, "step": 1600 }, { "epoch": 0.26182707993474713, "grad_norm": 2.298285961151123, "learning_rate": 0.00013083197389885805, "loss": 0.2302, "num_input_tokens_seen": 3467680, "step": 1605 }, { "epoch": 0.2626427406199021, "grad_norm": 0.040536798536777496, "learning_rate": 0.00013123980424143555, "loss": 0.3058, "num_input_tokens_seen": 3478400, "step": 1610 }, { "epoch": 0.2634584013050571, "grad_norm": 0.12443608790636063, "learning_rate": 0.00013164763458401306, "loss": 0.0356, "num_input_tokens_seen": 3489280, "step": 1615 }, { "epoch": 0.2642740619902121, "grad_norm": 3.2961642742156982, "learning_rate": 0.00013205546492659053, "loss": 0.3348, "num_input_tokens_seen": 3500800, "step": 1620 }, { "epoch": 0.26508972267536707, "grad_norm": 0.34571152925491333, "learning_rate": 0.00013246329526916804, "loss": 0.1261, "num_input_tokens_seen": 3510656, "step": 1625 }, { "epoch": 0.265905383360522, "grad_norm": 0.5007117986679077, "learning_rate": 0.00013287112561174552, "loss": 0.1386, "num_input_tokens_seen": 3521696, "step": 1630 }, { "epoch": 0.266721044045677, "grad_norm": 0.8755322098731995, "learning_rate": 0.00013327895595432302, "loss": 0.0952, "num_input_tokens_seen": 3534304, "step": 1635 }, { "epoch": 0.26753670473083196, "grad_norm": 2.3326218128204346, "learning_rate": 0.00013368678629690047, "loss": 0.2073, "num_input_tokens_seen": 3544416, "step": 1640 }, { "epoch": 0.26835236541598695, "grad_norm": 0.17495253682136536, "learning_rate": 0.00013409461663947797, "loss": 0.1812, "num_input_tokens_seen": 3553760, "step": 1645 }, { "epoch": 0.26916802610114193, "grad_norm": 0.30354925990104675, "learning_rate": 0.00013450244698205548, "loss": 0.1423, "num_input_tokens_seen": 3563776, "step": 1650 }, { "epoch": 0.2699836867862969, "grad_norm": 0.07377764582633972, "learning_rate": 0.00013491027732463295, "loss": 0.1907, "num_input_tokens_seen": 3575200, "step": 1655 }, { "epoch": 0.2707993474714519, "grad_norm": 0.5978612899780273, "learning_rate": 0.00013531810766721046, "loss": 0.1143, "num_input_tokens_seen": 3587328, "step": 1660 }, { "epoch": 0.27161500815660683, "grad_norm": 3.454429864883423, "learning_rate": 0.00013572593800978793, "loss": 0.15, "num_input_tokens_seen": 3597056, "step": 1665 }, { "epoch": 0.2724306688417618, "grad_norm": 6.486354827880859, "learning_rate": 0.0001361337683523654, "loss": 0.0857, "num_input_tokens_seen": 3608064, "step": 1670 }, { "epoch": 0.2732463295269168, "grad_norm": 6.622701168060303, "learning_rate": 0.00013654159869494288, "loss": 0.2567, "num_input_tokens_seen": 3618208, "step": 1675 }, { "epoch": 0.2740619902120718, "grad_norm": 3.2912650108337402, "learning_rate": 0.0001369494290375204, "loss": 0.4063, "num_input_tokens_seen": 3629600, "step": 1680 }, { "epoch": 0.27487765089722677, "grad_norm": 1.7371718883514404, "learning_rate": 0.0001373572593800979, "loss": 0.1756, "num_input_tokens_seen": 3640160, "step": 1685 }, { "epoch": 0.27569331158238175, "grad_norm": 1.5828455686569214, "learning_rate": 0.00013776508972267537, "loss": 0.1398, "num_input_tokens_seen": 3649984, "step": 1690 }, { "epoch": 0.2765089722675367, "grad_norm": 0.8824554681777954, "learning_rate": 0.00013817292006525287, "loss": 0.041, "num_input_tokens_seen": 3660544, "step": 1695 }, { "epoch": 0.27732463295269166, "grad_norm": 0.31346526741981506, "learning_rate": 0.00013858075040783035, "loss": 0.2259, "num_input_tokens_seen": 3670688, "step": 1700 }, { "epoch": 0.27814029363784665, "grad_norm": 0.8705945014953613, "learning_rate": 0.00013898858075040782, "loss": 0.1137, "num_input_tokens_seen": 3680896, "step": 1705 }, { "epoch": 0.27895595432300163, "grad_norm": 0.12432600557804108, "learning_rate": 0.00013939641109298533, "loss": 0.1308, "num_input_tokens_seen": 3691648, "step": 1710 }, { "epoch": 0.2797716150081566, "grad_norm": 0.15383680164813995, "learning_rate": 0.0001398042414355628, "loss": 0.2624, "num_input_tokens_seen": 3701920, "step": 1715 }, { "epoch": 0.2805872756933116, "grad_norm": 0.3748222291469574, "learning_rate": 0.0001402120717781403, "loss": 0.3476, "num_input_tokens_seen": 3711648, "step": 1720 }, { "epoch": 0.2814029363784666, "grad_norm": 1.4904766082763672, "learning_rate": 0.00014061990212071778, "loss": 0.1822, "num_input_tokens_seen": 3721312, "step": 1725 }, { "epoch": 0.2822185970636215, "grad_norm": 0.6280257105827332, "learning_rate": 0.0001410277324632953, "loss": 0.1454, "num_input_tokens_seen": 3732192, "step": 1730 }, { "epoch": 0.2830342577487765, "grad_norm": 1.832024097442627, "learning_rate": 0.00014143556280587274, "loss": 0.2084, "num_input_tokens_seen": 3742368, "step": 1735 }, { "epoch": 0.2838499184339315, "grad_norm": 1.173895001411438, "learning_rate": 0.00014184339314845024, "loss": 0.2289, "num_input_tokens_seen": 3753536, "step": 1740 }, { "epoch": 0.28466557911908646, "grad_norm": 0.7341709733009338, "learning_rate": 0.00014225122349102774, "loss": 0.0779, "num_input_tokens_seen": 3764960, "step": 1745 }, { "epoch": 0.28548123980424145, "grad_norm": 0.40611112117767334, "learning_rate": 0.00014265905383360522, "loss": 0.0627, "num_input_tokens_seen": 3776224, "step": 1750 }, { "epoch": 0.28629690048939643, "grad_norm": 0.38497471809387207, "learning_rate": 0.00014306688417618272, "loss": 0.3102, "num_input_tokens_seen": 3786208, "step": 1755 }, { "epoch": 0.28711256117455136, "grad_norm": 1.9566757678985596, "learning_rate": 0.0001434747145187602, "loss": 0.1619, "num_input_tokens_seen": 3796768, "step": 1760 }, { "epoch": 0.28792822185970635, "grad_norm": 0.3571220338344574, "learning_rate": 0.0001438825448613377, "loss": 0.1974, "num_input_tokens_seen": 3808928, "step": 1765 }, { "epoch": 0.28874388254486133, "grad_norm": 1.0718194246292114, "learning_rate": 0.00014429037520391515, "loss": 0.2318, "num_input_tokens_seen": 3820480, "step": 1770 }, { "epoch": 0.2895595432300163, "grad_norm": 0.08225858211517334, "learning_rate": 0.00014469820554649266, "loss": 0.1204, "num_input_tokens_seen": 3832352, "step": 1775 }, { "epoch": 0.2903752039151713, "grad_norm": 1.1732842922210693, "learning_rate": 0.00014510603588907016, "loss": 0.1203, "num_input_tokens_seen": 3842560, "step": 1780 }, { "epoch": 0.2911908646003263, "grad_norm": 1.7823255062103271, "learning_rate": 0.00014551386623164764, "loss": 0.3127, "num_input_tokens_seen": 3853824, "step": 1785 }, { "epoch": 0.29200652528548127, "grad_norm": 0.37060829997062683, "learning_rate": 0.00014592169657422514, "loss": 0.1092, "num_input_tokens_seen": 3865280, "step": 1790 }, { "epoch": 0.2928221859706362, "grad_norm": 2.0758445262908936, "learning_rate": 0.00014632952691680262, "loss": 0.1014, "num_input_tokens_seen": 3874944, "step": 1795 }, { "epoch": 0.2936378466557912, "grad_norm": 0.1883598268032074, "learning_rate": 0.0001467373572593801, "loss": 0.125, "num_input_tokens_seen": 3885824, "step": 1800 }, { "epoch": 0.29445350734094616, "grad_norm": 3.2518420219421387, "learning_rate": 0.00014714518760195757, "loss": 0.2931, "num_input_tokens_seen": 3896480, "step": 1805 }, { "epoch": 0.29526916802610115, "grad_norm": 2.0988779067993164, "learning_rate": 0.00014755301794453507, "loss": 0.2474, "num_input_tokens_seen": 3908256, "step": 1810 }, { "epoch": 0.29608482871125613, "grad_norm": 0.1773896962404251, "learning_rate": 0.00014796084828711258, "loss": 0.0975, "num_input_tokens_seen": 3919584, "step": 1815 }, { "epoch": 0.2969004893964111, "grad_norm": 1.0337015390396118, "learning_rate": 0.00014836867862969005, "loss": 0.1539, "num_input_tokens_seen": 3930016, "step": 1820 }, { "epoch": 0.29771615008156604, "grad_norm": 0.4843730032444, "learning_rate": 0.00014877650897226756, "loss": 0.0723, "num_input_tokens_seen": 3940800, "step": 1825 }, { "epoch": 0.29853181076672103, "grad_norm": 0.14410769939422607, "learning_rate": 0.00014918433931484503, "loss": 0.0975, "num_input_tokens_seen": 3952192, "step": 1830 }, { "epoch": 0.299347471451876, "grad_norm": 0.07089443504810333, "learning_rate": 0.0001495921696574225, "loss": 0.0374, "num_input_tokens_seen": 3963936, "step": 1835 }, { "epoch": 0.300163132137031, "grad_norm": 2.6121270656585693, "learning_rate": 0.00015, "loss": 0.145, "num_input_tokens_seen": 3973824, "step": 1840 }, { "epoch": 0.300978792822186, "grad_norm": 0.0231600571423769, "learning_rate": 0.0001504078303425775, "loss": 0.0033, "num_input_tokens_seen": 3984576, "step": 1845 }, { "epoch": 0.30179445350734097, "grad_norm": 0.015244451351463795, "learning_rate": 0.000150815660685155, "loss": 0.0119, "num_input_tokens_seen": 3997024, "step": 1850 }, { "epoch": 0.30261011419249595, "grad_norm": 0.007247697561979294, "learning_rate": 0.00015122349102773247, "loss": 0.1895, "num_input_tokens_seen": 4007808, "step": 1855 }, { "epoch": 0.3034257748776509, "grad_norm": 0.05449846014380455, "learning_rate": 0.00015163132137030997, "loss": 0.188, "num_input_tokens_seen": 4018944, "step": 1860 }, { "epoch": 0.30424143556280586, "grad_norm": 2.1363980770111084, "learning_rate": 0.00015203915171288742, "loss": 0.6462, "num_input_tokens_seen": 4029824, "step": 1865 }, { "epoch": 0.30505709624796085, "grad_norm": 1.8556227684020996, "learning_rate": 0.00015244698205546493, "loss": 0.3281, "num_input_tokens_seen": 4040800, "step": 1870 }, { "epoch": 0.30587275693311583, "grad_norm": 1.4034215211868286, "learning_rate": 0.0001528548123980424, "loss": 0.2342, "num_input_tokens_seen": 4050912, "step": 1875 }, { "epoch": 0.3066884176182708, "grad_norm": 1.7816450595855713, "learning_rate": 0.0001532626427406199, "loss": 0.1647, "num_input_tokens_seen": 4061696, "step": 1880 }, { "epoch": 0.3075040783034258, "grad_norm": 2.1075515747070312, "learning_rate": 0.0001536704730831974, "loss": 0.2302, "num_input_tokens_seen": 4071264, "step": 1885 }, { "epoch": 0.3083197389885807, "grad_norm": 1.625557780265808, "learning_rate": 0.0001540783034257749, "loss": 0.1746, "num_input_tokens_seen": 4081152, "step": 1890 }, { "epoch": 0.3091353996737357, "grad_norm": 0.2660638988018036, "learning_rate": 0.00015448613376835236, "loss": 0.1498, "num_input_tokens_seen": 4091584, "step": 1895 }, { "epoch": 0.3099510603588907, "grad_norm": 0.2802708148956299, "learning_rate": 0.00015489396411092984, "loss": 0.0782, "num_input_tokens_seen": 4102560, "step": 1900 }, { "epoch": 0.3107667210440457, "grad_norm": 0.36759790778160095, "learning_rate": 0.00015530179445350734, "loss": 0.1151, "num_input_tokens_seen": 4113920, "step": 1905 }, { "epoch": 0.31158238172920066, "grad_norm": 1.4215971231460571, "learning_rate": 0.00015570962479608482, "loss": 0.1237, "num_input_tokens_seen": 4125472, "step": 1910 }, { "epoch": 0.31239804241435565, "grad_norm": 0.3487188220024109, "learning_rate": 0.00015611745513866232, "loss": 0.2935, "num_input_tokens_seen": 4136032, "step": 1915 }, { "epoch": 0.3132137030995106, "grad_norm": 0.13971523940563202, "learning_rate": 0.00015652528548123983, "loss": 0.0692, "num_input_tokens_seen": 4145792, "step": 1920 }, { "epoch": 0.31402936378466556, "grad_norm": 0.19864986836910248, "learning_rate": 0.0001569331158238173, "loss": 0.2522, "num_input_tokens_seen": 4156832, "step": 1925 }, { "epoch": 0.31484502446982054, "grad_norm": 2.3408796787261963, "learning_rate": 0.00015734094616639478, "loss": 0.1961, "num_input_tokens_seen": 4167424, "step": 1930 }, { "epoch": 0.31566068515497553, "grad_norm": 1.4813441038131714, "learning_rate": 0.00015774877650897226, "loss": 0.0822, "num_input_tokens_seen": 4177248, "step": 1935 }, { "epoch": 0.3164763458401305, "grad_norm": 0.1526590883731842, "learning_rate": 0.00015815660685154976, "loss": 0.1321, "num_input_tokens_seen": 4188064, "step": 1940 }, { "epoch": 0.3172920065252855, "grad_norm": 0.12207305431365967, "learning_rate": 0.00015856443719412724, "loss": 0.0615, "num_input_tokens_seen": 4198144, "step": 1945 }, { "epoch": 0.3181076672104405, "grad_norm": 0.2033795267343521, "learning_rate": 0.00015897226753670474, "loss": 0.1123, "num_input_tokens_seen": 4209952, "step": 1950 }, { "epoch": 0.3189233278955954, "grad_norm": 1.4362683296203613, "learning_rate": 0.00015938009787928224, "loss": 0.025, "num_input_tokens_seen": 4219360, "step": 1955 }, { "epoch": 0.3197389885807504, "grad_norm": 0.9837049245834351, "learning_rate": 0.0001597879282218597, "loss": 0.2129, "num_input_tokens_seen": 4230080, "step": 1960 }, { "epoch": 0.3205546492659054, "grad_norm": 0.08906455338001251, "learning_rate": 0.0001601957585644372, "loss": 0.1465, "num_input_tokens_seen": 4240768, "step": 1965 }, { "epoch": 0.32137030995106036, "grad_norm": 1.3643310070037842, "learning_rate": 0.00016060358890701467, "loss": 0.3903, "num_input_tokens_seen": 4252224, "step": 1970 }, { "epoch": 0.32218597063621535, "grad_norm": 0.5359205603599548, "learning_rate": 0.00016101141924959218, "loss": 0.1853, "num_input_tokens_seen": 4264160, "step": 1975 }, { "epoch": 0.32300163132137033, "grad_norm": 1.2164024114608765, "learning_rate": 0.00016141924959216965, "loss": 0.2022, "num_input_tokens_seen": 4274368, "step": 1980 }, { "epoch": 0.32381729200652526, "grad_norm": 0.5487558841705322, "learning_rate": 0.00016182707993474716, "loss": 0.1015, "num_input_tokens_seen": 4283936, "step": 1985 }, { "epoch": 0.32463295269168024, "grad_norm": 0.5527929663658142, "learning_rate": 0.00016223491027732466, "loss": 0.1015, "num_input_tokens_seen": 4294368, "step": 1990 }, { "epoch": 0.3254486133768352, "grad_norm": 1.1117502450942993, "learning_rate": 0.0001626427406199021, "loss": 0.0639, "num_input_tokens_seen": 4303328, "step": 1995 }, { "epoch": 0.3262642740619902, "grad_norm": 2.3114776611328125, "learning_rate": 0.0001630505709624796, "loss": 0.1864, "num_input_tokens_seen": 4315072, "step": 2000 }, { "epoch": 0.3270799347471452, "grad_norm": 0.029865602031350136, "learning_rate": 0.0001634584013050571, "loss": 0.0466, "num_input_tokens_seen": 4326816, "step": 2005 }, { "epoch": 0.3278955954323002, "grad_norm": 2.729114055633545, "learning_rate": 0.0001638662316476346, "loss": 0.0934, "num_input_tokens_seen": 4336896, "step": 2010 }, { "epoch": 0.32871125611745516, "grad_norm": 1.5304375886917114, "learning_rate": 0.00016427406199021207, "loss": 0.0852, "num_input_tokens_seen": 4347328, "step": 2015 }, { "epoch": 0.3295269168026101, "grad_norm": 0.3901952803134918, "learning_rate": 0.00016468189233278957, "loss": 0.1983, "num_input_tokens_seen": 4358208, "step": 2020 }, { "epoch": 0.3303425774877651, "grad_norm": 2.25579571723938, "learning_rate": 0.00016508972267536705, "loss": 0.1892, "num_input_tokens_seen": 4368672, "step": 2025 }, { "epoch": 0.33115823817292006, "grad_norm": 3.3735718727111816, "learning_rate": 0.00016549755301794453, "loss": 0.1723, "num_input_tokens_seen": 4379808, "step": 2030 }, { "epoch": 0.33197389885807504, "grad_norm": 1.8546075820922852, "learning_rate": 0.00016590538336052203, "loss": 0.1136, "num_input_tokens_seen": 4389728, "step": 2035 }, { "epoch": 0.33278955954323003, "grad_norm": 0.03245149180293083, "learning_rate": 0.0001663132137030995, "loss": 0.0093, "num_input_tokens_seen": 4400704, "step": 2040 }, { "epoch": 0.333605220228385, "grad_norm": 2.443859577178955, "learning_rate": 0.000166721044045677, "loss": 0.2122, "num_input_tokens_seen": 4411648, "step": 2045 }, { "epoch": 0.33442088091353994, "grad_norm": 0.18161454796791077, "learning_rate": 0.00016712887438825449, "loss": 0.1275, "num_input_tokens_seen": 4423328, "step": 2050 }, { "epoch": 0.3352365415986949, "grad_norm": 0.296201229095459, "learning_rate": 0.000167536704730832, "loss": 0.3971, "num_input_tokens_seen": 4434336, "step": 2055 }, { "epoch": 0.3360522022838499, "grad_norm": 0.04536137357354164, "learning_rate": 0.00016794453507340947, "loss": 0.1908, "num_input_tokens_seen": 4444672, "step": 2060 }, { "epoch": 0.3368678629690049, "grad_norm": 0.1336517184972763, "learning_rate": 0.00016835236541598694, "loss": 0.0652, "num_input_tokens_seen": 4455872, "step": 2065 }, { "epoch": 0.3376835236541599, "grad_norm": 0.3583919405937195, "learning_rate": 0.00016876019575856445, "loss": 0.129, "num_input_tokens_seen": 4467616, "step": 2070 }, { "epoch": 0.33849918433931486, "grad_norm": 1.1324158906936646, "learning_rate": 0.00016916802610114192, "loss": 0.143, "num_input_tokens_seen": 4478656, "step": 2075 }, { "epoch": 0.33931484502446985, "grad_norm": 0.07469242811203003, "learning_rate": 0.00016957585644371943, "loss": 0.0389, "num_input_tokens_seen": 4488800, "step": 2080 }, { "epoch": 0.3401305057096248, "grad_norm": 1.5802353620529175, "learning_rate": 0.0001699836867862969, "loss": 0.0959, "num_input_tokens_seen": 4498496, "step": 2085 }, { "epoch": 0.34094616639477976, "grad_norm": 1.5060569047927856, "learning_rate": 0.00017039151712887438, "loss": 0.2579, "num_input_tokens_seen": 4509152, "step": 2090 }, { "epoch": 0.34176182707993474, "grad_norm": 0.15717989206314087, "learning_rate": 0.00017079934747145188, "loss": 0.0914, "num_input_tokens_seen": 4521120, "step": 2095 }, { "epoch": 0.3425774877650897, "grad_norm": 2.0099587440490723, "learning_rate": 0.00017120717781402936, "loss": 0.1256, "num_input_tokens_seen": 4531744, "step": 2100 }, { "epoch": 0.3433931484502447, "grad_norm": 0.8347704410552979, "learning_rate": 0.00017161500815660686, "loss": 0.2788, "num_input_tokens_seen": 4540704, "step": 2105 }, { "epoch": 0.3442088091353997, "grad_norm": 1.3534941673278809, "learning_rate": 0.00017202283849918434, "loss": 0.1521, "num_input_tokens_seen": 4551552, "step": 2110 }, { "epoch": 0.3450244698205546, "grad_norm": 0.24349333345890045, "learning_rate": 0.00017243066884176184, "loss": 0.2539, "num_input_tokens_seen": 4562976, "step": 2115 }, { "epoch": 0.3458401305057096, "grad_norm": 0.4490703046321869, "learning_rate": 0.00017283849918433932, "loss": 0.1712, "num_input_tokens_seen": 4573920, "step": 2120 }, { "epoch": 0.3466557911908646, "grad_norm": 1.9330402612686157, "learning_rate": 0.0001732463295269168, "loss": 0.2538, "num_input_tokens_seen": 4585088, "step": 2125 }, { "epoch": 0.3474714518760196, "grad_norm": 0.46054011583328247, "learning_rate": 0.0001736541598694943, "loss": 0.1483, "num_input_tokens_seen": 4594752, "step": 2130 }, { "epoch": 0.34828711256117456, "grad_norm": 0.2638119161128998, "learning_rate": 0.00017406199021207178, "loss": 0.1508, "num_input_tokens_seen": 4604512, "step": 2135 }, { "epoch": 0.34910277324632955, "grad_norm": 0.5201454758644104, "learning_rate": 0.00017446982055464928, "loss": 0.0691, "num_input_tokens_seen": 4615616, "step": 2140 }, { "epoch": 0.34991843393148453, "grad_norm": 12.082786560058594, "learning_rate": 0.00017487765089722676, "loss": 0.2726, "num_input_tokens_seen": 4625984, "step": 2145 }, { "epoch": 0.35073409461663946, "grad_norm": 0.30344992876052856, "learning_rate": 0.00017528548123980426, "loss": 0.1277, "num_input_tokens_seen": 4636992, "step": 2150 }, { "epoch": 0.35154975530179444, "grad_norm": 0.7207813858985901, "learning_rate": 0.0001756933115823817, "loss": 0.0274, "num_input_tokens_seen": 4647424, "step": 2155 }, { "epoch": 0.3523654159869494, "grad_norm": 0.2960370182991028, "learning_rate": 0.0001761011419249592, "loss": 0.1646, "num_input_tokens_seen": 4659360, "step": 2160 }, { "epoch": 0.3531810766721044, "grad_norm": 0.4028482139110565, "learning_rate": 0.00017650897226753672, "loss": 0.1761, "num_input_tokens_seen": 4669248, "step": 2165 }, { "epoch": 0.3539967373572594, "grad_norm": 0.2826700210571289, "learning_rate": 0.0001769168026101142, "loss": 0.0855, "num_input_tokens_seen": 4679360, "step": 2170 }, { "epoch": 0.3548123980424144, "grad_norm": 1.6123909950256348, "learning_rate": 0.0001773246329526917, "loss": 0.3686, "num_input_tokens_seen": 4691104, "step": 2175 }, { "epoch": 0.3556280587275693, "grad_norm": 2.6291587352752686, "learning_rate": 0.00017773246329526917, "loss": 0.1182, "num_input_tokens_seen": 4700416, "step": 2180 }, { "epoch": 0.3564437194127243, "grad_norm": 0.5103549957275391, "learning_rate": 0.00017814029363784668, "loss": 0.3337, "num_input_tokens_seen": 4711072, "step": 2185 }, { "epoch": 0.3572593800978793, "grad_norm": 0.25698915123939514, "learning_rate": 0.00017854812398042412, "loss": 0.2498, "num_input_tokens_seen": 4722496, "step": 2190 }, { "epoch": 0.35807504078303426, "grad_norm": 0.3679829239845276, "learning_rate": 0.00017895595432300163, "loss": 0.1047, "num_input_tokens_seen": 4730784, "step": 2195 }, { "epoch": 0.35889070146818924, "grad_norm": 0.2273644059896469, "learning_rate": 0.00017936378466557913, "loss": 0.1373, "num_input_tokens_seen": 4740896, "step": 2200 }, { "epoch": 0.35970636215334423, "grad_norm": 0.28911730647087097, "learning_rate": 0.0001797716150081566, "loss": 0.1279, "num_input_tokens_seen": 4752448, "step": 2205 }, { "epoch": 0.3605220228384992, "grad_norm": 0.657588005065918, "learning_rate": 0.0001801794453507341, "loss": 0.1135, "num_input_tokens_seen": 4763904, "step": 2210 }, { "epoch": 0.36133768352365414, "grad_norm": 0.7489591836929321, "learning_rate": 0.0001805872756933116, "loss": 0.092, "num_input_tokens_seen": 4773952, "step": 2215 }, { "epoch": 0.3621533442088091, "grad_norm": 0.6419491171836853, "learning_rate": 0.00018099510603588906, "loss": 0.1956, "num_input_tokens_seen": 4784672, "step": 2220 }, { "epoch": 0.3629690048939641, "grad_norm": 1.2978434562683105, "learning_rate": 0.00018140293637846654, "loss": 0.1935, "num_input_tokens_seen": 4796480, "step": 2225 }, { "epoch": 0.3637846655791191, "grad_norm": 0.9570780396461487, "learning_rate": 0.00018181076672104404, "loss": 0.2541, "num_input_tokens_seen": 4807328, "step": 2230 }, { "epoch": 0.3646003262642741, "grad_norm": 0.6393945813179016, "learning_rate": 0.00018221859706362155, "loss": 0.1581, "num_input_tokens_seen": 4818848, "step": 2235 }, { "epoch": 0.36541598694942906, "grad_norm": 0.6659722924232483, "learning_rate": 0.00018262642740619902, "loss": 0.1676, "num_input_tokens_seen": 4829024, "step": 2240 }, { "epoch": 0.366231647634584, "grad_norm": 0.46172747015953064, "learning_rate": 0.00018303425774877653, "loss": 0.1468, "num_input_tokens_seen": 4839936, "step": 2245 }, { "epoch": 0.367047308319739, "grad_norm": 0.5429582595825195, "learning_rate": 0.00018344208809135398, "loss": 0.2178, "num_input_tokens_seen": 4851424, "step": 2250 }, { "epoch": 0.36786296900489396, "grad_norm": 0.18980517983436584, "learning_rate": 0.00018384991843393148, "loss": 0.1108, "num_input_tokens_seen": 4861888, "step": 2255 }, { "epoch": 0.36867862969004894, "grad_norm": 0.52447509765625, "learning_rate": 0.00018425774877650896, "loss": 0.0889, "num_input_tokens_seen": 4871136, "step": 2260 }, { "epoch": 0.3694942903752039, "grad_norm": 0.4742538034915924, "learning_rate": 0.00018466557911908646, "loss": 0.0765, "num_input_tokens_seen": 4882976, "step": 2265 }, { "epoch": 0.3703099510603589, "grad_norm": 0.05093076080083847, "learning_rate": 0.00018507340946166396, "loss": 0.0865, "num_input_tokens_seen": 4893536, "step": 2270 }, { "epoch": 0.37112561174551384, "grad_norm": 0.5098185539245605, "learning_rate": 0.00018548123980424144, "loss": 0.2369, "num_input_tokens_seen": 4904384, "step": 2275 }, { "epoch": 0.3719412724306688, "grad_norm": 0.00874658115208149, "learning_rate": 0.00018588907014681894, "loss": 0.2179, "num_input_tokens_seen": 4915008, "step": 2280 }, { "epoch": 0.3727569331158238, "grad_norm": 0.49980396032333374, "learning_rate": 0.0001862969004893964, "loss": 0.081, "num_input_tokens_seen": 4925632, "step": 2285 }, { "epoch": 0.3735725938009788, "grad_norm": 0.029526453465223312, "learning_rate": 0.0001867047308319739, "loss": 0.0769, "num_input_tokens_seen": 4936224, "step": 2290 }, { "epoch": 0.3743882544861338, "grad_norm": 0.04340076446533203, "learning_rate": 0.0001871125611745514, "loss": 0.1736, "num_input_tokens_seen": 4947072, "step": 2295 }, { "epoch": 0.37520391517128876, "grad_norm": 2.040459156036377, "learning_rate": 0.00018752039151712888, "loss": 0.1318, "num_input_tokens_seen": 4957024, "step": 2300 }, { "epoch": 0.37601957585644374, "grad_norm": 0.05700768902897835, "learning_rate": 0.00018792822185970638, "loss": 0.0439, "num_input_tokens_seen": 4966528, "step": 2305 }, { "epoch": 0.3768352365415987, "grad_norm": 0.07470440119504929, "learning_rate": 0.00018833605220228386, "loss": 0.1136, "num_input_tokens_seen": 4976928, "step": 2310 }, { "epoch": 0.37765089722675366, "grad_norm": 1.5700215101242065, "learning_rate": 0.00018874388254486133, "loss": 0.2085, "num_input_tokens_seen": 4987296, "step": 2315 }, { "epoch": 0.37846655791190864, "grad_norm": 0.786880373954773, "learning_rate": 0.0001891517128874388, "loss": 0.0911, "num_input_tokens_seen": 4998336, "step": 2320 }, { "epoch": 0.3792822185970636, "grad_norm": 0.19122718274593353, "learning_rate": 0.00018955954323001631, "loss": 0.1669, "num_input_tokens_seen": 5008256, "step": 2325 }, { "epoch": 0.3800978792822186, "grad_norm": 1.7962323427200317, "learning_rate": 0.00018996737357259382, "loss": 0.195, "num_input_tokens_seen": 5018688, "step": 2330 }, { "epoch": 0.3809135399673736, "grad_norm": 0.29662618041038513, "learning_rate": 0.0001903752039151713, "loss": 0.1354, "num_input_tokens_seen": 5029984, "step": 2335 }, { "epoch": 0.3817292006525285, "grad_norm": 0.5231503248214722, "learning_rate": 0.0001907830342577488, "loss": 0.0567, "num_input_tokens_seen": 5040160, "step": 2340 }, { "epoch": 0.3825448613376835, "grad_norm": 1.3572986125946045, "learning_rate": 0.00019119086460032627, "loss": 0.1691, "num_input_tokens_seen": 5052640, "step": 2345 }, { "epoch": 0.3833605220228385, "grad_norm": 0.11215253174304962, "learning_rate": 0.00019159869494290375, "loss": 0.1856, "num_input_tokens_seen": 5063616, "step": 2350 }, { "epoch": 0.3841761827079935, "grad_norm": 1.3003672361373901, "learning_rate": 0.00019200652528548123, "loss": 0.1788, "num_input_tokens_seen": 5075584, "step": 2355 }, { "epoch": 0.38499184339314846, "grad_norm": 0.8807132840156555, "learning_rate": 0.00019241435562805873, "loss": 0.2466, "num_input_tokens_seen": 5086272, "step": 2360 }, { "epoch": 0.38580750407830344, "grad_norm": 0.5251150131225586, "learning_rate": 0.00019282218597063623, "loss": 0.2486, "num_input_tokens_seen": 5098560, "step": 2365 }, { "epoch": 0.3866231647634584, "grad_norm": 0.2054545283317566, "learning_rate": 0.0001932300163132137, "loss": 0.1211, "num_input_tokens_seen": 5109792, "step": 2370 }, { "epoch": 0.38743882544861336, "grad_norm": 1.0482416152954102, "learning_rate": 0.00019363784665579121, "loss": 0.2811, "num_input_tokens_seen": 5119328, "step": 2375 }, { "epoch": 0.38825448613376834, "grad_norm": 0.46562135219573975, "learning_rate": 0.00019404567699836866, "loss": 0.1733, "num_input_tokens_seen": 5129824, "step": 2380 }, { "epoch": 0.3890701468189233, "grad_norm": 1.1354540586471558, "learning_rate": 0.00019445350734094617, "loss": 0.2479, "num_input_tokens_seen": 5140800, "step": 2385 }, { "epoch": 0.3898858075040783, "grad_norm": 0.5768679976463318, "learning_rate": 0.00019486133768352364, "loss": 0.1671, "num_input_tokens_seen": 5152064, "step": 2390 }, { "epoch": 0.3907014681892333, "grad_norm": 0.4160184860229492, "learning_rate": 0.00019526916802610115, "loss": 0.1038, "num_input_tokens_seen": 5164288, "step": 2395 }, { "epoch": 0.3915171288743883, "grad_norm": 0.5931136012077332, "learning_rate": 0.00019567699836867865, "loss": 0.0697, "num_input_tokens_seen": 5174784, "step": 2400 }, { "epoch": 0.3923327895595432, "grad_norm": 0.5052030086517334, "learning_rate": 0.00019608482871125613, "loss": 0.1027, "num_input_tokens_seen": 5185792, "step": 2405 }, { "epoch": 0.3931484502446982, "grad_norm": 0.699626088142395, "learning_rate": 0.00019649265905383363, "loss": 0.2045, "num_input_tokens_seen": 5196832, "step": 2410 }, { "epoch": 0.3939641109298532, "grad_norm": 0.30739665031433105, "learning_rate": 0.00019690048939641108, "loss": 0.3627, "num_input_tokens_seen": 5208288, "step": 2415 }, { "epoch": 0.39477977161500816, "grad_norm": 0.7836719155311584, "learning_rate": 0.00019730831973898858, "loss": 0.0812, "num_input_tokens_seen": 5219008, "step": 2420 }, { "epoch": 0.39559543230016314, "grad_norm": 1.719211220741272, "learning_rate": 0.00019771615008156606, "loss": 0.1785, "num_input_tokens_seen": 5230400, "step": 2425 }, { "epoch": 0.3964110929853181, "grad_norm": 3.8855228424072266, "learning_rate": 0.00019812398042414356, "loss": 0.1409, "num_input_tokens_seen": 5241920, "step": 2430 }, { "epoch": 0.3972267536704731, "grad_norm": 0.14320261776447296, "learning_rate": 0.00019853181076672107, "loss": 0.1043, "num_input_tokens_seen": 5252768, "step": 2435 }, { "epoch": 0.39804241435562804, "grad_norm": 0.2717430591583252, "learning_rate": 0.00019893964110929854, "loss": 0.0898, "num_input_tokens_seen": 5263840, "step": 2440 }, { "epoch": 0.398858075040783, "grad_norm": 0.1439686268568039, "learning_rate": 0.00019934747145187602, "loss": 0.2313, "num_input_tokens_seen": 5274656, "step": 2445 }, { "epoch": 0.399673735725938, "grad_norm": 0.17499934136867523, "learning_rate": 0.0001997553017944535, "loss": 0.0766, "num_input_tokens_seen": 5285504, "step": 2450 }, { "epoch": 0.400489396411093, "grad_norm": 0.2679937779903412, "learning_rate": 0.000200163132137031, "loss": 0.2409, "num_input_tokens_seen": 5297024, "step": 2455 }, { "epoch": 0.401305057096248, "grad_norm": 0.47787871956825256, "learning_rate": 0.00020057096247960848, "loss": 0.2762, "num_input_tokens_seen": 5308064, "step": 2460 }, { "epoch": 0.40212071778140296, "grad_norm": 0.14121407270431519, "learning_rate": 0.00020097879282218598, "loss": 0.1081, "num_input_tokens_seen": 5318880, "step": 2465 }, { "epoch": 0.4029363784665579, "grad_norm": 0.4858434200286865, "learning_rate": 0.00020138662316476348, "loss": 0.1588, "num_input_tokens_seen": 5329440, "step": 2470 }, { "epoch": 0.40375203915171287, "grad_norm": 0.30492958426475525, "learning_rate": 0.00020179445350734096, "loss": 0.0775, "num_input_tokens_seen": 5340000, "step": 2475 }, { "epoch": 0.40456769983686786, "grad_norm": 5.706164836883545, "learning_rate": 0.00020220228384991844, "loss": 0.1216, "num_input_tokens_seen": 5350944, "step": 2480 }, { "epoch": 0.40538336052202284, "grad_norm": 0.20894655585289001, "learning_rate": 0.0002026101141924959, "loss": 0.257, "num_input_tokens_seen": 5362016, "step": 2485 }, { "epoch": 0.4061990212071778, "grad_norm": 0.1500648409128189, "learning_rate": 0.00020301794453507342, "loss": 0.1702, "num_input_tokens_seen": 5373024, "step": 2490 }, { "epoch": 0.4070146818923328, "grad_norm": 0.25598615407943726, "learning_rate": 0.0002034257748776509, "loss": 0.1976, "num_input_tokens_seen": 5383840, "step": 2495 }, { "epoch": 0.4078303425774878, "grad_norm": 0.2832091748714447, "learning_rate": 0.0002038336052202284, "loss": 0.0945, "num_input_tokens_seen": 5394688, "step": 2500 }, { "epoch": 0.4086460032626427, "grad_norm": 0.49175241589546204, "learning_rate": 0.0002042414355628059, "loss": 0.0589, "num_input_tokens_seen": 5405760, "step": 2505 }, { "epoch": 0.4094616639477977, "grad_norm": 2.1446218490600586, "learning_rate": 0.00020464926590538335, "loss": 0.1422, "num_input_tokens_seen": 5416736, "step": 2510 }, { "epoch": 0.4102773246329527, "grad_norm": 1.993091106414795, "learning_rate": 0.00020505709624796085, "loss": 0.1631, "num_input_tokens_seen": 5428160, "step": 2515 }, { "epoch": 0.4110929853181077, "grad_norm": 0.05304631590843201, "learning_rate": 0.00020546492659053833, "loss": 0.0302, "num_input_tokens_seen": 5439616, "step": 2520 }, { "epoch": 0.41190864600326266, "grad_norm": 2.1700851917266846, "learning_rate": 0.00020587275693311583, "loss": 0.1358, "num_input_tokens_seen": 5450048, "step": 2525 }, { "epoch": 0.41272430668841764, "grad_norm": 2.9487311840057373, "learning_rate": 0.0002062805872756933, "loss": 0.1655, "num_input_tokens_seen": 5460512, "step": 2530 }, { "epoch": 0.41353996737357257, "grad_norm": 1.1737804412841797, "learning_rate": 0.0002066884176182708, "loss": 0.1076, "num_input_tokens_seen": 5470496, "step": 2535 }, { "epoch": 0.41435562805872755, "grad_norm": 8.738869667053223, "learning_rate": 0.00020709624796084832, "loss": 0.4035, "num_input_tokens_seen": 5482112, "step": 2540 }, { "epoch": 0.41517128874388254, "grad_norm": 2.1070964336395264, "learning_rate": 0.00020750407830342577, "loss": 0.0887, "num_input_tokens_seen": 5493984, "step": 2545 }, { "epoch": 0.4159869494290375, "grad_norm": 0.5695657134056091, "learning_rate": 0.00020791190864600327, "loss": 0.0843, "num_input_tokens_seen": 5505440, "step": 2550 }, { "epoch": 0.4168026101141925, "grad_norm": 0.01275833323597908, "learning_rate": 0.00020831973898858075, "loss": 0.1324, "num_input_tokens_seen": 5516800, "step": 2555 }, { "epoch": 0.4176182707993475, "grad_norm": 0.055172014981508255, "learning_rate": 0.00020872756933115825, "loss": 0.0315, "num_input_tokens_seen": 5527360, "step": 2560 }, { "epoch": 0.4184339314845024, "grad_norm": 1.583932638168335, "learning_rate": 0.00020913539967373573, "loss": 0.1176, "num_input_tokens_seen": 5537792, "step": 2565 }, { "epoch": 0.4192495921696574, "grad_norm": 0.4243050217628479, "learning_rate": 0.00020954323001631323, "loss": 0.269, "num_input_tokens_seen": 5548864, "step": 2570 }, { "epoch": 0.4200652528548124, "grad_norm": 2.5984838008880615, "learning_rate": 0.0002099510603588907, "loss": 0.0807, "num_input_tokens_seen": 5559072, "step": 2575 }, { "epoch": 0.42088091353996737, "grad_norm": 0.3416847288608551, "learning_rate": 0.00021035889070146818, "loss": 0.2578, "num_input_tokens_seen": 5568864, "step": 2580 }, { "epoch": 0.42169657422512236, "grad_norm": 0.1450396478176117, "learning_rate": 0.00021076672104404569, "loss": 0.1429, "num_input_tokens_seen": 5579744, "step": 2585 }, { "epoch": 0.42251223491027734, "grad_norm": 0.7415868639945984, "learning_rate": 0.00021117455138662316, "loss": 0.2167, "num_input_tokens_seen": 5589824, "step": 2590 }, { "epoch": 0.4233278955954323, "grad_norm": 0.3233489990234375, "learning_rate": 0.00021158238172920067, "loss": 0.143, "num_input_tokens_seen": 5601056, "step": 2595 }, { "epoch": 0.42414355628058725, "grad_norm": 0.10952405631542206, "learning_rate": 0.00021199021207177814, "loss": 0.2179, "num_input_tokens_seen": 5611744, "step": 2600 }, { "epoch": 0.42495921696574224, "grad_norm": 0.619699239730835, "learning_rate": 0.00021239804241435562, "loss": 0.2343, "num_input_tokens_seen": 5621600, "step": 2605 }, { "epoch": 0.4257748776508972, "grad_norm": 0.3699929118156433, "learning_rate": 0.00021280587275693312, "loss": 0.143, "num_input_tokens_seen": 5633088, "step": 2610 }, { "epoch": 0.4265905383360522, "grad_norm": 1.0430079698562622, "learning_rate": 0.0002132137030995106, "loss": 0.1942, "num_input_tokens_seen": 5644352, "step": 2615 }, { "epoch": 0.4274061990212072, "grad_norm": 0.596524178981781, "learning_rate": 0.0002136215334420881, "loss": 0.1614, "num_input_tokens_seen": 5653600, "step": 2620 }, { "epoch": 0.4282218597063622, "grad_norm": 0.615421712398529, "learning_rate": 0.00021402936378466558, "loss": 0.1703, "num_input_tokens_seen": 5665152, "step": 2625 }, { "epoch": 0.4290375203915171, "grad_norm": 1.0820027589797974, "learning_rate": 0.00021443719412724308, "loss": 0.1177, "num_input_tokens_seen": 5676640, "step": 2630 }, { "epoch": 0.4298531810766721, "grad_norm": 0.33138588070869446, "learning_rate": 0.00021484502446982056, "loss": 0.0626, "num_input_tokens_seen": 5687360, "step": 2635 }, { "epoch": 0.43066884176182707, "grad_norm": 1.8373628854751587, "learning_rate": 0.00021525285481239804, "loss": 0.1724, "num_input_tokens_seen": 5698112, "step": 2640 }, { "epoch": 0.43148450244698205, "grad_norm": 0.3439456820487976, "learning_rate": 0.00021566068515497554, "loss": 0.2369, "num_input_tokens_seen": 5708480, "step": 2645 }, { "epoch": 0.43230016313213704, "grad_norm": 1.2714189291000366, "learning_rate": 0.00021606851549755302, "loss": 0.1843, "num_input_tokens_seen": 5718144, "step": 2650 }, { "epoch": 0.433115823817292, "grad_norm": 0.7776082754135132, "learning_rate": 0.00021647634584013052, "loss": 0.2882, "num_input_tokens_seen": 5728832, "step": 2655 }, { "epoch": 0.433931484502447, "grad_norm": 0.16278807818889618, "learning_rate": 0.000216884176182708, "loss": 0.1196, "num_input_tokens_seen": 5739584, "step": 2660 }, { "epoch": 0.43474714518760194, "grad_norm": 0.8418310284614563, "learning_rate": 0.0002172920065252855, "loss": 0.145, "num_input_tokens_seen": 5749632, "step": 2665 }, { "epoch": 0.4355628058727569, "grad_norm": 0.3150191903114319, "learning_rate": 0.00021769983686786295, "loss": 0.0556, "num_input_tokens_seen": 5760480, "step": 2670 }, { "epoch": 0.4363784665579119, "grad_norm": 0.9902245998382568, "learning_rate": 0.00021810766721044045, "loss": 0.1921, "num_input_tokens_seen": 5770560, "step": 2675 }, { "epoch": 0.4371941272430669, "grad_norm": 1.708687424659729, "learning_rate": 0.00021851549755301796, "loss": 0.1473, "num_input_tokens_seen": 5780416, "step": 2680 }, { "epoch": 0.43800978792822187, "grad_norm": 0.952900767326355, "learning_rate": 0.00021892332789559543, "loss": 0.2966, "num_input_tokens_seen": 5791136, "step": 2685 }, { "epoch": 0.43882544861337686, "grad_norm": 1.0747466087341309, "learning_rate": 0.00021933115823817294, "loss": 0.1756, "num_input_tokens_seen": 5801600, "step": 2690 }, { "epoch": 0.4396411092985318, "grad_norm": 0.06641166657209396, "learning_rate": 0.0002197389885807504, "loss": 0.19, "num_input_tokens_seen": 5813632, "step": 2695 }, { "epoch": 0.44045676998368677, "grad_norm": 0.29006800055503845, "learning_rate": 0.00022014681892332792, "loss": 0.0424, "num_input_tokens_seen": 5823680, "step": 2700 }, { "epoch": 0.44127243066884175, "grad_norm": 0.15687257051467896, "learning_rate": 0.00022055464926590536, "loss": 0.1069, "num_input_tokens_seen": 5833760, "step": 2705 }, { "epoch": 0.44208809135399674, "grad_norm": 0.32435062527656555, "learning_rate": 0.00022096247960848287, "loss": 0.184, "num_input_tokens_seen": 5844352, "step": 2710 }, { "epoch": 0.4429037520391517, "grad_norm": 0.9220103621482849, "learning_rate": 0.00022137030995106037, "loss": 0.1101, "num_input_tokens_seen": 5854624, "step": 2715 }, { "epoch": 0.4437194127243067, "grad_norm": 0.1260593682527542, "learning_rate": 0.00022177814029363785, "loss": 0.1279, "num_input_tokens_seen": 5865280, "step": 2720 }, { "epoch": 0.4445350734094617, "grad_norm": 0.10509152710437775, "learning_rate": 0.00022218597063621535, "loss": 0.128, "num_input_tokens_seen": 5875552, "step": 2725 }, { "epoch": 0.4453507340946166, "grad_norm": 0.3267362117767334, "learning_rate": 0.00022259380097879283, "loss": 0.2118, "num_input_tokens_seen": 5886592, "step": 2730 }, { "epoch": 0.4461663947797716, "grad_norm": 0.5570999383926392, "learning_rate": 0.0002230016313213703, "loss": 0.1433, "num_input_tokens_seen": 5896864, "step": 2735 }, { "epoch": 0.4469820554649266, "grad_norm": 0.09635140746831894, "learning_rate": 0.00022340946166394778, "loss": 0.1857, "num_input_tokens_seen": 5907360, "step": 2740 }, { "epoch": 0.44779771615008157, "grad_norm": 0.42166176438331604, "learning_rate": 0.00022381729200652529, "loss": 0.1385, "num_input_tokens_seen": 5917344, "step": 2745 }, { "epoch": 0.44861337683523655, "grad_norm": 0.3304622173309326, "learning_rate": 0.0002242251223491028, "loss": 0.2181, "num_input_tokens_seen": 5929536, "step": 2750 }, { "epoch": 0.44942903752039154, "grad_norm": 0.3093664050102234, "learning_rate": 0.00022463295269168027, "loss": 0.155, "num_input_tokens_seen": 5940992, "step": 2755 }, { "epoch": 0.45024469820554647, "grad_norm": 0.12017809599637985, "learning_rate": 0.00022504078303425777, "loss": 0.1618, "num_input_tokens_seen": 5951520, "step": 2760 }, { "epoch": 0.45106035889070145, "grad_norm": 0.5987087488174438, "learning_rate": 0.00022544861337683525, "loss": 0.1302, "num_input_tokens_seen": 5961952, "step": 2765 }, { "epoch": 0.45187601957585644, "grad_norm": 1.5060359239578247, "learning_rate": 0.00022585644371941272, "loss": 0.2578, "num_input_tokens_seen": 5972000, "step": 2770 }, { "epoch": 0.4526916802610114, "grad_norm": 0.19394594430923462, "learning_rate": 0.0002262642740619902, "loss": 0.1126, "num_input_tokens_seen": 5981984, "step": 2775 }, { "epoch": 0.4535073409461664, "grad_norm": 0.22451826930046082, "learning_rate": 0.0002266721044045677, "loss": 0.0887, "num_input_tokens_seen": 5992800, "step": 2780 }, { "epoch": 0.4543230016313214, "grad_norm": 0.9931198954582214, "learning_rate": 0.0002270799347471452, "loss": 0.0777, "num_input_tokens_seen": 6003808, "step": 2785 }, { "epoch": 0.4551386623164764, "grad_norm": 2.576740026473999, "learning_rate": 0.00022748776508972268, "loss": 0.1955, "num_input_tokens_seen": 6014336, "step": 2790 }, { "epoch": 0.4559543230016313, "grad_norm": 0.04945773258805275, "learning_rate": 0.00022789559543230019, "loss": 0.2142, "num_input_tokens_seen": 6025792, "step": 2795 }, { "epoch": 0.4567699836867863, "grad_norm": 0.8532063364982605, "learning_rate": 0.00022830342577487763, "loss": 0.1424, "num_input_tokens_seen": 6035968, "step": 2800 }, { "epoch": 0.45758564437194127, "grad_norm": 0.15329685807228088, "learning_rate": 0.00022871125611745514, "loss": 0.1321, "num_input_tokens_seen": 6047040, "step": 2805 }, { "epoch": 0.45840130505709625, "grad_norm": 0.15588818490505219, "learning_rate": 0.00022911908646003261, "loss": 0.0808, "num_input_tokens_seen": 6057728, "step": 2810 }, { "epoch": 0.45921696574225124, "grad_norm": 0.7118588089942932, "learning_rate": 0.00022952691680261012, "loss": 0.1649, "num_input_tokens_seen": 6070208, "step": 2815 }, { "epoch": 0.4600326264274062, "grad_norm": 0.23197035491466522, "learning_rate": 0.00022993474714518762, "loss": 0.1928, "num_input_tokens_seen": 6082144, "step": 2820 }, { "epoch": 0.46084828711256115, "grad_norm": 0.136517733335495, "learning_rate": 0.0002303425774877651, "loss": 0.1365, "num_input_tokens_seen": 6094112, "step": 2825 }, { "epoch": 0.46166394779771613, "grad_norm": 0.056639283895492554, "learning_rate": 0.0002307504078303426, "loss": 0.1038, "num_input_tokens_seen": 6104608, "step": 2830 }, { "epoch": 0.4624796084828711, "grad_norm": 0.2406209260225296, "learning_rate": 0.00023115823817292005, "loss": 0.0966, "num_input_tokens_seen": 6114816, "step": 2835 }, { "epoch": 0.4632952691680261, "grad_norm": 0.0827520340681076, "learning_rate": 0.00023156606851549755, "loss": 0.1219, "num_input_tokens_seen": 6125952, "step": 2840 }, { "epoch": 0.4641109298531811, "grad_norm": 0.08483751118183136, "learning_rate": 0.00023197389885807503, "loss": 0.2323, "num_input_tokens_seen": 6137056, "step": 2845 }, { "epoch": 0.46492659053833607, "grad_norm": 1.0944316387176514, "learning_rate": 0.00023238172920065253, "loss": 0.0779, "num_input_tokens_seen": 6147840, "step": 2850 }, { "epoch": 0.46574225122349105, "grad_norm": 0.10864396393299103, "learning_rate": 0.00023278955954323004, "loss": 0.222, "num_input_tokens_seen": 6159808, "step": 2855 }, { "epoch": 0.466557911908646, "grad_norm": 0.804469108581543, "learning_rate": 0.00023319738988580751, "loss": 0.1266, "num_input_tokens_seen": 6171520, "step": 2860 }, { "epoch": 0.46737357259380097, "grad_norm": 0.7416703701019287, "learning_rate": 0.000233605220228385, "loss": 0.1842, "num_input_tokens_seen": 6182656, "step": 2865 }, { "epoch": 0.46818923327895595, "grad_norm": 0.2062879502773285, "learning_rate": 0.00023401305057096247, "loss": 0.2398, "num_input_tokens_seen": 6193696, "step": 2870 }, { "epoch": 0.46900489396411094, "grad_norm": 0.3211911618709564, "learning_rate": 0.00023442088091353997, "loss": 0.1797, "num_input_tokens_seen": 6204192, "step": 2875 }, { "epoch": 0.4698205546492659, "grad_norm": 0.5380843877792358, "learning_rate": 0.00023482871125611747, "loss": 0.1488, "num_input_tokens_seen": 6215136, "step": 2880 }, { "epoch": 0.4706362153344209, "grad_norm": 0.6130079627037048, "learning_rate": 0.00023523654159869495, "loss": 0.0896, "num_input_tokens_seen": 6225952, "step": 2885 }, { "epoch": 0.47145187601957583, "grad_norm": 1.6829217672348022, "learning_rate": 0.00023564437194127245, "loss": 0.2225, "num_input_tokens_seen": 6237152, "step": 2890 }, { "epoch": 0.4722675367047308, "grad_norm": 0.5651580691337585, "learning_rate": 0.00023605220228384993, "loss": 0.1513, "num_input_tokens_seen": 6248416, "step": 2895 }, { "epoch": 0.4730831973898858, "grad_norm": 1.282302737236023, "learning_rate": 0.0002364600326264274, "loss": 0.1621, "num_input_tokens_seen": 6259840, "step": 2900 }, { "epoch": 0.4738988580750408, "grad_norm": 0.22257353365421295, "learning_rate": 0.00023686786296900488, "loss": 0.1043, "num_input_tokens_seen": 6271104, "step": 2905 }, { "epoch": 0.47471451876019577, "grad_norm": 0.780252993106842, "learning_rate": 0.0002372756933115824, "loss": 0.1725, "num_input_tokens_seen": 6281696, "step": 2910 }, { "epoch": 0.47553017944535075, "grad_norm": 1.1723055839538574, "learning_rate": 0.0002376835236541599, "loss": 0.1408, "num_input_tokens_seen": 6293760, "step": 2915 }, { "epoch": 0.4763458401305057, "grad_norm": 0.23256178200244904, "learning_rate": 0.00023809135399673737, "loss": 0.091, "num_input_tokens_seen": 6305376, "step": 2920 }, { "epoch": 0.47716150081566067, "grad_norm": 0.22261440753936768, "learning_rate": 0.00023849918433931487, "loss": 0.1022, "num_input_tokens_seen": 6316032, "step": 2925 }, { "epoch": 0.47797716150081565, "grad_norm": 1.2297919988632202, "learning_rate": 0.00023890701468189232, "loss": 0.2899, "num_input_tokens_seen": 6327552, "step": 2930 }, { "epoch": 0.47879282218597063, "grad_norm": 0.4029012620449066, "learning_rate": 0.00023931484502446982, "loss": 0.1099, "num_input_tokens_seen": 6337344, "step": 2935 }, { "epoch": 0.4796084828711256, "grad_norm": 0.3078548312187195, "learning_rate": 0.0002397226753670473, "loss": 0.1346, "num_input_tokens_seen": 6349120, "step": 2940 }, { "epoch": 0.4804241435562806, "grad_norm": 0.18580852448940277, "learning_rate": 0.0002401305057096248, "loss": 0.1392, "num_input_tokens_seen": 6359584, "step": 2945 }, { "epoch": 0.4812398042414356, "grad_norm": 0.7232683897018433, "learning_rate": 0.0002405383360522023, "loss": 0.1193, "num_input_tokens_seen": 6370112, "step": 2950 }, { "epoch": 0.4820554649265905, "grad_norm": 0.07306995987892151, "learning_rate": 0.00024094616639477978, "loss": 0.0465, "num_input_tokens_seen": 6381248, "step": 2955 }, { "epoch": 0.4828711256117455, "grad_norm": 1.1193236112594604, "learning_rate": 0.00024135399673735726, "loss": 0.1825, "num_input_tokens_seen": 6392224, "step": 2960 }, { "epoch": 0.4836867862969005, "grad_norm": 0.22385838627815247, "learning_rate": 0.00024176182707993474, "loss": 0.2347, "num_input_tokens_seen": 6401888, "step": 2965 }, { "epoch": 0.48450244698205547, "grad_norm": 0.5825753808021545, "learning_rate": 0.00024216965742251224, "loss": 0.1298, "num_input_tokens_seen": 6412352, "step": 2970 }, { "epoch": 0.48531810766721045, "grad_norm": 1.0909613370895386, "learning_rate": 0.00024257748776508972, "loss": 0.115, "num_input_tokens_seen": 6422880, "step": 2975 }, { "epoch": 0.48613376835236544, "grad_norm": 0.23585692048072815, "learning_rate": 0.00024298531810766722, "loss": 0.1675, "num_input_tokens_seen": 6433696, "step": 2980 }, { "epoch": 0.48694942903752036, "grad_norm": 0.722490131855011, "learning_rate": 0.00024339314845024472, "loss": 0.2255, "num_input_tokens_seen": 6444576, "step": 2985 }, { "epoch": 0.48776508972267535, "grad_norm": 0.2733224630355835, "learning_rate": 0.0002438009787928222, "loss": 0.0739, "num_input_tokens_seen": 6455616, "step": 2990 }, { "epoch": 0.48858075040783033, "grad_norm": 0.12696190178394318, "learning_rate": 0.0002442088091353997, "loss": 0.1702, "num_input_tokens_seen": 6465632, "step": 2995 }, { "epoch": 0.4893964110929853, "grad_norm": 1.2236684560775757, "learning_rate": 0.00024461663947797715, "loss": 0.1988, "num_input_tokens_seen": 6476320, "step": 3000 }, { "epoch": 0.4902120717781403, "grad_norm": 0.11306619644165039, "learning_rate": 0.00024502446982055463, "loss": 0.1749, "num_input_tokens_seen": 6486560, "step": 3005 }, { "epoch": 0.4910277324632953, "grad_norm": 0.06194991618394852, "learning_rate": 0.00024543230016313216, "loss": 0.06, "num_input_tokens_seen": 6496448, "step": 3010 }, { "epoch": 0.49184339314845027, "grad_norm": 0.1334661990404129, "learning_rate": 0.00024584013050570964, "loss": 0.1295, "num_input_tokens_seen": 6506624, "step": 3015 }, { "epoch": 0.4926590538336052, "grad_norm": 0.09926887601613998, "learning_rate": 0.0002462479608482871, "loss": 0.1661, "num_input_tokens_seen": 6516960, "step": 3020 }, { "epoch": 0.4934747145187602, "grad_norm": 1.0292459726333618, "learning_rate": 0.0002466557911908646, "loss": 0.1348, "num_input_tokens_seen": 6528896, "step": 3025 }, { "epoch": 0.49429037520391517, "grad_norm": 0.5590057969093323, "learning_rate": 0.00024706362153344207, "loss": 0.0731, "num_input_tokens_seen": 6540576, "step": 3030 }, { "epoch": 0.49510603588907015, "grad_norm": 0.3860446512699127, "learning_rate": 0.0002474714518760196, "loss": 0.0626, "num_input_tokens_seen": 6551424, "step": 3035 }, { "epoch": 0.49592169657422513, "grad_norm": 0.12069137394428253, "learning_rate": 0.0002478792822185971, "loss": 0.0862, "num_input_tokens_seen": 6562176, "step": 3040 }, { "epoch": 0.4967373572593801, "grad_norm": 0.0766163021326065, "learning_rate": 0.00024828711256117455, "loss": 0.0904, "num_input_tokens_seen": 6572384, "step": 3045 }, { "epoch": 0.49755301794453505, "grad_norm": 0.1165001317858696, "learning_rate": 0.000248694942903752, "loss": 0.2203, "num_input_tokens_seen": 6583424, "step": 3050 }, { "epoch": 0.49836867862969003, "grad_norm": 0.015077603980898857, "learning_rate": 0.00024910277324632956, "loss": 0.0513, "num_input_tokens_seen": 6594144, "step": 3055 }, { "epoch": 0.499184339314845, "grad_norm": 0.4812507629394531, "learning_rate": 0.00024951060358890703, "loss": 0.1248, "num_input_tokens_seen": 6605760, "step": 3060 }, { "epoch": 0.5, "grad_norm": 0.2543140649795532, "learning_rate": 0.0002499184339314845, "loss": 0.1945, "num_input_tokens_seen": 6616832, "step": 3065 }, { "epoch": 0.5008156606851549, "grad_norm": 0.787386417388916, "learning_rate": 0.00025032626427406204, "loss": 0.159, "num_input_tokens_seen": 6628384, "step": 3070 }, { "epoch": 0.50163132137031, "grad_norm": 0.19998139142990112, "learning_rate": 0.00025073409461663946, "loss": 0.0486, "num_input_tokens_seen": 6639456, "step": 3075 }, { "epoch": 0.5024469820554649, "grad_norm": 0.042470287531614304, "learning_rate": 0.00025114192495921694, "loss": 0.0311, "num_input_tokens_seen": 6651520, "step": 3080 }, { "epoch": 0.5032626427406199, "grad_norm": 0.04640533775091171, "learning_rate": 0.00025154975530179447, "loss": 0.1245, "num_input_tokens_seen": 6661664, "step": 3085 }, { "epoch": 0.5040783034257749, "grad_norm": 1.1446317434310913, "learning_rate": 0.00025195758564437195, "loss": 0.2796, "num_input_tokens_seen": 6673024, "step": 3090 }, { "epoch": 0.5048939641109299, "grad_norm": 0.34324145317077637, "learning_rate": 0.0002523654159869495, "loss": 0.1932, "num_input_tokens_seen": 6685216, "step": 3095 }, { "epoch": 0.5057096247960848, "grad_norm": 1.195542812347412, "learning_rate": 0.0002527732463295269, "loss": 0.1647, "num_input_tokens_seen": 6696096, "step": 3100 }, { "epoch": 0.5065252854812398, "grad_norm": 0.16621847450733185, "learning_rate": 0.0002531810766721044, "loss": 0.2937, "num_input_tokens_seen": 6706656, "step": 3105 }, { "epoch": 0.5073409461663948, "grad_norm": 0.3265911042690277, "learning_rate": 0.0002535889070146819, "loss": 0.1488, "num_input_tokens_seen": 6716704, "step": 3110 }, { "epoch": 0.5081566068515497, "grad_norm": 0.47061917185783386, "learning_rate": 0.0002539967373572594, "loss": 0.1496, "num_input_tokens_seen": 6728352, "step": 3115 }, { "epoch": 0.5089722675367048, "grad_norm": 0.8664241433143616, "learning_rate": 0.00025440456769983686, "loss": 0.1336, "num_input_tokens_seen": 6739296, "step": 3120 }, { "epoch": 0.5097879282218597, "grad_norm": 0.4536451995372772, "learning_rate": 0.00025481239804241434, "loss": 0.1314, "num_input_tokens_seen": 6750176, "step": 3125 }, { "epoch": 0.5106035889070146, "grad_norm": 0.8622775673866272, "learning_rate": 0.00025522022838499187, "loss": 0.1824, "num_input_tokens_seen": 6760288, "step": 3130 }, { "epoch": 0.5114192495921697, "grad_norm": 0.4697278141975403, "learning_rate": 0.00025562805872756934, "loss": 0.1319, "num_input_tokens_seen": 6769792, "step": 3135 }, { "epoch": 0.5122349102773246, "grad_norm": 0.8493194580078125, "learning_rate": 0.0002560358890701468, "loss": 0.169, "num_input_tokens_seen": 6779712, "step": 3140 }, { "epoch": 0.5130505709624796, "grad_norm": 0.7845749258995056, "learning_rate": 0.0002564437194127243, "loss": 0.1126, "num_input_tokens_seen": 6791232, "step": 3145 }, { "epoch": 0.5138662316476346, "grad_norm": 0.11098422855138779, "learning_rate": 0.00025685154975530177, "loss": 0.0951, "num_input_tokens_seen": 6801696, "step": 3150 }, { "epoch": 0.5146818923327896, "grad_norm": 0.016223762184381485, "learning_rate": 0.0002572593800978793, "loss": 0.0867, "num_input_tokens_seen": 6812672, "step": 3155 }, { "epoch": 0.5154975530179445, "grad_norm": 1.0923768281936646, "learning_rate": 0.0002576672104404568, "loss": 0.2778, "num_input_tokens_seen": 6822976, "step": 3160 }, { "epoch": 0.5163132137030995, "grad_norm": 0.14902295172214508, "learning_rate": 0.0002580750407830343, "loss": 0.179, "num_input_tokens_seen": 6833888, "step": 3165 }, { "epoch": 0.5171288743882545, "grad_norm": 0.7654731869697571, "learning_rate": 0.00025848287112561173, "loss": 0.3424, "num_input_tokens_seen": 6845632, "step": 3170 }, { "epoch": 0.5179445350734094, "grad_norm": 0.13922156393527985, "learning_rate": 0.0002588907014681892, "loss": 0.1384, "num_input_tokens_seen": 6856128, "step": 3175 }, { "epoch": 0.5187601957585645, "grad_norm": 0.2902519404888153, "learning_rate": 0.00025929853181076674, "loss": 0.0676, "num_input_tokens_seen": 6867840, "step": 3180 }, { "epoch": 0.5195758564437194, "grad_norm": 0.7630822658538818, "learning_rate": 0.0002597063621533442, "loss": 0.1326, "num_input_tokens_seen": 6878112, "step": 3185 }, { "epoch": 0.5203915171288744, "grad_norm": 0.21264766156673431, "learning_rate": 0.00026011419249592175, "loss": 0.1368, "num_input_tokens_seen": 6889312, "step": 3190 }, { "epoch": 0.5212071778140294, "grad_norm": 0.3382579982280731, "learning_rate": 0.00026052202283849917, "loss": 0.2752, "num_input_tokens_seen": 6899968, "step": 3195 }, { "epoch": 0.5220228384991843, "grad_norm": 0.5595989227294922, "learning_rate": 0.0002609298531810767, "loss": 0.106, "num_input_tokens_seen": 6909568, "step": 3200 }, { "epoch": 0.5228384991843393, "grad_norm": 0.2680160403251648, "learning_rate": 0.0002613376835236542, "loss": 0.0982, "num_input_tokens_seen": 6920928, "step": 3205 }, { "epoch": 0.5236541598694943, "grad_norm": 0.43840864300727844, "learning_rate": 0.00026174551386623165, "loss": 0.059, "num_input_tokens_seen": 6930144, "step": 3210 }, { "epoch": 0.5244698205546493, "grad_norm": 0.0211274903267622, "learning_rate": 0.00026215334420880913, "loss": 0.1161, "num_input_tokens_seen": 6940320, "step": 3215 }, { "epoch": 0.5252854812398042, "grad_norm": 0.3852957487106323, "learning_rate": 0.0002625611745513866, "loss": 0.2524, "num_input_tokens_seen": 6950656, "step": 3220 }, { "epoch": 0.5261011419249593, "grad_norm": 0.7833412885665894, "learning_rate": 0.00026296900489396414, "loss": 0.3218, "num_input_tokens_seen": 6961216, "step": 3225 }, { "epoch": 0.5269168026101142, "grad_norm": 0.1404338926076889, "learning_rate": 0.0002633768352365416, "loss": 0.1797, "num_input_tokens_seen": 6971808, "step": 3230 }, { "epoch": 0.5277324632952691, "grad_norm": 0.2573801577091217, "learning_rate": 0.0002637846655791191, "loss": 0.1472, "num_input_tokens_seen": 6984000, "step": 3235 }, { "epoch": 0.5285481239804242, "grad_norm": 0.347644180059433, "learning_rate": 0.00026419249592169657, "loss": 0.1489, "num_input_tokens_seen": 6994112, "step": 3240 }, { "epoch": 0.5293637846655791, "grad_norm": 0.2416415959596634, "learning_rate": 0.00026460032626427404, "loss": 0.1576, "num_input_tokens_seen": 7005184, "step": 3245 }, { "epoch": 0.5301794453507341, "grad_norm": 0.15647706389427185, "learning_rate": 0.00026500815660685157, "loss": 0.1407, "num_input_tokens_seen": 7015968, "step": 3250 }, { "epoch": 0.5309951060358891, "grad_norm": 0.6454426646232605, "learning_rate": 0.00026541598694942905, "loss": 0.1001, "num_input_tokens_seen": 7027584, "step": 3255 }, { "epoch": 0.531810766721044, "grad_norm": 0.3946031630039215, "learning_rate": 0.0002658238172920066, "loss": 0.1434, "num_input_tokens_seen": 7039424, "step": 3260 }, { "epoch": 0.532626427406199, "grad_norm": 0.1988263875246048, "learning_rate": 0.000266231647634584, "loss": 0.2385, "num_input_tokens_seen": 7049824, "step": 3265 }, { "epoch": 0.533442088091354, "grad_norm": 0.3768634498119354, "learning_rate": 0.0002666394779771615, "loss": 0.2721, "num_input_tokens_seen": 7061472, "step": 3270 }, { "epoch": 0.534257748776509, "grad_norm": 0.9409236311912537, "learning_rate": 0.000267047308319739, "loss": 0.1761, "num_input_tokens_seen": 7073152, "step": 3275 }, { "epoch": 0.5350734094616639, "grad_norm": 0.7082731127738953, "learning_rate": 0.0002674551386623165, "loss": 0.192, "num_input_tokens_seen": 7084640, "step": 3280 }, { "epoch": 0.535889070146819, "grad_norm": 0.22120733559131622, "learning_rate": 0.00026786296900489396, "loss": 0.0733, "num_input_tokens_seen": 7093088, "step": 3285 }, { "epoch": 0.5367047308319739, "grad_norm": 0.10793591290712357, "learning_rate": 0.00026827079934747144, "loss": 0.0965, "num_input_tokens_seen": 7103072, "step": 3290 }, { "epoch": 0.5375203915171288, "grad_norm": 0.1707492172718048, "learning_rate": 0.00026867862969004897, "loss": 0.0664, "num_input_tokens_seen": 7113792, "step": 3295 }, { "epoch": 0.5383360522022839, "grad_norm": 0.06590881943702698, "learning_rate": 0.00026908646003262645, "loss": 0.109, "num_input_tokens_seen": 7124800, "step": 3300 }, { "epoch": 0.5391517128874388, "grad_norm": 0.35956960916519165, "learning_rate": 0.0002694942903752039, "loss": 0.0801, "num_input_tokens_seen": 7137280, "step": 3305 }, { "epoch": 0.5399673735725938, "grad_norm": 0.5606528520584106, "learning_rate": 0.0002699021207177814, "loss": 0.0672, "num_input_tokens_seen": 7147264, "step": 3310 }, { "epoch": 0.5407830342577488, "grad_norm": 0.4383194148540497, "learning_rate": 0.0002703099510603589, "loss": 0.1905, "num_input_tokens_seen": 7157408, "step": 3315 }, { "epoch": 0.5415986949429038, "grad_norm": 0.6812806129455566, "learning_rate": 0.0002707177814029364, "loss": 0.0451, "num_input_tokens_seen": 7168224, "step": 3320 }, { "epoch": 0.5424143556280587, "grad_norm": 1.3847594261169434, "learning_rate": 0.0002711256117455139, "loss": 0.1935, "num_input_tokens_seen": 7179680, "step": 3325 }, { "epoch": 0.5432300163132137, "grad_norm": 0.5613686442375183, "learning_rate": 0.0002715334420880914, "loss": 0.0687, "num_input_tokens_seen": 7190944, "step": 3330 }, { "epoch": 0.5440456769983687, "grad_norm": 0.03551279008388519, "learning_rate": 0.00027194127243066883, "loss": 0.2025, "num_input_tokens_seen": 7200928, "step": 3335 }, { "epoch": 0.5448613376835236, "grad_norm": 0.08977734297513962, "learning_rate": 0.0002723491027732463, "loss": 0.0574, "num_input_tokens_seen": 7210400, "step": 3340 }, { "epoch": 0.5456769983686787, "grad_norm": 0.0749269425868988, "learning_rate": 0.00027275693311582384, "loss": 0.0494, "num_input_tokens_seen": 7221696, "step": 3345 }, { "epoch": 0.5464926590538336, "grad_norm": 1.869079828262329, "learning_rate": 0.0002731647634584013, "loss": 0.3457, "num_input_tokens_seen": 7232288, "step": 3350 }, { "epoch": 0.5473083197389886, "grad_norm": 0.035522375255823135, "learning_rate": 0.0002735725938009788, "loss": 0.0976, "num_input_tokens_seen": 7243808, "step": 3355 }, { "epoch": 0.5481239804241436, "grad_norm": 0.18078352510929108, "learning_rate": 0.00027398042414355627, "loss": 0.1636, "num_input_tokens_seen": 7254880, "step": 3360 }, { "epoch": 0.5489396411092985, "grad_norm": 0.5510651469230652, "learning_rate": 0.00027438825448613375, "loss": 0.0648, "num_input_tokens_seen": 7265664, "step": 3365 }, { "epoch": 0.5497553017944535, "grad_norm": 0.2560504972934723, "learning_rate": 0.0002747960848287113, "loss": 0.2246, "num_input_tokens_seen": 7275424, "step": 3370 }, { "epoch": 0.5505709624796085, "grad_norm": 0.26291438937187195, "learning_rate": 0.00027520391517128875, "loss": 0.1724, "num_input_tokens_seen": 7286496, "step": 3375 }, { "epoch": 0.5513866231647635, "grad_norm": 0.4947168529033661, "learning_rate": 0.00027561174551386623, "loss": 0.2398, "num_input_tokens_seen": 7297152, "step": 3380 }, { "epoch": 0.5522022838499184, "grad_norm": 0.21689368784427643, "learning_rate": 0.0002760195758564437, "loss": 0.1214, "num_input_tokens_seen": 7306816, "step": 3385 }, { "epoch": 0.5530179445350734, "grad_norm": 0.08921483159065247, "learning_rate": 0.00027642740619902124, "loss": 0.0529, "num_input_tokens_seen": 7317888, "step": 3390 }, { "epoch": 0.5538336052202284, "grad_norm": 0.8110567927360535, "learning_rate": 0.0002768352365415987, "loss": 0.151, "num_input_tokens_seen": 7329056, "step": 3395 }, { "epoch": 0.5546492659053833, "grad_norm": 0.1971195936203003, "learning_rate": 0.0002772430668841762, "loss": 0.0376, "num_input_tokens_seen": 7340192, "step": 3400 }, { "epoch": 0.5554649265905384, "grad_norm": 0.5013919472694397, "learning_rate": 0.00027765089722675367, "loss": 0.0647, "num_input_tokens_seen": 7351584, "step": 3405 }, { "epoch": 0.5562805872756933, "grad_norm": 0.2764725387096405, "learning_rate": 0.00027805872756933114, "loss": 0.2417, "num_input_tokens_seen": 7361632, "step": 3410 }, { "epoch": 0.5570962479608483, "grad_norm": 0.15180736780166626, "learning_rate": 0.0002784665579119087, "loss": 0.0949, "num_input_tokens_seen": 7371616, "step": 3415 }, { "epoch": 0.5579119086460033, "grad_norm": 0.03513738512992859, "learning_rate": 0.00027887438825448615, "loss": 0.1879, "num_input_tokens_seen": 7383488, "step": 3420 }, { "epoch": 0.5587275693311582, "grad_norm": 0.05455316975712776, "learning_rate": 0.00027928221859706363, "loss": 0.1062, "num_input_tokens_seen": 7394720, "step": 3425 }, { "epoch": 0.5595432300163132, "grad_norm": 0.369393527507782, "learning_rate": 0.0002796900489396411, "loss": 0.0697, "num_input_tokens_seen": 7405312, "step": 3430 }, { "epoch": 0.5603588907014682, "grad_norm": 0.5221443176269531, "learning_rate": 0.0002800978792822186, "loss": 0.0896, "num_input_tokens_seen": 7415040, "step": 3435 }, { "epoch": 0.5611745513866232, "grad_norm": 0.04115762189030647, "learning_rate": 0.0002805057096247961, "loss": 0.0852, "num_input_tokens_seen": 7426144, "step": 3440 }, { "epoch": 0.5619902120717781, "grad_norm": 0.5287455320358276, "learning_rate": 0.0002809135399673736, "loss": 0.1836, "num_input_tokens_seen": 7437024, "step": 3445 }, { "epoch": 0.5628058727569332, "grad_norm": 0.0422709584236145, "learning_rate": 0.00028132137030995106, "loss": 0.0583, "num_input_tokens_seen": 7447552, "step": 3450 }, { "epoch": 0.5636215334420881, "grad_norm": 1.0483263731002808, "learning_rate": 0.00028172920065252854, "loss": 0.2494, "num_input_tokens_seen": 7459040, "step": 3455 }, { "epoch": 0.564437194127243, "grad_norm": 0.06323757022619247, "learning_rate": 0.000282137030995106, "loss": 0.3266, "num_input_tokens_seen": 7469248, "step": 3460 }, { "epoch": 0.5652528548123981, "grad_norm": 0.149562269449234, "learning_rate": 0.00028254486133768355, "loss": 0.1012, "num_input_tokens_seen": 7480832, "step": 3465 }, { "epoch": 0.566068515497553, "grad_norm": 0.4498364329338074, "learning_rate": 0.000282952691680261, "loss": 0.1335, "num_input_tokens_seen": 7490016, "step": 3470 }, { "epoch": 0.566884176182708, "grad_norm": 0.18582139909267426, "learning_rate": 0.0002833605220228385, "loss": 0.0739, "num_input_tokens_seen": 7501184, "step": 3475 }, { "epoch": 0.567699836867863, "grad_norm": 0.42618072032928467, "learning_rate": 0.000283768352365416, "loss": 0.1605, "num_input_tokens_seen": 7511648, "step": 3480 }, { "epoch": 0.5685154975530179, "grad_norm": 0.24001431465148926, "learning_rate": 0.0002841761827079935, "loss": 0.0847, "num_input_tokens_seen": 7523104, "step": 3485 }, { "epoch": 0.5693311582381729, "grad_norm": 0.15262551605701447, "learning_rate": 0.000284584013050571, "loss": 0.2082, "num_input_tokens_seen": 7534496, "step": 3490 }, { "epoch": 0.5701468189233279, "grad_norm": 1.1047290563583374, "learning_rate": 0.0002849918433931484, "loss": 0.2414, "num_input_tokens_seen": 7545568, "step": 3495 }, { "epoch": 0.5709624796084829, "grad_norm": 0.1071564257144928, "learning_rate": 0.00028539967373572594, "loss": 0.0974, "num_input_tokens_seen": 7555808, "step": 3500 }, { "epoch": 0.5717781402936378, "grad_norm": 0.6726254224777222, "learning_rate": 0.0002858075040783034, "loss": 0.1989, "num_input_tokens_seen": 7567296, "step": 3505 }, { "epoch": 0.5725938009787929, "grad_norm": 0.18670007586479187, "learning_rate": 0.00028621533442088094, "loss": 0.1782, "num_input_tokens_seen": 7577824, "step": 3510 }, { "epoch": 0.5734094616639478, "grad_norm": 0.7367821931838989, "learning_rate": 0.0002866231647634584, "loss": 0.1201, "num_input_tokens_seen": 7588448, "step": 3515 }, { "epoch": 0.5742251223491027, "grad_norm": 0.11420662701129913, "learning_rate": 0.0002870309951060359, "loss": 0.1526, "num_input_tokens_seen": 7599264, "step": 3520 }, { "epoch": 0.5750407830342578, "grad_norm": 1.1549158096313477, "learning_rate": 0.0002874388254486134, "loss": 0.2732, "num_input_tokens_seen": 7611264, "step": 3525 }, { "epoch": 0.5758564437194127, "grad_norm": 1.2724305391311646, "learning_rate": 0.00028784665579119085, "loss": 0.1366, "num_input_tokens_seen": 7621312, "step": 3530 }, { "epoch": 0.5766721044045677, "grad_norm": 0.13163702189922333, "learning_rate": 0.0002882544861337684, "loss": 0.2843, "num_input_tokens_seen": 7632736, "step": 3535 }, { "epoch": 0.5774877650897227, "grad_norm": 0.22550056874752045, "learning_rate": 0.00028866231647634586, "loss": 0.2393, "num_input_tokens_seen": 7643296, "step": 3540 }, { "epoch": 0.5783034257748777, "grad_norm": 0.5635867714881897, "learning_rate": 0.00028907014681892333, "loss": 0.1396, "num_input_tokens_seen": 7653888, "step": 3545 }, { "epoch": 0.5791190864600326, "grad_norm": 0.1705874353647232, "learning_rate": 0.0002894779771615008, "loss": 0.089, "num_input_tokens_seen": 7664448, "step": 3550 }, { "epoch": 0.5799347471451876, "grad_norm": 0.32659459114074707, "learning_rate": 0.00028988580750407834, "loss": 0.0754, "num_input_tokens_seen": 7674176, "step": 3555 }, { "epoch": 0.5807504078303426, "grad_norm": 0.4205467998981476, "learning_rate": 0.0002902936378466558, "loss": 0.1165, "num_input_tokens_seen": 7685184, "step": 3560 }, { "epoch": 0.5815660685154975, "grad_norm": 1.7951291799545288, "learning_rate": 0.00029070146818923324, "loss": 0.2567, "num_input_tokens_seen": 7696064, "step": 3565 }, { "epoch": 0.5823817292006526, "grad_norm": 0.15837207436561584, "learning_rate": 0.00029110929853181077, "loss": 0.2791, "num_input_tokens_seen": 7707328, "step": 3570 }, { "epoch": 0.5831973898858075, "grad_norm": 0.20898975431919098, "learning_rate": 0.00029151712887438825, "loss": 0.0736, "num_input_tokens_seen": 7718368, "step": 3575 }, { "epoch": 0.5840130505709625, "grad_norm": 1.0188244581222534, "learning_rate": 0.0002919249592169658, "loss": 0.1135, "num_input_tokens_seen": 7729888, "step": 3580 }, { "epoch": 0.5848287112561175, "grad_norm": 1.0055124759674072, "learning_rate": 0.00029233278955954325, "loss": 0.2485, "num_input_tokens_seen": 7739424, "step": 3585 }, { "epoch": 0.5856443719412724, "grad_norm": 1.2235937118530273, "learning_rate": 0.0002927406199021207, "loss": 0.1966, "num_input_tokens_seen": 7748832, "step": 3590 }, { "epoch": 0.5864600326264274, "grad_norm": 0.947248637676239, "learning_rate": 0.0002931484502446982, "loss": 0.0973, "num_input_tokens_seen": 7760128, "step": 3595 }, { "epoch": 0.5872756933115824, "grad_norm": 0.5701817870140076, "learning_rate": 0.0002935562805872757, "loss": 0.1069, "num_input_tokens_seen": 7770688, "step": 3600 }, { "epoch": 0.5880913539967374, "grad_norm": 0.41673779487609863, "learning_rate": 0.0002939641109298532, "loss": 0.1512, "num_input_tokens_seen": 7782304, "step": 3605 }, { "epoch": 0.5889070146818923, "grad_norm": 0.6106691360473633, "learning_rate": 0.0002943719412724307, "loss": 0.1987, "num_input_tokens_seen": 7793184, "step": 3610 }, { "epoch": 0.5897226753670473, "grad_norm": 0.19900889694690704, "learning_rate": 0.00029477977161500817, "loss": 0.1869, "num_input_tokens_seen": 7803648, "step": 3615 }, { "epoch": 0.5905383360522023, "grad_norm": 0.19333691895008087, "learning_rate": 0.00029518760195758564, "loss": 0.1128, "num_input_tokens_seen": 7814272, "step": 3620 }, { "epoch": 0.5913539967373572, "grad_norm": 0.2754856050014496, "learning_rate": 0.0002955954323001631, "loss": 0.1743, "num_input_tokens_seen": 7825120, "step": 3625 }, { "epoch": 0.5921696574225123, "grad_norm": 0.9911066889762878, "learning_rate": 0.00029600326264274065, "loss": 0.135, "num_input_tokens_seen": 7837440, "step": 3630 }, { "epoch": 0.5929853181076672, "grad_norm": 0.45625540614128113, "learning_rate": 0.00029641109298531807, "loss": 0.0608, "num_input_tokens_seen": 7848064, "step": 3635 }, { "epoch": 0.5938009787928222, "grad_norm": 0.22430795431137085, "learning_rate": 0.0002968189233278956, "loss": 0.1347, "num_input_tokens_seen": 7858816, "step": 3640 }, { "epoch": 0.5946166394779772, "grad_norm": 1.5107712745666504, "learning_rate": 0.0002972267536704731, "loss": 0.2535, "num_input_tokens_seen": 7869504, "step": 3645 }, { "epoch": 0.5954323001631321, "grad_norm": 0.162008136510849, "learning_rate": 0.0002976345840130506, "loss": 0.0354, "num_input_tokens_seen": 7879712, "step": 3650 }, { "epoch": 0.5962479608482871, "grad_norm": 0.5064948201179504, "learning_rate": 0.0002980424143556281, "loss": 0.0763, "num_input_tokens_seen": 7890368, "step": 3655 }, { "epoch": 0.5970636215334421, "grad_norm": 0.48915374279022217, "learning_rate": 0.0002984502446982055, "loss": 0.0591, "num_input_tokens_seen": 7900704, "step": 3660 }, { "epoch": 0.5978792822185971, "grad_norm": 1.123414158821106, "learning_rate": 0.00029885807504078304, "loss": 0.2029, "num_input_tokens_seen": 7911776, "step": 3665 }, { "epoch": 0.598694942903752, "grad_norm": 0.43261805176734924, "learning_rate": 0.0002992659053833605, "loss": 0.2295, "num_input_tokens_seen": 7921984, "step": 3670 }, { "epoch": 0.5995106035889071, "grad_norm": 0.5339052677154541, "learning_rate": 0.00029967373572593805, "loss": 0.2408, "num_input_tokens_seen": 7934080, "step": 3675 }, { "epoch": 0.600326264274062, "grad_norm": 0.15146224200725555, "learning_rate": 0.0003000815660685155, "loss": 0.1547, "num_input_tokens_seen": 7944736, "step": 3680 }, { "epoch": 0.6011419249592169, "grad_norm": 0.11091198772192001, "learning_rate": 0.000300489396411093, "loss": 0.1293, "num_input_tokens_seen": 7956256, "step": 3685 }, { "epoch": 0.601957585644372, "grad_norm": 0.07787430286407471, "learning_rate": 0.0003008972267536705, "loss": 0.1261, "num_input_tokens_seen": 7967808, "step": 3690 }, { "epoch": 0.6027732463295269, "grad_norm": 0.16613641381263733, "learning_rate": 0.00030130505709624795, "loss": 0.0563, "num_input_tokens_seen": 7979648, "step": 3695 }, { "epoch": 0.6035889070146819, "grad_norm": 0.32812386751174927, "learning_rate": 0.0003017128874388255, "loss": 0.0928, "num_input_tokens_seen": 7990944, "step": 3700 }, { "epoch": 0.6044045676998369, "grad_norm": 0.3859018385410309, "learning_rate": 0.0003021207177814029, "loss": 0.1239, "num_input_tokens_seen": 8001568, "step": 3705 }, { "epoch": 0.6052202283849919, "grad_norm": 0.14347811043262482, "learning_rate": 0.00030252854812398044, "loss": 0.2942, "num_input_tokens_seen": 8013376, "step": 3710 }, { "epoch": 0.6060358890701468, "grad_norm": 0.922331690788269, "learning_rate": 0.0003029363784665579, "loss": 0.2152, "num_input_tokens_seen": 8024512, "step": 3715 }, { "epoch": 0.6068515497553018, "grad_norm": 0.7256356477737427, "learning_rate": 0.0003033442088091354, "loss": 0.1756, "num_input_tokens_seen": 8035520, "step": 3720 }, { "epoch": 0.6076672104404568, "grad_norm": 0.32077616453170776, "learning_rate": 0.0003037520391517129, "loss": 0.1294, "num_input_tokens_seen": 8046912, "step": 3725 }, { "epoch": 0.6084828711256117, "grad_norm": 0.3042055666446686, "learning_rate": 0.00030415986949429034, "loss": 0.1097, "num_input_tokens_seen": 8057472, "step": 3730 }, { "epoch": 0.6092985318107668, "grad_norm": 1.0995193719863892, "learning_rate": 0.00030456769983686787, "loss": 0.3589, "num_input_tokens_seen": 8068576, "step": 3735 }, { "epoch": 0.6101141924959217, "grad_norm": 0.34954649209976196, "learning_rate": 0.00030497553017944535, "loss": 0.1123, "num_input_tokens_seen": 8080320, "step": 3740 }, { "epoch": 0.6109298531810766, "grad_norm": 0.32164424657821655, "learning_rate": 0.0003053833605220229, "loss": 0.2047, "num_input_tokens_seen": 8090144, "step": 3745 }, { "epoch": 0.6117455138662317, "grad_norm": 0.20417018234729767, "learning_rate": 0.00030579119086460036, "loss": 0.0666, "num_input_tokens_seen": 8101472, "step": 3750 }, { "epoch": 0.6125611745513866, "grad_norm": 0.8525700569152832, "learning_rate": 0.0003061990212071778, "loss": 0.1242, "num_input_tokens_seen": 8112736, "step": 3755 }, { "epoch": 0.6133768352365416, "grad_norm": 0.8774811625480652, "learning_rate": 0.0003066068515497553, "loss": 0.1184, "num_input_tokens_seen": 8123296, "step": 3760 }, { "epoch": 0.6141924959216966, "grad_norm": 0.4467347264289856, "learning_rate": 0.0003070146818923328, "loss": 0.1969, "num_input_tokens_seen": 8133248, "step": 3765 }, { "epoch": 0.6150081566068516, "grad_norm": 0.09053094685077667, "learning_rate": 0.0003074225122349103, "loss": 0.1198, "num_input_tokens_seen": 8144448, "step": 3770 }, { "epoch": 0.6158238172920065, "grad_norm": 0.6725847721099854, "learning_rate": 0.00030783034257748774, "loss": 0.1982, "num_input_tokens_seen": 8154656, "step": 3775 }, { "epoch": 0.6166394779771615, "grad_norm": 0.3370138108730316, "learning_rate": 0.00030823817292006527, "loss": 0.0528, "num_input_tokens_seen": 8165888, "step": 3780 }, { "epoch": 0.6174551386623165, "grad_norm": 1.0672154426574707, "learning_rate": 0.00030864600326264275, "loss": 0.0759, "num_input_tokens_seen": 8175744, "step": 3785 }, { "epoch": 0.6182707993474714, "grad_norm": 0.0875239148736, "learning_rate": 0.0003090538336052202, "loss": 0.1605, "num_input_tokens_seen": 8186880, "step": 3790 }, { "epoch": 0.6190864600326265, "grad_norm": 0.2635380029678345, "learning_rate": 0.00030946166394779775, "loss": 0.1947, "num_input_tokens_seen": 8196192, "step": 3795 }, { "epoch": 0.6199021207177814, "grad_norm": 0.2070256769657135, "learning_rate": 0.0003098694942903752, "loss": 0.1919, "num_input_tokens_seen": 8205632, "step": 3800 }, { "epoch": 0.6207177814029364, "grad_norm": 0.3485415577888489, "learning_rate": 0.0003102773246329527, "loss": 0.1056, "num_input_tokens_seen": 8216032, "step": 3805 }, { "epoch": 0.6215334420880914, "grad_norm": 0.5896238684654236, "learning_rate": 0.0003106851549755302, "loss": 0.1347, "num_input_tokens_seen": 8228192, "step": 3810 }, { "epoch": 0.6223491027732463, "grad_norm": 0.1791100800037384, "learning_rate": 0.00031109298531810766, "loss": 0.143, "num_input_tokens_seen": 8239136, "step": 3815 }, { "epoch": 0.6231647634584013, "grad_norm": 0.2757539451122284, "learning_rate": 0.0003115008156606852, "loss": 0.0687, "num_input_tokens_seen": 8249184, "step": 3820 }, { "epoch": 0.6239804241435563, "grad_norm": 0.47659072279930115, "learning_rate": 0.0003119086460032626, "loss": 0.1604, "num_input_tokens_seen": 8260288, "step": 3825 }, { "epoch": 0.6247960848287113, "grad_norm": 0.1077791154384613, "learning_rate": 0.00031231647634584014, "loss": 0.2936, "num_input_tokens_seen": 8271232, "step": 3830 }, { "epoch": 0.6256117455138662, "grad_norm": 0.2414446771144867, "learning_rate": 0.0003127243066884176, "loss": 0.1355, "num_input_tokens_seen": 8281248, "step": 3835 }, { "epoch": 0.6264274061990212, "grad_norm": 0.13507677614688873, "learning_rate": 0.00031313213703099515, "loss": 0.1825, "num_input_tokens_seen": 8292864, "step": 3840 }, { "epoch": 0.6272430668841762, "grad_norm": 0.26294106245040894, "learning_rate": 0.0003135399673735726, "loss": 0.1879, "num_input_tokens_seen": 8303488, "step": 3845 }, { "epoch": 0.6280587275693311, "grad_norm": 0.3151414096355438, "learning_rate": 0.00031394779771615005, "loss": 0.217, "num_input_tokens_seen": 8315008, "step": 3850 }, { "epoch": 0.6288743882544862, "grad_norm": 0.38112303614616394, "learning_rate": 0.0003143556280587276, "loss": 0.1261, "num_input_tokens_seen": 8325696, "step": 3855 }, { "epoch": 0.6296900489396411, "grad_norm": 0.07693363726139069, "learning_rate": 0.00031476345840130506, "loss": 0.1513, "num_input_tokens_seen": 8336960, "step": 3860 }, { "epoch": 0.6305057096247961, "grad_norm": 0.24605275690555573, "learning_rate": 0.0003151712887438826, "loss": 0.1153, "num_input_tokens_seen": 8348544, "step": 3865 }, { "epoch": 0.6313213703099511, "grad_norm": 0.48214077949523926, "learning_rate": 0.00031557911908646, "loss": 0.1825, "num_input_tokens_seen": 8359072, "step": 3870 }, { "epoch": 0.632137030995106, "grad_norm": 0.4101504385471344, "learning_rate": 0.00031598694942903754, "loss": 0.1694, "num_input_tokens_seen": 8369184, "step": 3875 }, { "epoch": 0.632952691680261, "grad_norm": 0.06815630197525024, "learning_rate": 0.000316394779771615, "loss": 0.0905, "num_input_tokens_seen": 8380352, "step": 3880 }, { "epoch": 0.633768352365416, "grad_norm": 0.11706419289112091, "learning_rate": 0.0003168026101141925, "loss": 0.0495, "num_input_tokens_seen": 8390880, "step": 3885 }, { "epoch": 0.634584013050571, "grad_norm": 0.41242027282714844, "learning_rate": 0.00031721044045677, "loss": 0.1514, "num_input_tokens_seen": 8402176, "step": 3890 }, { "epoch": 0.6353996737357259, "grad_norm": 0.09979145973920822, "learning_rate": 0.00031761827079934744, "loss": 0.0868, "num_input_tokens_seen": 8413280, "step": 3895 }, { "epoch": 0.636215334420881, "grad_norm": 0.043393541127443314, "learning_rate": 0.000318026101141925, "loss": 0.046, "num_input_tokens_seen": 8424960, "step": 3900 }, { "epoch": 0.6370309951060359, "grad_norm": 0.12201043963432312, "learning_rate": 0.00031843393148450245, "loss": 0.0594, "num_input_tokens_seen": 8436160, "step": 3905 }, { "epoch": 0.6378466557911908, "grad_norm": 1.5243480205535889, "learning_rate": 0.00031884176182708, "loss": 0.1185, "num_input_tokens_seen": 8445856, "step": 3910 }, { "epoch": 0.6386623164763459, "grad_norm": 1.0513534545898438, "learning_rate": 0.00031924959216965746, "loss": 0.5507, "num_input_tokens_seen": 8455264, "step": 3915 }, { "epoch": 0.6394779771615008, "grad_norm": 0.4103231132030487, "learning_rate": 0.0003196574225122349, "loss": 0.1775, "num_input_tokens_seen": 8465984, "step": 3920 }, { "epoch": 0.6402936378466558, "grad_norm": 1.263214349746704, "learning_rate": 0.0003200652528548124, "loss": 0.2106, "num_input_tokens_seen": 8477344, "step": 3925 }, { "epoch": 0.6411092985318108, "grad_norm": 0.15126630663871765, "learning_rate": 0.0003204730831973899, "loss": 0.1166, "num_input_tokens_seen": 8488544, "step": 3930 }, { "epoch": 0.6419249592169658, "grad_norm": 0.36729708313941956, "learning_rate": 0.0003208809135399674, "loss": 0.1048, "num_input_tokens_seen": 8499296, "step": 3935 }, { "epoch": 0.6427406199021207, "grad_norm": 0.9425373673439026, "learning_rate": 0.00032128874388254484, "loss": 0.1382, "num_input_tokens_seen": 8510912, "step": 3940 }, { "epoch": 0.6435562805872757, "grad_norm": 0.2685391306877136, "learning_rate": 0.0003216965742251223, "loss": 0.1376, "num_input_tokens_seen": 8521920, "step": 3945 }, { "epoch": 0.6443719412724307, "grad_norm": 0.489003986120224, "learning_rate": 0.00032210440456769985, "loss": 0.1312, "num_input_tokens_seen": 8532448, "step": 3950 }, { "epoch": 0.6451876019575856, "grad_norm": 0.14087380468845367, "learning_rate": 0.0003225122349102773, "loss": 0.1139, "num_input_tokens_seen": 8543936, "step": 3955 }, { "epoch": 0.6460032626427407, "grad_norm": 0.11659581959247589, "learning_rate": 0.00032292006525285486, "loss": 0.0836, "num_input_tokens_seen": 8556160, "step": 3960 }, { "epoch": 0.6468189233278956, "grad_norm": 0.15926118195056915, "learning_rate": 0.0003233278955954323, "loss": 0.0522, "num_input_tokens_seen": 8566816, "step": 3965 }, { "epoch": 0.6476345840130505, "grad_norm": 0.35616812109947205, "learning_rate": 0.0003237357259380098, "loss": 0.2619, "num_input_tokens_seen": 8576992, "step": 3970 }, { "epoch": 0.6484502446982056, "grad_norm": 0.46962714195251465, "learning_rate": 0.0003241435562805873, "loss": 0.2148, "num_input_tokens_seen": 8588224, "step": 3975 }, { "epoch": 0.6492659053833605, "grad_norm": 0.061958249658346176, "learning_rate": 0.00032455138662316476, "loss": 0.0982, "num_input_tokens_seen": 8598976, "step": 3980 }, { "epoch": 0.6500815660685155, "grad_norm": 0.13584494590759277, "learning_rate": 0.0003249592169657423, "loss": 0.0892, "num_input_tokens_seen": 8609216, "step": 3985 }, { "epoch": 0.6508972267536705, "grad_norm": 0.8373795747756958, "learning_rate": 0.0003253670473083197, "loss": 0.1666, "num_input_tokens_seen": 8620032, "step": 3990 }, { "epoch": 0.6517128874388255, "grad_norm": 0.06975753605365753, "learning_rate": 0.00032577487765089724, "loss": 0.2013, "num_input_tokens_seen": 8631456, "step": 3995 }, { "epoch": 0.6525285481239804, "grad_norm": 0.164698988199234, "learning_rate": 0.0003261827079934747, "loss": 0.0616, "num_input_tokens_seen": 8641696, "step": 4000 }, { "epoch": 0.6533442088091354, "grad_norm": 0.8426600098609924, "learning_rate": 0.00032659053833605225, "loss": 0.1575, "num_input_tokens_seen": 8652576, "step": 4005 }, { "epoch": 0.6541598694942904, "grad_norm": 0.6252540349960327, "learning_rate": 0.0003269983686786297, "loss": 0.2312, "num_input_tokens_seen": 8662464, "step": 4010 }, { "epoch": 0.6549755301794453, "grad_norm": 0.43457654118537903, "learning_rate": 0.00032740619902120715, "loss": 0.0936, "num_input_tokens_seen": 8673312, "step": 4015 }, { "epoch": 0.6557911908646004, "grad_norm": 0.4076187312602997, "learning_rate": 0.0003278140293637847, "loss": 0.1401, "num_input_tokens_seen": 8683904, "step": 4020 }, { "epoch": 0.6566068515497553, "grad_norm": 0.28343382477760315, "learning_rate": 0.00032822185970636216, "loss": 0.0753, "num_input_tokens_seen": 8694944, "step": 4025 }, { "epoch": 0.6574225122349103, "grad_norm": 0.19631558656692505, "learning_rate": 0.0003286296900489397, "loss": 0.0465, "num_input_tokens_seen": 8706400, "step": 4030 }, { "epoch": 0.6582381729200653, "grad_norm": 0.06990889459848404, "learning_rate": 0.0003290375203915171, "loss": 0.1025, "num_input_tokens_seen": 8717504, "step": 4035 }, { "epoch": 0.6590538336052202, "grad_norm": 0.39275580644607544, "learning_rate": 0.00032944535073409464, "loss": 0.0554, "num_input_tokens_seen": 8728736, "step": 4040 }, { "epoch": 0.6598694942903752, "grad_norm": 0.7639222741127014, "learning_rate": 0.0003298531810766721, "loss": 0.3712, "num_input_tokens_seen": 8740032, "step": 4045 }, { "epoch": 0.6606851549755302, "grad_norm": 0.0512065626680851, "learning_rate": 0.0003302610114192496, "loss": 0.1152, "num_input_tokens_seen": 8749280, "step": 4050 }, { "epoch": 0.6615008156606852, "grad_norm": 0.07373015582561493, "learning_rate": 0.0003306688417618271, "loss": 0.0288, "num_input_tokens_seen": 8760320, "step": 4055 }, { "epoch": 0.6623164763458401, "grad_norm": 0.5321258902549744, "learning_rate": 0.00033107667210440455, "loss": 0.1337, "num_input_tokens_seen": 8771104, "step": 4060 }, { "epoch": 0.6631321370309952, "grad_norm": 0.03265725448727608, "learning_rate": 0.0003314845024469821, "loss": 0.0709, "num_input_tokens_seen": 8781664, "step": 4065 }, { "epoch": 0.6639477977161501, "grad_norm": 0.15506812930107117, "learning_rate": 0.00033189233278955955, "loss": 0.0715, "num_input_tokens_seen": 8792128, "step": 4070 }, { "epoch": 0.664763458401305, "grad_norm": 0.3224940896034241, "learning_rate": 0.00033230016313213703, "loss": 0.2244, "num_input_tokens_seen": 8803616, "step": 4075 }, { "epoch": 0.6655791190864601, "grad_norm": 0.6347690224647522, "learning_rate": 0.0003327079934747145, "loss": 0.0932, "num_input_tokens_seen": 8813696, "step": 4080 }, { "epoch": 0.666394779771615, "grad_norm": 0.6844305396080017, "learning_rate": 0.000333115823817292, "loss": 0.1977, "num_input_tokens_seen": 8824672, "step": 4085 }, { "epoch": 0.66721044045677, "grad_norm": 0.053750500082969666, "learning_rate": 0.0003335236541598695, "loss": 0.0225, "num_input_tokens_seen": 8836256, "step": 4090 }, { "epoch": 0.668026101141925, "grad_norm": 0.0979921966791153, "learning_rate": 0.000333931484502447, "loss": 0.1962, "num_input_tokens_seen": 8847168, "step": 4095 }, { "epoch": 0.6688417618270799, "grad_norm": 0.7607890367507935, "learning_rate": 0.0003343393148450245, "loss": 0.1747, "num_input_tokens_seen": 8855008, "step": 4100 }, { "epoch": 0.6696574225122349, "grad_norm": 0.2811325490474701, "learning_rate": 0.00033474714518760194, "loss": 0.1966, "num_input_tokens_seen": 8865728, "step": 4105 }, { "epoch": 0.6704730831973899, "grad_norm": 0.14467936754226685, "learning_rate": 0.0003351549755301794, "loss": 0.1633, "num_input_tokens_seen": 8877440, "step": 4110 }, { "epoch": 0.6712887438825449, "grad_norm": 0.5608596205711365, "learning_rate": 0.00033556280587275695, "loss": 0.2213, "num_input_tokens_seen": 8889248, "step": 4115 }, { "epoch": 0.6721044045676998, "grad_norm": 0.36362361907958984, "learning_rate": 0.0003359706362153344, "loss": 0.0987, "num_input_tokens_seen": 8900640, "step": 4120 }, { "epoch": 0.6729200652528549, "grad_norm": 0.05654023960232735, "learning_rate": 0.00033637846655791196, "loss": 0.1707, "num_input_tokens_seen": 8911232, "step": 4125 }, { "epoch": 0.6737357259380098, "grad_norm": 0.09752820432186127, "learning_rate": 0.0003367862969004894, "loss": 0.1078, "num_input_tokens_seen": 8921952, "step": 4130 }, { "epoch": 0.6745513866231647, "grad_norm": 0.08624225109815598, "learning_rate": 0.0003371941272430669, "loss": 0.1293, "num_input_tokens_seen": 8933856, "step": 4135 }, { "epoch": 0.6753670473083198, "grad_norm": 0.14989924430847168, "learning_rate": 0.0003376019575856444, "loss": 0.061, "num_input_tokens_seen": 8944800, "step": 4140 }, { "epoch": 0.6761827079934747, "grad_norm": 0.11734739691019058, "learning_rate": 0.00033800978792822186, "loss": 0.0411, "num_input_tokens_seen": 8956352, "step": 4145 }, { "epoch": 0.6769983686786297, "grad_norm": 0.35162967443466187, "learning_rate": 0.00033841761827079934, "loss": 0.0789, "num_input_tokens_seen": 8967520, "step": 4150 }, { "epoch": 0.6778140293637847, "grad_norm": 0.03362584114074707, "learning_rate": 0.0003388254486133768, "loss": 0.1638, "num_input_tokens_seen": 8977888, "step": 4155 }, { "epoch": 0.6786296900489397, "grad_norm": 0.1328830122947693, "learning_rate": 0.00033923327895595435, "loss": 0.0441, "num_input_tokens_seen": 8989440, "step": 4160 }, { "epoch": 0.6794453507340946, "grad_norm": 0.18508820235729218, "learning_rate": 0.0003396411092985318, "loss": 0.0742, "num_input_tokens_seen": 9000896, "step": 4165 }, { "epoch": 0.6802610114192496, "grad_norm": 0.026474563404917717, "learning_rate": 0.0003400489396411093, "loss": 0.1311, "num_input_tokens_seen": 9011808, "step": 4170 }, { "epoch": 0.6810766721044046, "grad_norm": 0.793641984462738, "learning_rate": 0.0003404567699836868, "loss": 0.1348, "num_input_tokens_seen": 9024096, "step": 4175 }, { "epoch": 0.6818923327895595, "grad_norm": 0.07803583890199661, "learning_rate": 0.00034086460032626425, "loss": 0.2088, "num_input_tokens_seen": 9035648, "step": 4180 }, { "epoch": 0.6827079934747146, "grad_norm": 0.08670012652873993, "learning_rate": 0.0003412724306688418, "loss": 0.0742, "num_input_tokens_seen": 9045920, "step": 4185 }, { "epoch": 0.6835236541598695, "grad_norm": 0.4543367028236389, "learning_rate": 0.00034168026101141926, "loss": 0.0632, "num_input_tokens_seen": 9057088, "step": 4190 }, { "epoch": 0.6843393148450244, "grad_norm": 0.41005179286003113, "learning_rate": 0.0003420880913539968, "loss": 0.1719, "num_input_tokens_seen": 9066208, "step": 4195 }, { "epoch": 0.6851549755301795, "grad_norm": 0.7371568083763123, "learning_rate": 0.0003424959216965742, "loss": 0.1812, "num_input_tokens_seen": 9077120, "step": 4200 }, { "epoch": 0.6859706362153344, "grad_norm": 0.340640127658844, "learning_rate": 0.0003429037520391517, "loss": 0.2524, "num_input_tokens_seen": 9086592, "step": 4205 }, { "epoch": 0.6867862969004894, "grad_norm": 0.18895219266414642, "learning_rate": 0.0003433115823817292, "loss": 0.1456, "num_input_tokens_seen": 9096864, "step": 4210 }, { "epoch": 0.6876019575856444, "grad_norm": 0.18842971324920654, "learning_rate": 0.0003437194127243067, "loss": 0.1622, "num_input_tokens_seen": 9107424, "step": 4215 }, { "epoch": 0.6884176182707994, "grad_norm": 0.0588395819067955, "learning_rate": 0.00034412724306688417, "loss": 0.0809, "num_input_tokens_seen": 9117696, "step": 4220 }, { "epoch": 0.6892332789559543, "grad_norm": 0.08728792518377304, "learning_rate": 0.00034453507340946165, "loss": 0.0914, "num_input_tokens_seen": 9128096, "step": 4225 }, { "epoch": 0.6900489396411092, "grad_norm": 1.0194220542907715, "learning_rate": 0.0003449429037520392, "loss": 0.1346, "num_input_tokens_seen": 9139104, "step": 4230 }, { "epoch": 0.6908646003262643, "grad_norm": 0.4258745014667511, "learning_rate": 0.00034535073409461666, "loss": 0.1032, "num_input_tokens_seen": 9149408, "step": 4235 }, { "epoch": 0.6916802610114192, "grad_norm": 0.0936698392033577, "learning_rate": 0.00034575856443719413, "loss": 0.064, "num_input_tokens_seen": 9160672, "step": 4240 }, { "epoch": 0.6924959216965743, "grad_norm": 0.8383188843727112, "learning_rate": 0.0003461663947797716, "loss": 0.2815, "num_input_tokens_seen": 9171104, "step": 4245 }, { "epoch": 0.6933115823817292, "grad_norm": 0.05329615995287895, "learning_rate": 0.0003465742251223491, "loss": 0.0785, "num_input_tokens_seen": 9181440, "step": 4250 }, { "epoch": 0.6941272430668842, "grad_norm": 0.044270992279052734, "learning_rate": 0.0003469820554649266, "loss": 0.0703, "num_input_tokens_seen": 9191488, "step": 4255 }, { "epoch": 0.6949429037520392, "grad_norm": 1.0437971353530884, "learning_rate": 0.0003473898858075041, "loss": 0.2139, "num_input_tokens_seen": 9203392, "step": 4260 }, { "epoch": 0.6957585644371941, "grad_norm": 0.3245795667171478, "learning_rate": 0.0003477977161500816, "loss": 0.2403, "num_input_tokens_seen": 9214368, "step": 4265 }, { "epoch": 0.6965742251223491, "grad_norm": 0.08259432762861252, "learning_rate": 0.00034820554649265905, "loss": 0.062, "num_input_tokens_seen": 9225248, "step": 4270 }, { "epoch": 0.697389885807504, "grad_norm": 0.269199401140213, "learning_rate": 0.0003486133768352365, "loss": 0.1181, "num_input_tokens_seen": 9236640, "step": 4275 }, { "epoch": 0.6982055464926591, "grad_norm": 0.38677653670310974, "learning_rate": 0.00034902120717781405, "loss": 0.0849, "num_input_tokens_seen": 9248000, "step": 4280 }, { "epoch": 0.699021207177814, "grad_norm": 0.1369486302137375, "learning_rate": 0.00034942903752039153, "loss": 0.267, "num_input_tokens_seen": 9258752, "step": 4285 }, { "epoch": 0.6998368678629691, "grad_norm": 0.44952574372291565, "learning_rate": 0.000349836867862969, "loss": 0.1027, "num_input_tokens_seen": 9269376, "step": 4290 }, { "epoch": 0.700652528548124, "grad_norm": 0.12477151304483414, "learning_rate": 0.0003502446982055465, "loss": 0.2004, "num_input_tokens_seen": 9281312, "step": 4295 }, { "epoch": 0.7014681892332789, "grad_norm": 0.11125738173723221, "learning_rate": 0.00035065252854812396, "loss": 0.0653, "num_input_tokens_seen": 9291936, "step": 4300 }, { "epoch": 0.702283849918434, "grad_norm": 0.1944471299648285, "learning_rate": 0.0003510603588907015, "loss": 0.1115, "num_input_tokens_seen": 9302528, "step": 4305 }, { "epoch": 0.7030995106035889, "grad_norm": 0.08961895108222961, "learning_rate": 0.00035146818923327897, "loss": 0.0653, "num_input_tokens_seen": 9313536, "step": 4310 }, { "epoch": 0.7039151712887439, "grad_norm": 0.043053120374679565, "learning_rate": 0.00035187601957585644, "loss": 0.1076, "num_input_tokens_seen": 9324384, "step": 4315 }, { "epoch": 0.7047308319738989, "grad_norm": 0.5048277378082275, "learning_rate": 0.0003522838499184339, "loss": 0.2702, "num_input_tokens_seen": 9335104, "step": 4320 }, { "epoch": 0.7055464926590538, "grad_norm": 0.516410768032074, "learning_rate": 0.00035269168026101145, "loss": 0.2341, "num_input_tokens_seen": 9345824, "step": 4325 }, { "epoch": 0.7063621533442088, "grad_norm": 0.15441341698169708, "learning_rate": 0.0003530995106035889, "loss": 0.0657, "num_input_tokens_seen": 9357088, "step": 4330 }, { "epoch": 0.7071778140293637, "grad_norm": 0.7144105434417725, "learning_rate": 0.0003535073409461664, "loss": 0.2489, "num_input_tokens_seen": 9366784, "step": 4335 }, { "epoch": 0.7079934747145188, "grad_norm": 0.1695648729801178, "learning_rate": 0.0003539151712887439, "loss": 0.1474, "num_input_tokens_seen": 9377024, "step": 4340 }, { "epoch": 0.7088091353996737, "grad_norm": 0.2768016457557678, "learning_rate": 0.00035432300163132136, "loss": 0.1546, "num_input_tokens_seen": 9389152, "step": 4345 }, { "epoch": 0.7096247960848288, "grad_norm": 0.1949160099029541, "learning_rate": 0.0003547308319738989, "loss": 0.1935, "num_input_tokens_seen": 9399616, "step": 4350 }, { "epoch": 0.7104404567699837, "grad_norm": 0.09738589823246002, "learning_rate": 0.00035513866231647636, "loss": 0.0919, "num_input_tokens_seen": 9410176, "step": 4355 }, { "epoch": 0.7112561174551386, "grad_norm": 0.14508315920829773, "learning_rate": 0.0003555464926590539, "loss": 0.1165, "num_input_tokens_seen": 9421760, "step": 4360 }, { "epoch": 0.7120717781402937, "grad_norm": 0.07993219792842865, "learning_rate": 0.0003559543230016313, "loss": 0.0539, "num_input_tokens_seen": 9432960, "step": 4365 }, { "epoch": 0.7128874388254486, "grad_norm": 0.3040957450866699, "learning_rate": 0.0003563621533442088, "loss": 0.3573, "num_input_tokens_seen": 9443936, "step": 4370 }, { "epoch": 0.7137030995106036, "grad_norm": 0.043079450726509094, "learning_rate": 0.0003567699836867863, "loss": 0.0536, "num_input_tokens_seen": 9453120, "step": 4375 }, { "epoch": 0.7145187601957586, "grad_norm": 0.12413550168275833, "learning_rate": 0.0003571778140293638, "loss": 0.1671, "num_input_tokens_seen": 9464480, "step": 4380 }, { "epoch": 0.7153344208809136, "grad_norm": 0.45056381821632385, "learning_rate": 0.0003575856443719413, "loss": 0.1845, "num_input_tokens_seen": 9475360, "step": 4385 }, { "epoch": 0.7161500815660685, "grad_norm": 0.3023238182067871, "learning_rate": 0.00035799347471451875, "loss": 0.2689, "num_input_tokens_seen": 9484992, "step": 4390 }, { "epoch": 0.7169657422512234, "grad_norm": 0.5121544003486633, "learning_rate": 0.0003584013050570963, "loss": 0.0575, "num_input_tokens_seen": 9495296, "step": 4395 }, { "epoch": 0.7177814029363785, "grad_norm": 0.2982773184776306, "learning_rate": 0.00035880913539967376, "loss": 0.0646, "num_input_tokens_seen": 9506592, "step": 4400 }, { "epoch": 0.7185970636215334, "grad_norm": 0.4488369822502136, "learning_rate": 0.00035921696574225124, "loss": 0.1364, "num_input_tokens_seen": 9517056, "step": 4405 }, { "epoch": 0.7194127243066885, "grad_norm": 0.037081990391016006, "learning_rate": 0.0003596247960848287, "loss": 0.0476, "num_input_tokens_seen": 9527808, "step": 4410 }, { "epoch": 0.7202283849918434, "grad_norm": 0.4101148545742035, "learning_rate": 0.0003600326264274062, "loss": 0.0485, "num_input_tokens_seen": 9538624, "step": 4415 }, { "epoch": 0.7210440456769984, "grad_norm": 0.6088188290596008, "learning_rate": 0.0003604404567699837, "loss": 0.1898, "num_input_tokens_seen": 9547744, "step": 4420 }, { "epoch": 0.7218597063621534, "grad_norm": 0.4636387526988983, "learning_rate": 0.0003608482871125612, "loss": 0.0678, "num_input_tokens_seen": 9559072, "step": 4425 }, { "epoch": 0.7226753670473083, "grad_norm": 0.4632618725299835, "learning_rate": 0.0003612561174551386, "loss": 0.1456, "num_input_tokens_seen": 9570528, "step": 4430 }, { "epoch": 0.7234910277324633, "grad_norm": 0.5356050729751587, "learning_rate": 0.00036166394779771615, "loss": 0.1853, "num_input_tokens_seen": 9580800, "step": 4435 }, { "epoch": 0.7243066884176182, "grad_norm": 0.06347585469484329, "learning_rate": 0.0003620717781402936, "loss": 0.1803, "num_input_tokens_seen": 9592064, "step": 4440 }, { "epoch": 0.7251223491027733, "grad_norm": 0.03401469439268112, "learning_rate": 0.00036247960848287116, "loss": 0.1577, "num_input_tokens_seen": 9603744, "step": 4445 }, { "epoch": 0.7259380097879282, "grad_norm": 0.08639135956764221, "learning_rate": 0.00036288743882544863, "loss": 0.247, "num_input_tokens_seen": 9615008, "step": 4450 }, { "epoch": 0.7267536704730831, "grad_norm": 0.52489173412323, "learning_rate": 0.0003632952691680261, "loss": 0.198, "num_input_tokens_seen": 9625376, "step": 4455 }, { "epoch": 0.7275693311582382, "grad_norm": 0.23638580739498138, "learning_rate": 0.0003637030995106036, "loss": 0.1478, "num_input_tokens_seen": 9636128, "step": 4460 }, { "epoch": 0.7283849918433931, "grad_norm": 0.2670087218284607, "learning_rate": 0.00036411092985318106, "loss": 0.2096, "num_input_tokens_seen": 9647808, "step": 4465 }, { "epoch": 0.7292006525285482, "grad_norm": 0.5415324568748474, "learning_rate": 0.0003645187601957586, "loss": 0.1935, "num_input_tokens_seen": 9658496, "step": 4470 }, { "epoch": 0.7300163132137031, "grad_norm": 0.27866536378860474, "learning_rate": 0.00036492659053833607, "loss": 0.1422, "num_input_tokens_seen": 9669120, "step": 4475 }, { "epoch": 0.7308319738988581, "grad_norm": 0.20106881856918335, "learning_rate": 0.00036533442088091354, "loss": 0.1005, "num_input_tokens_seen": 9680448, "step": 4480 }, { "epoch": 0.731647634584013, "grad_norm": 0.2943683862686157, "learning_rate": 0.000365742251223491, "loss": 0.0592, "num_input_tokens_seen": 9690592, "step": 4485 }, { "epoch": 0.732463295269168, "grad_norm": 0.8741294741630554, "learning_rate": 0.00036615008156606855, "loss": 0.1675, "num_input_tokens_seen": 9701824, "step": 4490 }, { "epoch": 0.733278955954323, "grad_norm": 0.7757192254066467, "learning_rate": 0.00036655791190864603, "loss": 0.2252, "num_input_tokens_seen": 9712384, "step": 4495 }, { "epoch": 0.734094616639478, "grad_norm": 0.24651999771595, "learning_rate": 0.0003669657422512235, "loss": 0.2337, "num_input_tokens_seen": 9723200, "step": 4500 }, { "epoch": 0.734910277324633, "grad_norm": 0.1742609441280365, "learning_rate": 0.000367373572593801, "loss": 0.138, "num_input_tokens_seen": 9733536, "step": 4505 }, { "epoch": 0.7357259380097879, "grad_norm": 0.20956604182720184, "learning_rate": 0.00036778140293637846, "loss": 0.2098, "num_input_tokens_seen": 9744832, "step": 4510 }, { "epoch": 0.736541598694943, "grad_norm": 0.4425009489059448, "learning_rate": 0.000368189233278956, "loss": 0.1022, "num_input_tokens_seen": 9755520, "step": 4515 }, { "epoch": 0.7373572593800979, "grad_norm": 0.9309787750244141, "learning_rate": 0.00036859706362153346, "loss": 0.1647, "num_input_tokens_seen": 9766208, "step": 4520 }, { "epoch": 0.7381729200652528, "grad_norm": 0.6328949332237244, "learning_rate": 0.0003690048939641109, "loss": 0.1981, "num_input_tokens_seen": 9778272, "step": 4525 }, { "epoch": 0.7389885807504079, "grad_norm": 0.8695969581604004, "learning_rate": 0.0003694127243066884, "loss": 0.1662, "num_input_tokens_seen": 9789760, "step": 4530 }, { "epoch": 0.7398042414355628, "grad_norm": 0.7203797101974487, "learning_rate": 0.0003698205546492659, "loss": 0.1121, "num_input_tokens_seen": 9800800, "step": 4535 }, { "epoch": 0.7406199021207178, "grad_norm": 1.077952265739441, "learning_rate": 0.0003702283849918434, "loss": 0.1574, "num_input_tokens_seen": 9812672, "step": 4540 }, { "epoch": 0.7414355628058727, "grad_norm": 0.6627715229988098, "learning_rate": 0.0003706362153344209, "loss": 0.2866, "num_input_tokens_seen": 9823232, "step": 4545 }, { "epoch": 0.7422512234910277, "grad_norm": 0.1315276026725769, "learning_rate": 0.0003710440456769984, "loss": 0.1755, "num_input_tokens_seen": 9834848, "step": 4550 }, { "epoch": 0.7430668841761827, "grad_norm": 0.05795247107744217, "learning_rate": 0.00037145187601957585, "loss": 0.1221, "num_input_tokens_seen": 9846304, "step": 4555 }, { "epoch": 0.7438825448613376, "grad_norm": 0.21074354648590088, "learning_rate": 0.00037185970636215333, "loss": 0.0962, "num_input_tokens_seen": 9857472, "step": 4560 }, { "epoch": 0.7446982055464927, "grad_norm": 0.11915198713541031, "learning_rate": 0.00037226753670473086, "loss": 0.0883, "num_input_tokens_seen": 9869632, "step": 4565 }, { "epoch": 0.7455138662316476, "grad_norm": 0.12031367421150208, "learning_rate": 0.00037267536704730834, "loss": 0.0608, "num_input_tokens_seen": 9881344, "step": 4570 }, { "epoch": 0.7463295269168027, "grad_norm": 0.14942015707492828, "learning_rate": 0.0003730831973898858, "loss": 0.1219, "num_input_tokens_seen": 9892640, "step": 4575 }, { "epoch": 0.7471451876019576, "grad_norm": 0.525719165802002, "learning_rate": 0.0003734910277324633, "loss": 0.0878, "num_input_tokens_seen": 9903296, "step": 4580 }, { "epoch": 0.7479608482871125, "grad_norm": 0.03644242137670517, "learning_rate": 0.0003738988580750408, "loss": 0.0736, "num_input_tokens_seen": 9913952, "step": 4585 }, { "epoch": 0.7487765089722676, "grad_norm": 0.02505657821893692, "learning_rate": 0.0003743066884176183, "loss": 0.0757, "num_input_tokens_seen": 9924928, "step": 4590 }, { "epoch": 0.7495921696574225, "grad_norm": 0.08508000522851944, "learning_rate": 0.0003747145187601957, "loss": 0.0751, "num_input_tokens_seen": 9936576, "step": 4595 }, { "epoch": 0.7504078303425775, "grad_norm": 0.7894995212554932, "learning_rate": 0.00037512234910277325, "loss": 0.2172, "num_input_tokens_seen": 9948288, "step": 4600 }, { "epoch": 0.7512234910277324, "grad_norm": 0.46785202622413635, "learning_rate": 0.00037553017944535073, "loss": 0.1958, "num_input_tokens_seen": 9958976, "step": 4605 }, { "epoch": 0.7520391517128875, "grad_norm": 0.3128347098827362, "learning_rate": 0.00037593800978792826, "loss": 0.2062, "num_input_tokens_seen": 9969856, "step": 4610 }, { "epoch": 0.7528548123980424, "grad_norm": 0.21548837423324585, "learning_rate": 0.00037634584013050573, "loss": 0.1369, "num_input_tokens_seen": 9982176, "step": 4615 }, { "epoch": 0.7536704730831973, "grad_norm": 0.61204993724823, "learning_rate": 0.0003767536704730832, "loss": 0.1607, "num_input_tokens_seen": 9994176, "step": 4620 }, { "epoch": 0.7544861337683524, "grad_norm": 0.2929581105709076, "learning_rate": 0.0003771615008156607, "loss": 0.127, "num_input_tokens_seen": 10005504, "step": 4625 }, { "epoch": 0.7553017944535073, "grad_norm": 0.2728572487831116, "learning_rate": 0.00037756933115823816, "loss": 0.1428, "num_input_tokens_seen": 10016800, "step": 4630 }, { "epoch": 0.7561174551386624, "grad_norm": 0.5226534008979797, "learning_rate": 0.0003779771615008157, "loss": 0.2997, "num_input_tokens_seen": 10026720, "step": 4635 }, { "epoch": 0.7569331158238173, "grad_norm": 0.32955631613731384, "learning_rate": 0.00037838499184339317, "loss": 0.1112, "num_input_tokens_seen": 10037664, "step": 4640 }, { "epoch": 0.7577487765089723, "grad_norm": 0.2558460235595703, "learning_rate": 0.00037879282218597065, "loss": 0.1725, "num_input_tokens_seen": 10049216, "step": 4645 }, { "epoch": 0.7585644371941273, "grad_norm": 0.34898290038108826, "learning_rate": 0.0003792006525285481, "loss": 0.0869, "num_input_tokens_seen": 10060960, "step": 4650 }, { "epoch": 0.7593800978792822, "grad_norm": 0.2750975489616394, "learning_rate": 0.0003796084828711256, "loss": 0.1028, "num_input_tokens_seen": 10072096, "step": 4655 }, { "epoch": 0.7601957585644372, "grad_norm": 0.4596557319164276, "learning_rate": 0.00038001631321370313, "loss": 0.0883, "num_input_tokens_seen": 10083712, "step": 4660 }, { "epoch": 0.7610114192495921, "grad_norm": 0.21442389488220215, "learning_rate": 0.00038042414355628055, "loss": 0.1009, "num_input_tokens_seen": 10094656, "step": 4665 }, { "epoch": 0.7618270799347472, "grad_norm": 0.41847553849220276, "learning_rate": 0.0003808319738988581, "loss": 0.0981, "num_input_tokens_seen": 10105568, "step": 4670 }, { "epoch": 0.7626427406199021, "grad_norm": 0.258605033159256, "learning_rate": 0.00038123980424143556, "loss": 0.0465, "num_input_tokens_seen": 10116800, "step": 4675 }, { "epoch": 0.763458401305057, "grad_norm": 0.08785971254110336, "learning_rate": 0.0003816476345840131, "loss": 0.151, "num_input_tokens_seen": 10127360, "step": 4680 }, { "epoch": 0.7642740619902121, "grad_norm": 0.44143345952033997, "learning_rate": 0.00038205546492659057, "loss": 0.0895, "num_input_tokens_seen": 10137312, "step": 4685 }, { "epoch": 0.765089722675367, "grad_norm": 1.2000739574432373, "learning_rate": 0.000382463295269168, "loss": 0.2656, "num_input_tokens_seen": 10148960, "step": 4690 }, { "epoch": 0.765905383360522, "grad_norm": 0.045616984367370605, "learning_rate": 0.0003828711256117455, "loss": 0.0484, "num_input_tokens_seen": 10157856, "step": 4695 }, { "epoch": 0.766721044045677, "grad_norm": 0.29947271943092346, "learning_rate": 0.000383278955954323, "loss": 0.0808, "num_input_tokens_seen": 10168672, "step": 4700 }, { "epoch": 0.767536704730832, "grad_norm": 0.0342426560819149, "learning_rate": 0.00038368678629690053, "loss": 0.0645, "num_input_tokens_seen": 10178272, "step": 4705 }, { "epoch": 0.768352365415987, "grad_norm": 0.40731778740882874, "learning_rate": 0.000384094616639478, "loss": 0.0414, "num_input_tokens_seen": 10188896, "step": 4710 }, { "epoch": 0.7691680261011419, "grad_norm": 0.35202035307884216, "learning_rate": 0.0003845024469820555, "loss": 0.1535, "num_input_tokens_seen": 10199488, "step": 4715 }, { "epoch": 0.7699836867862969, "grad_norm": 0.38114434480667114, "learning_rate": 0.00038491027732463296, "loss": 0.1137, "num_input_tokens_seen": 10210720, "step": 4720 }, { "epoch": 0.7707993474714518, "grad_norm": 0.16754403710365295, "learning_rate": 0.00038531810766721043, "loss": 0.0798, "num_input_tokens_seen": 10221472, "step": 4725 }, { "epoch": 0.7716150081566069, "grad_norm": 0.761899471282959, "learning_rate": 0.00038572593800978796, "loss": 0.1995, "num_input_tokens_seen": 10232288, "step": 4730 }, { "epoch": 0.7724306688417618, "grad_norm": 0.18429462611675262, "learning_rate": 0.0003861337683523654, "loss": 0.1711, "num_input_tokens_seen": 10243616, "step": 4735 }, { "epoch": 0.7732463295269169, "grad_norm": 0.5064928531646729, "learning_rate": 0.0003865415986949429, "loss": 0.1026, "num_input_tokens_seen": 10254464, "step": 4740 }, { "epoch": 0.7740619902120718, "grad_norm": 0.21989291906356812, "learning_rate": 0.0003869494290375204, "loss": 0.065, "num_input_tokens_seen": 10265472, "step": 4745 }, { "epoch": 0.7748776508972267, "grad_norm": 0.2017669528722763, "learning_rate": 0.0003873572593800979, "loss": 0.1856, "num_input_tokens_seen": 10277184, "step": 4750 }, { "epoch": 0.7756933115823818, "grad_norm": 0.43002089858055115, "learning_rate": 0.0003877650897226754, "loss": 0.1439, "num_input_tokens_seen": 10287104, "step": 4755 }, { "epoch": 0.7765089722675367, "grad_norm": 0.11221319437026978, "learning_rate": 0.0003881729200652528, "loss": 0.1302, "num_input_tokens_seen": 10297952, "step": 4760 }, { "epoch": 0.7773246329526917, "grad_norm": 0.12216249108314514, "learning_rate": 0.00038858075040783035, "loss": 0.0866, "num_input_tokens_seen": 10309440, "step": 4765 }, { "epoch": 0.7781402936378466, "grad_norm": 0.12690703570842743, "learning_rate": 0.00038898858075040783, "loss": 0.2505, "num_input_tokens_seen": 10319680, "step": 4770 }, { "epoch": 0.7789559543230016, "grad_norm": 0.2630586624145508, "learning_rate": 0.00038939641109298536, "loss": 0.1576, "num_input_tokens_seen": 10331424, "step": 4775 }, { "epoch": 0.7797716150081566, "grad_norm": 0.5038022398948669, "learning_rate": 0.00038980424143556284, "loss": 0.2242, "num_input_tokens_seen": 10341600, "step": 4780 }, { "epoch": 0.7805872756933115, "grad_norm": 0.30624839663505554, "learning_rate": 0.00039021207177814026, "loss": 0.2602, "num_input_tokens_seen": 10353504, "step": 4785 }, { "epoch": 0.7814029363784666, "grad_norm": 0.10484899580478668, "learning_rate": 0.0003906199021207178, "loss": 0.0848, "num_input_tokens_seen": 10365536, "step": 4790 }, { "epoch": 0.7822185970636215, "grad_norm": 0.285604327917099, "learning_rate": 0.00039102773246329527, "loss": 0.1797, "num_input_tokens_seen": 10375456, "step": 4795 }, { "epoch": 0.7830342577487766, "grad_norm": 0.14094938337802887, "learning_rate": 0.0003914355628058728, "loss": 0.1878, "num_input_tokens_seen": 10386336, "step": 4800 }, { "epoch": 0.7838499184339315, "grad_norm": 0.04346349090337753, "learning_rate": 0.0003918433931484502, "loss": 0.1707, "num_input_tokens_seen": 10397504, "step": 4805 }, { "epoch": 0.7846655791190864, "grad_norm": 0.128965362906456, "learning_rate": 0.00039225122349102775, "loss": 0.0681, "num_input_tokens_seen": 10407520, "step": 4810 }, { "epoch": 0.7854812398042414, "grad_norm": 0.3891755938529968, "learning_rate": 0.0003926590538336052, "loss": 0.1559, "num_input_tokens_seen": 10418368, "step": 4815 }, { "epoch": 0.7862969004893964, "grad_norm": 0.0359419621527195, "learning_rate": 0.0003930668841761827, "loss": 0.1942, "num_input_tokens_seen": 10428864, "step": 4820 }, { "epoch": 0.7871125611745514, "grad_norm": 0.36615094542503357, "learning_rate": 0.00039347471451876023, "loss": 0.1266, "num_input_tokens_seen": 10440224, "step": 4825 }, { "epoch": 0.7879282218597063, "grad_norm": 0.2508382499217987, "learning_rate": 0.00039388254486133766, "loss": 0.1644, "num_input_tokens_seen": 10450912, "step": 4830 }, { "epoch": 0.7887438825448614, "grad_norm": 0.19512003660202026, "learning_rate": 0.0003942903752039152, "loss": 0.1277, "num_input_tokens_seen": 10460352, "step": 4835 }, { "epoch": 0.7895595432300163, "grad_norm": 0.23638033866882324, "learning_rate": 0.00039469820554649266, "loss": 0.242, "num_input_tokens_seen": 10471968, "step": 4840 }, { "epoch": 0.7903752039151712, "grad_norm": 0.43455421924591064, "learning_rate": 0.0003951060358890702, "loss": 0.1504, "num_input_tokens_seen": 10481312, "step": 4845 }, { "epoch": 0.7911908646003263, "grad_norm": 0.13896767795085907, "learning_rate": 0.00039551386623164767, "loss": 0.1634, "num_input_tokens_seen": 10491136, "step": 4850 }, { "epoch": 0.7920065252854812, "grad_norm": 0.31423068046569824, "learning_rate": 0.0003959216965742251, "loss": 0.1022, "num_input_tokens_seen": 10501344, "step": 4855 }, { "epoch": 0.7928221859706363, "grad_norm": 0.08108766376972198, "learning_rate": 0.0003963295269168026, "loss": 0.0808, "num_input_tokens_seen": 10511968, "step": 4860 }, { "epoch": 0.7936378466557912, "grad_norm": 0.3036273717880249, "learning_rate": 0.0003967373572593801, "loss": 0.139, "num_input_tokens_seen": 10523296, "step": 4865 }, { "epoch": 0.7944535073409462, "grad_norm": 0.18951745331287384, "learning_rate": 0.00039714518760195763, "loss": 0.068, "num_input_tokens_seen": 10532992, "step": 4870 }, { "epoch": 0.7952691680261011, "grad_norm": 0.030269593000411987, "learning_rate": 0.00039755301794453505, "loss": 0.1123, "num_input_tokens_seen": 10544768, "step": 4875 }, { "epoch": 0.7960848287112561, "grad_norm": 0.29710137844085693, "learning_rate": 0.00039796084828711253, "loss": 0.049, "num_input_tokens_seen": 10555680, "step": 4880 }, { "epoch": 0.7969004893964111, "grad_norm": 0.21235564351081848, "learning_rate": 0.00039836867862969006, "loss": 0.0519, "num_input_tokens_seen": 10566304, "step": 4885 }, { "epoch": 0.797716150081566, "grad_norm": 0.6146203875541687, "learning_rate": 0.00039877650897226754, "loss": 0.0219, "num_input_tokens_seen": 10576128, "step": 4890 }, { "epoch": 0.7985318107667211, "grad_norm": 0.18994970619678497, "learning_rate": 0.00039918433931484507, "loss": 0.032, "num_input_tokens_seen": 10587072, "step": 4895 }, { "epoch": 0.799347471451876, "grad_norm": 1.0278782844543457, "learning_rate": 0.0003995921696574225, "loss": 0.087, "num_input_tokens_seen": 10597696, "step": 4900 }, { "epoch": 0.8001631321370309, "grad_norm": 0.9544143676757812, "learning_rate": 0.0004, "loss": 0.1629, "num_input_tokens_seen": 10608928, "step": 4905 }, { "epoch": 0.800978792822186, "grad_norm": 0.12461934238672256, "learning_rate": 0.0004004078303425775, "loss": 0.04, "num_input_tokens_seen": 10619872, "step": 4910 }, { "epoch": 0.8017944535073409, "grad_norm": 0.027850087732076645, "learning_rate": 0.00040081566068515497, "loss": 0.1131, "num_input_tokens_seen": 10631360, "step": 4915 }, { "epoch": 0.802610114192496, "grad_norm": 0.21807579696178436, "learning_rate": 0.0004012234910277325, "loss": 0.0874, "num_input_tokens_seen": 10642880, "step": 4920 }, { "epoch": 0.8034257748776509, "grad_norm": 0.40161916613578796, "learning_rate": 0.0004016313213703099, "loss": 0.0408, "num_input_tokens_seen": 10651968, "step": 4925 }, { "epoch": 0.8042414355628059, "grad_norm": 0.8437064290046692, "learning_rate": 0.00040203915171288746, "loss": 0.246, "num_input_tokens_seen": 10662240, "step": 4930 }, { "epoch": 0.8050570962479608, "grad_norm": 0.15779760479927063, "learning_rate": 0.00040244698205546493, "loss": 0.0533, "num_input_tokens_seen": 10672864, "step": 4935 }, { "epoch": 0.8058727569331158, "grad_norm": 0.08539305627346039, "learning_rate": 0.00040285481239804246, "loss": 0.1148, "num_input_tokens_seen": 10683936, "step": 4940 }, { "epoch": 0.8066884176182708, "grad_norm": 0.8961646556854248, "learning_rate": 0.0004032626427406199, "loss": 0.2105, "num_input_tokens_seen": 10695104, "step": 4945 }, { "epoch": 0.8075040783034257, "grad_norm": 0.11104848980903625, "learning_rate": 0.00040367047308319736, "loss": 0.1674, "num_input_tokens_seen": 10706528, "step": 4950 }, { "epoch": 0.8083197389885808, "grad_norm": 0.26247522234916687, "learning_rate": 0.0004040783034257749, "loss": 0.1362, "num_input_tokens_seen": 10717728, "step": 4955 }, { "epoch": 0.8091353996737357, "grad_norm": 0.030089763924479485, "learning_rate": 0.00040448613376835237, "loss": 0.1047, "num_input_tokens_seen": 10727104, "step": 4960 }, { "epoch": 0.8099510603588908, "grad_norm": 0.11344542354345322, "learning_rate": 0.0004048939641109299, "loss": 0.1412, "num_input_tokens_seen": 10737952, "step": 4965 }, { "epoch": 0.8107667210440457, "grad_norm": 0.3622676134109497, "learning_rate": 0.0004053017944535073, "loss": 0.2602, "num_input_tokens_seen": 10747744, "step": 4970 }, { "epoch": 0.8115823817292006, "grad_norm": 0.08006960898637772, "learning_rate": 0.00040570962479608485, "loss": 0.1059, "num_input_tokens_seen": 10757920, "step": 4975 }, { "epoch": 0.8123980424143556, "grad_norm": 0.12218235433101654, "learning_rate": 0.00040611745513866233, "loss": 0.0414, "num_input_tokens_seen": 10768896, "step": 4980 }, { "epoch": 0.8132137030995106, "grad_norm": 0.0940176248550415, "learning_rate": 0.0004065252854812398, "loss": 0.0599, "num_input_tokens_seen": 10779136, "step": 4985 }, { "epoch": 0.8140293637846656, "grad_norm": 0.17066459357738495, "learning_rate": 0.00040693311582381734, "loss": 0.1839, "num_input_tokens_seen": 10789280, "step": 4990 }, { "epoch": 0.8148450244698205, "grad_norm": 0.17495228350162506, "learning_rate": 0.00040734094616639476, "loss": 0.1297, "num_input_tokens_seen": 10800192, "step": 4995 }, { "epoch": 0.8156606851549756, "grad_norm": 0.4211640954017639, "learning_rate": 0.0004077487765089723, "loss": 0.0795, "num_input_tokens_seen": 10811296, "step": 5000 }, { "epoch": 0.8164763458401305, "grad_norm": 0.1242627203464508, "learning_rate": 0.00040815660685154977, "loss": 0.0298, "num_input_tokens_seen": 10822272, "step": 5005 }, { "epoch": 0.8172920065252854, "grad_norm": 0.684248149394989, "learning_rate": 0.00040856443719412724, "loss": 0.0643, "num_input_tokens_seen": 10832832, "step": 5010 }, { "epoch": 0.8181076672104405, "grad_norm": 0.5153582692146301, "learning_rate": 0.00040897226753670477, "loss": 0.2636, "num_input_tokens_seen": 10843008, "step": 5015 }, { "epoch": 0.8189233278955954, "grad_norm": 0.13736863434314728, "learning_rate": 0.0004093800978792822, "loss": 0.0742, "num_input_tokens_seen": 10856000, "step": 5020 }, { "epoch": 0.8197389885807504, "grad_norm": 0.07956301420927048, "learning_rate": 0.0004097879282218597, "loss": 0.0642, "num_input_tokens_seen": 10866144, "step": 5025 }, { "epoch": 0.8205546492659054, "grad_norm": 0.7332919239997864, "learning_rate": 0.0004101957585644372, "loss": 0.3499, "num_input_tokens_seen": 10877312, "step": 5030 }, { "epoch": 0.8213703099510603, "grad_norm": 0.054674167186021805, "learning_rate": 0.00041060358890701473, "loss": 0.1298, "num_input_tokens_seen": 10887040, "step": 5035 }, { "epoch": 0.8221859706362153, "grad_norm": 0.20225413143634796, "learning_rate": 0.00041101141924959215, "loss": 0.0691, "num_input_tokens_seen": 10897440, "step": 5040 }, { "epoch": 0.8230016313213703, "grad_norm": 0.6315981149673462, "learning_rate": 0.00041141924959216963, "loss": 0.0708, "num_input_tokens_seen": 10909504, "step": 5045 }, { "epoch": 0.8238172920065253, "grad_norm": 0.7156874537467957, "learning_rate": 0.00041182707993474716, "loss": 0.3704, "num_input_tokens_seen": 10921728, "step": 5050 }, { "epoch": 0.8246329526916802, "grad_norm": 0.16364993155002594, "learning_rate": 0.00041223491027732464, "loss": 0.0518, "num_input_tokens_seen": 10931776, "step": 5055 }, { "epoch": 0.8254486133768353, "grad_norm": 0.12342008948326111, "learning_rate": 0.00041264274061990217, "loss": 0.0695, "num_input_tokens_seen": 10942784, "step": 5060 }, { "epoch": 0.8262642740619902, "grad_norm": 0.04900471493601799, "learning_rate": 0.0004130505709624796, "loss": 0.0299, "num_input_tokens_seen": 10954272, "step": 5065 }, { "epoch": 0.8270799347471451, "grad_norm": 0.40814009308815, "learning_rate": 0.0004134584013050571, "loss": 0.1194, "num_input_tokens_seen": 10964864, "step": 5070 }, { "epoch": 0.8278955954323002, "grad_norm": 0.0919327363371849, "learning_rate": 0.0004138662316476346, "loss": 0.1849, "num_input_tokens_seen": 10975104, "step": 5075 }, { "epoch": 0.8287112561174551, "grad_norm": 0.6414536237716675, "learning_rate": 0.0004142740619902121, "loss": 0.0728, "num_input_tokens_seen": 10985152, "step": 5080 }, { "epoch": 0.8295269168026101, "grad_norm": 0.05294128879904747, "learning_rate": 0.0004146818923327896, "loss": 0.0663, "num_input_tokens_seen": 10996640, "step": 5085 }, { "epoch": 0.8303425774877651, "grad_norm": 0.0711590051651001, "learning_rate": 0.00041508972267536703, "loss": 0.1359, "num_input_tokens_seen": 11007328, "step": 5090 }, { "epoch": 0.8311582381729201, "grad_norm": 0.27603647112846375, "learning_rate": 0.00041549755301794456, "loss": 0.1779, "num_input_tokens_seen": 11017760, "step": 5095 }, { "epoch": 0.831973898858075, "grad_norm": 0.6225929856300354, "learning_rate": 0.00041590538336052203, "loss": 0.0765, "num_input_tokens_seen": 11028768, "step": 5100 }, { "epoch": 0.83278955954323, "grad_norm": 0.2413845807313919, "learning_rate": 0.00041631321370309957, "loss": 0.269, "num_input_tokens_seen": 11039360, "step": 5105 }, { "epoch": 0.833605220228385, "grad_norm": 0.3218750059604645, "learning_rate": 0.000416721044045677, "loss": 0.0634, "num_input_tokens_seen": 11050272, "step": 5110 }, { "epoch": 0.8344208809135399, "grad_norm": 0.42538219690322876, "learning_rate": 0.00041712887438825446, "loss": 0.3043, "num_input_tokens_seen": 11060736, "step": 5115 }, { "epoch": 0.835236541598695, "grad_norm": 0.951379656791687, "learning_rate": 0.000417536704730832, "loss": 0.1503, "num_input_tokens_seen": 11070688, "step": 5120 }, { "epoch": 0.8360522022838499, "grad_norm": 0.5826851725578308, "learning_rate": 0.00041794453507340947, "loss": 0.2831, "num_input_tokens_seen": 11080800, "step": 5125 }, { "epoch": 0.8368678629690048, "grad_norm": 0.1050708070397377, "learning_rate": 0.000418352365415987, "loss": 0.0262, "num_input_tokens_seen": 11091680, "step": 5130 }, { "epoch": 0.8376835236541599, "grad_norm": 0.08830724656581879, "learning_rate": 0.0004187601957585644, "loss": 0.1475, "num_input_tokens_seen": 11102656, "step": 5135 }, { "epoch": 0.8384991843393148, "grad_norm": 0.21703684329986572, "learning_rate": 0.0004191680261011419, "loss": 0.0642, "num_input_tokens_seen": 11112960, "step": 5140 }, { "epoch": 0.8393148450244698, "grad_norm": 0.07175783067941666, "learning_rate": 0.00041957585644371943, "loss": 0.098, "num_input_tokens_seen": 11124512, "step": 5145 }, { "epoch": 0.8401305057096248, "grad_norm": 0.33142393827438354, "learning_rate": 0.0004199836867862969, "loss": 0.2984, "num_input_tokens_seen": 11134976, "step": 5150 }, { "epoch": 0.8409461663947798, "grad_norm": 0.46356773376464844, "learning_rate": 0.00042039151712887444, "loss": 0.1555, "num_input_tokens_seen": 11146144, "step": 5155 }, { "epoch": 0.8417618270799347, "grad_norm": 0.36611565947532654, "learning_rate": 0.00042079934747145186, "loss": 0.0533, "num_input_tokens_seen": 11157152, "step": 5160 }, { "epoch": 0.8425774877650897, "grad_norm": 0.11518274247646332, "learning_rate": 0.0004212071778140294, "loss": 0.0976, "num_input_tokens_seen": 11166400, "step": 5165 }, { "epoch": 0.8433931484502447, "grad_norm": 0.3951309025287628, "learning_rate": 0.00042161500815660687, "loss": 0.0887, "num_input_tokens_seen": 11178016, "step": 5170 }, { "epoch": 0.8442088091353996, "grad_norm": 0.4681323766708374, "learning_rate": 0.00042202283849918434, "loss": 0.1065, "num_input_tokens_seen": 11188992, "step": 5175 }, { "epoch": 0.8450244698205547, "grad_norm": 0.26340165734291077, "learning_rate": 0.0004224306688417618, "loss": 0.0694, "num_input_tokens_seen": 11200160, "step": 5180 }, { "epoch": 0.8458401305057096, "grad_norm": 0.562423586845398, "learning_rate": 0.0004228384991843393, "loss": 0.2449, "num_input_tokens_seen": 11211776, "step": 5185 }, { "epoch": 0.8466557911908646, "grad_norm": 0.7215205430984497, "learning_rate": 0.00042324632952691683, "loss": 0.2644, "num_input_tokens_seen": 11223328, "step": 5190 }, { "epoch": 0.8474714518760196, "grad_norm": 0.04237314313650131, "learning_rate": 0.0004236541598694943, "loss": 0.1336, "num_input_tokens_seen": 11234528, "step": 5195 }, { "epoch": 0.8482871125611745, "grad_norm": 0.09171731770038605, "learning_rate": 0.00042406199021207183, "loss": 0.1566, "num_input_tokens_seen": 11245920, "step": 5200 }, { "epoch": 0.8491027732463295, "grad_norm": 0.135093554854393, "learning_rate": 0.00042446982055464926, "loss": 0.1481, "num_input_tokens_seen": 11258080, "step": 5205 }, { "epoch": 0.8499184339314845, "grad_norm": 0.10724607855081558, "learning_rate": 0.00042487765089722673, "loss": 0.1028, "num_input_tokens_seen": 11270176, "step": 5210 }, { "epoch": 0.8507340946166395, "grad_norm": 0.08377696573734283, "learning_rate": 0.00042528548123980426, "loss": 0.0622, "num_input_tokens_seen": 11282016, "step": 5215 }, { "epoch": 0.8515497553017944, "grad_norm": 0.12989285588264465, "learning_rate": 0.00042569331158238174, "loss": 0.0544, "num_input_tokens_seen": 11292320, "step": 5220 }, { "epoch": 0.8523654159869495, "grad_norm": 0.13682430982589722, "learning_rate": 0.00042610114192495927, "loss": 0.1094, "num_input_tokens_seen": 11303040, "step": 5225 }, { "epoch": 0.8531810766721044, "grad_norm": 0.7620516419410706, "learning_rate": 0.0004265089722675367, "loss": 0.1155, "num_input_tokens_seen": 11312672, "step": 5230 }, { "epoch": 0.8539967373572593, "grad_norm": 0.033026549965143204, "learning_rate": 0.00042691680261011417, "loss": 0.0373, "num_input_tokens_seen": 11323488, "step": 5235 }, { "epoch": 0.8548123980424144, "grad_norm": 0.16827772557735443, "learning_rate": 0.0004273246329526917, "loss": 0.0487, "num_input_tokens_seen": 11334176, "step": 5240 }, { "epoch": 0.8556280587275693, "grad_norm": 0.16478866338729858, "learning_rate": 0.0004277324632952692, "loss": 0.1613, "num_input_tokens_seen": 11345632, "step": 5245 }, { "epoch": 0.8564437194127243, "grad_norm": 0.2556282579898834, "learning_rate": 0.00042814029363784665, "loss": 0.0364, "num_input_tokens_seen": 11355552, "step": 5250 }, { "epoch": 0.8572593800978793, "grad_norm": 0.02794981375336647, "learning_rate": 0.00042854812398042413, "loss": 0.1908, "num_input_tokens_seen": 11367072, "step": 5255 }, { "epoch": 0.8580750407830342, "grad_norm": 0.5714795589447021, "learning_rate": 0.00042895595432300166, "loss": 0.2137, "num_input_tokens_seen": 11377120, "step": 5260 }, { "epoch": 0.8588907014681892, "grad_norm": 0.11488376557826996, "learning_rate": 0.00042936378466557914, "loss": 0.0982, "num_input_tokens_seen": 11388416, "step": 5265 }, { "epoch": 0.8597063621533442, "grad_norm": 0.08155690133571625, "learning_rate": 0.0004297716150081566, "loss": 0.1723, "num_input_tokens_seen": 11398528, "step": 5270 }, { "epoch": 0.8605220228384992, "grad_norm": 0.10925207287073135, "learning_rate": 0.0004301794453507341, "loss": 0.0976, "num_input_tokens_seen": 11408576, "step": 5275 }, { "epoch": 0.8613376835236541, "grad_norm": 0.23385116457939148, "learning_rate": 0.00043058727569331157, "loss": 0.0709, "num_input_tokens_seen": 11420000, "step": 5280 }, { "epoch": 0.8621533442088092, "grad_norm": 0.510635495185852, "learning_rate": 0.0004309951060358891, "loss": 0.1581, "num_input_tokens_seen": 11430528, "step": 5285 }, { "epoch": 0.8629690048939641, "grad_norm": 0.08769966661930084, "learning_rate": 0.0004314029363784666, "loss": 0.0703, "num_input_tokens_seen": 11440992, "step": 5290 }, { "epoch": 0.863784665579119, "grad_norm": 0.09656643867492676, "learning_rate": 0.0004318107667210441, "loss": 0.1036, "num_input_tokens_seen": 11451712, "step": 5295 }, { "epoch": 0.8646003262642741, "grad_norm": 0.5707296133041382, "learning_rate": 0.0004322185970636215, "loss": 0.1564, "num_input_tokens_seen": 11463584, "step": 5300 }, { "epoch": 0.865415986949429, "grad_norm": 0.18754911422729492, "learning_rate": 0.000432626427406199, "loss": 0.0607, "num_input_tokens_seen": 11474560, "step": 5305 }, { "epoch": 0.866231647634584, "grad_norm": 0.0587138757109642, "learning_rate": 0.00043303425774877653, "loss": 0.0883, "num_input_tokens_seen": 11486080, "step": 5310 }, { "epoch": 0.867047308319739, "grad_norm": 0.026419376954436302, "learning_rate": 0.000433442088091354, "loss": 0.1829, "num_input_tokens_seen": 11498464, "step": 5315 }, { "epoch": 0.867862969004894, "grad_norm": 0.5190200209617615, "learning_rate": 0.0004338499184339315, "loss": 0.2018, "num_input_tokens_seen": 11509376, "step": 5320 }, { "epoch": 0.8686786296900489, "grad_norm": 0.07488425821065903, "learning_rate": 0.00043425774877650896, "loss": 0.202, "num_input_tokens_seen": 11520480, "step": 5325 }, { "epoch": 0.8694942903752039, "grad_norm": 0.18614496290683746, "learning_rate": 0.0004346655791190865, "loss": 0.0892, "num_input_tokens_seen": 11531136, "step": 5330 }, { "epoch": 0.8703099510603589, "grad_norm": 0.05950484424829483, "learning_rate": 0.00043507340946166397, "loss": 0.0623, "num_input_tokens_seen": 11541408, "step": 5335 }, { "epoch": 0.8711256117455138, "grad_norm": 0.21500875055789948, "learning_rate": 0.00043548123980424145, "loss": 0.2217, "num_input_tokens_seen": 11552320, "step": 5340 }, { "epoch": 0.8719412724306689, "grad_norm": 0.773648202419281, "learning_rate": 0.0004358890701468189, "loss": 0.2593, "num_input_tokens_seen": 11563232, "step": 5345 }, { "epoch": 0.8727569331158238, "grad_norm": 0.14433734118938446, "learning_rate": 0.0004362969004893964, "loss": 0.0955, "num_input_tokens_seen": 11573856, "step": 5350 }, { "epoch": 0.8735725938009788, "grad_norm": 0.10043247789144516, "learning_rate": 0.00043670473083197393, "loss": 0.1101, "num_input_tokens_seen": 11584960, "step": 5355 }, { "epoch": 0.8743882544861338, "grad_norm": 0.03573020547628403, "learning_rate": 0.0004371125611745514, "loss": 0.1191, "num_input_tokens_seen": 11595168, "step": 5360 }, { "epoch": 0.8752039151712887, "grad_norm": 0.25010839104652405, "learning_rate": 0.0004375203915171289, "loss": 0.1045, "num_input_tokens_seen": 11605472, "step": 5365 }, { "epoch": 0.8760195758564437, "grad_norm": 0.22654956579208374, "learning_rate": 0.00043792822185970636, "loss": 0.035, "num_input_tokens_seen": 11616224, "step": 5370 }, { "epoch": 0.8768352365415987, "grad_norm": 0.20199733972549438, "learning_rate": 0.00043833605220228384, "loss": 0.077, "num_input_tokens_seen": 11626816, "step": 5375 }, { "epoch": 0.8776508972267537, "grad_norm": 0.20932228863239288, "learning_rate": 0.00043874388254486137, "loss": 0.0706, "num_input_tokens_seen": 11637152, "step": 5380 }, { "epoch": 0.8784665579119086, "grad_norm": 0.268714964389801, "learning_rate": 0.00043915171288743884, "loss": 0.1373, "num_input_tokens_seen": 11648160, "step": 5385 }, { "epoch": 0.8792822185970636, "grad_norm": 0.25124499201774597, "learning_rate": 0.0004395595432300163, "loss": 0.2045, "num_input_tokens_seen": 11660320, "step": 5390 }, { "epoch": 0.8800978792822186, "grad_norm": 0.030649229884147644, "learning_rate": 0.0004399673735725938, "loss": 0.1744, "num_input_tokens_seen": 11670816, "step": 5395 }, { "epoch": 0.8809135399673735, "grad_norm": 0.06209159642457962, "learning_rate": 0.00044037520391517127, "loss": 0.0314, "num_input_tokens_seen": 11682560, "step": 5400 }, { "epoch": 0.8817292006525286, "grad_norm": 0.13285031914710999, "learning_rate": 0.0004407830342577488, "loss": 0.3882, "num_input_tokens_seen": 11694400, "step": 5405 }, { "epoch": 0.8825448613376835, "grad_norm": 0.14116713404655457, "learning_rate": 0.0004411908646003263, "loss": 0.0998, "num_input_tokens_seen": 11705728, "step": 5410 }, { "epoch": 0.8833605220228385, "grad_norm": 0.23796717822551727, "learning_rate": 0.00044159869494290376, "loss": 0.1255, "num_input_tokens_seen": 11716736, "step": 5415 }, { "epoch": 0.8841761827079935, "grad_norm": 0.18344132602214813, "learning_rate": 0.00044200652528548123, "loss": 0.0871, "num_input_tokens_seen": 11726208, "step": 5420 }, { "epoch": 0.8849918433931484, "grad_norm": 0.10778245329856873, "learning_rate": 0.00044241435562805876, "loss": 0.0952, "num_input_tokens_seen": 11737664, "step": 5425 }, { "epoch": 0.8858075040783034, "grad_norm": 0.22092236578464508, "learning_rate": 0.00044282218597063624, "loss": 0.1987, "num_input_tokens_seen": 11749312, "step": 5430 }, { "epoch": 0.8866231647634584, "grad_norm": 0.6242573261260986, "learning_rate": 0.0004432300163132137, "loss": 0.2475, "num_input_tokens_seen": 11760192, "step": 5435 }, { "epoch": 0.8874388254486134, "grad_norm": 0.2279716432094574, "learning_rate": 0.0004436378466557912, "loss": 0.164, "num_input_tokens_seen": 11770464, "step": 5440 }, { "epoch": 0.8882544861337683, "grad_norm": 0.06530027091503143, "learning_rate": 0.00044404567699836867, "loss": 0.1861, "num_input_tokens_seen": 11781344, "step": 5445 }, { "epoch": 0.8890701468189234, "grad_norm": 0.14814841747283936, "learning_rate": 0.0004444535073409462, "loss": 0.116, "num_input_tokens_seen": 11791968, "step": 5450 }, { "epoch": 0.8898858075040783, "grad_norm": 0.3580428659915924, "learning_rate": 0.0004448613376835237, "loss": 0.1099, "num_input_tokens_seen": 11802624, "step": 5455 }, { "epoch": 0.8907014681892332, "grad_norm": 0.944884717464447, "learning_rate": 0.0004452691680261011, "loss": 0.2742, "num_input_tokens_seen": 11813248, "step": 5460 }, { "epoch": 0.8915171288743883, "grad_norm": 0.2670440077781677, "learning_rate": 0.00044567699836867863, "loss": 0.1914, "num_input_tokens_seen": 11823040, "step": 5465 }, { "epoch": 0.8923327895595432, "grad_norm": 0.3020407259464264, "learning_rate": 0.0004460848287112561, "loss": 0.1478, "num_input_tokens_seen": 11834528, "step": 5470 }, { "epoch": 0.8931484502446982, "grad_norm": 0.21428096294403076, "learning_rate": 0.00044649265905383364, "loss": 0.1098, "num_input_tokens_seen": 11844896, "step": 5475 }, { "epoch": 0.8939641109298532, "grad_norm": 0.2839694321155548, "learning_rate": 0.0004469004893964111, "loss": 0.0678, "num_input_tokens_seen": 11855392, "step": 5480 }, { "epoch": 0.8947797716150081, "grad_norm": 0.6894422769546509, "learning_rate": 0.0004473083197389886, "loss": 0.2677, "num_input_tokens_seen": 11864608, "step": 5485 }, { "epoch": 0.8955954323001631, "grad_norm": 0.20967309176921844, "learning_rate": 0.00044771615008156607, "loss": 0.2639, "num_input_tokens_seen": 11875776, "step": 5490 }, { "epoch": 0.8964110929853181, "grad_norm": 0.37381711602211, "learning_rate": 0.00044812398042414354, "loss": 0.0903, "num_input_tokens_seen": 11885472, "step": 5495 }, { "epoch": 0.8972267536704731, "grad_norm": 0.6217723488807678, "learning_rate": 0.00044853181076672107, "loss": 0.178, "num_input_tokens_seen": 11897056, "step": 5500 }, { "epoch": 0.898042414355628, "grad_norm": 0.4900282323360443, "learning_rate": 0.00044893964110929855, "loss": 0.1818, "num_input_tokens_seen": 11907104, "step": 5505 }, { "epoch": 0.8988580750407831, "grad_norm": 0.05841980502009392, "learning_rate": 0.000449347471451876, "loss": 0.1849, "num_input_tokens_seen": 11916960, "step": 5510 }, { "epoch": 0.899673735725938, "grad_norm": 0.0654044821858406, "learning_rate": 0.0004497553017944535, "loss": 0.0843, "num_input_tokens_seen": 11927232, "step": 5515 }, { "epoch": 0.9004893964110929, "grad_norm": 0.1382654458284378, "learning_rate": 0.00045016313213703103, "loss": 0.2114, "num_input_tokens_seen": 11938272, "step": 5520 }, { "epoch": 0.901305057096248, "grad_norm": 0.2675796449184418, "learning_rate": 0.0004505709624796085, "loss": 0.2142, "num_input_tokens_seen": 11949632, "step": 5525 }, { "epoch": 0.9021207177814029, "grad_norm": 0.05411672219634056, "learning_rate": 0.00045097879282218593, "loss": 0.1789, "num_input_tokens_seen": 11959232, "step": 5530 }, { "epoch": 0.9029363784665579, "grad_norm": 0.3251686096191406, "learning_rate": 0.00045138662316476346, "loss": 0.1218, "num_input_tokens_seen": 11970304, "step": 5535 }, { "epoch": 0.9037520391517129, "grad_norm": 0.15985806286334991, "learning_rate": 0.00045179445350734094, "loss": 0.1069, "num_input_tokens_seen": 11980864, "step": 5540 }, { "epoch": 0.9045676998368679, "grad_norm": 0.5636354684829712, "learning_rate": 0.00045220228384991847, "loss": 0.1869, "num_input_tokens_seen": 11990592, "step": 5545 }, { "epoch": 0.9053833605220228, "grad_norm": 0.07867056131362915, "learning_rate": 0.00045261011419249595, "loss": 0.0485, "num_input_tokens_seen": 12002720, "step": 5550 }, { "epoch": 0.9061990212071778, "grad_norm": 0.7367706298828125, "learning_rate": 0.0004530179445350734, "loss": 0.126, "num_input_tokens_seen": 12012896, "step": 5555 }, { "epoch": 0.9070146818923328, "grad_norm": 0.75595623254776, "learning_rate": 0.0004534257748776509, "loss": 0.1054, "num_input_tokens_seen": 12025120, "step": 5560 }, { "epoch": 0.9078303425774877, "grad_norm": 0.9178465008735657, "learning_rate": 0.0004538336052202284, "loss": 0.3257, "num_input_tokens_seen": 12036384, "step": 5565 }, { "epoch": 0.9086460032626428, "grad_norm": 0.47468215227127075, "learning_rate": 0.0004542414355628059, "loss": 0.2105, "num_input_tokens_seen": 12047296, "step": 5570 }, { "epoch": 0.9094616639477977, "grad_norm": 0.175617977976799, "learning_rate": 0.0004546492659053834, "loss": 0.1081, "num_input_tokens_seen": 12057664, "step": 5575 }, { "epoch": 0.9102773246329527, "grad_norm": 0.21897639334201813, "learning_rate": 0.00045505709624796086, "loss": 0.1315, "num_input_tokens_seen": 12069344, "step": 5580 }, { "epoch": 0.9110929853181077, "grad_norm": 0.10874702781438828, "learning_rate": 0.00045546492659053833, "loss": 0.0992, "num_input_tokens_seen": 12080224, "step": 5585 }, { "epoch": 0.9119086460032626, "grad_norm": 0.36471787095069885, "learning_rate": 0.0004558727569331158, "loss": 0.0847, "num_input_tokens_seen": 12089824, "step": 5590 }, { "epoch": 0.9127243066884176, "grad_norm": 0.024809151887893677, "learning_rate": 0.00045628058727569334, "loss": 0.0778, "num_input_tokens_seen": 12101152, "step": 5595 }, { "epoch": 0.9135399673735726, "grad_norm": 0.024391191080212593, "learning_rate": 0.00045668841761827076, "loss": 0.1407, "num_input_tokens_seen": 12111936, "step": 5600 }, { "epoch": 0.9143556280587276, "grad_norm": 0.18645010888576508, "learning_rate": 0.0004570962479608483, "loss": 0.1784, "num_input_tokens_seen": 12122528, "step": 5605 }, { "epoch": 0.9151712887438825, "grad_norm": 0.03236968070268631, "learning_rate": 0.00045750407830342577, "loss": 0.2051, "num_input_tokens_seen": 12132128, "step": 5610 }, { "epoch": 0.9159869494290375, "grad_norm": 0.036100562661886215, "learning_rate": 0.0004579119086460033, "loss": 0.2114, "num_input_tokens_seen": 12143552, "step": 5615 }, { "epoch": 0.9168026101141925, "grad_norm": 0.16130702197551727, "learning_rate": 0.0004583197389885808, "loss": 0.1329, "num_input_tokens_seen": 12152992, "step": 5620 }, { "epoch": 0.9176182707993474, "grad_norm": 0.23771022260189056, "learning_rate": 0.0004587275693311582, "loss": 0.1475, "num_input_tokens_seen": 12163488, "step": 5625 }, { "epoch": 0.9184339314845025, "grad_norm": 0.2382916957139969, "learning_rate": 0.00045913539967373573, "loss": 0.0997, "num_input_tokens_seen": 12174976, "step": 5630 }, { "epoch": 0.9192495921696574, "grad_norm": 0.3301408588886261, "learning_rate": 0.0004595432300163132, "loss": 0.0692, "num_input_tokens_seen": 12185984, "step": 5635 }, { "epoch": 0.9200652528548124, "grad_norm": 0.7716155648231506, "learning_rate": 0.00045995106035889074, "loss": 0.2212, "num_input_tokens_seen": 12196768, "step": 5640 }, { "epoch": 0.9208809135399674, "grad_norm": 0.11273916065692902, "learning_rate": 0.0004603588907014682, "loss": 0.0561, "num_input_tokens_seen": 12209120, "step": 5645 }, { "epoch": 0.9216965742251223, "grad_norm": 0.07322194427251816, "learning_rate": 0.0004607667210440457, "loss": 0.0937, "num_input_tokens_seen": 12220768, "step": 5650 }, { "epoch": 0.9225122349102773, "grad_norm": 0.1523495316505432, "learning_rate": 0.00046117455138662317, "loss": 0.047, "num_input_tokens_seen": 12230720, "step": 5655 }, { "epoch": 0.9233278955954323, "grad_norm": 0.6982433795928955, "learning_rate": 0.00046158238172920064, "loss": 0.1112, "num_input_tokens_seen": 12242880, "step": 5660 }, { "epoch": 0.9241435562805873, "grad_norm": 0.05697758495807648, "learning_rate": 0.0004619902120717782, "loss": 0.0375, "num_input_tokens_seen": 12253760, "step": 5665 }, { "epoch": 0.9249592169657422, "grad_norm": 0.05087430030107498, "learning_rate": 0.0004623980424143556, "loss": 0.1231, "num_input_tokens_seen": 12263712, "step": 5670 }, { "epoch": 0.9257748776508973, "grad_norm": 0.04257337376475334, "learning_rate": 0.00046280587275693313, "loss": 0.145, "num_input_tokens_seen": 12274560, "step": 5675 }, { "epoch": 0.9265905383360522, "grad_norm": 0.3906922936439514, "learning_rate": 0.0004632137030995106, "loss": 0.159, "num_input_tokens_seen": 12285024, "step": 5680 }, { "epoch": 0.9274061990212071, "grad_norm": 0.21243229508399963, "learning_rate": 0.00046362153344208813, "loss": 0.1451, "num_input_tokens_seen": 12295712, "step": 5685 }, { "epoch": 0.9282218597063622, "grad_norm": 0.46332404017448425, "learning_rate": 0.0004640293637846656, "loss": 0.1199, "num_input_tokens_seen": 12305024, "step": 5690 }, { "epoch": 0.9290375203915171, "grad_norm": 0.20840857923030853, "learning_rate": 0.00046443719412724303, "loss": 0.1054, "num_input_tokens_seen": 12315360, "step": 5695 }, { "epoch": 0.9298531810766721, "grad_norm": 0.39110761880874634, "learning_rate": 0.00046484502446982056, "loss": 0.0757, "num_input_tokens_seen": 12326464, "step": 5700 }, { "epoch": 0.9306688417618271, "grad_norm": 0.3890259861946106, "learning_rate": 0.00046525285481239804, "loss": 0.0945, "num_input_tokens_seen": 12337600, "step": 5705 }, { "epoch": 0.9314845024469821, "grad_norm": 0.19361518323421478, "learning_rate": 0.00046566068515497557, "loss": 0.2785, "num_input_tokens_seen": 12347904, "step": 5710 }, { "epoch": 0.932300163132137, "grad_norm": 0.15051914751529694, "learning_rate": 0.00046606851549755305, "loss": 0.1508, "num_input_tokens_seen": 12359712, "step": 5715 }, { "epoch": 0.933115823817292, "grad_norm": 0.12826696038246155, "learning_rate": 0.00046647634584013047, "loss": 0.1295, "num_input_tokens_seen": 12369952, "step": 5720 }, { "epoch": 0.933931484502447, "grad_norm": 0.20581692457199097, "learning_rate": 0.000466884176182708, "loss": 0.1429, "num_input_tokens_seen": 12380384, "step": 5725 }, { "epoch": 0.9347471451876019, "grad_norm": 0.0365518257021904, "learning_rate": 0.0004672920065252855, "loss": 0.1297, "num_input_tokens_seen": 12391904, "step": 5730 }, { "epoch": 0.935562805872757, "grad_norm": 0.04881615564227104, "learning_rate": 0.000467699836867863, "loss": 0.0857, "num_input_tokens_seen": 12402944, "step": 5735 }, { "epoch": 0.9363784665579119, "grad_norm": 0.26118239760398865, "learning_rate": 0.0004681076672104405, "loss": 0.2689, "num_input_tokens_seen": 12414432, "step": 5740 }, { "epoch": 0.9371941272430668, "grad_norm": 0.18851740658283234, "learning_rate": 0.00046851549755301796, "loss": 0.2165, "num_input_tokens_seen": 12425024, "step": 5745 }, { "epoch": 0.9380097879282219, "grad_norm": 0.5728957056999207, "learning_rate": 0.00046892332789559544, "loss": 0.2883, "num_input_tokens_seen": 12435232, "step": 5750 }, { "epoch": 0.9388254486133768, "grad_norm": 0.13215041160583496, "learning_rate": 0.0004693311582381729, "loss": 0.1216, "num_input_tokens_seen": 12445984, "step": 5755 }, { "epoch": 0.9396411092985318, "grad_norm": 0.2319490611553192, "learning_rate": 0.00046973898858075044, "loss": 0.1076, "num_input_tokens_seen": 12456416, "step": 5760 }, { "epoch": 0.9404567699836868, "grad_norm": 0.16915561258792877, "learning_rate": 0.00047014681892332787, "loss": 0.2011, "num_input_tokens_seen": 12466656, "step": 5765 }, { "epoch": 0.9412724306688418, "grad_norm": 0.09713034331798553, "learning_rate": 0.0004705546492659054, "loss": 0.0862, "num_input_tokens_seen": 12477696, "step": 5770 }, { "epoch": 0.9420880913539967, "grad_norm": 0.7483925223350525, "learning_rate": 0.0004709624796084829, "loss": 0.2129, "num_input_tokens_seen": 12488896, "step": 5775 }, { "epoch": 0.9429037520391517, "grad_norm": 0.5672449469566345, "learning_rate": 0.0004713703099510604, "loss": 0.1227, "num_input_tokens_seen": 12499040, "step": 5780 }, { "epoch": 0.9437194127243067, "grad_norm": 0.6804947257041931, "learning_rate": 0.0004717781402936379, "loss": 0.1309, "num_input_tokens_seen": 12509760, "step": 5785 }, { "epoch": 0.9445350734094616, "grad_norm": 0.24877053499221802, "learning_rate": 0.0004721859706362153, "loss": 0.1011, "num_input_tokens_seen": 12521440, "step": 5790 }, { "epoch": 0.9453507340946167, "grad_norm": 0.5431973338127136, "learning_rate": 0.00047259380097879283, "loss": 0.2528, "num_input_tokens_seen": 12531840, "step": 5795 }, { "epoch": 0.9461663947797716, "grad_norm": 0.1831020563840866, "learning_rate": 0.0004730016313213703, "loss": 0.2248, "num_input_tokens_seen": 12543136, "step": 5800 }, { "epoch": 0.9469820554649266, "grad_norm": 0.33074751496315, "learning_rate": 0.00047340946166394784, "loss": 0.2949, "num_input_tokens_seen": 12554240, "step": 5805 }, { "epoch": 0.9477977161500816, "grad_norm": 0.10430339723825455, "learning_rate": 0.0004738172920065253, "loss": 0.1823, "num_input_tokens_seen": 12564640, "step": 5810 }, { "epoch": 0.9486133768352365, "grad_norm": 0.17456623911857605, "learning_rate": 0.00047422512234910274, "loss": 0.2085, "num_input_tokens_seen": 12574976, "step": 5815 }, { "epoch": 0.9494290375203915, "grad_norm": 0.15890410542488098, "learning_rate": 0.00047463295269168027, "loss": 0.1755, "num_input_tokens_seen": 12585632, "step": 5820 }, { "epoch": 0.9502446982055465, "grad_norm": 0.4678882658481598, "learning_rate": 0.00047504078303425775, "loss": 0.2283, "num_input_tokens_seen": 12596096, "step": 5825 }, { "epoch": 0.9510603588907015, "grad_norm": 0.10083203762769699, "learning_rate": 0.0004754486133768353, "loss": 0.0988, "num_input_tokens_seen": 12607712, "step": 5830 }, { "epoch": 0.9518760195758564, "grad_norm": 0.054982513189315796, "learning_rate": 0.0004758564437194127, "loss": 0.0766, "num_input_tokens_seen": 12618272, "step": 5835 }, { "epoch": 0.9526916802610114, "grad_norm": 0.08998695760965347, "learning_rate": 0.00047626427406199023, "loss": 0.0792, "num_input_tokens_seen": 12629856, "step": 5840 }, { "epoch": 0.9535073409461664, "grad_norm": 0.19011932611465454, "learning_rate": 0.0004766721044045677, "loss": 0.1799, "num_input_tokens_seen": 12639456, "step": 5845 }, { "epoch": 0.9543230016313213, "grad_norm": 0.5274903774261475, "learning_rate": 0.0004770799347471452, "loss": 0.1551, "num_input_tokens_seen": 12649088, "step": 5850 }, { "epoch": 0.9551386623164764, "grad_norm": 0.065676748752594, "learning_rate": 0.0004774877650897227, "loss": 0.0948, "num_input_tokens_seen": 12659680, "step": 5855 }, { "epoch": 0.9559543230016313, "grad_norm": 0.25573477149009705, "learning_rate": 0.00047789559543230014, "loss": 0.1348, "num_input_tokens_seen": 12670848, "step": 5860 }, { "epoch": 0.9567699836867863, "grad_norm": 0.11124901473522186, "learning_rate": 0.00047830342577487767, "loss": 0.0595, "num_input_tokens_seen": 12681568, "step": 5865 }, { "epoch": 0.9575856443719413, "grad_norm": 0.12954628467559814, "learning_rate": 0.00047871125611745514, "loss": 0.2106, "num_input_tokens_seen": 12691616, "step": 5870 }, { "epoch": 0.9584013050570962, "grad_norm": 0.1547648310661316, "learning_rate": 0.0004791190864600327, "loss": 0.1245, "num_input_tokens_seen": 12701728, "step": 5875 }, { "epoch": 0.9592169657422512, "grad_norm": 0.18160319328308105, "learning_rate": 0.00047952691680261015, "loss": 0.1439, "num_input_tokens_seen": 12712480, "step": 5880 }, { "epoch": 0.9600326264274062, "grad_norm": 0.12603481113910675, "learning_rate": 0.00047993474714518757, "loss": 0.2585, "num_input_tokens_seen": 12724544, "step": 5885 }, { "epoch": 0.9608482871125612, "grad_norm": 0.12232507020235062, "learning_rate": 0.0004803425774877651, "loss": 0.0721, "num_input_tokens_seen": 12735552, "step": 5890 }, { "epoch": 0.9616639477977161, "grad_norm": 0.8182587623596191, "learning_rate": 0.0004807504078303426, "loss": 0.2742, "num_input_tokens_seen": 12747072, "step": 5895 }, { "epoch": 0.9624796084828712, "grad_norm": 0.2686678171157837, "learning_rate": 0.0004811582381729201, "loss": 0.2193, "num_input_tokens_seen": 12757504, "step": 5900 }, { "epoch": 0.9632952691680261, "grad_norm": 0.10753358900547028, "learning_rate": 0.00048156606851549753, "loss": 0.1594, "num_input_tokens_seen": 12768544, "step": 5905 }, { "epoch": 0.964110929853181, "grad_norm": 0.24111835658550262, "learning_rate": 0.00048197389885807506, "loss": 0.069, "num_input_tokens_seen": 12778464, "step": 5910 }, { "epoch": 0.9649265905383361, "grad_norm": 0.1870940625667572, "learning_rate": 0.00048238172920065254, "loss": 0.1426, "num_input_tokens_seen": 12788352, "step": 5915 }, { "epoch": 0.965742251223491, "grad_norm": 0.08543268591165543, "learning_rate": 0.00048278955954323, "loss": 0.1739, "num_input_tokens_seen": 12797344, "step": 5920 }, { "epoch": 0.966557911908646, "grad_norm": 0.17103126645088196, "learning_rate": 0.00048319738988580755, "loss": 0.1457, "num_input_tokens_seen": 12807264, "step": 5925 }, { "epoch": 0.967373572593801, "grad_norm": 0.037070151418447495, "learning_rate": 0.00048360522022838497, "loss": 0.1057, "num_input_tokens_seen": 12817408, "step": 5930 }, { "epoch": 0.968189233278956, "grad_norm": 0.34985965490341187, "learning_rate": 0.0004840130505709625, "loss": 0.1548, "num_input_tokens_seen": 12827616, "step": 5935 }, { "epoch": 0.9690048939641109, "grad_norm": 0.42751795053482056, "learning_rate": 0.00048442088091354, "loss": 0.181, "num_input_tokens_seen": 12837696, "step": 5940 }, { "epoch": 0.9698205546492659, "grad_norm": 0.12427811324596405, "learning_rate": 0.00048482871125611745, "loss": 0.0716, "num_input_tokens_seen": 12848928, "step": 5945 }, { "epoch": 0.9706362153344209, "grad_norm": 0.15662795305252075, "learning_rate": 0.000485236541598695, "loss": 0.2212, "num_input_tokens_seen": 12859264, "step": 5950 }, { "epoch": 0.9714518760195758, "grad_norm": 0.13130423426628113, "learning_rate": 0.0004856443719412724, "loss": 0.1212, "num_input_tokens_seen": 12870336, "step": 5955 }, { "epoch": 0.9722675367047309, "grad_norm": 0.5949497222900391, "learning_rate": 0.00048605220228384994, "loss": 0.308, "num_input_tokens_seen": 12881280, "step": 5960 }, { "epoch": 0.9730831973898858, "grad_norm": 0.24126264452934265, "learning_rate": 0.0004864600326264274, "loss": 0.1112, "num_input_tokens_seen": 12892544, "step": 5965 }, { "epoch": 0.9738988580750407, "grad_norm": 0.18657752871513367, "learning_rate": 0.00048686786296900494, "loss": 0.1458, "num_input_tokens_seen": 12903680, "step": 5970 }, { "epoch": 0.9747145187601958, "grad_norm": 0.2861552834510803, "learning_rate": 0.00048727569331158237, "loss": 0.1987, "num_input_tokens_seen": 12914400, "step": 5975 }, { "epoch": 0.9755301794453507, "grad_norm": 0.11565116047859192, "learning_rate": 0.00048768352365415984, "loss": 0.1782, "num_input_tokens_seen": 12925920, "step": 5980 }, { "epoch": 0.9763458401305057, "grad_norm": 0.09785725176334381, "learning_rate": 0.00048809135399673737, "loss": 0.1816, "num_input_tokens_seen": 12936640, "step": 5985 }, { "epoch": 0.9771615008156607, "grad_norm": 0.4810200035572052, "learning_rate": 0.0004884991843393148, "loss": 0.177, "num_input_tokens_seen": 12948160, "step": 5990 }, { "epoch": 0.9779771615008157, "grad_norm": 0.16747428476810455, "learning_rate": 0.0004889070146818923, "loss": 0.1374, "num_input_tokens_seen": 12959296, "step": 5995 }, { "epoch": 0.9787928221859706, "grad_norm": 0.12989793717861176, "learning_rate": 0.0004893148450244698, "loss": 0.1107, "num_input_tokens_seen": 12970624, "step": 6000 }, { "epoch": 0.9796084828711256, "grad_norm": 0.08526536822319031, "learning_rate": 0.0004897226753670474, "loss": 0.2367, "num_input_tokens_seen": 12981664, "step": 6005 }, { "epoch": 0.9804241435562806, "grad_norm": 0.15927903354167938, "learning_rate": 0.0004901305057096248, "loss": 0.2274, "num_input_tokens_seen": 12991328, "step": 6010 }, { "epoch": 0.9812398042414355, "grad_norm": 0.15431569516658783, "learning_rate": 0.0004905383360522022, "loss": 0.046, "num_input_tokens_seen": 13001632, "step": 6015 }, { "epoch": 0.9820554649265906, "grad_norm": 0.6077148914337158, "learning_rate": 0.0004909461663947798, "loss": 0.1611, "num_input_tokens_seen": 13012256, "step": 6020 }, { "epoch": 0.9828711256117455, "grad_norm": 0.36628520488739014, "learning_rate": 0.0004913539967373573, "loss": 0.1385, "num_input_tokens_seen": 13024480, "step": 6025 }, { "epoch": 0.9836867862969005, "grad_norm": 0.052024632692337036, "learning_rate": 0.0004917618270799348, "loss": 0.0512, "num_input_tokens_seen": 13034432, "step": 6030 }, { "epoch": 0.9845024469820555, "grad_norm": 0.08394888788461685, "learning_rate": 0.0004921696574225122, "loss": 0.163, "num_input_tokens_seen": 13045824, "step": 6035 }, { "epoch": 0.9853181076672104, "grad_norm": 0.21150889992713928, "learning_rate": 0.0004925774877650897, "loss": 0.1086, "num_input_tokens_seen": 13057536, "step": 6040 }, { "epoch": 0.9861337683523654, "grad_norm": 0.09327172487974167, "learning_rate": 0.0004929853181076672, "loss": 0.1038, "num_input_tokens_seen": 13069120, "step": 6045 }, { "epoch": 0.9869494290375204, "grad_norm": 0.2453579306602478, "learning_rate": 0.0004933931484502447, "loss": 0.0449, "num_input_tokens_seen": 13080192, "step": 6050 }, { "epoch": 0.9877650897226754, "grad_norm": 0.04804931953549385, "learning_rate": 0.0004938009787928223, "loss": 0.1282, "num_input_tokens_seen": 13091040, "step": 6055 }, { "epoch": 0.9885807504078303, "grad_norm": 0.01482168585062027, "learning_rate": 0.0004942088091353996, "loss": 0.0451, "num_input_tokens_seen": 13102976, "step": 6060 }, { "epoch": 0.9893964110929854, "grad_norm": 0.13239941000938416, "learning_rate": 0.0004946166394779772, "loss": 0.3, "num_input_tokens_seen": 13114688, "step": 6065 }, { "epoch": 0.9902120717781403, "grad_norm": 0.10520962625741959, "learning_rate": 0.0004950244698205547, "loss": 0.0628, "num_input_tokens_seen": 13126432, "step": 6070 }, { "epoch": 0.9910277324632952, "grad_norm": 0.21707811951637268, "learning_rate": 0.0004954323001631322, "loss": 0.1958, "num_input_tokens_seen": 13137344, "step": 6075 }, { "epoch": 0.9918433931484503, "grad_norm": 0.11555507779121399, "learning_rate": 0.0004958401305057096, "loss": 0.1382, "num_input_tokens_seen": 13148608, "step": 6080 }, { "epoch": 0.9926590538336052, "grad_norm": 0.5580472350120544, "learning_rate": 0.0004962479608482871, "loss": 0.1019, "num_input_tokens_seen": 13159840, "step": 6085 }, { "epoch": 0.9934747145187602, "grad_norm": 0.4591805934906006, "learning_rate": 0.0004966557911908646, "loss": 0.1872, "num_input_tokens_seen": 13169856, "step": 6090 }, { "epoch": 0.9942903752039152, "grad_norm": 0.3633251190185547, "learning_rate": 0.0004970636215334421, "loss": 0.1258, "num_input_tokens_seen": 13181728, "step": 6095 }, { "epoch": 0.9951060358890701, "grad_norm": 0.22325961291790009, "learning_rate": 0.0004974714518760197, "loss": 0.1266, "num_input_tokens_seen": 13193760, "step": 6100 }, { "epoch": 0.9959216965742251, "grad_norm": 0.09583016484975815, "learning_rate": 0.000497879282218597, "loss": 0.0894, "num_input_tokens_seen": 13205280, "step": 6105 }, { "epoch": 0.9967373572593801, "grad_norm": 0.8459984064102173, "learning_rate": 0.0004982871125611745, "loss": 0.2232, "num_input_tokens_seen": 13215584, "step": 6110 }, { "epoch": 0.9975530179445351, "grad_norm": 0.24980811774730682, "learning_rate": 0.0004986949429037521, "loss": 0.1846, "num_input_tokens_seen": 13226496, "step": 6115 }, { "epoch": 0.99836867862969, "grad_norm": 0.04283153638243675, "learning_rate": 0.0004991027732463296, "loss": 0.1192, "num_input_tokens_seen": 13236992, "step": 6120 }, { "epoch": 0.9991843393148451, "grad_norm": 0.5137706995010376, "learning_rate": 0.000499510603588907, "loss": 0.2165, "num_input_tokens_seen": 13247424, "step": 6125 }, { "epoch": 1.0, "grad_norm": 0.39642074704170227, "learning_rate": 0.0004999184339314845, "loss": 0.1799, "num_input_tokens_seen": 13256608, "step": 6130 }, { "epoch": 1.0, "eval_loss": 0.1398823857307434, "eval_runtime": 103.3977, "eval_samples_per_second": 26.355, "eval_steps_per_second": 6.596, "num_input_tokens_seen": 13256608, "step": 6130 }, { "epoch": 1.000815660685155, "grad_norm": 0.11160384863615036, "learning_rate": 0.000500326264274062, "loss": 0.1257, "num_input_tokens_seen": 13268544, "step": 6135 }, { "epoch": 1.0016313213703099, "grad_norm": 0.23075062036514282, "learning_rate": 0.0005007340946166395, "loss": 0.1004, "num_input_tokens_seen": 13279712, "step": 6140 }, { "epoch": 1.002446982055465, "grad_norm": 0.18675552308559418, "learning_rate": 0.0005011419249592169, "loss": 0.1524, "num_input_tokens_seen": 13289344, "step": 6145 }, { "epoch": 1.00326264274062, "grad_norm": 0.2517428398132324, "learning_rate": 0.0005015497553017944, "loss": 0.1255, "num_input_tokens_seen": 13300032, "step": 6150 }, { "epoch": 1.004078303425775, "grad_norm": 0.18479129672050476, "learning_rate": 0.000501957585644372, "loss": 0.0235, "num_input_tokens_seen": 13310176, "step": 6155 }, { "epoch": 1.0048939641109298, "grad_norm": 0.09900052845478058, "learning_rate": 0.0005023654159869494, "loss": 0.1479, "num_input_tokens_seen": 13320256, "step": 6160 }, { "epoch": 1.0057096247960848, "grad_norm": 0.2950863838195801, "learning_rate": 0.000502773246329527, "loss": 0.2501, "num_input_tokens_seen": 13330560, "step": 6165 }, { "epoch": 1.0065252854812399, "grad_norm": 0.0240387711673975, "learning_rate": 0.0005031810766721044, "loss": 0.1392, "num_input_tokens_seen": 13341120, "step": 6170 }, { "epoch": 1.0073409461663947, "grad_norm": 0.08495213836431503, "learning_rate": 0.0005035889070146818, "loss": 0.1341, "num_input_tokens_seen": 13352416, "step": 6175 }, { "epoch": 1.0081566068515497, "grad_norm": 0.23577626049518585, "learning_rate": 0.0005039967373572594, "loss": 0.148, "num_input_tokens_seen": 13363392, "step": 6180 }, { "epoch": 1.0089722675367048, "grad_norm": 0.10276693105697632, "learning_rate": 0.0005044045676998369, "loss": 0.1369, "num_input_tokens_seen": 13373056, "step": 6185 }, { "epoch": 1.0097879282218598, "grad_norm": 0.08981849253177643, "learning_rate": 0.0005048123980424144, "loss": 0.1007, "num_input_tokens_seen": 13384288, "step": 6190 }, { "epoch": 1.0106035889070146, "grad_norm": 0.1138916164636612, "learning_rate": 0.0005052202283849918, "loss": 0.2059, "num_input_tokens_seen": 13394208, "step": 6195 }, { "epoch": 1.0114192495921697, "grad_norm": 0.1942625790834427, "learning_rate": 0.0005056280587275693, "loss": 0.0768, "num_input_tokens_seen": 13404576, "step": 6200 }, { "epoch": 1.0122349102773247, "grad_norm": 0.09755151718854904, "learning_rate": 0.0005060358890701469, "loss": 0.1849, "num_input_tokens_seen": 13415296, "step": 6205 }, { "epoch": 1.0130505709624795, "grad_norm": 0.06156241148710251, "learning_rate": 0.0005064437194127242, "loss": 0.1902, "num_input_tokens_seen": 13426112, "step": 6210 }, { "epoch": 1.0138662316476346, "grad_norm": 0.10398396849632263, "learning_rate": 0.0005068515497553018, "loss": 0.2426, "num_input_tokens_seen": 13436416, "step": 6215 }, { "epoch": 1.0146818923327896, "grad_norm": 0.20540976524353027, "learning_rate": 0.0005072593800978793, "loss": 0.2627, "num_input_tokens_seen": 13446624, "step": 6220 }, { "epoch": 1.0154975530179446, "grad_norm": 0.15334442257881165, "learning_rate": 0.0005076672104404568, "loss": 0.1482, "num_input_tokens_seen": 13457440, "step": 6225 }, { "epoch": 1.0163132137030995, "grad_norm": 0.34090787172317505, "learning_rate": 0.0005080750407830343, "loss": 0.1779, "num_input_tokens_seen": 13468800, "step": 6230 }, { "epoch": 1.0171288743882545, "grad_norm": 0.15002760291099548, "learning_rate": 0.0005084828711256117, "loss": 0.0614, "num_input_tokens_seen": 13480512, "step": 6235 }, { "epoch": 1.0179445350734095, "grad_norm": 0.2431073635816574, "learning_rate": 0.0005088907014681893, "loss": 0.133, "num_input_tokens_seen": 13491232, "step": 6240 }, { "epoch": 1.0187601957585644, "grad_norm": 0.14370810985565186, "learning_rate": 0.0005092985318107667, "loss": 0.0539, "num_input_tokens_seen": 13501920, "step": 6245 }, { "epoch": 1.0195758564437194, "grad_norm": 0.07355213165283203, "learning_rate": 0.0005097063621533442, "loss": 0.0994, "num_input_tokens_seen": 13510016, "step": 6250 }, { "epoch": 1.0203915171288744, "grad_norm": 0.2566724419593811, "learning_rate": 0.0005101141924959218, "loss": 0.2044, "num_input_tokens_seen": 13521280, "step": 6255 }, { "epoch": 1.0212071778140293, "grad_norm": 0.08346903324127197, "learning_rate": 0.0005105220228384992, "loss": 0.1983, "num_input_tokens_seen": 13532032, "step": 6260 }, { "epoch": 1.0220228384991843, "grad_norm": 0.28570470213890076, "learning_rate": 0.0005109298531810767, "loss": 0.1358, "num_input_tokens_seen": 13541568, "step": 6265 }, { "epoch": 1.0228384991843393, "grad_norm": 0.41711992025375366, "learning_rate": 0.0005113376835236542, "loss": 0.1472, "num_input_tokens_seen": 13552064, "step": 6270 }, { "epoch": 1.0236541598694944, "grad_norm": 0.10956079512834549, "learning_rate": 0.0005117455138662317, "loss": 0.1441, "num_input_tokens_seen": 13560928, "step": 6275 }, { "epoch": 1.0244698205546492, "grad_norm": 0.13065916299819946, "learning_rate": 0.0005121533442088091, "loss": 0.0472, "num_input_tokens_seen": 13572096, "step": 6280 }, { "epoch": 1.0252854812398042, "grad_norm": 0.5665595531463623, "learning_rate": 0.0005125611745513866, "loss": 0.1187, "num_input_tokens_seen": 13584448, "step": 6285 }, { "epoch": 1.0261011419249593, "grad_norm": 0.18895985186100006, "learning_rate": 0.0005129690048939642, "loss": 0.1032, "num_input_tokens_seen": 13596352, "step": 6290 }, { "epoch": 1.026916802610114, "grad_norm": 0.1723942905664444, "learning_rate": 0.0005133768352365417, "loss": 0.1629, "num_input_tokens_seen": 13606688, "step": 6295 }, { "epoch": 1.0277324632952691, "grad_norm": 0.5321813821792603, "learning_rate": 0.000513784665579119, "loss": 0.1551, "num_input_tokens_seen": 13618400, "step": 6300 }, { "epoch": 1.0285481239804242, "grad_norm": 0.014531032182276249, "learning_rate": 0.0005141924959216966, "loss": 0.0871, "num_input_tokens_seen": 13629376, "step": 6305 }, { "epoch": 1.0293637846655792, "grad_norm": 0.11209902167320251, "learning_rate": 0.0005146003262642741, "loss": 0.0521, "num_input_tokens_seen": 13640896, "step": 6310 }, { "epoch": 1.030179445350734, "grad_norm": 0.2015608698129654, "learning_rate": 0.0005150081566068515, "loss": 0.143, "num_input_tokens_seen": 13652128, "step": 6315 }, { "epoch": 1.030995106035889, "grad_norm": 0.09540046751499176, "learning_rate": 0.000515415986949429, "loss": 0.019, "num_input_tokens_seen": 13662048, "step": 6320 }, { "epoch": 1.031810766721044, "grad_norm": 0.579704225063324, "learning_rate": 0.0005158238172920065, "loss": 0.1019, "num_input_tokens_seen": 13672160, "step": 6325 }, { "epoch": 1.032626427406199, "grad_norm": 0.1127452477812767, "learning_rate": 0.0005162316476345841, "loss": 0.0413, "num_input_tokens_seen": 13682400, "step": 6330 }, { "epoch": 1.033442088091354, "grad_norm": 0.6191928386688232, "learning_rate": 0.0005166394779771615, "loss": 0.3745, "num_input_tokens_seen": 13692608, "step": 6335 }, { "epoch": 1.034257748776509, "grad_norm": 0.23995548486709595, "learning_rate": 0.000517047308319739, "loss": 0.2638, "num_input_tokens_seen": 13703008, "step": 6340 }, { "epoch": 1.035073409461664, "grad_norm": 0.05872446298599243, "learning_rate": 0.0005174551386623165, "loss": 0.1718, "num_input_tokens_seen": 13714496, "step": 6345 }, { "epoch": 1.0358890701468189, "grad_norm": 0.1604536920785904, "learning_rate": 0.0005178629690048939, "loss": 0.1295, "num_input_tokens_seen": 13725120, "step": 6350 }, { "epoch": 1.036704730831974, "grad_norm": 0.1121409609913826, "learning_rate": 0.0005182707993474715, "loss": 0.049, "num_input_tokens_seen": 13734560, "step": 6355 }, { "epoch": 1.037520391517129, "grad_norm": 0.1634875386953354, "learning_rate": 0.000518678629690049, "loss": 0.1074, "num_input_tokens_seen": 13745248, "step": 6360 }, { "epoch": 1.0383360522022838, "grad_norm": 0.20854128897190094, "learning_rate": 0.0005190864600326263, "loss": 0.1673, "num_input_tokens_seen": 13755584, "step": 6365 }, { "epoch": 1.0391517128874388, "grad_norm": 0.08223803341388702, "learning_rate": 0.0005194942903752039, "loss": 0.0784, "num_input_tokens_seen": 13767456, "step": 6370 }, { "epoch": 1.0399673735725938, "grad_norm": 0.3701033294200897, "learning_rate": 0.0005199021207177814, "loss": 0.2891, "num_input_tokens_seen": 13778176, "step": 6375 }, { "epoch": 1.0407830342577489, "grad_norm": 0.042385704815387726, "learning_rate": 0.000520309951060359, "loss": 0.0874, "num_input_tokens_seen": 13788576, "step": 6380 }, { "epoch": 1.0415986949429037, "grad_norm": 0.04089699313044548, "learning_rate": 0.0005207177814029364, "loss": 0.1409, "num_input_tokens_seen": 13798432, "step": 6385 }, { "epoch": 1.0424143556280587, "grad_norm": 0.1607908308506012, "learning_rate": 0.0005211256117455138, "loss": 0.1119, "num_input_tokens_seen": 13809408, "step": 6390 }, { "epoch": 1.0432300163132138, "grad_norm": 0.2969203591346741, "learning_rate": 0.0005215334420880914, "loss": 0.1187, "num_input_tokens_seen": 13821152, "step": 6395 }, { "epoch": 1.0440456769983686, "grad_norm": 0.41446980834007263, "learning_rate": 0.0005219412724306688, "loss": 0.254, "num_input_tokens_seen": 13832000, "step": 6400 }, { "epoch": 1.0448613376835236, "grad_norm": 0.1455860584974289, "learning_rate": 0.0005223491027732464, "loss": 0.1489, "num_input_tokens_seen": 13842208, "step": 6405 }, { "epoch": 1.0456769983686787, "grad_norm": 0.3523208796977997, "learning_rate": 0.0005227569331158238, "loss": 0.1161, "num_input_tokens_seen": 13852256, "step": 6410 }, { "epoch": 1.0464926590538337, "grad_norm": 0.14199502766132355, "learning_rate": 0.0005231647634584013, "loss": 0.0589, "num_input_tokens_seen": 13864064, "step": 6415 }, { "epoch": 1.0473083197389885, "grad_norm": 0.09805894643068314, "learning_rate": 0.0005235725938009788, "loss": 0.1358, "num_input_tokens_seen": 13874368, "step": 6420 }, { "epoch": 1.0481239804241436, "grad_norm": 0.2984837293624878, "learning_rate": 0.0005239804241435563, "loss": 0.0741, "num_input_tokens_seen": 13885984, "step": 6425 }, { "epoch": 1.0489396411092986, "grad_norm": 0.03853955864906311, "learning_rate": 0.0005243882544861339, "loss": 0.0288, "num_input_tokens_seen": 13896000, "step": 6430 }, { "epoch": 1.0497553017944534, "grad_norm": 0.010538961738348007, "learning_rate": 0.0005247960848287112, "loss": 0.2701, "num_input_tokens_seen": 13906848, "step": 6435 }, { "epoch": 1.0505709624796085, "grad_norm": 0.04958980530500412, "learning_rate": 0.0005252039151712887, "loss": 0.1826, "num_input_tokens_seen": 13916864, "step": 6440 }, { "epoch": 1.0513866231647635, "grad_norm": 0.10969033092260361, "learning_rate": 0.0005256117455138663, "loss": 0.1165, "num_input_tokens_seen": 13926592, "step": 6445 }, { "epoch": 1.0522022838499185, "grad_norm": 0.08144375681877136, "learning_rate": 0.0005260195758564438, "loss": 0.0327, "num_input_tokens_seen": 13937120, "step": 6450 }, { "epoch": 1.0530179445350734, "grad_norm": 0.399844229221344, "learning_rate": 0.0005264274061990211, "loss": 0.1252, "num_input_tokens_seen": 13949120, "step": 6455 }, { "epoch": 1.0538336052202284, "grad_norm": 0.28734290599823, "learning_rate": 0.0005268352365415987, "loss": 0.0848, "num_input_tokens_seen": 13959264, "step": 6460 }, { "epoch": 1.0546492659053834, "grad_norm": 0.4673934876918793, "learning_rate": 0.0005272430668841762, "loss": 0.1745, "num_input_tokens_seen": 13970304, "step": 6465 }, { "epoch": 1.0554649265905383, "grad_norm": 0.15282650291919708, "learning_rate": 0.0005276508972267537, "loss": 0.1067, "num_input_tokens_seen": 13980800, "step": 6470 }, { "epoch": 1.0562805872756933, "grad_norm": 0.11581412702798843, "learning_rate": 0.0005280587275693311, "loss": 0.0991, "num_input_tokens_seen": 13991040, "step": 6475 }, { "epoch": 1.0570962479608483, "grad_norm": 0.023843001574277878, "learning_rate": 0.0005284665579119086, "loss": 0.1086, "num_input_tokens_seen": 14001216, "step": 6480 }, { "epoch": 1.0579119086460032, "grad_norm": 0.027091102674603462, "learning_rate": 0.0005288743882544862, "loss": 0.0865, "num_input_tokens_seen": 14012672, "step": 6485 }, { "epoch": 1.0587275693311582, "grad_norm": 0.051208313554525375, "learning_rate": 0.0005292822185970636, "loss": 0.119, "num_input_tokens_seen": 14023872, "step": 6490 }, { "epoch": 1.0595432300163132, "grad_norm": 0.061948299407958984, "learning_rate": 0.0005296900489396412, "loss": 0.0983, "num_input_tokens_seen": 14035136, "step": 6495 }, { "epoch": 1.0603588907014683, "grad_norm": 0.5826138257980347, "learning_rate": 0.0005300978792822186, "loss": 0.1936, "num_input_tokens_seen": 14046240, "step": 6500 }, { "epoch": 1.061174551386623, "grad_norm": 0.025136886164546013, "learning_rate": 0.000530505709624796, "loss": 0.0611, "num_input_tokens_seen": 14056512, "step": 6505 }, { "epoch": 1.0619902120717781, "grad_norm": 0.3028709888458252, "learning_rate": 0.0005309135399673736, "loss": 0.1867, "num_input_tokens_seen": 14068000, "step": 6510 }, { "epoch": 1.0628058727569332, "grad_norm": 0.04569149389863014, "learning_rate": 0.0005313213703099511, "loss": 0.1099, "num_input_tokens_seen": 14078336, "step": 6515 }, { "epoch": 1.0636215334420882, "grad_norm": 0.48968860507011414, "learning_rate": 0.0005317292006525287, "loss": 0.1447, "num_input_tokens_seen": 14089632, "step": 6520 }, { "epoch": 1.064437194127243, "grad_norm": 0.06684679538011551, "learning_rate": 0.000532137030995106, "loss": 0.1157, "num_input_tokens_seen": 14099104, "step": 6525 }, { "epoch": 1.065252854812398, "grad_norm": 0.5205869078636169, "learning_rate": 0.0005325448613376835, "loss": 0.1275, "num_input_tokens_seen": 14109408, "step": 6530 }, { "epoch": 1.066068515497553, "grad_norm": 0.07348957657814026, "learning_rate": 0.0005329526916802611, "loss": 0.2033, "num_input_tokens_seen": 14119616, "step": 6535 }, { "epoch": 1.066884176182708, "grad_norm": 0.12224670499563217, "learning_rate": 0.0005333605220228385, "loss": 0.0612, "num_input_tokens_seen": 14130112, "step": 6540 }, { "epoch": 1.067699836867863, "grad_norm": 0.03272275999188423, "learning_rate": 0.000533768352365416, "loss": 0.0502, "num_input_tokens_seen": 14140672, "step": 6545 }, { "epoch": 1.068515497553018, "grad_norm": 0.4673844277858734, "learning_rate": 0.0005341761827079935, "loss": 0.1442, "num_input_tokens_seen": 14152128, "step": 6550 }, { "epoch": 1.0693311582381728, "grad_norm": 0.12728549540042877, "learning_rate": 0.000534584013050571, "loss": 0.0641, "num_input_tokens_seen": 14164096, "step": 6555 }, { "epoch": 1.0701468189233279, "grad_norm": 0.48025938868522644, "learning_rate": 0.0005349918433931485, "loss": 0.1081, "num_input_tokens_seen": 14174624, "step": 6560 }, { "epoch": 1.070962479608483, "grad_norm": 0.06705355644226074, "learning_rate": 0.0005353996737357259, "loss": 0.1736, "num_input_tokens_seen": 14186784, "step": 6565 }, { "epoch": 1.071778140293638, "grad_norm": 0.043406713753938675, "learning_rate": 0.0005358075040783035, "loss": 0.0601, "num_input_tokens_seen": 14196064, "step": 6570 }, { "epoch": 1.0725938009787928, "grad_norm": 0.3859328031539917, "learning_rate": 0.0005362153344208809, "loss": 0.0934, "num_input_tokens_seen": 14207232, "step": 6575 }, { "epoch": 1.0734094616639478, "grad_norm": 0.015095439739525318, "learning_rate": 0.0005366231647634584, "loss": 0.0868, "num_input_tokens_seen": 14217920, "step": 6580 }, { "epoch": 1.0742251223491028, "grad_norm": 0.8526964783668518, "learning_rate": 0.000537030995106036, "loss": 0.2762, "num_input_tokens_seen": 14228896, "step": 6585 }, { "epoch": 1.0750407830342577, "grad_norm": 0.045150063931941986, "learning_rate": 0.0005374388254486133, "loss": 0.2702, "num_input_tokens_seen": 14239712, "step": 6590 }, { "epoch": 1.0758564437194127, "grad_norm": 0.4150654077529907, "learning_rate": 0.0005378466557911908, "loss": 0.1477, "num_input_tokens_seen": 14249760, "step": 6595 }, { "epoch": 1.0766721044045677, "grad_norm": 0.23948174715042114, "learning_rate": 0.0005382544861337684, "loss": 0.1037, "num_input_tokens_seen": 14259552, "step": 6600 }, { "epoch": 1.0774877650897228, "grad_norm": 0.2983197271823883, "learning_rate": 0.0005386623164763459, "loss": 0.1766, "num_input_tokens_seen": 14270880, "step": 6605 }, { "epoch": 1.0783034257748776, "grad_norm": 0.08725722879171371, "learning_rate": 0.0005390701468189233, "loss": 0.0909, "num_input_tokens_seen": 14281856, "step": 6610 }, { "epoch": 1.0791190864600326, "grad_norm": 0.17761683464050293, "learning_rate": 0.0005394779771615008, "loss": 0.133, "num_input_tokens_seen": 14293536, "step": 6615 }, { "epoch": 1.0799347471451877, "grad_norm": 0.4125170111656189, "learning_rate": 0.0005398858075040783, "loss": 0.1336, "num_input_tokens_seen": 14303968, "step": 6620 }, { "epoch": 1.0807504078303425, "grad_norm": 0.1900666207075119, "learning_rate": 0.0005402936378466558, "loss": 0.1574, "num_input_tokens_seen": 14315296, "step": 6625 }, { "epoch": 1.0815660685154975, "grad_norm": 0.28400659561157227, "learning_rate": 0.0005407014681892332, "loss": 0.1111, "num_input_tokens_seen": 14326528, "step": 6630 }, { "epoch": 1.0823817292006526, "grad_norm": 0.04672158136963844, "learning_rate": 0.0005411092985318108, "loss": 0.0592, "num_input_tokens_seen": 14337248, "step": 6635 }, { "epoch": 1.0831973898858076, "grad_norm": 0.4267369508743286, "learning_rate": 0.0005415171288743883, "loss": 0.0632, "num_input_tokens_seen": 14347392, "step": 6640 }, { "epoch": 1.0840130505709624, "grad_norm": 0.14387011528015137, "learning_rate": 0.0005419249592169657, "loss": 0.032, "num_input_tokens_seen": 14359008, "step": 6645 }, { "epoch": 1.0848287112561175, "grad_norm": 0.2627013325691223, "learning_rate": 0.0005423327895595433, "loss": 0.1407, "num_input_tokens_seen": 14370464, "step": 6650 }, { "epoch": 1.0856443719412725, "grad_norm": 0.7487311363220215, "learning_rate": 0.0005427406199021207, "loss": 0.1933, "num_input_tokens_seen": 14382304, "step": 6655 }, { "epoch": 1.0864600326264273, "grad_norm": 0.2547962963581085, "learning_rate": 0.0005431484502446982, "loss": 0.1144, "num_input_tokens_seen": 14392288, "step": 6660 }, { "epoch": 1.0872756933115824, "grad_norm": 0.5006257891654968, "learning_rate": 0.0005435562805872757, "loss": 0.117, "num_input_tokens_seen": 14403200, "step": 6665 }, { "epoch": 1.0880913539967374, "grad_norm": 0.10363472998142242, "learning_rate": 0.0005439641109298532, "loss": 0.253, "num_input_tokens_seen": 14414944, "step": 6670 }, { "epoch": 1.0889070146818924, "grad_norm": 0.7664408087730408, "learning_rate": 0.0005443719412724307, "loss": 0.1454, "num_input_tokens_seen": 14426336, "step": 6675 }, { "epoch": 1.0897226753670473, "grad_norm": 0.11776512116193771, "learning_rate": 0.0005447797716150081, "loss": 0.0555, "num_input_tokens_seen": 14437760, "step": 6680 }, { "epoch": 1.0905383360522023, "grad_norm": 0.13282710313796997, "learning_rate": 0.0005451876019575857, "loss": 0.1458, "num_input_tokens_seen": 14449120, "step": 6685 }, { "epoch": 1.0913539967373573, "grad_norm": 0.3319006562232971, "learning_rate": 0.0005455954323001632, "loss": 0.1105, "num_input_tokens_seen": 14460544, "step": 6690 }, { "epoch": 1.0921696574225122, "grad_norm": 0.24569126963615417, "learning_rate": 0.0005460032626427405, "loss": 0.1459, "num_input_tokens_seen": 14472608, "step": 6695 }, { "epoch": 1.0929853181076672, "grad_norm": 0.12690795958042145, "learning_rate": 0.0005464110929853181, "loss": 0.111, "num_input_tokens_seen": 14482176, "step": 6700 }, { "epoch": 1.0938009787928222, "grad_norm": 0.24353094398975372, "learning_rate": 0.0005468189233278956, "loss": 0.0743, "num_input_tokens_seen": 14492768, "step": 6705 }, { "epoch": 1.094616639477977, "grad_norm": 0.3623286187648773, "learning_rate": 0.0005472267536704732, "loss": 0.0524, "num_input_tokens_seen": 14505024, "step": 6710 }, { "epoch": 1.095432300163132, "grad_norm": 0.5367739200592041, "learning_rate": 0.0005476345840130506, "loss": 0.0445, "num_input_tokens_seen": 14515744, "step": 6715 }, { "epoch": 1.0962479608482871, "grad_norm": 0.2832864820957184, "learning_rate": 0.000548042414355628, "loss": 0.0292, "num_input_tokens_seen": 14526784, "step": 6720 }, { "epoch": 1.0970636215334422, "grad_norm": 0.2894558906555176, "learning_rate": 0.0005484502446982056, "loss": 0.0782, "num_input_tokens_seen": 14536960, "step": 6725 }, { "epoch": 1.097879282218597, "grad_norm": 0.07219669222831726, "learning_rate": 0.000548858075040783, "loss": 0.0159, "num_input_tokens_seen": 14549344, "step": 6730 }, { "epoch": 1.098694942903752, "grad_norm": 0.04819165915250778, "learning_rate": 0.0005492659053833605, "loss": 0.0529, "num_input_tokens_seen": 14560224, "step": 6735 }, { "epoch": 1.099510603588907, "grad_norm": 0.08893704414367676, "learning_rate": 0.000549673735725938, "loss": 0.288, "num_input_tokens_seen": 14569440, "step": 6740 }, { "epoch": 1.100326264274062, "grad_norm": 0.5870652794837952, "learning_rate": 0.0005500815660685155, "loss": 0.1119, "num_input_tokens_seen": 14579648, "step": 6745 }, { "epoch": 1.101141924959217, "grad_norm": 0.03373727947473526, "learning_rate": 0.000550489396411093, "loss": 0.0401, "num_input_tokens_seen": 14590528, "step": 6750 }, { "epoch": 1.101957585644372, "grad_norm": 0.10977505147457123, "learning_rate": 0.0005508972267536705, "loss": 0.1862, "num_input_tokens_seen": 14600864, "step": 6755 }, { "epoch": 1.102773246329527, "grad_norm": 1.0331140756607056, "learning_rate": 0.000551305057096248, "loss": 0.186, "num_input_tokens_seen": 14612096, "step": 6760 }, { "epoch": 1.1035889070146818, "grad_norm": 0.3095877170562744, "learning_rate": 0.0005517128874388254, "loss": 0.059, "num_input_tokens_seen": 14623392, "step": 6765 }, { "epoch": 1.1044045676998369, "grad_norm": 0.19286680221557617, "learning_rate": 0.0005521207177814029, "loss": 0.0896, "num_input_tokens_seen": 14634016, "step": 6770 }, { "epoch": 1.105220228384992, "grad_norm": 0.22141914069652557, "learning_rate": 0.0005525285481239805, "loss": 0.2321, "num_input_tokens_seen": 14645216, "step": 6775 }, { "epoch": 1.1060358890701467, "grad_norm": 0.48826268315315247, "learning_rate": 0.000552936378466558, "loss": 0.2204, "num_input_tokens_seen": 14656576, "step": 6780 }, { "epoch": 1.1068515497553018, "grad_norm": 0.13619111478328705, "learning_rate": 0.0005533442088091353, "loss": 0.0632, "num_input_tokens_seen": 14667168, "step": 6785 }, { "epoch": 1.1076672104404568, "grad_norm": 0.5033240914344788, "learning_rate": 0.0005537520391517129, "loss": 0.199, "num_input_tokens_seen": 14678016, "step": 6790 }, { "epoch": 1.1084828711256118, "grad_norm": 0.08465439826250076, "learning_rate": 0.0005541598694942904, "loss": 0.0638, "num_input_tokens_seen": 14688960, "step": 6795 }, { "epoch": 1.1092985318107667, "grad_norm": 0.2793862521648407, "learning_rate": 0.0005545676998368679, "loss": 0.1144, "num_input_tokens_seen": 14700480, "step": 6800 }, { "epoch": 1.1101141924959217, "grad_norm": 0.4719338119029999, "learning_rate": 0.0005549755301794454, "loss": 0.1082, "num_input_tokens_seen": 14711104, "step": 6805 }, { "epoch": 1.1109298531810767, "grad_norm": 0.12284115701913834, "learning_rate": 0.0005553833605220228, "loss": 0.0624, "num_input_tokens_seen": 14722176, "step": 6810 }, { "epoch": 1.1117455138662315, "grad_norm": 0.1608635038137436, "learning_rate": 0.0005557911908646003, "loss": 0.2192, "num_input_tokens_seen": 14733600, "step": 6815 }, { "epoch": 1.1125611745513866, "grad_norm": 0.1508731096982956, "learning_rate": 0.0005561990212071778, "loss": 0.1064, "num_input_tokens_seen": 14744352, "step": 6820 }, { "epoch": 1.1133768352365416, "grad_norm": 0.2695777714252472, "learning_rate": 0.0005566068515497554, "loss": 0.1752, "num_input_tokens_seen": 14755040, "step": 6825 }, { "epoch": 1.1141924959216967, "grad_norm": 0.39584463834762573, "learning_rate": 0.0005570146818923328, "loss": 0.1059, "num_input_tokens_seen": 14766112, "step": 6830 }, { "epoch": 1.1150081566068515, "grad_norm": 0.11579836159944534, "learning_rate": 0.0005574225122349102, "loss": 0.0326, "num_input_tokens_seen": 14776192, "step": 6835 }, { "epoch": 1.1158238172920065, "grad_norm": 0.013752263970673084, "learning_rate": 0.0005578303425774878, "loss": 0.0487, "num_input_tokens_seen": 14786240, "step": 6840 }, { "epoch": 1.1166394779771616, "grad_norm": 0.7901402115821838, "learning_rate": 0.0005582381729200653, "loss": 0.1736, "num_input_tokens_seen": 14798048, "step": 6845 }, { "epoch": 1.1174551386623164, "grad_norm": 0.37859535217285156, "learning_rate": 0.0005586460032626428, "loss": 0.2425, "num_input_tokens_seen": 14808928, "step": 6850 }, { "epoch": 1.1182707993474714, "grad_norm": 0.21328096091747284, "learning_rate": 0.0005590538336052202, "loss": 0.1267, "num_input_tokens_seen": 14820384, "step": 6855 }, { "epoch": 1.1190864600326265, "grad_norm": 0.11841249465942383, "learning_rate": 0.0005594616639477977, "loss": 0.0939, "num_input_tokens_seen": 14831168, "step": 6860 }, { "epoch": 1.1199021207177815, "grad_norm": 0.5339210629463196, "learning_rate": 0.0005598694942903753, "loss": 0.3598, "num_input_tokens_seen": 14842112, "step": 6865 }, { "epoch": 1.1207177814029363, "grad_norm": 0.2543666958808899, "learning_rate": 0.0005602773246329527, "loss": 0.1049, "num_input_tokens_seen": 14852096, "step": 6870 }, { "epoch": 1.1215334420880914, "grad_norm": 0.06985493749380112, "learning_rate": 0.0005606851549755301, "loss": 0.131, "num_input_tokens_seen": 14862240, "step": 6875 }, { "epoch": 1.1223491027732464, "grad_norm": 0.1255275011062622, "learning_rate": 0.0005610929853181077, "loss": 0.0738, "num_input_tokens_seen": 14872448, "step": 6880 }, { "epoch": 1.1231647634584012, "grad_norm": 0.06869206577539444, "learning_rate": 0.0005615008156606851, "loss": 0.1663, "num_input_tokens_seen": 14884000, "step": 6885 }, { "epoch": 1.1239804241435563, "grad_norm": 0.20043504238128662, "learning_rate": 0.0005619086460032627, "loss": 0.2337, "num_input_tokens_seen": 14895552, "step": 6890 }, { "epoch": 1.1247960848287113, "grad_norm": 0.1891249418258667, "learning_rate": 0.0005623164763458401, "loss": 0.0987, "num_input_tokens_seen": 14904896, "step": 6895 }, { "epoch": 1.1256117455138663, "grad_norm": 0.05826778709888458, "learning_rate": 0.0005627243066884176, "loss": 0.1315, "num_input_tokens_seen": 14917184, "step": 6900 }, { "epoch": 1.1264274061990212, "grad_norm": 0.15955139696598053, "learning_rate": 0.0005631321370309951, "loss": 0.1136, "num_input_tokens_seen": 14928064, "step": 6905 }, { "epoch": 1.1272430668841762, "grad_norm": 0.19315731525421143, "learning_rate": 0.0005635399673735726, "loss": 0.0669, "num_input_tokens_seen": 14939008, "step": 6910 }, { "epoch": 1.1280587275693312, "grad_norm": 0.24480724334716797, "learning_rate": 0.0005639477977161502, "loss": 0.1961, "num_input_tokens_seen": 14950464, "step": 6915 }, { "epoch": 1.128874388254486, "grad_norm": 0.4513859450817108, "learning_rate": 0.0005643556280587275, "loss": 0.1275, "num_input_tokens_seen": 14960736, "step": 6920 }, { "epoch": 1.129690048939641, "grad_norm": 0.4298337399959564, "learning_rate": 0.000564763458401305, "loss": 0.2811, "num_input_tokens_seen": 14972128, "step": 6925 }, { "epoch": 1.1305057096247961, "grad_norm": 0.20439466834068298, "learning_rate": 0.0005651712887438826, "loss": 0.1381, "num_input_tokens_seen": 14982400, "step": 6930 }, { "epoch": 1.131321370309951, "grad_norm": 0.25064659118652344, "learning_rate": 0.0005655791190864601, "loss": 0.1285, "num_input_tokens_seen": 14992320, "step": 6935 }, { "epoch": 1.132137030995106, "grad_norm": 0.42060086131095886, "learning_rate": 0.0005659869494290375, "loss": 0.2396, "num_input_tokens_seen": 15003520, "step": 6940 }, { "epoch": 1.132952691680261, "grad_norm": 0.044980768114328384, "learning_rate": 0.000566394779771615, "loss": 0.0594, "num_input_tokens_seen": 15014656, "step": 6945 }, { "epoch": 1.133768352365416, "grad_norm": 0.07436365634202957, "learning_rate": 0.0005668026101141925, "loss": 0.0619, "num_input_tokens_seen": 15025408, "step": 6950 }, { "epoch": 1.1345840130505709, "grad_norm": 0.47422927618026733, "learning_rate": 0.00056721044045677, "loss": 0.1253, "num_input_tokens_seen": 15036672, "step": 6955 }, { "epoch": 1.135399673735726, "grad_norm": 0.29630041122436523, "learning_rate": 0.0005676182707993474, "loss": 0.1597, "num_input_tokens_seen": 15047136, "step": 6960 }, { "epoch": 1.136215334420881, "grad_norm": 0.16470947861671448, "learning_rate": 0.000568026101141925, "loss": 0.134, "num_input_tokens_seen": 15058976, "step": 6965 }, { "epoch": 1.137030995106036, "grad_norm": 0.28484222292900085, "learning_rate": 0.0005684339314845025, "loss": 0.1313, "num_input_tokens_seen": 15069312, "step": 6970 }, { "epoch": 1.1378466557911908, "grad_norm": 0.03445442393422127, "learning_rate": 0.0005688417618270799, "loss": 0.0632, "num_input_tokens_seen": 15080832, "step": 6975 }, { "epoch": 1.1386623164763459, "grad_norm": 0.05196113511919975, "learning_rate": 0.0005692495921696575, "loss": 0.0529, "num_input_tokens_seen": 15092288, "step": 6980 }, { "epoch": 1.139477977161501, "grad_norm": 0.0929301381111145, "learning_rate": 0.0005696574225122349, "loss": 0.1277, "num_input_tokens_seen": 15102432, "step": 6985 }, { "epoch": 1.1402936378466557, "grad_norm": 0.11680106818675995, "learning_rate": 0.0005700652528548124, "loss": 0.1757, "num_input_tokens_seen": 15112256, "step": 6990 }, { "epoch": 1.1411092985318108, "grad_norm": 0.19014939665794373, "learning_rate": 0.0005704730831973899, "loss": 0.2152, "num_input_tokens_seen": 15123808, "step": 6995 }, { "epoch": 1.1419249592169658, "grad_norm": 0.1377144306898117, "learning_rate": 0.0005708809135399674, "loss": 0.0885, "num_input_tokens_seen": 15133184, "step": 7000 }, { "epoch": 1.1427406199021206, "grad_norm": 0.5549722909927368, "learning_rate": 0.000571288743882545, "loss": 0.2517, "num_input_tokens_seen": 15144704, "step": 7005 }, { "epoch": 1.1435562805872757, "grad_norm": 0.24352173507213593, "learning_rate": 0.0005716965742251223, "loss": 0.1248, "num_input_tokens_seen": 15155456, "step": 7010 }, { "epoch": 1.1443719412724307, "grad_norm": 0.11551082879304886, "learning_rate": 0.0005721044045676999, "loss": 0.0901, "num_input_tokens_seen": 15165696, "step": 7015 }, { "epoch": 1.1451876019575857, "grad_norm": 0.278942734003067, "learning_rate": 0.0005725122349102774, "loss": 0.1059, "num_input_tokens_seen": 15176320, "step": 7020 }, { "epoch": 1.1460032626427405, "grad_norm": 0.07334481179714203, "learning_rate": 0.0005729200652528548, "loss": 0.2575, "num_input_tokens_seen": 15187488, "step": 7025 }, { "epoch": 1.1468189233278956, "grad_norm": 0.11356133967638016, "learning_rate": 0.0005733278955954323, "loss": 0.0769, "num_input_tokens_seen": 15198528, "step": 7030 }, { "epoch": 1.1476345840130506, "grad_norm": 0.31060490012168884, "learning_rate": 0.0005737357259380098, "loss": 0.1982, "num_input_tokens_seen": 15209376, "step": 7035 }, { "epoch": 1.1484502446982057, "grad_norm": 0.10984083265066147, "learning_rate": 0.0005741435562805873, "loss": 0.0814, "num_input_tokens_seen": 15220128, "step": 7040 }, { "epoch": 1.1492659053833605, "grad_norm": 0.08647423982620239, "learning_rate": 0.0005745513866231648, "loss": 0.2202, "num_input_tokens_seen": 15231552, "step": 7045 }, { "epoch": 1.1500815660685155, "grad_norm": 0.32499903440475464, "learning_rate": 0.0005749592169657422, "loss": 0.2185, "num_input_tokens_seen": 15241664, "step": 7050 }, { "epoch": 1.1508972267536706, "grad_norm": 0.374118834733963, "learning_rate": 0.0005753670473083198, "loss": 0.112, "num_input_tokens_seen": 15253536, "step": 7055 }, { "epoch": 1.1517128874388254, "grad_norm": 0.4085756242275238, "learning_rate": 0.0005757748776508972, "loss": 0.1844, "num_input_tokens_seen": 15263872, "step": 7060 }, { "epoch": 1.1525285481239804, "grad_norm": 0.08538088202476501, "learning_rate": 0.0005761827079934747, "loss": 0.0696, "num_input_tokens_seen": 15275264, "step": 7065 }, { "epoch": 1.1533442088091355, "grad_norm": 0.37617227435112, "learning_rate": 0.0005765905383360523, "loss": 0.2125, "num_input_tokens_seen": 15286848, "step": 7070 }, { "epoch": 1.1541598694942903, "grad_norm": 0.15897266566753387, "learning_rate": 0.0005769983686786296, "loss": 0.2071, "num_input_tokens_seen": 15297472, "step": 7075 }, { "epoch": 1.1549755301794453, "grad_norm": 0.22269028425216675, "learning_rate": 0.0005774061990212072, "loss": 0.0877, "num_input_tokens_seen": 15308224, "step": 7080 }, { "epoch": 1.1557911908646004, "grad_norm": 0.2394644170999527, "learning_rate": 0.0005778140293637847, "loss": 0.2109, "num_input_tokens_seen": 15319904, "step": 7085 }, { "epoch": 1.1566068515497552, "grad_norm": 0.2885288596153259, "learning_rate": 0.0005782218597063622, "loss": 0.1601, "num_input_tokens_seen": 15329216, "step": 7090 }, { "epoch": 1.1574225122349102, "grad_norm": 0.062343530356884, "learning_rate": 0.0005786296900489396, "loss": 0.1017, "num_input_tokens_seen": 15340288, "step": 7095 }, { "epoch": 1.1582381729200653, "grad_norm": 0.30550217628479004, "learning_rate": 0.0005790375203915171, "loss": 0.1342, "num_input_tokens_seen": 15351648, "step": 7100 }, { "epoch": 1.1590538336052203, "grad_norm": 0.022208329290151596, "learning_rate": 0.0005794453507340947, "loss": 0.1672, "num_input_tokens_seen": 15363488, "step": 7105 }, { "epoch": 1.1598694942903751, "grad_norm": 0.06365969777107239, "learning_rate": 0.0005798531810766721, "loss": 0.1039, "num_input_tokens_seen": 15373792, "step": 7110 }, { "epoch": 1.1606851549755302, "grad_norm": 0.30073338747024536, "learning_rate": 0.0005802610114192495, "loss": 0.1141, "num_input_tokens_seen": 15385120, "step": 7115 }, { "epoch": 1.1615008156606852, "grad_norm": 0.18821366131305695, "learning_rate": 0.0005806688417618271, "loss": 0.1638, "num_input_tokens_seen": 15395264, "step": 7120 }, { "epoch": 1.1623164763458402, "grad_norm": 0.3371999263763428, "learning_rate": 0.0005810766721044046, "loss": 0.2989, "num_input_tokens_seen": 15405696, "step": 7125 }, { "epoch": 1.163132137030995, "grad_norm": 0.3867366909980774, "learning_rate": 0.0005814845024469821, "loss": 0.0885, "num_input_tokens_seen": 15416928, "step": 7130 }, { "epoch": 1.16394779771615, "grad_norm": 0.48066145181655884, "learning_rate": 0.0005818923327895596, "loss": 0.2603, "num_input_tokens_seen": 15427456, "step": 7135 }, { "epoch": 1.1647634584013051, "grad_norm": 0.11666944622993469, "learning_rate": 0.000582300163132137, "loss": 0.0898, "num_input_tokens_seen": 15438336, "step": 7140 }, { "epoch": 1.16557911908646, "grad_norm": 0.15594574809074402, "learning_rate": 0.0005827079934747145, "loss": 0.2309, "num_input_tokens_seen": 15449088, "step": 7145 }, { "epoch": 1.166394779771615, "grad_norm": 0.2819611132144928, "learning_rate": 0.000583115823817292, "loss": 0.1877, "num_input_tokens_seen": 15460256, "step": 7150 }, { "epoch": 1.16721044045677, "grad_norm": 0.1727602779865265, "learning_rate": 0.0005835236541598696, "loss": 0.0619, "num_input_tokens_seen": 15470560, "step": 7155 }, { "epoch": 1.1680261011419248, "grad_norm": 0.19856815040111542, "learning_rate": 0.000583931484502447, "loss": 0.1137, "num_input_tokens_seen": 15482112, "step": 7160 }, { "epoch": 1.1688417618270799, "grad_norm": 0.034124091267585754, "learning_rate": 0.0005843393148450244, "loss": 0.074, "num_input_tokens_seen": 15493312, "step": 7165 }, { "epoch": 1.169657422512235, "grad_norm": 0.05666607618331909, "learning_rate": 0.000584747145187602, "loss": 0.1201, "num_input_tokens_seen": 15503424, "step": 7170 }, { "epoch": 1.17047308319739, "grad_norm": 0.05534420162439346, "learning_rate": 0.0005851549755301795, "loss": 0.1046, "num_input_tokens_seen": 15514400, "step": 7175 }, { "epoch": 1.1712887438825448, "grad_norm": 0.014158536680042744, "learning_rate": 0.0005855628058727568, "loss": 0.0807, "num_input_tokens_seen": 15525760, "step": 7180 }, { "epoch": 1.1721044045676998, "grad_norm": 0.44807717204093933, "learning_rate": 0.0005859706362153344, "loss": 0.179, "num_input_tokens_seen": 15536480, "step": 7185 }, { "epoch": 1.1729200652528549, "grad_norm": 0.06306151300668716, "learning_rate": 0.0005863784665579119, "loss": 0.129, "num_input_tokens_seen": 15546720, "step": 7190 }, { "epoch": 1.17373572593801, "grad_norm": 0.37018027901649475, "learning_rate": 0.0005867862969004895, "loss": 0.0538, "num_input_tokens_seen": 15556704, "step": 7195 }, { "epoch": 1.1745513866231647, "grad_norm": 0.03759448975324631, "learning_rate": 0.0005871941272430669, "loss": 0.0436, "num_input_tokens_seen": 15567776, "step": 7200 }, { "epoch": 1.1753670473083198, "grad_norm": 0.07634948194026947, "learning_rate": 0.0005876019575856443, "loss": 0.1187, "num_input_tokens_seen": 15578976, "step": 7205 }, { "epoch": 1.1761827079934748, "grad_norm": 0.23645785450935364, "learning_rate": 0.0005880097879282219, "loss": 0.1072, "num_input_tokens_seen": 15590816, "step": 7210 }, { "epoch": 1.1769983686786296, "grad_norm": 0.02600211650133133, "learning_rate": 0.0005884176182707993, "loss": 0.0452, "num_input_tokens_seen": 15601120, "step": 7215 }, { "epoch": 1.1778140293637847, "grad_norm": 0.062186602503061295, "learning_rate": 0.0005888254486133769, "loss": 0.1061, "num_input_tokens_seen": 15610816, "step": 7220 }, { "epoch": 1.1786296900489397, "grad_norm": 0.015181249938905239, "learning_rate": 0.0005892332789559544, "loss": 0.1301, "num_input_tokens_seen": 15621664, "step": 7225 }, { "epoch": 1.1794453507340945, "grad_norm": 0.06925445795059204, "learning_rate": 0.0005896411092985318, "loss": 0.1576, "num_input_tokens_seen": 15632768, "step": 7230 }, { "epoch": 1.1802610114192496, "grad_norm": 0.008259747177362442, "learning_rate": 0.0005900489396411093, "loss": 0.1392, "num_input_tokens_seen": 15643872, "step": 7235 }, { "epoch": 1.1810766721044046, "grad_norm": 0.12475767731666565, "learning_rate": 0.0005904567699836868, "loss": 0.0776, "num_input_tokens_seen": 15655200, "step": 7240 }, { "epoch": 1.1818923327895596, "grad_norm": 0.0850701779127121, "learning_rate": 0.0005908646003262644, "loss": 0.2109, "num_input_tokens_seen": 15664512, "step": 7245 }, { "epoch": 1.1827079934747144, "grad_norm": 0.7401888370513916, "learning_rate": 0.0005912724306688417, "loss": 0.2835, "num_input_tokens_seen": 15675072, "step": 7250 }, { "epoch": 1.1835236541598695, "grad_norm": 0.1192028746008873, "learning_rate": 0.0005916802610114192, "loss": 0.0668, "num_input_tokens_seen": 15685152, "step": 7255 }, { "epoch": 1.1843393148450245, "grad_norm": 0.1462324559688568, "learning_rate": 0.0005920880913539968, "loss": 0.1597, "num_input_tokens_seen": 15696064, "step": 7260 }, { "epoch": 1.1851549755301796, "grad_norm": 0.21007820963859558, "learning_rate": 0.0005924959216965743, "loss": 0.1158, "num_input_tokens_seen": 15707296, "step": 7265 }, { "epoch": 1.1859706362153344, "grad_norm": 0.13377432525157928, "learning_rate": 0.0005929037520391517, "loss": 0.095, "num_input_tokens_seen": 15718496, "step": 7270 }, { "epoch": 1.1867862969004894, "grad_norm": 0.23630847036838531, "learning_rate": 0.0005933115823817292, "loss": 0.1359, "num_input_tokens_seen": 15730080, "step": 7275 }, { "epoch": 1.1876019575856445, "grad_norm": 0.05261866748332977, "learning_rate": 0.0005937194127243067, "loss": 0.0328, "num_input_tokens_seen": 15741248, "step": 7280 }, { "epoch": 1.1884176182707993, "grad_norm": 0.09066125005483627, "learning_rate": 0.0005941272430668842, "loss": 0.0561, "num_input_tokens_seen": 15752736, "step": 7285 }, { "epoch": 1.1892332789559543, "grad_norm": 0.5383073687553406, "learning_rate": 0.0005945350734094617, "loss": 0.2609, "num_input_tokens_seen": 15763648, "step": 7290 }, { "epoch": 1.1900489396411094, "grad_norm": 0.2469019889831543, "learning_rate": 0.0005949429037520392, "loss": 0.2318, "num_input_tokens_seen": 15774016, "step": 7295 }, { "epoch": 1.1908646003262642, "grad_norm": 0.10927631705999374, "learning_rate": 0.0005953507340946166, "loss": 0.1849, "num_input_tokens_seen": 15783232, "step": 7300 }, { "epoch": 1.1916802610114192, "grad_norm": 0.14146047830581665, "learning_rate": 0.0005957585644371941, "loss": 0.226, "num_input_tokens_seen": 15794304, "step": 7305 }, { "epoch": 1.1924959216965743, "grad_norm": 0.1634165346622467, "learning_rate": 0.0005961663947797717, "loss": 0.0985, "num_input_tokens_seen": 15804960, "step": 7310 }, { "epoch": 1.1933115823817293, "grad_norm": 0.5072509050369263, "learning_rate": 0.0005965742251223491, "loss": 0.2341, "num_input_tokens_seen": 15815808, "step": 7315 }, { "epoch": 1.1941272430668841, "grad_norm": 0.2947240471839905, "learning_rate": 0.0005969820554649265, "loss": 0.1681, "num_input_tokens_seen": 15826528, "step": 7320 }, { "epoch": 1.1949429037520392, "grad_norm": 0.34804099798202515, "learning_rate": 0.0005973898858075041, "loss": 0.1394, "num_input_tokens_seen": 15837920, "step": 7325 }, { "epoch": 1.1957585644371942, "grad_norm": 0.09488537162542343, "learning_rate": 0.0005977977161500816, "loss": 0.1016, "num_input_tokens_seen": 15849152, "step": 7330 }, { "epoch": 1.196574225122349, "grad_norm": 0.012553676031529903, "learning_rate": 0.000598205546492659, "loss": 0.0936, "num_input_tokens_seen": 15861152, "step": 7335 }, { "epoch": 1.197389885807504, "grad_norm": 0.1785455197095871, "learning_rate": 0.0005986133768352365, "loss": 0.1073, "num_input_tokens_seen": 15872352, "step": 7340 }, { "epoch": 1.198205546492659, "grad_norm": 0.09887787699699402, "learning_rate": 0.000599021207177814, "loss": 0.0886, "num_input_tokens_seen": 15881984, "step": 7345 }, { "epoch": 1.1990212071778141, "grad_norm": 0.5077795386314392, "learning_rate": 0.0005994290375203916, "loss": 0.1468, "num_input_tokens_seen": 15892800, "step": 7350 }, { "epoch": 1.199836867862969, "grad_norm": 0.015568344853818417, "learning_rate": 0.000599836867862969, "loss": 0.2378, "num_input_tokens_seen": 15902912, "step": 7355 }, { "epoch": 1.200652528548124, "grad_norm": 0.02570744976401329, "learning_rate": 0.0006002446982055465, "loss": 0.0258, "num_input_tokens_seen": 15914656, "step": 7360 }, { "epoch": 1.201468189233279, "grad_norm": 0.01589066907763481, "learning_rate": 0.000600652528548124, "loss": 0.0614, "num_input_tokens_seen": 15924960, "step": 7365 }, { "epoch": 1.2022838499184338, "grad_norm": 0.5855007171630859, "learning_rate": 0.0006010603588907014, "loss": 0.22, "num_input_tokens_seen": 15935808, "step": 7370 }, { "epoch": 1.2030995106035889, "grad_norm": 0.19280865788459778, "learning_rate": 0.000601468189233279, "loss": 0.1349, "num_input_tokens_seen": 15945184, "step": 7375 }, { "epoch": 1.203915171288744, "grad_norm": 0.18036264181137085, "learning_rate": 0.0006018760195758564, "loss": 0.0681, "num_input_tokens_seen": 15956000, "step": 7380 }, { "epoch": 1.2047308319738987, "grad_norm": 0.27529698610305786, "learning_rate": 0.000602283849918434, "loss": 0.1275, "num_input_tokens_seen": 15967712, "step": 7385 }, { "epoch": 1.2055464926590538, "grad_norm": 0.6813198924064636, "learning_rate": 0.0006026916802610114, "loss": 0.2154, "num_input_tokens_seen": 15977312, "step": 7390 }, { "epoch": 1.2063621533442088, "grad_norm": 0.6599311232566833, "learning_rate": 0.0006030995106035889, "loss": 0.1632, "num_input_tokens_seen": 15986048, "step": 7395 }, { "epoch": 1.2071778140293639, "grad_norm": 0.2776208817958832, "learning_rate": 0.0006035073409461665, "loss": 0.2229, "num_input_tokens_seen": 15998080, "step": 7400 }, { "epoch": 1.2079934747145187, "grad_norm": 0.08681552112102509, "learning_rate": 0.0006039151712887438, "loss": 0.097, "num_input_tokens_seen": 16008704, "step": 7405 }, { "epoch": 1.2088091353996737, "grad_norm": 0.13826783001422882, "learning_rate": 0.0006043230016313214, "loss": 0.0846, "num_input_tokens_seen": 16020128, "step": 7410 }, { "epoch": 1.2096247960848288, "grad_norm": 0.2015797346830368, "learning_rate": 0.0006047308319738989, "loss": 0.1688, "num_input_tokens_seen": 16031680, "step": 7415 }, { "epoch": 1.2104404567699838, "grad_norm": 0.27414339780807495, "learning_rate": 0.0006051386623164764, "loss": 0.1624, "num_input_tokens_seen": 16042400, "step": 7420 }, { "epoch": 1.2112561174551386, "grad_norm": 0.4146776795387268, "learning_rate": 0.0006055464926590538, "loss": 0.0825, "num_input_tokens_seen": 16052832, "step": 7425 }, { "epoch": 1.2120717781402937, "grad_norm": 0.5340694785118103, "learning_rate": 0.0006059543230016313, "loss": 0.2615, "num_input_tokens_seen": 16062688, "step": 7430 }, { "epoch": 1.2128874388254487, "grad_norm": 0.15199506282806396, "learning_rate": 0.0006063621533442089, "loss": 0.1463, "num_input_tokens_seen": 16073440, "step": 7435 }, { "epoch": 1.2137030995106035, "grad_norm": 0.45297807455062866, "learning_rate": 0.0006067699836867863, "loss": 0.2037, "num_input_tokens_seen": 16085120, "step": 7440 }, { "epoch": 1.2145187601957586, "grad_norm": 0.23709626495838165, "learning_rate": 0.0006071778140293637, "loss": 0.1115, "num_input_tokens_seen": 16095296, "step": 7445 }, { "epoch": 1.2153344208809136, "grad_norm": 0.3890670835971832, "learning_rate": 0.0006075856443719413, "loss": 0.2847, "num_input_tokens_seen": 16106112, "step": 7450 }, { "epoch": 1.2161500815660684, "grad_norm": 0.1603914052248001, "learning_rate": 0.0006079934747145188, "loss": 0.1126, "num_input_tokens_seen": 16115136, "step": 7455 }, { "epoch": 1.2169657422512234, "grad_norm": 0.06594960391521454, "learning_rate": 0.0006084013050570962, "loss": 0.083, "num_input_tokens_seen": 16125344, "step": 7460 }, { "epoch": 1.2177814029363785, "grad_norm": 0.12942712008953094, "learning_rate": 0.0006088091353996738, "loss": 0.1093, "num_input_tokens_seen": 16136928, "step": 7465 }, { "epoch": 1.2185970636215335, "grad_norm": 0.2689228951931, "learning_rate": 0.0006092169657422512, "loss": 0.0951, "num_input_tokens_seen": 16147392, "step": 7470 }, { "epoch": 1.2194127243066883, "grad_norm": 0.142789825797081, "learning_rate": 0.0006096247960848287, "loss": 0.117, "num_input_tokens_seen": 16158080, "step": 7475 }, { "epoch": 1.2202283849918434, "grad_norm": 0.06195086985826492, "learning_rate": 0.0006100326264274062, "loss": 0.1366, "num_input_tokens_seen": 16169568, "step": 7480 }, { "epoch": 1.2210440456769984, "grad_norm": 0.1662866175174713, "learning_rate": 0.0006104404567699837, "loss": 0.0804, "num_input_tokens_seen": 16180768, "step": 7485 }, { "epoch": 1.2218597063621535, "grad_norm": 0.21019388735294342, "learning_rate": 0.0006108482871125613, "loss": 0.0708, "num_input_tokens_seen": 16191520, "step": 7490 }, { "epoch": 1.2226753670473083, "grad_norm": 0.20374208688735962, "learning_rate": 0.0006112561174551386, "loss": 0.1439, "num_input_tokens_seen": 16202624, "step": 7495 }, { "epoch": 1.2234910277324633, "grad_norm": 0.0657731369137764, "learning_rate": 0.0006116639477977162, "loss": 0.0694, "num_input_tokens_seen": 16212192, "step": 7500 }, { "epoch": 1.2243066884176184, "grad_norm": 0.5109583735466003, "learning_rate": 0.0006120717781402937, "loss": 0.0916, "num_input_tokens_seen": 16224416, "step": 7505 }, { "epoch": 1.2251223491027732, "grad_norm": 0.04669109731912613, "learning_rate": 0.000612479608482871, "loss": 0.1749, "num_input_tokens_seen": 16235232, "step": 7510 }, { "epoch": 1.2259380097879282, "grad_norm": 0.04343324527144432, "learning_rate": 0.0006128874388254486, "loss": 0.0244, "num_input_tokens_seen": 16245696, "step": 7515 }, { "epoch": 1.2267536704730833, "grad_norm": 0.124315544962883, "learning_rate": 0.0006132952691680261, "loss": 0.1668, "num_input_tokens_seen": 16257216, "step": 7520 }, { "epoch": 1.227569331158238, "grad_norm": 0.4877174496650696, "learning_rate": 0.0006137030995106036, "loss": 0.1787, "num_input_tokens_seen": 16268896, "step": 7525 }, { "epoch": 1.2283849918433931, "grad_norm": 0.0646728053689003, "learning_rate": 0.0006141109298531811, "loss": 0.074, "num_input_tokens_seen": 16280128, "step": 7530 }, { "epoch": 1.2292006525285482, "grad_norm": 0.07480008155107498, "learning_rate": 0.0006145187601957585, "loss": 0.0611, "num_input_tokens_seen": 16292128, "step": 7535 }, { "epoch": 1.2300163132137032, "grad_norm": 0.25711551308631897, "learning_rate": 0.0006149265905383361, "loss": 0.0798, "num_input_tokens_seen": 16302048, "step": 7540 }, { "epoch": 1.230831973898858, "grad_norm": 0.48960769176483154, "learning_rate": 0.0006153344208809135, "loss": 0.2378, "num_input_tokens_seen": 16312672, "step": 7545 }, { "epoch": 1.231647634584013, "grad_norm": 0.21101155877113342, "learning_rate": 0.0006157422512234911, "loss": 0.0516, "num_input_tokens_seen": 16322528, "step": 7550 }, { "epoch": 1.232463295269168, "grad_norm": 0.6068270206451416, "learning_rate": 0.0006161500815660686, "loss": 0.3582, "num_input_tokens_seen": 16334720, "step": 7555 }, { "epoch": 1.233278955954323, "grad_norm": 0.21375367045402527, "learning_rate": 0.0006165579119086459, "loss": 0.059, "num_input_tokens_seen": 16346272, "step": 7560 }, { "epoch": 1.234094616639478, "grad_norm": 0.13177448511123657, "learning_rate": 0.0006169657422512235, "loss": 0.094, "num_input_tokens_seen": 16357280, "step": 7565 }, { "epoch": 1.234910277324633, "grad_norm": 0.2921614646911621, "learning_rate": 0.000617373572593801, "loss": 0.1991, "num_input_tokens_seen": 16368704, "step": 7570 }, { "epoch": 1.235725938009788, "grad_norm": 0.3497111201286316, "learning_rate": 0.0006177814029363786, "loss": 0.1153, "num_input_tokens_seen": 16379904, "step": 7575 }, { "epoch": 1.2365415986949428, "grad_norm": 0.056018609553575516, "learning_rate": 0.0006181892332789559, "loss": 0.129, "num_input_tokens_seen": 16390144, "step": 7580 }, { "epoch": 1.2373572593800979, "grad_norm": 0.15077915787696838, "learning_rate": 0.0006185970636215334, "loss": 0.0764, "num_input_tokens_seen": 16401024, "step": 7585 }, { "epoch": 1.238172920065253, "grad_norm": 0.13833118975162506, "learning_rate": 0.000619004893964111, "loss": 0.1811, "num_input_tokens_seen": 16410368, "step": 7590 }, { "epoch": 1.2389885807504077, "grad_norm": 0.2312982827425003, "learning_rate": 0.0006194127243066884, "loss": 0.1477, "num_input_tokens_seen": 16420000, "step": 7595 }, { "epoch": 1.2398042414355628, "grad_norm": 0.11060801148414612, "learning_rate": 0.000619820554649266, "loss": 0.08, "num_input_tokens_seen": 16430240, "step": 7600 }, { "epoch": 1.2406199021207178, "grad_norm": 0.1415390968322754, "learning_rate": 0.0006202283849918434, "loss": 0.2042, "num_input_tokens_seen": 16440384, "step": 7605 }, { "epoch": 1.2414355628058726, "grad_norm": 0.36229464411735535, "learning_rate": 0.0006206362153344209, "loss": 0.1486, "num_input_tokens_seen": 16451296, "step": 7610 }, { "epoch": 1.2422512234910277, "grad_norm": 0.13426244258880615, "learning_rate": 0.0006210440456769984, "loss": 0.0735, "num_input_tokens_seen": 16463584, "step": 7615 }, { "epoch": 1.2430668841761827, "grad_norm": 0.24502316117286682, "learning_rate": 0.0006214518760195759, "loss": 0.1116, "num_input_tokens_seen": 16474656, "step": 7620 }, { "epoch": 1.2438825448613378, "grad_norm": 0.39227384328842163, "learning_rate": 0.0006218597063621533, "loss": 0.213, "num_input_tokens_seen": 16485632, "step": 7625 }, { "epoch": 1.2446982055464926, "grad_norm": 0.24334146082401276, "learning_rate": 0.0006222675367047308, "loss": 0.1499, "num_input_tokens_seen": 16496160, "step": 7630 }, { "epoch": 1.2455138662316476, "grad_norm": 0.4462047517299652, "learning_rate": 0.0006226753670473083, "loss": 0.1664, "num_input_tokens_seen": 16506016, "step": 7635 }, { "epoch": 1.2463295269168027, "grad_norm": 0.14899565279483795, "learning_rate": 0.0006230831973898859, "loss": 0.0756, "num_input_tokens_seen": 16516832, "step": 7640 }, { "epoch": 1.2471451876019577, "grad_norm": 0.043367356061935425, "learning_rate": 0.0006234910277324634, "loss": 0.0656, "num_input_tokens_seen": 16528128, "step": 7645 }, { "epoch": 1.2479608482871125, "grad_norm": 0.019222905859351158, "learning_rate": 0.0006238988580750407, "loss": 0.039, "num_input_tokens_seen": 16539168, "step": 7650 }, { "epoch": 1.2487765089722676, "grad_norm": 0.16319647431373596, "learning_rate": 0.0006243066884176183, "loss": 0.1972, "num_input_tokens_seen": 16549280, "step": 7655 }, { "epoch": 1.2495921696574226, "grad_norm": 0.17091651260852814, "learning_rate": 0.0006247145187601958, "loss": 0.0665, "num_input_tokens_seen": 16559680, "step": 7660 }, { "epoch": 1.2504078303425774, "grad_norm": 0.055152345448732376, "learning_rate": 0.0006251223491027733, "loss": 0.0209, "num_input_tokens_seen": 16570048, "step": 7665 }, { "epoch": 1.2512234910277324, "grad_norm": 0.35145047307014465, "learning_rate": 0.0006255301794453507, "loss": 0.123, "num_input_tokens_seen": 16581344, "step": 7670 }, { "epoch": 1.2520391517128875, "grad_norm": 0.08169589936733246, "learning_rate": 0.0006259380097879282, "loss": 0.0798, "num_input_tokens_seen": 16591872, "step": 7675 }, { "epoch": 1.2528548123980423, "grad_norm": 0.20706957578659058, "learning_rate": 0.0006263458401305058, "loss": 0.264, "num_input_tokens_seen": 16602400, "step": 7680 }, { "epoch": 1.2536704730831973, "grad_norm": 0.41825392842292786, "learning_rate": 0.0006267536704730832, "loss": 0.0811, "num_input_tokens_seen": 16614272, "step": 7685 }, { "epoch": 1.2544861337683524, "grad_norm": 0.017080556601285934, "learning_rate": 0.0006271615008156607, "loss": 0.0979, "num_input_tokens_seen": 16625728, "step": 7690 }, { "epoch": 1.2553017944535072, "grad_norm": 0.30033621191978455, "learning_rate": 0.0006275693311582382, "loss": 0.0881, "num_input_tokens_seen": 16637056, "step": 7695 }, { "epoch": 1.2561174551386622, "grad_norm": 0.02396804839372635, "learning_rate": 0.0006279771615008156, "loss": 0.0527, "num_input_tokens_seen": 16648448, "step": 7700 }, { "epoch": 1.2569331158238173, "grad_norm": 0.18351727724075317, "learning_rate": 0.0006283849918433932, "loss": 0.1394, "num_input_tokens_seen": 16659200, "step": 7705 }, { "epoch": 1.2577487765089723, "grad_norm": 0.008948463946580887, "learning_rate": 0.0006287928221859707, "loss": 0.1612, "num_input_tokens_seen": 16668992, "step": 7710 }, { "epoch": 1.2585644371941274, "grad_norm": 0.2766994535923004, "learning_rate": 0.0006292006525285482, "loss": 0.068, "num_input_tokens_seen": 16678720, "step": 7715 }, { "epoch": 1.2593800978792822, "grad_norm": 0.0534847155213356, "learning_rate": 0.0006296084828711256, "loss": 0.0561, "num_input_tokens_seen": 16690656, "step": 7720 }, { "epoch": 1.2601957585644372, "grad_norm": 0.026031237095594406, "learning_rate": 0.0006300163132137031, "loss": 0.0931, "num_input_tokens_seen": 16701536, "step": 7725 }, { "epoch": 1.2610114192495923, "grad_norm": 0.0823201909661293, "learning_rate": 0.0006304241435562807, "loss": 0.1381, "num_input_tokens_seen": 16712096, "step": 7730 }, { "epoch": 1.261827079934747, "grad_norm": 0.058362994343042374, "learning_rate": 0.000630831973898858, "loss": 0.0968, "num_input_tokens_seen": 16724544, "step": 7735 }, { "epoch": 1.2626427406199021, "grad_norm": 0.30868566036224365, "learning_rate": 0.0006312398042414356, "loss": 0.0493, "num_input_tokens_seen": 16736128, "step": 7740 }, { "epoch": 1.2634584013050572, "grad_norm": 0.03179110214114189, "learning_rate": 0.0006316476345840131, "loss": 0.1463, "num_input_tokens_seen": 16746464, "step": 7745 }, { "epoch": 1.264274061990212, "grad_norm": 0.10827480256557465, "learning_rate": 0.0006320554649265906, "loss": 0.0504, "num_input_tokens_seen": 16757632, "step": 7750 }, { "epoch": 1.265089722675367, "grad_norm": 0.4119908809661865, "learning_rate": 0.000632463295269168, "loss": 0.1013, "num_input_tokens_seen": 16767296, "step": 7755 }, { "epoch": 1.265905383360522, "grad_norm": 0.0076904455199837685, "learning_rate": 0.0006328711256117455, "loss": 0.2405, "num_input_tokens_seen": 16779616, "step": 7760 }, { "epoch": 1.2667210440456769, "grad_norm": 0.21104197204113007, "learning_rate": 0.000633278955954323, "loss": 0.0598, "num_input_tokens_seen": 16789216, "step": 7765 }, { "epoch": 1.267536704730832, "grad_norm": 0.47181203961372375, "learning_rate": 0.0006336867862969005, "loss": 0.0737, "num_input_tokens_seen": 16800192, "step": 7770 }, { "epoch": 1.268352365415987, "grad_norm": 0.2550259828567505, "learning_rate": 0.000634094616639478, "loss": 0.088, "num_input_tokens_seen": 16810624, "step": 7775 }, { "epoch": 1.269168026101142, "grad_norm": 0.09608176350593567, "learning_rate": 0.0006345024469820555, "loss": 0.213, "num_input_tokens_seen": 16821280, "step": 7780 }, { "epoch": 1.269983686786297, "grad_norm": 0.026034316048026085, "learning_rate": 0.0006349102773246329, "loss": 0.1053, "num_input_tokens_seen": 16832384, "step": 7785 }, { "epoch": 1.2707993474714518, "grad_norm": 0.12941612303256989, "learning_rate": 0.0006353181076672104, "loss": 0.06, "num_input_tokens_seen": 16843360, "step": 7790 }, { "epoch": 1.2716150081566069, "grad_norm": 0.10202895104885101, "learning_rate": 0.000635725938009788, "loss": 0.1224, "num_input_tokens_seen": 16853216, "step": 7795 }, { "epoch": 1.272430668841762, "grad_norm": 0.3297278583049774, "learning_rate": 0.0006361337683523654, "loss": 0.1465, "num_input_tokens_seen": 16864704, "step": 7800 }, { "epoch": 1.2732463295269167, "grad_norm": 0.5877017974853516, "learning_rate": 0.0006365415986949429, "loss": 0.1138, "num_input_tokens_seen": 16876928, "step": 7805 }, { "epoch": 1.2740619902120718, "grad_norm": 0.4183492362499237, "learning_rate": 0.0006369494290375204, "loss": 0.1534, "num_input_tokens_seen": 16888224, "step": 7810 }, { "epoch": 1.2748776508972268, "grad_norm": 0.31846538186073303, "learning_rate": 0.0006373572593800979, "loss": 0.0547, "num_input_tokens_seen": 16899648, "step": 7815 }, { "epoch": 1.2756933115823816, "grad_norm": 0.38586241006851196, "learning_rate": 0.0006377650897226754, "loss": 0.1134, "num_input_tokens_seen": 16909472, "step": 7820 }, { "epoch": 1.2765089722675367, "grad_norm": 0.15391522645950317, "learning_rate": 0.0006381729200652528, "loss": 0.0538, "num_input_tokens_seen": 16921344, "step": 7825 }, { "epoch": 1.2773246329526917, "grad_norm": 0.2430095225572586, "learning_rate": 0.0006385807504078304, "loss": 0.1289, "num_input_tokens_seen": 16931648, "step": 7830 }, { "epoch": 1.2781402936378465, "grad_norm": 0.10504290461540222, "learning_rate": 0.0006389885807504079, "loss": 0.1039, "num_input_tokens_seen": 16942304, "step": 7835 }, { "epoch": 1.2789559543230016, "grad_norm": 0.10132510960102081, "learning_rate": 0.0006393964110929853, "loss": 0.1656, "num_input_tokens_seen": 16953056, "step": 7840 }, { "epoch": 1.2797716150081566, "grad_norm": 0.14680597186088562, "learning_rate": 0.0006398042414355628, "loss": 0.0431, "num_input_tokens_seen": 16962944, "step": 7845 }, { "epoch": 1.2805872756933117, "grad_norm": 0.04616044834256172, "learning_rate": 0.0006402120717781403, "loss": 0.1193, "num_input_tokens_seen": 16973408, "step": 7850 }, { "epoch": 1.2814029363784667, "grad_norm": 0.03202705457806587, "learning_rate": 0.0006406199021207178, "loss": 0.0673, "num_input_tokens_seen": 16983904, "step": 7855 }, { "epoch": 1.2822185970636215, "grad_norm": 0.0769016444683075, "learning_rate": 0.0006410277324632953, "loss": 0.0692, "num_input_tokens_seen": 16995584, "step": 7860 }, { "epoch": 1.2830342577487766, "grad_norm": 0.2932111620903015, "learning_rate": 0.0006414355628058727, "loss": 0.2171, "num_input_tokens_seen": 17006912, "step": 7865 }, { "epoch": 1.2838499184339316, "grad_norm": 0.0873182862997055, "learning_rate": 0.0006418433931484503, "loss": 0.2605, "num_input_tokens_seen": 17018240, "step": 7870 }, { "epoch": 1.2846655791190864, "grad_norm": 0.1578231006860733, "learning_rate": 0.0006422512234910277, "loss": 0.1284, "num_input_tokens_seen": 17028832, "step": 7875 }, { "epoch": 1.2854812398042414, "grad_norm": 0.27968907356262207, "learning_rate": 0.0006426590538336053, "loss": 0.1061, "num_input_tokens_seen": 17040704, "step": 7880 }, { "epoch": 1.2862969004893965, "grad_norm": 0.04651603102684021, "learning_rate": 0.0006430668841761828, "loss": 0.1679, "num_input_tokens_seen": 17052224, "step": 7885 }, { "epoch": 1.2871125611745513, "grad_norm": 0.5027137398719788, "learning_rate": 0.0006434747145187601, "loss": 0.4001, "num_input_tokens_seen": 17062272, "step": 7890 }, { "epoch": 1.2879282218597063, "grad_norm": 0.08611617237329483, "learning_rate": 0.0006438825448613377, "loss": 0.0792, "num_input_tokens_seen": 17073728, "step": 7895 }, { "epoch": 1.2887438825448614, "grad_norm": 0.29694536328315735, "learning_rate": 0.0006442903752039152, "loss": 0.241, "num_input_tokens_seen": 17084032, "step": 7900 }, { "epoch": 1.2895595432300162, "grad_norm": 0.023007987067103386, "learning_rate": 0.0006446982055464927, "loss": 0.2715, "num_input_tokens_seen": 17095616, "step": 7905 }, { "epoch": 1.2903752039151712, "grad_norm": 0.23659998178482056, "learning_rate": 0.0006451060358890701, "loss": 0.1089, "num_input_tokens_seen": 17105952, "step": 7910 }, { "epoch": 1.2911908646003263, "grad_norm": 0.28056174516677856, "learning_rate": 0.0006455138662316476, "loss": 0.161, "num_input_tokens_seen": 17116064, "step": 7915 }, { "epoch": 1.2920065252854813, "grad_norm": 0.22875936329364777, "learning_rate": 0.0006459216965742252, "loss": 0.1918, "num_input_tokens_seen": 17127456, "step": 7920 }, { "epoch": 1.2928221859706361, "grad_norm": 0.5045974254608154, "learning_rate": 0.0006463295269168026, "loss": 0.1348, "num_input_tokens_seen": 17139168, "step": 7925 }, { "epoch": 1.2936378466557912, "grad_norm": 0.24506209790706635, "learning_rate": 0.00064673735725938, "loss": 0.078, "num_input_tokens_seen": 17150848, "step": 7930 }, { "epoch": 1.2944535073409462, "grad_norm": 0.04896121099591255, "learning_rate": 0.0006471451876019576, "loss": 0.0384, "num_input_tokens_seen": 17162080, "step": 7935 }, { "epoch": 1.2952691680261013, "grad_norm": 0.2526971995830536, "learning_rate": 0.0006475530179445351, "loss": 0.0936, "num_input_tokens_seen": 17172544, "step": 7940 }, { "epoch": 1.296084828711256, "grad_norm": 0.10988382250070572, "learning_rate": 0.0006479608482871126, "loss": 0.0965, "num_input_tokens_seen": 17182848, "step": 7945 }, { "epoch": 1.2969004893964111, "grad_norm": 0.27198734879493713, "learning_rate": 0.0006483686786296901, "loss": 0.0372, "num_input_tokens_seen": 17193408, "step": 7950 }, { "epoch": 1.2977161500815662, "grad_norm": 0.3824102580547333, "learning_rate": 0.0006487765089722675, "loss": 0.176, "num_input_tokens_seen": 17204384, "step": 7955 }, { "epoch": 1.298531810766721, "grad_norm": 0.1732729822397232, "learning_rate": 0.000649184339314845, "loss": 0.1326, "num_input_tokens_seen": 17214656, "step": 7960 }, { "epoch": 1.299347471451876, "grad_norm": 0.2566794753074646, "learning_rate": 0.0006495921696574225, "loss": 0.0654, "num_input_tokens_seen": 17224320, "step": 7965 }, { "epoch": 1.300163132137031, "grad_norm": 0.09076650440692902, "learning_rate": 0.0006500000000000001, "loss": 0.1901, "num_input_tokens_seen": 17236512, "step": 7970 }, { "epoch": 1.3009787928221859, "grad_norm": 0.18000783026218414, "learning_rate": 0.0006504078303425776, "loss": 0.0926, "num_input_tokens_seen": 17247520, "step": 7975 }, { "epoch": 1.301794453507341, "grad_norm": 0.23230217397212982, "learning_rate": 0.0006508156606851549, "loss": 0.1648, "num_input_tokens_seen": 17258752, "step": 7980 }, { "epoch": 1.302610114192496, "grad_norm": 0.11019614338874817, "learning_rate": 0.0006512234910277325, "loss": 0.0771, "num_input_tokens_seen": 17269632, "step": 7985 }, { "epoch": 1.3034257748776508, "grad_norm": 0.04341624677181244, "learning_rate": 0.00065163132137031, "loss": 0.2333, "num_input_tokens_seen": 17280192, "step": 7990 }, { "epoch": 1.3042414355628058, "grad_norm": 0.4747850298881531, "learning_rate": 0.0006520391517128875, "loss": 0.1317, "num_input_tokens_seen": 17291712, "step": 7995 }, { "epoch": 1.3050570962479608, "grad_norm": 0.7965296506881714, "learning_rate": 0.0006524469820554649, "loss": 0.22, "num_input_tokens_seen": 17302272, "step": 8000 }, { "epoch": 1.3058727569331159, "grad_norm": 0.29167118668556213, "learning_rate": 0.0006528548123980424, "loss": 0.1517, "num_input_tokens_seen": 17314304, "step": 8005 }, { "epoch": 1.306688417618271, "grad_norm": 0.3655271828174591, "learning_rate": 0.0006532626427406199, "loss": 0.1185, "num_input_tokens_seen": 17325248, "step": 8010 }, { "epoch": 1.3075040783034257, "grad_norm": 0.26490768790245056, "learning_rate": 0.0006536704730831974, "loss": 0.1989, "num_input_tokens_seen": 17335936, "step": 8015 }, { "epoch": 1.3083197389885808, "grad_norm": 0.3598152995109558, "learning_rate": 0.000654078303425775, "loss": 0.2269, "num_input_tokens_seen": 17346016, "step": 8020 }, { "epoch": 1.3091353996737358, "grad_norm": 0.10805067420005798, "learning_rate": 0.0006544861337683524, "loss": 0.1456, "num_input_tokens_seen": 17357728, "step": 8025 }, { "epoch": 1.3099510603588906, "grad_norm": 0.07780245691537857, "learning_rate": 0.0006548939641109298, "loss": 0.094, "num_input_tokens_seen": 17368032, "step": 8030 }, { "epoch": 1.3107667210440457, "grad_norm": 0.41313520073890686, "learning_rate": 0.0006553017944535074, "loss": 0.1353, "num_input_tokens_seen": 17378816, "step": 8035 }, { "epoch": 1.3115823817292007, "grad_norm": 0.10756899416446686, "learning_rate": 0.0006557096247960849, "loss": 0.1648, "num_input_tokens_seen": 17390048, "step": 8040 }, { "epoch": 1.3123980424143555, "grad_norm": 0.5034075379371643, "learning_rate": 0.0006561174551386622, "loss": 0.1469, "num_input_tokens_seen": 17399680, "step": 8045 }, { "epoch": 1.3132137030995106, "grad_norm": 0.08964333683252335, "learning_rate": 0.0006565252854812398, "loss": 0.0941, "num_input_tokens_seen": 17411424, "step": 8050 }, { "epoch": 1.3140293637846656, "grad_norm": 0.029598180204629898, "learning_rate": 0.0006569331158238173, "loss": 0.0703, "num_input_tokens_seen": 17422272, "step": 8055 }, { "epoch": 1.3148450244698204, "grad_norm": 0.2906266450881958, "learning_rate": 0.0006573409461663949, "loss": 0.1844, "num_input_tokens_seen": 17433216, "step": 8060 }, { "epoch": 1.3156606851549755, "grad_norm": 0.033766523003578186, "learning_rate": 0.0006577487765089722, "loss": 0.0721, "num_input_tokens_seen": 17443200, "step": 8065 }, { "epoch": 1.3164763458401305, "grad_norm": 0.08947774022817612, "learning_rate": 0.0006581566068515497, "loss": 0.1417, "num_input_tokens_seen": 17453792, "step": 8070 }, { "epoch": 1.3172920065252856, "grad_norm": 0.14372272789478302, "learning_rate": 0.0006585644371941273, "loss": 0.1788, "num_input_tokens_seen": 17464192, "step": 8075 }, { "epoch": 1.3181076672104406, "grad_norm": 0.029002483934164047, "learning_rate": 0.0006589722675367047, "loss": 0.1642, "num_input_tokens_seen": 17475200, "step": 8080 }, { "epoch": 1.3189233278955954, "grad_norm": 0.2958846092224121, "learning_rate": 0.0006593800978792823, "loss": 0.1529, "num_input_tokens_seen": 17486176, "step": 8085 }, { "epoch": 1.3197389885807504, "grad_norm": 0.14247475564479828, "learning_rate": 0.0006597879282218597, "loss": 0.2228, "num_input_tokens_seen": 17496416, "step": 8090 }, { "epoch": 1.3205546492659055, "grad_norm": 0.2910315990447998, "learning_rate": 0.0006601957585644372, "loss": 0.1838, "num_input_tokens_seen": 17507200, "step": 8095 }, { "epoch": 1.3213703099510603, "grad_norm": 0.08594219386577606, "learning_rate": 0.0006606035889070147, "loss": 0.1321, "num_input_tokens_seen": 17517792, "step": 8100 }, { "epoch": 1.3221859706362153, "grad_norm": 0.026004578918218613, "learning_rate": 0.0006610114192495922, "loss": 0.12, "num_input_tokens_seen": 17527808, "step": 8105 }, { "epoch": 1.3230016313213704, "grad_norm": 0.030598606914281845, "learning_rate": 0.0006614192495921697, "loss": 0.124, "num_input_tokens_seen": 17538560, "step": 8110 }, { "epoch": 1.3238172920065252, "grad_norm": 0.12475190311670303, "learning_rate": 0.0006618270799347471, "loss": 0.1772, "num_input_tokens_seen": 17550656, "step": 8115 }, { "epoch": 1.3246329526916802, "grad_norm": 0.05746564269065857, "learning_rate": 0.0006622349102773246, "loss": 0.1185, "num_input_tokens_seen": 17561824, "step": 8120 }, { "epoch": 1.3254486133768353, "grad_norm": 0.2370694875717163, "learning_rate": 0.0006626427406199022, "loss": 0.1014, "num_input_tokens_seen": 17573120, "step": 8125 }, { "epoch": 1.32626427406199, "grad_norm": 0.3231610655784607, "learning_rate": 0.0006630505709624797, "loss": 0.1533, "num_input_tokens_seen": 17584224, "step": 8130 }, { "epoch": 1.3270799347471451, "grad_norm": 0.025710172951221466, "learning_rate": 0.0006634584013050571, "loss": 0.0276, "num_input_tokens_seen": 17595232, "step": 8135 }, { "epoch": 1.3278955954323002, "grad_norm": 0.22752685844898224, "learning_rate": 0.0006638662316476346, "loss": 0.1328, "num_input_tokens_seen": 17606624, "step": 8140 }, { "epoch": 1.3287112561174552, "grad_norm": 0.07197009772062302, "learning_rate": 0.0006642740619902121, "loss": 0.0577, "num_input_tokens_seen": 17616864, "step": 8145 }, { "epoch": 1.32952691680261, "grad_norm": 0.11145736277103424, "learning_rate": 0.0006646818923327896, "loss": 0.2121, "num_input_tokens_seen": 17628416, "step": 8150 }, { "epoch": 1.330342577487765, "grad_norm": 0.06519704312086105, "learning_rate": 0.000665089722675367, "loss": 0.1524, "num_input_tokens_seen": 17637472, "step": 8155 }, { "epoch": 1.3311582381729201, "grad_norm": 0.0630141869187355, "learning_rate": 0.0006654975530179446, "loss": 0.1842, "num_input_tokens_seen": 17647744, "step": 8160 }, { "epoch": 1.3319738988580752, "grad_norm": 0.10813954472541809, "learning_rate": 0.0006659053833605221, "loss": 0.0914, "num_input_tokens_seen": 17657824, "step": 8165 }, { "epoch": 1.33278955954323, "grad_norm": 0.0244632288813591, "learning_rate": 0.0006663132137030995, "loss": 0.0866, "num_input_tokens_seen": 17669344, "step": 8170 }, { "epoch": 1.333605220228385, "grad_norm": 0.019405458122491837, "learning_rate": 0.000666721044045677, "loss": 0.0706, "num_input_tokens_seen": 17680000, "step": 8175 }, { "epoch": 1.33442088091354, "grad_norm": 0.013781199231743813, "learning_rate": 0.0006671288743882545, "loss": 0.1345, "num_input_tokens_seen": 17690368, "step": 8180 }, { "epoch": 1.3352365415986949, "grad_norm": 0.012108061462640762, "learning_rate": 0.0006675367047308319, "loss": 0.0377, "num_input_tokens_seen": 17701216, "step": 8185 }, { "epoch": 1.33605220228385, "grad_norm": 0.10047098994255066, "learning_rate": 0.0006679445350734095, "loss": 0.2312, "num_input_tokens_seen": 17712800, "step": 8190 }, { "epoch": 1.336867862969005, "grad_norm": 0.10426725447177887, "learning_rate": 0.000668352365415987, "loss": 0.218, "num_input_tokens_seen": 17723136, "step": 8195 }, { "epoch": 1.3376835236541598, "grad_norm": 0.21119491755962372, "learning_rate": 0.0006687601957585645, "loss": 0.1494, "num_input_tokens_seen": 17733792, "step": 8200 }, { "epoch": 1.3384991843393148, "grad_norm": 0.060859113931655884, "learning_rate": 0.0006691680261011419, "loss": 0.1225, "num_input_tokens_seen": 17744032, "step": 8205 }, { "epoch": 1.3393148450244698, "grad_norm": 0.04675585404038429, "learning_rate": 0.0006695758564437194, "loss": 0.1787, "num_input_tokens_seen": 17753856, "step": 8210 }, { "epoch": 1.3401305057096247, "grad_norm": 0.09033242613077164, "learning_rate": 0.000669983686786297, "loss": 0.2053, "num_input_tokens_seen": 17765184, "step": 8215 }, { "epoch": 1.3409461663947797, "grad_norm": 0.2503712773323059, "learning_rate": 0.0006703915171288743, "loss": 0.1107, "num_input_tokens_seen": 17776032, "step": 8220 }, { "epoch": 1.3417618270799347, "grad_norm": 0.03463561087846756, "learning_rate": 0.0006707993474714519, "loss": 0.0904, "num_input_tokens_seen": 17785472, "step": 8225 }, { "epoch": 1.3425774877650898, "grad_norm": 0.6294701099395752, "learning_rate": 0.0006712071778140294, "loss": 0.1538, "num_input_tokens_seen": 17796832, "step": 8230 }, { "epoch": 1.3433931484502448, "grad_norm": 0.3345804214477539, "learning_rate": 0.0006716150081566068, "loss": 0.1497, "num_input_tokens_seen": 17809344, "step": 8235 }, { "epoch": 1.3442088091353996, "grad_norm": 0.47858479619026184, "learning_rate": 0.0006720228384991843, "loss": 0.3782, "num_input_tokens_seen": 17820000, "step": 8240 }, { "epoch": 1.3450244698205547, "grad_norm": 0.38808053731918335, "learning_rate": 0.0006724306688417618, "loss": 0.1865, "num_input_tokens_seen": 17830912, "step": 8245 }, { "epoch": 1.3458401305057097, "grad_norm": 0.24597151577472687, "learning_rate": 0.0006728384991843394, "loss": 0.1407, "num_input_tokens_seen": 17841856, "step": 8250 }, { "epoch": 1.3466557911908645, "grad_norm": 0.29200631380081177, "learning_rate": 0.0006732463295269168, "loss": 0.1813, "num_input_tokens_seen": 17852320, "step": 8255 }, { "epoch": 1.3474714518760196, "grad_norm": 0.02408430352807045, "learning_rate": 0.0006736541598694943, "loss": 0.1287, "num_input_tokens_seen": 17862624, "step": 8260 }, { "epoch": 1.3482871125611746, "grad_norm": 0.10565165430307388, "learning_rate": 0.0006740619902120718, "loss": 0.2301, "num_input_tokens_seen": 17873280, "step": 8265 }, { "epoch": 1.3491027732463294, "grad_norm": 0.07163897156715393, "learning_rate": 0.0006744698205546492, "loss": 0.1216, "num_input_tokens_seen": 17884608, "step": 8270 }, { "epoch": 1.3499184339314845, "grad_norm": 0.17496277391910553, "learning_rate": 0.0006748776508972268, "loss": 0.0676, "num_input_tokens_seen": 17895072, "step": 8275 }, { "epoch": 1.3507340946166395, "grad_norm": 0.14802797138690948, "learning_rate": 0.0006752854812398043, "loss": 0.0963, "num_input_tokens_seen": 17905248, "step": 8280 }, { "epoch": 1.3515497553017943, "grad_norm": 0.01929861307144165, "learning_rate": 0.0006756933115823817, "loss": 0.0899, "num_input_tokens_seen": 17914336, "step": 8285 }, { "epoch": 1.3523654159869494, "grad_norm": 0.129594624042511, "learning_rate": 0.0006761011419249592, "loss": 0.1605, "num_input_tokens_seen": 17926112, "step": 8290 }, { "epoch": 1.3531810766721044, "grad_norm": 0.23017330467700958, "learning_rate": 0.0006765089722675367, "loss": 0.0561, "num_input_tokens_seen": 17937472, "step": 8295 }, { "epoch": 1.3539967373572595, "grad_norm": 0.044130630791187286, "learning_rate": 0.0006769168026101143, "loss": 0.0399, "num_input_tokens_seen": 17948960, "step": 8300 }, { "epoch": 1.3548123980424145, "grad_norm": 0.32337111234664917, "learning_rate": 0.0006773246329526917, "loss": 0.0665, "num_input_tokens_seen": 17959808, "step": 8305 }, { "epoch": 1.3556280587275693, "grad_norm": 0.5042018890380859, "learning_rate": 0.0006777324632952691, "loss": 0.069, "num_input_tokens_seen": 17972224, "step": 8310 }, { "epoch": 1.3564437194127243, "grad_norm": 0.057626839727163315, "learning_rate": 0.0006781402936378467, "loss": 0.0513, "num_input_tokens_seen": 17982528, "step": 8315 }, { "epoch": 1.3572593800978794, "grad_norm": 0.011388307437300682, "learning_rate": 0.0006785481239804242, "loss": 0.192, "num_input_tokens_seen": 17993408, "step": 8320 }, { "epoch": 1.3580750407830342, "grad_norm": 0.13212941586971283, "learning_rate": 0.0006789559543230017, "loss": 0.054, "num_input_tokens_seen": 18004288, "step": 8325 }, { "epoch": 1.3588907014681892, "grad_norm": 0.025670086964964867, "learning_rate": 0.0006793637846655791, "loss": 0.2387, "num_input_tokens_seen": 18015552, "step": 8330 }, { "epoch": 1.3597063621533443, "grad_norm": 0.37728649377822876, "learning_rate": 0.0006797716150081566, "loss": 0.1111, "num_input_tokens_seen": 18024800, "step": 8335 }, { "epoch": 1.360522022838499, "grad_norm": 0.018351459875702858, "learning_rate": 0.0006801794453507341, "loss": 0.0376, "num_input_tokens_seen": 18035072, "step": 8340 }, { "epoch": 1.3613376835236541, "grad_norm": 0.08784783631563187, "learning_rate": 0.0006805872756933116, "loss": 0.057, "num_input_tokens_seen": 18045728, "step": 8345 }, { "epoch": 1.3621533442088092, "grad_norm": 0.06758838891983032, "learning_rate": 0.000680995106035889, "loss": 0.1178, "num_input_tokens_seen": 18055936, "step": 8350 }, { "epoch": 1.362969004893964, "grad_norm": 0.5933988690376282, "learning_rate": 0.0006814029363784666, "loss": 0.2268, "num_input_tokens_seen": 18067680, "step": 8355 }, { "epoch": 1.363784665579119, "grad_norm": 0.05332661420106888, "learning_rate": 0.000681810766721044, "loss": 0.1218, "num_input_tokens_seen": 18078400, "step": 8360 }, { "epoch": 1.364600326264274, "grad_norm": 0.16641493141651154, "learning_rate": 0.0006822185970636216, "loss": 0.199, "num_input_tokens_seen": 18089568, "step": 8365 }, { "epoch": 1.3654159869494291, "grad_norm": 0.14251184463500977, "learning_rate": 0.0006826264274061991, "loss": 0.1338, "num_input_tokens_seen": 18100928, "step": 8370 }, { "epoch": 1.366231647634584, "grad_norm": 0.21270228922367096, "learning_rate": 0.0006830342577487764, "loss": 0.0976, "num_input_tokens_seen": 18111264, "step": 8375 }, { "epoch": 1.367047308319739, "grad_norm": 0.0460171140730381, "learning_rate": 0.000683442088091354, "loss": 0.0314, "num_input_tokens_seen": 18122080, "step": 8380 }, { "epoch": 1.367862969004894, "grad_norm": 0.4137776792049408, "learning_rate": 0.0006838499184339315, "loss": 0.2193, "num_input_tokens_seen": 18132416, "step": 8385 }, { "epoch": 1.368678629690049, "grad_norm": 0.10444167256355286, "learning_rate": 0.0006842577487765091, "loss": 0.1293, "num_input_tokens_seen": 18143584, "step": 8390 }, { "epoch": 1.3694942903752039, "grad_norm": 0.06799294054508209, "learning_rate": 0.0006846655791190864, "loss": 0.2084, "num_input_tokens_seen": 18153760, "step": 8395 }, { "epoch": 1.370309951060359, "grad_norm": 0.0833783745765686, "learning_rate": 0.0006850734094616639, "loss": 0.1257, "num_input_tokens_seen": 18165216, "step": 8400 }, { "epoch": 1.371125611745514, "grad_norm": 0.031502969563007355, "learning_rate": 0.0006854812398042415, "loss": 0.1021, "num_input_tokens_seen": 18176480, "step": 8405 }, { "epoch": 1.3719412724306688, "grad_norm": 0.3506411910057068, "learning_rate": 0.0006858890701468189, "loss": 0.0863, "num_input_tokens_seen": 18187648, "step": 8410 }, { "epoch": 1.3727569331158238, "grad_norm": 0.45229458808898926, "learning_rate": 0.0006862969004893965, "loss": 0.068, "num_input_tokens_seen": 18198752, "step": 8415 }, { "epoch": 1.3735725938009788, "grad_norm": 0.2910847067832947, "learning_rate": 0.0006867047308319739, "loss": 0.1658, "num_input_tokens_seen": 18210880, "step": 8420 }, { "epoch": 1.3743882544861337, "grad_norm": 0.31154388189315796, "learning_rate": 0.0006871125611745514, "loss": 0.0971, "num_input_tokens_seen": 18222464, "step": 8425 }, { "epoch": 1.3752039151712887, "grad_norm": 0.03552580624818802, "learning_rate": 0.0006875203915171289, "loss": 0.122, "num_input_tokens_seen": 18233216, "step": 8430 }, { "epoch": 1.3760195758564437, "grad_norm": 0.1600065529346466, "learning_rate": 0.0006879282218597064, "loss": 0.0669, "num_input_tokens_seen": 18244832, "step": 8435 }, { "epoch": 1.3768352365415986, "grad_norm": 0.07445048540830612, "learning_rate": 0.000688336052202284, "loss": 0.1999, "num_input_tokens_seen": 18254176, "step": 8440 }, { "epoch": 1.3776508972267536, "grad_norm": 0.47163334488868713, "learning_rate": 0.0006887438825448613, "loss": 0.22, "num_input_tokens_seen": 18265184, "step": 8445 }, { "epoch": 1.3784665579119086, "grad_norm": 0.13729801774024963, "learning_rate": 0.0006891517128874388, "loss": 0.1316, "num_input_tokens_seen": 18275424, "step": 8450 }, { "epoch": 1.3792822185970637, "grad_norm": 0.39802855253219604, "learning_rate": 0.0006895595432300164, "loss": 0.1697, "num_input_tokens_seen": 18285632, "step": 8455 }, { "epoch": 1.3800978792822187, "grad_norm": 0.02712981216609478, "learning_rate": 0.0006899673735725939, "loss": 0.0611, "num_input_tokens_seen": 18297536, "step": 8460 }, { "epoch": 1.3809135399673735, "grad_norm": 0.1002969890832901, "learning_rate": 0.0006903752039151713, "loss": 0.0591, "num_input_tokens_seen": 18307360, "step": 8465 }, { "epoch": 1.3817292006525286, "grad_norm": 0.4240610599517822, "learning_rate": 0.0006907830342577488, "loss": 0.2264, "num_input_tokens_seen": 18317792, "step": 8470 }, { "epoch": 1.3825448613376836, "grad_norm": 0.41298383474349976, "learning_rate": 0.0006911908646003263, "loss": 0.2458, "num_input_tokens_seen": 18329184, "step": 8475 }, { "epoch": 1.3833605220228384, "grad_norm": 0.08825691044330597, "learning_rate": 0.0006915986949429038, "loss": 0.0931, "num_input_tokens_seen": 18340224, "step": 8480 }, { "epoch": 1.3841761827079935, "grad_norm": 0.31725960969924927, "learning_rate": 0.0006920065252854812, "loss": 0.1657, "num_input_tokens_seen": 18352032, "step": 8485 }, { "epoch": 1.3849918433931485, "grad_norm": 0.2807118892669678, "learning_rate": 0.0006924143556280587, "loss": 0.1332, "num_input_tokens_seen": 18361728, "step": 8490 }, { "epoch": 1.3858075040783033, "grad_norm": 0.20829269289970398, "learning_rate": 0.0006928221859706362, "loss": 0.0705, "num_input_tokens_seen": 18371744, "step": 8495 }, { "epoch": 1.3866231647634584, "grad_norm": 0.11227507889270782, "learning_rate": 0.0006932300163132137, "loss": 0.147, "num_input_tokens_seen": 18382752, "step": 8500 }, { "epoch": 1.3874388254486134, "grad_norm": 0.0697614997625351, "learning_rate": 0.0006936378466557913, "loss": 0.1329, "num_input_tokens_seen": 18393152, "step": 8505 }, { "epoch": 1.3882544861337682, "grad_norm": 0.23251527547836304, "learning_rate": 0.0006940456769983687, "loss": 0.0789, "num_input_tokens_seen": 18403168, "step": 8510 }, { "epoch": 1.3890701468189233, "grad_norm": 0.37753644585609436, "learning_rate": 0.0006944535073409461, "loss": 0.3207, "num_input_tokens_seen": 18413664, "step": 8515 }, { "epoch": 1.3898858075040783, "grad_norm": 0.17351676523685455, "learning_rate": 0.0006948613376835237, "loss": 0.112, "num_input_tokens_seen": 18424288, "step": 8520 }, { "epoch": 1.3907014681892333, "grad_norm": 0.21399036049842834, "learning_rate": 0.0006952691680261012, "loss": 0.0894, "num_input_tokens_seen": 18434976, "step": 8525 }, { "epoch": 1.3915171288743884, "grad_norm": 0.20291267335414886, "learning_rate": 0.0006956769983686786, "loss": 0.0709, "num_input_tokens_seen": 18445440, "step": 8530 }, { "epoch": 1.3923327895595432, "grad_norm": 0.04411192238330841, "learning_rate": 0.0006960848287112561, "loss": 0.0533, "num_input_tokens_seen": 18455904, "step": 8535 }, { "epoch": 1.3931484502446982, "grad_norm": 0.0695725828409195, "learning_rate": 0.0006964926590538336, "loss": 0.2081, "num_input_tokens_seen": 18467200, "step": 8540 }, { "epoch": 1.3939641109298533, "grad_norm": 0.0445864163339138, "learning_rate": 0.0006969004893964112, "loss": 0.1561, "num_input_tokens_seen": 18478304, "step": 8545 }, { "epoch": 1.394779771615008, "grad_norm": 0.4487917721271515, "learning_rate": 0.0006973083197389885, "loss": 0.1422, "num_input_tokens_seen": 18489664, "step": 8550 }, { "epoch": 1.3955954323001631, "grad_norm": 0.19967851042747498, "learning_rate": 0.0006977161500815661, "loss": 0.0737, "num_input_tokens_seen": 18501568, "step": 8555 }, { "epoch": 1.3964110929853182, "grad_norm": 0.35850760340690613, "learning_rate": 0.0006981239804241436, "loss": 0.1543, "num_input_tokens_seen": 18513504, "step": 8560 }, { "epoch": 1.397226753670473, "grad_norm": 0.15785285830497742, "learning_rate": 0.000698531810766721, "loss": 0.0517, "num_input_tokens_seen": 18523616, "step": 8565 }, { "epoch": 1.398042414355628, "grad_norm": 0.5196748971939087, "learning_rate": 0.0006989396411092986, "loss": 0.1858, "num_input_tokens_seen": 18534464, "step": 8570 }, { "epoch": 1.398858075040783, "grad_norm": 0.40732455253601074, "learning_rate": 0.000699347471451876, "loss": 0.1775, "num_input_tokens_seen": 18545536, "step": 8575 }, { "epoch": 1.399673735725938, "grad_norm": 0.3885939121246338, "learning_rate": 0.0006997553017944536, "loss": 0.0849, "num_input_tokens_seen": 18556128, "step": 8580 }, { "epoch": 1.400489396411093, "grad_norm": 0.27319011092185974, "learning_rate": 0.000700163132137031, "loss": 0.0458, "num_input_tokens_seen": 18567232, "step": 8585 }, { "epoch": 1.401305057096248, "grad_norm": 0.13056805729866028, "learning_rate": 0.0007005709624796085, "loss": 0.2172, "num_input_tokens_seen": 18577824, "step": 8590 }, { "epoch": 1.402120717781403, "grad_norm": 0.07697630673646927, "learning_rate": 0.000700978792822186, "loss": 0.1189, "num_input_tokens_seen": 18588096, "step": 8595 }, { "epoch": 1.4029363784665578, "grad_norm": 0.18461918830871582, "learning_rate": 0.0007013866231647634, "loss": 0.0482, "num_input_tokens_seen": 18599744, "step": 8600 }, { "epoch": 1.4037520391517129, "grad_norm": 0.3856184482574463, "learning_rate": 0.000701794453507341, "loss": 0.206, "num_input_tokens_seen": 18610592, "step": 8605 }, { "epoch": 1.404567699836868, "grad_norm": 0.06395883858203888, "learning_rate": 0.0007022022838499185, "loss": 0.1081, "num_input_tokens_seen": 18620896, "step": 8610 }, { "epoch": 1.405383360522023, "grad_norm": 0.011114265769720078, "learning_rate": 0.000702610114192496, "loss": 0.0479, "num_input_tokens_seen": 18630848, "step": 8615 }, { "epoch": 1.4061990212071778, "grad_norm": 0.024176809936761856, "learning_rate": 0.0007030179445350734, "loss": 0.0549, "num_input_tokens_seen": 18641600, "step": 8620 }, { "epoch": 1.4070146818923328, "grad_norm": 0.6192775964736938, "learning_rate": 0.0007034257748776509, "loss": 0.1453, "num_input_tokens_seen": 18652448, "step": 8625 }, { "epoch": 1.4078303425774878, "grad_norm": 0.04966207593679428, "learning_rate": 0.0007038336052202285, "loss": 0.1306, "num_input_tokens_seen": 18663392, "step": 8630 }, { "epoch": 1.4086460032626427, "grad_norm": 0.016872374340891838, "learning_rate": 0.0007042414355628059, "loss": 0.063, "num_input_tokens_seen": 18674528, "step": 8635 }, { "epoch": 1.4094616639477977, "grad_norm": 0.02471073530614376, "learning_rate": 0.0007046492659053833, "loss": 0.0279, "num_input_tokens_seen": 18685632, "step": 8640 }, { "epoch": 1.4102773246329527, "grad_norm": 0.012303034774959087, "learning_rate": 0.0007050570962479609, "loss": 0.071, "num_input_tokens_seen": 18695776, "step": 8645 }, { "epoch": 1.4110929853181076, "grad_norm": 0.098455511033535, "learning_rate": 0.0007054649265905384, "loss": 0.249, "num_input_tokens_seen": 18705056, "step": 8650 }, { "epoch": 1.4119086460032626, "grad_norm": 0.3847644627094269, "learning_rate": 0.0007058727569331158, "loss": 0.1436, "num_input_tokens_seen": 18714688, "step": 8655 }, { "epoch": 1.4127243066884176, "grad_norm": 0.024373041465878487, "learning_rate": 0.0007062805872756933, "loss": 0.102, "num_input_tokens_seen": 18725984, "step": 8660 }, { "epoch": 1.4135399673735725, "grad_norm": 0.15685175359249115, "learning_rate": 0.0007066884176182708, "loss": 0.1985, "num_input_tokens_seen": 18736224, "step": 8665 }, { "epoch": 1.4143556280587275, "grad_norm": 0.1535254567861557, "learning_rate": 0.0007070962479608483, "loss": 0.0492, "num_input_tokens_seen": 18748832, "step": 8670 }, { "epoch": 1.4151712887438825, "grad_norm": 0.3047678470611572, "learning_rate": 0.0007075040783034258, "loss": 0.1792, "num_input_tokens_seen": 18757792, "step": 8675 }, { "epoch": 1.4159869494290376, "grad_norm": 0.28612783551216125, "learning_rate": 0.0007079119086460033, "loss": 0.249, "num_input_tokens_seen": 18768160, "step": 8680 }, { "epoch": 1.4168026101141926, "grad_norm": 0.059516116976737976, "learning_rate": 0.0007083197389885808, "loss": 0.0793, "num_input_tokens_seen": 18778336, "step": 8685 }, { "epoch": 1.4176182707993474, "grad_norm": 0.2646983861923218, "learning_rate": 0.0007087275693311582, "loss": 0.1472, "num_input_tokens_seen": 18789632, "step": 8690 }, { "epoch": 1.4184339314845025, "grad_norm": 0.15517950057983398, "learning_rate": 0.0007091353996737358, "loss": 0.2406, "num_input_tokens_seen": 18800224, "step": 8695 }, { "epoch": 1.4192495921696575, "grad_norm": 0.04616353288292885, "learning_rate": 0.0007095432300163133, "loss": 0.108, "num_input_tokens_seen": 18810208, "step": 8700 }, { "epoch": 1.4200652528548123, "grad_norm": 0.04655442386865616, "learning_rate": 0.0007099510603588906, "loss": 0.065, "num_input_tokens_seen": 18821760, "step": 8705 }, { "epoch": 1.4208809135399674, "grad_norm": 0.2469894289970398, "learning_rate": 0.0007103588907014682, "loss": 0.2837, "num_input_tokens_seen": 18833536, "step": 8710 }, { "epoch": 1.4216965742251224, "grad_norm": 0.11056669801473618, "learning_rate": 0.0007107667210440457, "loss": 0.1823, "num_input_tokens_seen": 18844576, "step": 8715 }, { "epoch": 1.4225122349102772, "grad_norm": 0.01934995874762535, "learning_rate": 0.0007111745513866232, "loss": 0.0429, "num_input_tokens_seen": 18856096, "step": 8720 }, { "epoch": 1.4233278955954323, "grad_norm": 0.12118564546108246, "learning_rate": 0.0007115823817292006, "loss": 0.134, "num_input_tokens_seen": 18866944, "step": 8725 }, { "epoch": 1.4241435562805873, "grad_norm": 0.09233374893665314, "learning_rate": 0.0007119902120717781, "loss": 0.189, "num_input_tokens_seen": 18878464, "step": 8730 }, { "epoch": 1.4249592169657421, "grad_norm": 0.10285761952400208, "learning_rate": 0.0007123980424143557, "loss": 0.0487, "num_input_tokens_seen": 18888992, "step": 8735 }, { "epoch": 1.4257748776508972, "grad_norm": 0.03624412789940834, "learning_rate": 0.0007128058727569331, "loss": 0.1002, "num_input_tokens_seen": 18899776, "step": 8740 }, { "epoch": 1.4265905383360522, "grad_norm": 0.6590876579284668, "learning_rate": 0.0007132137030995107, "loss": 0.0809, "num_input_tokens_seen": 18910528, "step": 8745 }, { "epoch": 1.4274061990212072, "grad_norm": 0.06478895992040634, "learning_rate": 0.0007136215334420881, "loss": 0.0835, "num_input_tokens_seen": 18922048, "step": 8750 }, { "epoch": 1.4282218597063623, "grad_norm": 0.020453322678804398, "learning_rate": 0.0007140293637846655, "loss": 0.0671, "num_input_tokens_seen": 18933472, "step": 8755 }, { "epoch": 1.429037520391517, "grad_norm": 0.46330201625823975, "learning_rate": 0.0007144371941272431, "loss": 0.1484, "num_input_tokens_seen": 18944160, "step": 8760 }, { "epoch": 1.4298531810766721, "grad_norm": 0.5292882323265076, "learning_rate": 0.0007148450244698206, "loss": 0.1076, "num_input_tokens_seen": 18954688, "step": 8765 }, { "epoch": 1.4306688417618272, "grad_norm": 0.12839145958423615, "learning_rate": 0.0007152528548123982, "loss": 0.0256, "num_input_tokens_seen": 18965728, "step": 8770 }, { "epoch": 1.431484502446982, "grad_norm": 0.026854708790779114, "learning_rate": 0.0007156606851549755, "loss": 0.1457, "num_input_tokens_seen": 18977088, "step": 8775 }, { "epoch": 1.432300163132137, "grad_norm": 0.0423266664147377, "learning_rate": 0.000716068515497553, "loss": 0.14, "num_input_tokens_seen": 18989056, "step": 8780 }, { "epoch": 1.433115823817292, "grad_norm": 0.022482803091406822, "learning_rate": 0.0007164763458401306, "loss": 0.0264, "num_input_tokens_seen": 18999936, "step": 8785 }, { "epoch": 1.433931484502447, "grad_norm": 0.009021877311170101, "learning_rate": 0.000716884176182708, "loss": 0.0196, "num_input_tokens_seen": 19011328, "step": 8790 }, { "epoch": 1.434747145187602, "grad_norm": 0.34267958998680115, "learning_rate": 0.0007172920065252854, "loss": 0.0449, "num_input_tokens_seen": 19022176, "step": 8795 }, { "epoch": 1.435562805872757, "grad_norm": 0.0407864935696125, "learning_rate": 0.000717699836867863, "loss": 0.2754, "num_input_tokens_seen": 19032832, "step": 8800 }, { "epoch": 1.4363784665579118, "grad_norm": 0.06768330931663513, "learning_rate": 0.0007181076672104405, "loss": 0.0412, "num_input_tokens_seen": 19044384, "step": 8805 }, { "epoch": 1.4371941272430668, "grad_norm": 0.03693476691842079, "learning_rate": 0.000718515497553018, "loss": 0.1054, "num_input_tokens_seen": 19054816, "step": 8810 }, { "epoch": 1.4380097879282219, "grad_norm": 0.05763913691043854, "learning_rate": 0.0007189233278955954, "loss": 0.031, "num_input_tokens_seen": 19065120, "step": 8815 }, { "epoch": 1.438825448613377, "grad_norm": 0.26617172360420227, "learning_rate": 0.0007193311582381729, "loss": 0.389, "num_input_tokens_seen": 19075872, "step": 8820 }, { "epoch": 1.4396411092985317, "grad_norm": 0.06919386237859726, "learning_rate": 0.0007197389885807504, "loss": 0.0492, "num_input_tokens_seen": 19087808, "step": 8825 }, { "epoch": 1.4404567699836868, "grad_norm": 0.2736736238002777, "learning_rate": 0.0007201468189233279, "loss": 0.3008, "num_input_tokens_seen": 19099488, "step": 8830 }, { "epoch": 1.4412724306688418, "grad_norm": 0.05337755009531975, "learning_rate": 0.0007205546492659055, "loss": 0.0528, "num_input_tokens_seen": 19111104, "step": 8835 }, { "epoch": 1.4420880913539968, "grad_norm": 0.03262023627758026, "learning_rate": 0.0007209624796084829, "loss": 0.0659, "num_input_tokens_seen": 19122656, "step": 8840 }, { "epoch": 1.4429037520391517, "grad_norm": 0.19894321262836456, "learning_rate": 0.0007213703099510603, "loss": 0.2232, "num_input_tokens_seen": 19133728, "step": 8845 }, { "epoch": 1.4437194127243067, "grad_norm": 0.031567685306072235, "learning_rate": 0.0007217781402936379, "loss": 0.0757, "num_input_tokens_seen": 19144896, "step": 8850 }, { "epoch": 1.4445350734094617, "grad_norm": 0.050865061581134796, "learning_rate": 0.0007221859706362154, "loss": 0.0726, "num_input_tokens_seen": 19155040, "step": 8855 }, { "epoch": 1.4453507340946166, "grad_norm": 0.3916897177696228, "learning_rate": 0.0007225938009787928, "loss": 0.1051, "num_input_tokens_seen": 19165216, "step": 8860 }, { "epoch": 1.4461663947797716, "grad_norm": 0.02855168841779232, "learning_rate": 0.0007230016313213703, "loss": 0.0636, "num_input_tokens_seen": 19175904, "step": 8865 }, { "epoch": 1.4469820554649266, "grad_norm": 0.009972508065402508, "learning_rate": 0.0007234094616639478, "loss": 0.1216, "num_input_tokens_seen": 19185312, "step": 8870 }, { "epoch": 1.4477977161500815, "grad_norm": 0.05328962206840515, "learning_rate": 0.0007238172920065254, "loss": 0.1923, "num_input_tokens_seen": 19196768, "step": 8875 }, { "epoch": 1.4486133768352365, "grad_norm": 0.20842105150222778, "learning_rate": 0.0007242251223491027, "loss": 0.1206, "num_input_tokens_seen": 19207360, "step": 8880 }, { "epoch": 1.4494290375203915, "grad_norm": 0.298380047082901, "learning_rate": 0.0007246329526916803, "loss": 0.097, "num_input_tokens_seen": 19217792, "step": 8885 }, { "epoch": 1.4502446982055464, "grad_norm": 0.056603509932756424, "learning_rate": 0.0007250407830342578, "loss": 0.0887, "num_input_tokens_seen": 19229920, "step": 8890 }, { "epoch": 1.4510603588907014, "grad_norm": 0.028075775131583214, "learning_rate": 0.0007254486133768352, "loss": 0.0862, "num_input_tokens_seen": 19241952, "step": 8895 }, { "epoch": 1.4518760195758564, "grad_norm": 0.04573468863964081, "learning_rate": 0.0007258564437194128, "loss": 0.157, "num_input_tokens_seen": 19252320, "step": 8900 }, { "epoch": 1.4526916802610115, "grad_norm": 0.06405018270015717, "learning_rate": 0.0007262642740619902, "loss": 0.0865, "num_input_tokens_seen": 19262912, "step": 8905 }, { "epoch": 1.4535073409461665, "grad_norm": 0.17091864347457886, "learning_rate": 0.0007266721044045678, "loss": 0.2396, "num_input_tokens_seen": 19273792, "step": 8910 }, { "epoch": 1.4543230016313213, "grad_norm": 0.15708409249782562, "learning_rate": 0.0007270799347471452, "loss": 0.2161, "num_input_tokens_seen": 19284448, "step": 8915 }, { "epoch": 1.4551386623164764, "grad_norm": 0.05929143726825714, "learning_rate": 0.0007274877650897227, "loss": 0.1374, "num_input_tokens_seen": 19295712, "step": 8920 }, { "epoch": 1.4559543230016314, "grad_norm": 0.03152371942996979, "learning_rate": 0.0007278955954323002, "loss": 0.0991, "num_input_tokens_seen": 19307520, "step": 8925 }, { "epoch": 1.4567699836867862, "grad_norm": 0.06505519896745682, "learning_rate": 0.0007283034257748776, "loss": 0.0437, "num_input_tokens_seen": 19319008, "step": 8930 }, { "epoch": 1.4575856443719413, "grad_norm": 0.1482236683368683, "learning_rate": 0.0007287112561174551, "loss": 0.0816, "num_input_tokens_seen": 19329440, "step": 8935 }, { "epoch": 1.4584013050570963, "grad_norm": 0.10577386617660522, "learning_rate": 0.0007291190864600327, "loss": 0.2586, "num_input_tokens_seen": 19339616, "step": 8940 }, { "epoch": 1.4592169657422511, "grad_norm": 0.18734675645828247, "learning_rate": 0.00072952691680261, "loss": 0.0709, "num_input_tokens_seen": 19351136, "step": 8945 }, { "epoch": 1.4600326264274062, "grad_norm": 0.052861470729112625, "learning_rate": 0.0007299347471451876, "loss": 0.1096, "num_input_tokens_seen": 19360768, "step": 8950 }, { "epoch": 1.4608482871125612, "grad_norm": 0.02653975412249565, "learning_rate": 0.0007303425774877651, "loss": 0.0464, "num_input_tokens_seen": 19370688, "step": 8955 }, { "epoch": 1.461663947797716, "grad_norm": 0.04796244576573372, "learning_rate": 0.0007307504078303426, "loss": 0.0705, "num_input_tokens_seen": 19382336, "step": 8960 }, { "epoch": 1.462479608482871, "grad_norm": 0.02996446006000042, "learning_rate": 0.0007311582381729201, "loss": 0.0502, "num_input_tokens_seen": 19393152, "step": 8965 }, { "epoch": 1.463295269168026, "grad_norm": 0.13484928011894226, "learning_rate": 0.0007315660685154975, "loss": 0.116, "num_input_tokens_seen": 19403744, "step": 8970 }, { "epoch": 1.4641109298531811, "grad_norm": 0.0562857910990715, "learning_rate": 0.0007319738988580751, "loss": 0.0506, "num_input_tokens_seen": 19415008, "step": 8975 }, { "epoch": 1.4649265905383362, "grad_norm": 0.11413650959730148, "learning_rate": 0.0007323817292006525, "loss": 0.1563, "num_input_tokens_seen": 19425888, "step": 8980 }, { "epoch": 1.465742251223491, "grad_norm": 0.2681230902671814, "learning_rate": 0.00073278955954323, "loss": 0.1464, "num_input_tokens_seen": 19437280, "step": 8985 }, { "epoch": 1.466557911908646, "grad_norm": 0.055149346590042114, "learning_rate": 0.0007331973898858076, "loss": 0.2557, "num_input_tokens_seen": 19448544, "step": 8990 }, { "epoch": 1.467373572593801, "grad_norm": 0.07654145359992981, "learning_rate": 0.000733605220228385, "loss": 0.1016, "num_input_tokens_seen": 19459264, "step": 8995 }, { "epoch": 1.468189233278956, "grad_norm": 0.07035654783248901, "learning_rate": 0.0007340130505709625, "loss": 0.1717, "num_input_tokens_seen": 19468896, "step": 9000 }, { "epoch": 1.469004893964111, "grad_norm": 0.3256077170372009, "learning_rate": 0.00073442088091354, "loss": 0.2185, "num_input_tokens_seen": 19480032, "step": 9005 }, { "epoch": 1.469820554649266, "grad_norm": 0.08641856163740158, "learning_rate": 0.0007348287112561175, "loss": 0.1296, "num_input_tokens_seen": 19490816, "step": 9010 }, { "epoch": 1.4706362153344208, "grad_norm": 0.4245319068431854, "learning_rate": 0.0007352365415986949, "loss": 0.1821, "num_input_tokens_seen": 19501248, "step": 9015 }, { "epoch": 1.4714518760195758, "grad_norm": 0.057510893791913986, "learning_rate": 0.0007356443719412724, "loss": 0.0404, "num_input_tokens_seen": 19513152, "step": 9020 }, { "epoch": 1.4722675367047309, "grad_norm": 0.23167626559734344, "learning_rate": 0.00073605220228385, "loss": 0.1153, "num_input_tokens_seen": 19523776, "step": 9025 }, { "epoch": 1.4730831973898857, "grad_norm": 0.03532523289322853, "learning_rate": 0.0007364600326264275, "loss": 0.1117, "num_input_tokens_seen": 19535072, "step": 9030 }, { "epoch": 1.4738988580750407, "grad_norm": 0.015089893713593483, "learning_rate": 0.0007368678629690048, "loss": 0.0937, "num_input_tokens_seen": 19545696, "step": 9035 }, { "epoch": 1.4747145187601958, "grad_norm": 0.4665493667125702, "learning_rate": 0.0007372756933115824, "loss": 0.1995, "num_input_tokens_seen": 19554944, "step": 9040 }, { "epoch": 1.4755301794453508, "grad_norm": 0.21652501821517944, "learning_rate": 0.0007376835236541599, "loss": 0.2747, "num_input_tokens_seen": 19566144, "step": 9045 }, { "epoch": 1.4763458401305056, "grad_norm": 0.329572468996048, "learning_rate": 0.0007380913539967374, "loss": 0.1159, "num_input_tokens_seen": 19576256, "step": 9050 }, { "epoch": 1.4771615008156607, "grad_norm": 0.13731348514556885, "learning_rate": 0.0007384991843393149, "loss": 0.0869, "num_input_tokens_seen": 19587264, "step": 9055 }, { "epoch": 1.4779771615008157, "grad_norm": 0.08550713956356049, "learning_rate": 0.0007389070146818923, "loss": 0.087, "num_input_tokens_seen": 19599904, "step": 9060 }, { "epoch": 1.4787928221859707, "grad_norm": 0.08178147673606873, "learning_rate": 0.0007393148450244699, "loss": 0.1437, "num_input_tokens_seen": 19610848, "step": 9065 }, { "epoch": 1.4796084828711256, "grad_norm": 0.28226912021636963, "learning_rate": 0.0007397226753670473, "loss": 0.1054, "num_input_tokens_seen": 19621344, "step": 9070 }, { "epoch": 1.4804241435562806, "grad_norm": 0.05449576675891876, "learning_rate": 0.0007401305057096248, "loss": 0.1941, "num_input_tokens_seen": 19631616, "step": 9075 }, { "epoch": 1.4812398042414356, "grad_norm": 0.24325771629810333, "learning_rate": 0.0007405383360522023, "loss": 0.2353, "num_input_tokens_seen": 19642528, "step": 9080 }, { "epoch": 1.4820554649265905, "grad_norm": 0.07389519363641739, "learning_rate": 0.0007409461663947797, "loss": 0.1516, "num_input_tokens_seen": 19651840, "step": 9085 }, { "epoch": 1.4828711256117455, "grad_norm": 0.08990222960710526, "learning_rate": 0.0007413539967373573, "loss": 0.0684, "num_input_tokens_seen": 19662848, "step": 9090 }, { "epoch": 1.4836867862969005, "grad_norm": 0.04385393112897873, "learning_rate": 0.0007417618270799348, "loss": 0.1173, "num_input_tokens_seen": 19673984, "step": 9095 }, { "epoch": 1.4845024469820554, "grad_norm": 0.06532658636569977, "learning_rate": 0.0007421696574225123, "loss": 0.0915, "num_input_tokens_seen": 19685504, "step": 9100 }, { "epoch": 1.4853181076672104, "grad_norm": 0.025229470804333687, "learning_rate": 0.0007425774877650897, "loss": 0.0416, "num_input_tokens_seen": 19696832, "step": 9105 }, { "epoch": 1.4861337683523654, "grad_norm": 0.35470104217529297, "learning_rate": 0.0007429853181076672, "loss": 0.1424, "num_input_tokens_seen": 19708064, "step": 9110 }, { "epoch": 1.4869494290375203, "grad_norm": 0.34158971905708313, "learning_rate": 0.0007433931484502448, "loss": 0.2832, "num_input_tokens_seen": 19719264, "step": 9115 }, { "epoch": 1.4877650897226753, "grad_norm": 0.041290439665317535, "learning_rate": 0.0007438009787928222, "loss": 0.0405, "num_input_tokens_seen": 19729280, "step": 9120 }, { "epoch": 1.4885807504078303, "grad_norm": 0.03599981218576431, "learning_rate": 0.0007442088091353996, "loss": 0.0781, "num_input_tokens_seen": 19739840, "step": 9125 }, { "epoch": 1.4893964110929854, "grad_norm": 0.11953336000442505, "learning_rate": 0.0007446166394779772, "loss": 0.0411, "num_input_tokens_seen": 19750304, "step": 9130 }, { "epoch": 1.4902120717781404, "grad_norm": 0.14723843336105347, "learning_rate": 0.0007450244698205547, "loss": 0.1182, "num_input_tokens_seen": 19760288, "step": 9135 }, { "epoch": 1.4910277324632952, "grad_norm": 0.07382355630397797, "learning_rate": 0.0007454323001631322, "loss": 0.0367, "num_input_tokens_seen": 19770080, "step": 9140 }, { "epoch": 1.4918433931484503, "grad_norm": 0.32175618410110474, "learning_rate": 0.0007458401305057096, "loss": 0.1103, "num_input_tokens_seen": 19780512, "step": 9145 }, { "epoch": 1.4926590538336053, "grad_norm": 0.28489458560943604, "learning_rate": 0.0007462479608482871, "loss": 0.1126, "num_input_tokens_seen": 19792032, "step": 9150 }, { "epoch": 1.4934747145187601, "grad_norm": 0.07463068515062332, "learning_rate": 0.0007466557911908646, "loss": 0.1008, "num_input_tokens_seen": 19802496, "step": 9155 }, { "epoch": 1.4942903752039152, "grad_norm": 0.2966947853565216, "learning_rate": 0.0007470636215334421, "loss": 0.1891, "num_input_tokens_seen": 19813024, "step": 9160 }, { "epoch": 1.4951060358890702, "grad_norm": 0.05433628708124161, "learning_rate": 0.0007474714518760197, "loss": 0.1489, "num_input_tokens_seen": 19823968, "step": 9165 }, { "epoch": 1.495921696574225, "grad_norm": 0.10637657344341278, "learning_rate": 0.0007478792822185971, "loss": 0.0561, "num_input_tokens_seen": 19833856, "step": 9170 }, { "epoch": 1.49673735725938, "grad_norm": 0.36320605874061584, "learning_rate": 0.0007482871125611745, "loss": 0.1962, "num_input_tokens_seen": 19843872, "step": 9175 }, { "epoch": 1.497553017944535, "grad_norm": 0.04385019838809967, "learning_rate": 0.0007486949429037521, "loss": 0.0605, "num_input_tokens_seen": 19854144, "step": 9180 }, { "epoch": 1.49836867862969, "grad_norm": 0.04330093413591385, "learning_rate": 0.0007491027732463296, "loss": 0.0296, "num_input_tokens_seen": 19865184, "step": 9185 }, { "epoch": 1.499184339314845, "grad_norm": 0.32716479897499084, "learning_rate": 0.000749510603588907, "loss": 0.2863, "num_input_tokens_seen": 19874528, "step": 9190 }, { "epoch": 1.5, "grad_norm": 0.042429886758327484, "learning_rate": 0.0007499184339314845, "loss": 0.1687, "num_input_tokens_seen": 19885184, "step": 9195 }, { "epoch": 1.5008156606851548, "grad_norm": 0.16900426149368286, "learning_rate": 0.000750326264274062, "loss": 0.1278, "num_input_tokens_seen": 19896768, "step": 9200 }, { "epoch": 1.50163132137031, "grad_norm": 0.04221004992723465, "learning_rate": 0.0007507340946166395, "loss": 0.153, "num_input_tokens_seen": 19906848, "step": 9205 }, { "epoch": 1.502446982055465, "grad_norm": 0.038230542093515396, "learning_rate": 0.000751141924959217, "loss": 0.1196, "num_input_tokens_seen": 19917728, "step": 9210 }, { "epoch": 1.50326264274062, "grad_norm": 0.20852527022361755, "learning_rate": 0.0007515497553017944, "loss": 0.343, "num_input_tokens_seen": 19928224, "step": 9215 }, { "epoch": 1.504078303425775, "grad_norm": 0.134451761841774, "learning_rate": 0.000751957585644372, "loss": 0.1256, "num_input_tokens_seen": 19939904, "step": 9220 }, { "epoch": 1.5048939641109298, "grad_norm": 0.071271613240242, "learning_rate": 0.0007523654159869494, "loss": 0.1314, "num_input_tokens_seen": 19951360, "step": 9225 }, { "epoch": 1.5057096247960848, "grad_norm": 0.12655316293239594, "learning_rate": 0.000752773246329527, "loss": 0.261, "num_input_tokens_seen": 19961792, "step": 9230 }, { "epoch": 1.5065252854812399, "grad_norm": 0.04052901268005371, "learning_rate": 0.0007531810766721044, "loss": 0.0979, "num_input_tokens_seen": 19972288, "step": 9235 }, { "epoch": 1.5073409461663947, "grad_norm": 0.6805994510650635, "learning_rate": 0.0007535889070146818, "loss": 0.1843, "num_input_tokens_seen": 19982688, "step": 9240 }, { "epoch": 1.5081566068515497, "grad_norm": 0.07349798828363419, "learning_rate": 0.0007539967373572594, "loss": 0.1171, "num_input_tokens_seen": 19992992, "step": 9245 }, { "epoch": 1.5089722675367048, "grad_norm": 0.3114171028137207, "learning_rate": 0.0007544045676998369, "loss": 0.1579, "num_input_tokens_seen": 20004384, "step": 9250 }, { "epoch": 1.5097879282218596, "grad_norm": 0.045912064611911774, "learning_rate": 0.0007548123980424145, "loss": 0.2534, "num_input_tokens_seen": 20014784, "step": 9255 }, { "epoch": 1.5106035889070146, "grad_norm": 0.06314994394779205, "learning_rate": 0.0007552202283849918, "loss": 0.0856, "num_input_tokens_seen": 20025536, "step": 9260 }, { "epoch": 1.5114192495921697, "grad_norm": 0.07371212542057037, "learning_rate": 0.0007556280587275693, "loss": 0.0728, "num_input_tokens_seen": 20035712, "step": 9265 }, { "epoch": 1.5122349102773245, "grad_norm": 0.08201035857200623, "learning_rate": 0.0007560358890701469, "loss": 0.1516, "num_input_tokens_seen": 20046912, "step": 9270 }, { "epoch": 1.5130505709624797, "grad_norm": 0.05865674465894699, "learning_rate": 0.0007564437194127243, "loss": 0.0503, "num_input_tokens_seen": 20058304, "step": 9275 }, { "epoch": 1.5138662316476346, "grad_norm": 0.26975804567337036, "learning_rate": 0.0007568515497553018, "loss": 0.0944, "num_input_tokens_seen": 20068256, "step": 9280 }, { "epoch": 1.5146818923327896, "grad_norm": 0.02460741624236107, "learning_rate": 0.0007572593800978793, "loss": 0.1286, "num_input_tokens_seen": 20079232, "step": 9285 }, { "epoch": 1.5154975530179446, "grad_norm": 0.010626477189362049, "learning_rate": 0.0007576672104404568, "loss": 0.0225, "num_input_tokens_seen": 20090176, "step": 9290 }, { "epoch": 1.5163132137030995, "grad_norm": 0.21808360517024994, "learning_rate": 0.0007580750407830343, "loss": 0.09, "num_input_tokens_seen": 20100640, "step": 9295 }, { "epoch": 1.5171288743882545, "grad_norm": 0.31197798252105713, "learning_rate": 0.0007584828711256117, "loss": 0.1233, "num_input_tokens_seen": 20110656, "step": 9300 }, { "epoch": 1.5179445350734095, "grad_norm": 0.07159364223480225, "learning_rate": 0.0007588907014681893, "loss": 0.1392, "num_input_tokens_seen": 20122240, "step": 9305 }, { "epoch": 1.5187601957585644, "grad_norm": 0.011855943128466606, "learning_rate": 0.0007592985318107667, "loss": 0.1328, "num_input_tokens_seen": 20133344, "step": 9310 }, { "epoch": 1.5195758564437194, "grad_norm": 0.39659756422042847, "learning_rate": 0.0007597063621533442, "loss": 0.1872, "num_input_tokens_seen": 20142944, "step": 9315 }, { "epoch": 1.5203915171288744, "grad_norm": 0.23843838274478912, "learning_rate": 0.0007601141924959218, "loss": 0.1653, "num_input_tokens_seen": 20153728, "step": 9320 }, { "epoch": 1.5212071778140293, "grad_norm": 0.17858515679836273, "learning_rate": 0.0007605220228384992, "loss": 0.1768, "num_input_tokens_seen": 20163904, "step": 9325 }, { "epoch": 1.5220228384991843, "grad_norm": 0.07030331343412399, "learning_rate": 0.0007609298531810767, "loss": 0.0681, "num_input_tokens_seen": 20175040, "step": 9330 }, { "epoch": 1.5228384991843393, "grad_norm": 0.09889055788516998, "learning_rate": 0.0007613376835236542, "loss": 0.1594, "num_input_tokens_seen": 20185696, "step": 9335 }, { "epoch": 1.5236541598694942, "grad_norm": 0.08910706639289856, "learning_rate": 0.0007617455138662317, "loss": 0.1579, "num_input_tokens_seen": 20196288, "step": 9340 }, { "epoch": 1.5244698205546494, "grad_norm": 0.11903411149978638, "learning_rate": 0.0007621533442088091, "loss": 0.1586, "num_input_tokens_seen": 20206688, "step": 9345 }, { "epoch": 1.5252854812398042, "grad_norm": 0.2565356194972992, "learning_rate": 0.0007625611745513866, "loss": 0.206, "num_input_tokens_seen": 20217824, "step": 9350 }, { "epoch": 1.5261011419249593, "grad_norm": 0.09693357348442078, "learning_rate": 0.0007629690048939642, "loss": 0.1122, "num_input_tokens_seen": 20227552, "step": 9355 }, { "epoch": 1.5269168026101143, "grad_norm": 0.2581535279750824, "learning_rate": 0.0007633768352365417, "loss": 0.2088, "num_input_tokens_seen": 20237920, "step": 9360 }, { "epoch": 1.5277324632952691, "grad_norm": 0.14089787006378174, "learning_rate": 0.000763784665579119, "loss": 0.1094, "num_input_tokens_seen": 20248928, "step": 9365 }, { "epoch": 1.5285481239804242, "grad_norm": 0.2328333854675293, "learning_rate": 0.0007641924959216966, "loss": 0.1387, "num_input_tokens_seen": 20260448, "step": 9370 }, { "epoch": 1.5293637846655792, "grad_norm": 0.018458805978298187, "learning_rate": 0.0007646003262642741, "loss": 0.06, "num_input_tokens_seen": 20271136, "step": 9375 }, { "epoch": 1.530179445350734, "grad_norm": 0.13734719157218933, "learning_rate": 0.0007650081566068515, "loss": 0.1299, "num_input_tokens_seen": 20282048, "step": 9380 }, { "epoch": 1.530995106035889, "grad_norm": 0.18070201575756073, "learning_rate": 0.0007654159869494291, "loss": 0.1086, "num_input_tokens_seen": 20291936, "step": 9385 }, { "epoch": 1.531810766721044, "grad_norm": 0.09469042718410492, "learning_rate": 0.0007658238172920065, "loss": 0.0774, "num_input_tokens_seen": 20302976, "step": 9390 }, { "epoch": 1.532626427406199, "grad_norm": 0.02390364743769169, "learning_rate": 0.0007662316476345841, "loss": 0.0631, "num_input_tokens_seen": 20314336, "step": 9395 }, { "epoch": 1.533442088091354, "grad_norm": 0.2972950339317322, "learning_rate": 0.0007666394779771615, "loss": 0.1546, "num_input_tokens_seen": 20325248, "step": 9400 }, { "epoch": 1.534257748776509, "grad_norm": 0.39182689785957336, "learning_rate": 0.000767047308319739, "loss": 0.388, "num_input_tokens_seen": 20335936, "step": 9405 }, { "epoch": 1.5350734094616638, "grad_norm": 0.03537694737315178, "learning_rate": 0.0007674551386623165, "loss": 0.0757, "num_input_tokens_seen": 20347424, "step": 9410 }, { "epoch": 1.535889070146819, "grad_norm": 0.06817349791526794, "learning_rate": 0.0007678629690048939, "loss": 0.1584, "num_input_tokens_seen": 20358240, "step": 9415 }, { "epoch": 1.536704730831974, "grad_norm": 0.21510159969329834, "learning_rate": 0.0007682707993474715, "loss": 0.172, "num_input_tokens_seen": 20369184, "step": 9420 }, { "epoch": 1.5375203915171287, "grad_norm": 0.17005428671836853, "learning_rate": 0.000768678629690049, "loss": 0.2609, "num_input_tokens_seen": 20380480, "step": 9425 }, { "epoch": 1.538336052202284, "grad_norm": 0.10062456130981445, "learning_rate": 0.0007690864600326263, "loss": 0.2049, "num_input_tokens_seen": 20389600, "step": 9430 }, { "epoch": 1.5391517128874388, "grad_norm": 0.08677083998918533, "learning_rate": 0.0007694942903752039, "loss": 0.1215, "num_input_tokens_seen": 20399712, "step": 9435 }, { "epoch": 1.5399673735725938, "grad_norm": 0.21858160197734833, "learning_rate": 0.0007699021207177814, "loss": 0.2001, "num_input_tokens_seen": 20410816, "step": 9440 }, { "epoch": 1.5407830342577489, "grad_norm": 0.13692283630371094, "learning_rate": 0.000770309951060359, "loss": 0.08, "num_input_tokens_seen": 20421952, "step": 9445 }, { "epoch": 1.5415986949429037, "grad_norm": 0.07183019816875458, "learning_rate": 0.0007707177814029364, "loss": 0.0549, "num_input_tokens_seen": 20432480, "step": 9450 }, { "epoch": 1.5424143556280587, "grad_norm": 0.04967404529452324, "learning_rate": 0.0007711256117455138, "loss": 0.1739, "num_input_tokens_seen": 20444032, "step": 9455 }, { "epoch": 1.5432300163132138, "grad_norm": 0.05237607657909393, "learning_rate": 0.0007715334420880914, "loss": 0.2333, "num_input_tokens_seen": 20455648, "step": 9460 }, { "epoch": 1.5440456769983686, "grad_norm": 0.2099936455488205, "learning_rate": 0.0007719412724306688, "loss": 0.139, "num_input_tokens_seen": 20466112, "step": 9465 }, { "epoch": 1.5448613376835236, "grad_norm": 0.04406539723277092, "learning_rate": 0.0007723491027732464, "loss": 0.1943, "num_input_tokens_seen": 20477280, "step": 9470 }, { "epoch": 1.5456769983686787, "grad_norm": 0.0637565404176712, "learning_rate": 0.0007727569331158239, "loss": 0.0561, "num_input_tokens_seen": 20488704, "step": 9475 }, { "epoch": 1.5464926590538335, "grad_norm": 0.14393645524978638, "learning_rate": 0.0007731647634584013, "loss": 0.146, "num_input_tokens_seen": 20499168, "step": 9480 }, { "epoch": 1.5473083197389887, "grad_norm": 0.032931309193372726, "learning_rate": 0.0007735725938009788, "loss": 0.0926, "num_input_tokens_seen": 20509696, "step": 9485 }, { "epoch": 1.5481239804241436, "grad_norm": 0.11536554247140884, "learning_rate": 0.0007739804241435563, "loss": 0.1343, "num_input_tokens_seen": 20519424, "step": 9490 }, { "epoch": 1.5489396411092984, "grad_norm": 0.017625289037823677, "learning_rate": 0.0007743882544861339, "loss": 0.1249, "num_input_tokens_seen": 20530304, "step": 9495 }, { "epoch": 1.5497553017944536, "grad_norm": 0.15197528898715973, "learning_rate": 0.0007747960848287112, "loss": 0.1542, "num_input_tokens_seen": 20540384, "step": 9500 }, { "epoch": 1.5505709624796085, "grad_norm": 0.2158111035823822, "learning_rate": 0.0007752039151712887, "loss": 0.096, "num_input_tokens_seen": 20550912, "step": 9505 }, { "epoch": 1.5513866231647635, "grad_norm": 0.04284696653485298, "learning_rate": 0.0007756117455138663, "loss": 0.0918, "num_input_tokens_seen": 20561280, "step": 9510 }, { "epoch": 1.5522022838499185, "grad_norm": 0.1303076297044754, "learning_rate": 0.0007760195758564438, "loss": 0.2241, "num_input_tokens_seen": 20572896, "step": 9515 }, { "epoch": 1.5530179445350734, "grad_norm": 0.044415369629859924, "learning_rate": 0.0007764274061990211, "loss": 0.0969, "num_input_tokens_seen": 20583200, "step": 9520 }, { "epoch": 1.5538336052202284, "grad_norm": 0.09820342063903809, "learning_rate": 0.0007768352365415987, "loss": 0.0713, "num_input_tokens_seen": 20593824, "step": 9525 }, { "epoch": 1.5546492659053834, "grad_norm": 0.0766250267624855, "learning_rate": 0.0007772430668841762, "loss": 0.1066, "num_input_tokens_seen": 20605696, "step": 9530 }, { "epoch": 1.5554649265905383, "grad_norm": 0.11463528871536255, "learning_rate": 0.0007776508972267537, "loss": 0.0916, "num_input_tokens_seen": 20616672, "step": 9535 }, { "epoch": 1.5562805872756933, "grad_norm": 0.3298501968383789, "learning_rate": 0.0007780587275693312, "loss": 0.0981, "num_input_tokens_seen": 20629216, "step": 9540 }, { "epoch": 1.5570962479608483, "grad_norm": 0.15438921749591827, "learning_rate": 0.0007784665579119086, "loss": 0.046, "num_input_tokens_seen": 20640544, "step": 9545 }, { "epoch": 1.5579119086460032, "grad_norm": 0.03456113860011101, "learning_rate": 0.0007788743882544862, "loss": 0.1043, "num_input_tokens_seen": 20651808, "step": 9550 }, { "epoch": 1.5587275693311582, "grad_norm": 0.044604893773794174, "learning_rate": 0.0007792822185970636, "loss": 0.0403, "num_input_tokens_seen": 20663360, "step": 9555 }, { "epoch": 1.5595432300163132, "grad_norm": 0.015352782793343067, "learning_rate": 0.0007796900489396412, "loss": 0.1858, "num_input_tokens_seen": 20674336, "step": 9560 }, { "epoch": 1.560358890701468, "grad_norm": 0.12856276333332062, "learning_rate": 0.0007800978792822186, "loss": 0.0773, "num_input_tokens_seen": 20684960, "step": 9565 }, { "epoch": 1.5611745513866233, "grad_norm": 0.15705958008766174, "learning_rate": 0.000780505709624796, "loss": 0.1627, "num_input_tokens_seen": 20695648, "step": 9570 }, { "epoch": 1.5619902120717781, "grad_norm": 0.26499855518341064, "learning_rate": 0.0007809135399673736, "loss": 0.1323, "num_input_tokens_seen": 20707008, "step": 9575 }, { "epoch": 1.5628058727569332, "grad_norm": 0.44296300411224365, "learning_rate": 0.0007813213703099511, "loss": 0.2125, "num_input_tokens_seen": 20717024, "step": 9580 }, { "epoch": 1.5636215334420882, "grad_norm": 0.10148364305496216, "learning_rate": 0.0007817292006525287, "loss": 0.1699, "num_input_tokens_seen": 20727968, "step": 9585 }, { "epoch": 1.564437194127243, "grad_norm": 0.7346378564834595, "learning_rate": 0.000782137030995106, "loss": 0.3014, "num_input_tokens_seen": 20739520, "step": 9590 }, { "epoch": 1.565252854812398, "grad_norm": 0.25575461983680725, "learning_rate": 0.0007825448613376835, "loss": 0.099, "num_input_tokens_seen": 20751104, "step": 9595 }, { "epoch": 1.566068515497553, "grad_norm": 0.041789885610342026, "learning_rate": 0.0007829526916802611, "loss": 0.0537, "num_input_tokens_seen": 20760672, "step": 9600 }, { "epoch": 1.566884176182708, "grad_norm": 0.2482481747865677, "learning_rate": 0.0007833605220228385, "loss": 0.1595, "num_input_tokens_seen": 20772160, "step": 9605 }, { "epoch": 1.567699836867863, "grad_norm": 0.32589903473854065, "learning_rate": 0.000783768352365416, "loss": 0.076, "num_input_tokens_seen": 20783840, "step": 9610 }, { "epoch": 1.568515497553018, "grad_norm": 0.056545983999967575, "learning_rate": 0.0007841761827079935, "loss": 0.0829, "num_input_tokens_seen": 20795488, "step": 9615 }, { "epoch": 1.5693311582381728, "grad_norm": 0.02571425400674343, "learning_rate": 0.000784584013050571, "loss": 0.1417, "num_input_tokens_seen": 20806272, "step": 9620 }, { "epoch": 1.5701468189233279, "grad_norm": 0.12023447453975677, "learning_rate": 0.0007849918433931485, "loss": 0.1562, "num_input_tokens_seen": 20817472, "step": 9625 }, { "epoch": 1.570962479608483, "grad_norm": 0.17629219591617584, "learning_rate": 0.000785399673735726, "loss": 0.1079, "num_input_tokens_seen": 20828992, "step": 9630 }, { "epoch": 1.5717781402936377, "grad_norm": 0.20702822506427765, "learning_rate": 0.0007858075040783035, "loss": 0.1637, "num_input_tokens_seen": 20841184, "step": 9635 }, { "epoch": 1.572593800978793, "grad_norm": 0.10118906199932098, "learning_rate": 0.0007862153344208809, "loss": 0.1106, "num_input_tokens_seen": 20852096, "step": 9640 }, { "epoch": 1.5734094616639478, "grad_norm": 0.016205577179789543, "learning_rate": 0.0007866231647634584, "loss": 0.1564, "num_input_tokens_seen": 20862784, "step": 9645 }, { "epoch": 1.5742251223491026, "grad_norm": 0.3246265947818756, "learning_rate": 0.000787030995106036, "loss": 0.2121, "num_input_tokens_seen": 20872320, "step": 9650 }, { "epoch": 1.5750407830342579, "grad_norm": 0.12944892048835754, "learning_rate": 0.0007874388254486133, "loss": 0.1364, "num_input_tokens_seen": 20883648, "step": 9655 }, { "epoch": 1.5758564437194127, "grad_norm": 0.2590799629688263, "learning_rate": 0.0007878466557911908, "loss": 0.1175, "num_input_tokens_seen": 20892992, "step": 9660 }, { "epoch": 1.5766721044045677, "grad_norm": 0.049245089292526245, "learning_rate": 0.0007882544861337684, "loss": 0.1526, "num_input_tokens_seen": 20903552, "step": 9665 }, { "epoch": 1.5774877650897228, "grad_norm": 0.15859724581241608, "learning_rate": 0.0007886623164763459, "loss": 0.118, "num_input_tokens_seen": 20914400, "step": 9670 }, { "epoch": 1.5783034257748776, "grad_norm": 0.08111312985420227, "learning_rate": 0.0007890701468189233, "loss": 0.0824, "num_input_tokens_seen": 20925280, "step": 9675 }, { "epoch": 1.5791190864600326, "grad_norm": 0.2263268232345581, "learning_rate": 0.0007894779771615008, "loss": 0.1133, "num_input_tokens_seen": 20936256, "step": 9680 }, { "epoch": 1.5799347471451877, "grad_norm": 0.24854174256324768, "learning_rate": 0.0007898858075040783, "loss": 0.2001, "num_input_tokens_seen": 20946432, "step": 9685 }, { "epoch": 1.5807504078303425, "grad_norm": 0.3115064203739166, "learning_rate": 0.0007902936378466558, "loss": 0.2652, "num_input_tokens_seen": 20956576, "step": 9690 }, { "epoch": 1.5815660685154975, "grad_norm": 0.054324883967638016, "learning_rate": 0.0007907014681892332, "loss": 0.2013, "num_input_tokens_seen": 20967296, "step": 9695 }, { "epoch": 1.5823817292006526, "grad_norm": 0.15358050167560577, "learning_rate": 0.0007911092985318108, "loss": 0.1248, "num_input_tokens_seen": 20978176, "step": 9700 }, { "epoch": 1.5831973898858074, "grad_norm": 0.05537407100200653, "learning_rate": 0.0007915171288743883, "loss": 0.0842, "num_input_tokens_seen": 20989824, "step": 9705 }, { "epoch": 1.5840130505709626, "grad_norm": 0.1706741601228714, "learning_rate": 0.0007919249592169657, "loss": 0.1579, "num_input_tokens_seen": 21000992, "step": 9710 }, { "epoch": 1.5848287112561175, "grad_norm": 0.179428368806839, "learning_rate": 0.0007923327895595433, "loss": 0.086, "num_input_tokens_seen": 21010624, "step": 9715 }, { "epoch": 1.5856443719412723, "grad_norm": 0.19192343950271606, "learning_rate": 0.0007927406199021207, "loss": 0.224, "num_input_tokens_seen": 21021856, "step": 9720 }, { "epoch": 1.5864600326264275, "grad_norm": 0.2749413847923279, "learning_rate": 0.0007931484502446982, "loss": 0.2647, "num_input_tokens_seen": 21032864, "step": 9725 }, { "epoch": 1.5872756933115824, "grad_norm": 0.020634248852729797, "learning_rate": 0.0007935562805872757, "loss": 0.0556, "num_input_tokens_seen": 21044800, "step": 9730 }, { "epoch": 1.5880913539967374, "grad_norm": 0.036429792642593384, "learning_rate": 0.0007939641109298532, "loss": 0.1549, "num_input_tokens_seen": 21055936, "step": 9735 }, { "epoch": 1.5889070146818924, "grad_norm": 0.11412417143583298, "learning_rate": 0.0007943719412724308, "loss": 0.0468, "num_input_tokens_seen": 21066944, "step": 9740 }, { "epoch": 1.5897226753670473, "grad_norm": 0.34290286898612976, "learning_rate": 0.0007947797716150081, "loss": 0.2389, "num_input_tokens_seen": 21077440, "step": 9745 }, { "epoch": 1.5905383360522023, "grad_norm": 0.07320336997509003, "learning_rate": 0.0007951876019575857, "loss": 0.2007, "num_input_tokens_seen": 21087616, "step": 9750 }, { "epoch": 1.5913539967373573, "grad_norm": 0.13163729012012482, "learning_rate": 0.0007955954323001632, "loss": 0.1386, "num_input_tokens_seen": 21099424, "step": 9755 }, { "epoch": 1.5921696574225122, "grad_norm": 0.08553323149681091, "learning_rate": 0.0007960032626427406, "loss": 0.1947, "num_input_tokens_seen": 21109696, "step": 9760 }, { "epoch": 1.5929853181076672, "grad_norm": 0.08144395798444748, "learning_rate": 0.0007964110929853181, "loss": 0.1125, "num_input_tokens_seen": 21120608, "step": 9765 }, { "epoch": 1.5938009787928222, "grad_norm": 0.27438557147979736, "learning_rate": 0.0007968189233278956, "loss": 0.1317, "num_input_tokens_seen": 21131552, "step": 9770 }, { "epoch": 1.594616639477977, "grad_norm": 0.13903331756591797, "learning_rate": 0.0007972267536704732, "loss": 0.116, "num_input_tokens_seen": 21141920, "step": 9775 }, { "epoch": 1.595432300163132, "grad_norm": 0.07145722210407257, "learning_rate": 0.0007976345840130506, "loss": 0.0591, "num_input_tokens_seen": 21152768, "step": 9780 }, { "epoch": 1.5962479608482871, "grad_norm": 0.19481636583805084, "learning_rate": 0.000798042414355628, "loss": 0.0639, "num_input_tokens_seen": 21162816, "step": 9785 }, { "epoch": 1.597063621533442, "grad_norm": 0.1090518906712532, "learning_rate": 0.0007984502446982056, "loss": 0.1006, "num_input_tokens_seen": 21173376, "step": 9790 }, { "epoch": 1.5978792822185972, "grad_norm": 0.07812517881393433, "learning_rate": 0.000798858075040783, "loss": 0.0791, "num_input_tokens_seen": 21184064, "step": 9795 }, { "epoch": 1.598694942903752, "grad_norm": 0.10007583349943161, "learning_rate": 0.0007992659053833605, "loss": 0.0489, "num_input_tokens_seen": 21195136, "step": 9800 }, { "epoch": 1.599510603588907, "grad_norm": 0.00513321440666914, "learning_rate": 0.0007996737357259381, "loss": 0.1956, "num_input_tokens_seen": 21205824, "step": 9805 }, { "epoch": 1.600326264274062, "grad_norm": 0.005637241993099451, "learning_rate": 0.0008000815660685155, "loss": 0.0374, "num_input_tokens_seen": 21215456, "step": 9810 }, { "epoch": 1.601141924959217, "grad_norm": 0.0892510935664177, "learning_rate": 0.000800489396411093, "loss": 0.0899, "num_input_tokens_seen": 21226368, "step": 9815 }, { "epoch": 1.601957585644372, "grad_norm": 0.376203328371048, "learning_rate": 0.0008008972267536705, "loss": 0.1367, "num_input_tokens_seen": 21238240, "step": 9820 }, { "epoch": 1.602773246329527, "grad_norm": 0.36713284254074097, "learning_rate": 0.000801305057096248, "loss": 0.1611, "num_input_tokens_seen": 21248896, "step": 9825 }, { "epoch": 1.6035889070146818, "grad_norm": 0.0800262987613678, "learning_rate": 0.0008017128874388254, "loss": 0.1063, "num_input_tokens_seen": 21259264, "step": 9830 }, { "epoch": 1.6044045676998369, "grad_norm": 0.0729251429438591, "learning_rate": 0.0008021207177814029, "loss": 0.0827, "num_input_tokens_seen": 21269472, "step": 9835 }, { "epoch": 1.605220228384992, "grad_norm": 0.4738904535770416, "learning_rate": 0.0008025285481239805, "loss": 0.1909, "num_input_tokens_seen": 21281280, "step": 9840 }, { "epoch": 1.6060358890701467, "grad_norm": 0.03555934503674507, "learning_rate": 0.000802936378466558, "loss": 0.1532, "num_input_tokens_seen": 21290752, "step": 9845 }, { "epoch": 1.6068515497553018, "grad_norm": 0.2605329751968384, "learning_rate": 0.0008033442088091353, "loss": 0.2615, "num_input_tokens_seen": 21299520, "step": 9850 }, { "epoch": 1.6076672104404568, "grad_norm": 0.21877489984035492, "learning_rate": 0.0008037520391517129, "loss": 0.1428, "num_input_tokens_seen": 21309632, "step": 9855 }, { "epoch": 1.6084828711256116, "grad_norm": 0.2073116898536682, "learning_rate": 0.0008041598694942904, "loss": 0.079, "num_input_tokens_seen": 21320416, "step": 9860 }, { "epoch": 1.6092985318107669, "grad_norm": 0.23353806138038635, "learning_rate": 0.0008045676998368679, "loss": 0.1914, "num_input_tokens_seen": 21330432, "step": 9865 }, { "epoch": 1.6101141924959217, "grad_norm": 0.06838128715753555, "learning_rate": 0.0008049755301794454, "loss": 0.0641, "num_input_tokens_seen": 21341056, "step": 9870 }, { "epoch": 1.6109298531810765, "grad_norm": 0.08493325114250183, "learning_rate": 0.0008053833605220228, "loss": 0.1241, "num_input_tokens_seen": 21352864, "step": 9875 }, { "epoch": 1.6117455138662318, "grad_norm": 0.1823084056377411, "learning_rate": 0.0008057911908646003, "loss": 0.1849, "num_input_tokens_seen": 21364512, "step": 9880 }, { "epoch": 1.6125611745513866, "grad_norm": 0.03267619386315346, "learning_rate": 0.0008061990212071778, "loss": 0.1082, "num_input_tokens_seen": 21374656, "step": 9885 }, { "epoch": 1.6133768352365416, "grad_norm": 0.22920729219913483, "learning_rate": 0.0008066068515497554, "loss": 0.1155, "num_input_tokens_seen": 21384928, "step": 9890 }, { "epoch": 1.6141924959216967, "grad_norm": 0.009968969970941544, "learning_rate": 0.0008070146818923329, "loss": 0.0435, "num_input_tokens_seen": 21395488, "step": 9895 }, { "epoch": 1.6150081566068515, "grad_norm": 0.06347603350877762, "learning_rate": 0.0008074225122349102, "loss": 0.0536, "num_input_tokens_seen": 21405760, "step": 9900 }, { "epoch": 1.6158238172920065, "grad_norm": 0.03512804955244064, "learning_rate": 0.0008078303425774878, "loss": 0.3131, "num_input_tokens_seen": 21416096, "step": 9905 }, { "epoch": 1.6166394779771616, "grad_norm": 0.25602778792381287, "learning_rate": 0.0008082381729200653, "loss": 0.1505, "num_input_tokens_seen": 21426176, "step": 9910 }, { "epoch": 1.6174551386623164, "grad_norm": 0.036331601440906525, "learning_rate": 0.0008086460032626428, "loss": 0.0997, "num_input_tokens_seen": 21437024, "step": 9915 }, { "epoch": 1.6182707993474714, "grad_norm": 0.06043427065014839, "learning_rate": 0.0008090538336052202, "loss": 0.1313, "num_input_tokens_seen": 21447168, "step": 9920 }, { "epoch": 1.6190864600326265, "grad_norm": 0.19334854185581207, "learning_rate": 0.0008094616639477977, "loss": 0.2161, "num_input_tokens_seen": 21457568, "step": 9925 }, { "epoch": 1.6199021207177813, "grad_norm": 0.08765456825494766, "learning_rate": 0.0008098694942903753, "loss": 0.0758, "num_input_tokens_seen": 21467264, "step": 9930 }, { "epoch": 1.6207177814029365, "grad_norm": 0.1640249788761139, "learning_rate": 0.0008102773246329527, "loss": 0.0498, "num_input_tokens_seen": 21479360, "step": 9935 }, { "epoch": 1.6215334420880914, "grad_norm": 0.1942995935678482, "learning_rate": 0.0008106851549755301, "loss": 0.0642, "num_input_tokens_seen": 21490208, "step": 9940 }, { "epoch": 1.6223491027732462, "grad_norm": 0.1993321031332016, "learning_rate": 0.0008110929853181077, "loss": 0.138, "num_input_tokens_seen": 21501472, "step": 9945 }, { "epoch": 1.6231647634584014, "grad_norm": 0.09414301067590714, "learning_rate": 0.0008115008156606851, "loss": 0.1241, "num_input_tokens_seen": 21512896, "step": 9950 }, { "epoch": 1.6239804241435563, "grad_norm": 0.07336173206567764, "learning_rate": 0.0008119086460032627, "loss": 0.1602, "num_input_tokens_seen": 21523744, "step": 9955 }, { "epoch": 1.6247960848287113, "grad_norm": 0.23218612372875214, "learning_rate": 0.0008123164763458402, "loss": 0.0861, "num_input_tokens_seen": 21534592, "step": 9960 }, { "epoch": 1.6256117455138663, "grad_norm": 0.0703354924917221, "learning_rate": 0.0008127243066884176, "loss": 0.0807, "num_input_tokens_seen": 21546784, "step": 9965 }, { "epoch": 1.6264274061990212, "grad_norm": 0.5143048763275146, "learning_rate": 0.0008131321370309951, "loss": 0.1959, "num_input_tokens_seen": 21557792, "step": 9970 }, { "epoch": 1.6272430668841762, "grad_norm": 0.26658895611763, "learning_rate": 0.0008135399673735726, "loss": 0.1729, "num_input_tokens_seen": 21566496, "step": 9975 }, { "epoch": 1.6280587275693312, "grad_norm": 0.007412992883473635, "learning_rate": 0.0008139477977161502, "loss": 0.0649, "num_input_tokens_seen": 21576800, "step": 9980 }, { "epoch": 1.628874388254486, "grad_norm": 0.14374643564224243, "learning_rate": 0.0008143556280587275, "loss": 0.1996, "num_input_tokens_seen": 21587360, "step": 9985 }, { "epoch": 1.629690048939641, "grad_norm": 0.23667171597480774, "learning_rate": 0.000814763458401305, "loss": 0.1, "num_input_tokens_seen": 21597824, "step": 9990 }, { "epoch": 1.6305057096247961, "grad_norm": 0.02756788767874241, "learning_rate": 0.0008151712887438826, "loss": 0.0974, "num_input_tokens_seen": 21608064, "step": 9995 }, { "epoch": 1.631321370309951, "grad_norm": 0.03173388913273811, "learning_rate": 0.0008155791190864601, "loss": 0.129, "num_input_tokens_seen": 21617568, "step": 10000 }, { "epoch": 1.632137030995106, "grad_norm": 0.0286561269313097, "learning_rate": 0.0008159869494290375, "loss": 0.1911, "num_input_tokens_seen": 21628544, "step": 10005 }, { "epoch": 1.632952691680261, "grad_norm": 0.02535759098827839, "learning_rate": 0.000816394779771615, "loss": 0.0685, "num_input_tokens_seen": 21637440, "step": 10010 }, { "epoch": 1.6337683523654158, "grad_norm": 0.26951324939727783, "learning_rate": 0.0008168026101141925, "loss": 0.1693, "num_input_tokens_seen": 21647200, "step": 10015 }, { "epoch": 1.634584013050571, "grad_norm": 0.09645380079746246, "learning_rate": 0.00081721044045677, "loss": 0.181, "num_input_tokens_seen": 21657888, "step": 10020 }, { "epoch": 1.635399673735726, "grad_norm": 0.19887897372245789, "learning_rate": 0.0008176182707993475, "loss": 0.1373, "num_input_tokens_seen": 21668640, "step": 10025 }, { "epoch": 1.636215334420881, "grad_norm": 0.18069353699684143, "learning_rate": 0.000818026101141925, "loss": 0.1123, "num_input_tokens_seen": 21679008, "step": 10030 }, { "epoch": 1.637030995106036, "grad_norm": 0.09430285543203354, "learning_rate": 0.0008184339314845025, "loss": 0.2209, "num_input_tokens_seen": 21689984, "step": 10035 }, { "epoch": 1.6378466557911908, "grad_norm": 0.050288546830415726, "learning_rate": 0.0008188417618270799, "loss": 0.1313, "num_input_tokens_seen": 21700960, "step": 10040 }, { "epoch": 1.6386623164763459, "grad_norm": 0.11963741481304169, "learning_rate": 0.0008192495921696575, "loss": 0.1268, "num_input_tokens_seen": 21711680, "step": 10045 }, { "epoch": 1.639477977161501, "grad_norm": 0.18857711553573608, "learning_rate": 0.0008196574225122349, "loss": 0.1761, "num_input_tokens_seen": 21722432, "step": 10050 }, { "epoch": 1.6402936378466557, "grad_norm": 0.2952326238155365, "learning_rate": 0.0008200652528548124, "loss": 0.0944, "num_input_tokens_seen": 21732608, "step": 10055 }, { "epoch": 1.6411092985318108, "grad_norm": 0.06583889573812485, "learning_rate": 0.0008204730831973899, "loss": 0.1298, "num_input_tokens_seen": 21741760, "step": 10060 }, { "epoch": 1.6419249592169658, "grad_norm": 0.23590274155139923, "learning_rate": 0.0008208809135399674, "loss": 0.1632, "num_input_tokens_seen": 21753472, "step": 10065 }, { "epoch": 1.6427406199021206, "grad_norm": 0.11746193468570709, "learning_rate": 0.000821288743882545, "loss": 0.1314, "num_input_tokens_seen": 21764480, "step": 10070 }, { "epoch": 1.6435562805872757, "grad_norm": 0.11129488050937653, "learning_rate": 0.0008216965742251223, "loss": 0.1275, "num_input_tokens_seen": 21775296, "step": 10075 }, { "epoch": 1.6443719412724307, "grad_norm": 0.25786536931991577, "learning_rate": 0.0008221044045676999, "loss": 0.2348, "num_input_tokens_seen": 21786240, "step": 10080 }, { "epoch": 1.6451876019575855, "grad_norm": 0.1905505508184433, "learning_rate": 0.0008225122349102774, "loss": 0.171, "num_input_tokens_seen": 21797440, "step": 10085 }, { "epoch": 1.6460032626427408, "grad_norm": 0.10690456628799438, "learning_rate": 0.0008229200652528548, "loss": 0.069, "num_input_tokens_seen": 21807456, "step": 10090 }, { "epoch": 1.6468189233278956, "grad_norm": 0.015939027070999146, "learning_rate": 0.0008233278955954323, "loss": 0.0365, "num_input_tokens_seen": 21819968, "step": 10095 }, { "epoch": 1.6476345840130504, "grad_norm": 0.03100682608783245, "learning_rate": 0.0008237357259380098, "loss": 0.1481, "num_input_tokens_seen": 21831328, "step": 10100 }, { "epoch": 1.6484502446982057, "grad_norm": 0.2114262878894806, "learning_rate": 0.0008241435562805873, "loss": 0.2003, "num_input_tokens_seen": 21843648, "step": 10105 }, { "epoch": 1.6492659053833605, "grad_norm": 0.1278071254491806, "learning_rate": 0.0008245513866231648, "loss": 0.0986, "num_input_tokens_seen": 21853120, "step": 10110 }, { "epoch": 1.6500815660685155, "grad_norm": 0.06103214994072914, "learning_rate": 0.0008249592169657422, "loss": 0.129, "num_input_tokens_seen": 21863424, "step": 10115 }, { "epoch": 1.6508972267536706, "grad_norm": 0.048976849764585495, "learning_rate": 0.0008253670473083198, "loss": 0.173, "num_input_tokens_seen": 21873952, "step": 10120 }, { "epoch": 1.6517128874388254, "grad_norm": 0.07527100294828415, "learning_rate": 0.0008257748776508972, "loss": 0.1356, "num_input_tokens_seen": 21884736, "step": 10125 }, { "epoch": 1.6525285481239804, "grad_norm": 0.038339171558618546, "learning_rate": 0.0008261827079934747, "loss": 0.0748, "num_input_tokens_seen": 21895264, "step": 10130 }, { "epoch": 1.6533442088091355, "grad_norm": 0.15123924612998962, "learning_rate": 0.0008265905383360523, "loss": 0.2225, "num_input_tokens_seen": 21906720, "step": 10135 }, { "epoch": 1.6541598694942903, "grad_norm": 0.19199934601783752, "learning_rate": 0.0008269983686786296, "loss": 0.1511, "num_input_tokens_seen": 21916416, "step": 10140 }, { "epoch": 1.6549755301794453, "grad_norm": 0.1354297697544098, "learning_rate": 0.0008274061990212072, "loss": 0.1065, "num_input_tokens_seen": 21927328, "step": 10145 }, { "epoch": 1.6557911908646004, "grad_norm": 0.16701364517211914, "learning_rate": 0.0008278140293637847, "loss": 0.1901, "num_input_tokens_seen": 21938112, "step": 10150 }, { "epoch": 1.6566068515497552, "grad_norm": 0.10553938150405884, "learning_rate": 0.0008282218597063622, "loss": 0.123, "num_input_tokens_seen": 21949472, "step": 10155 }, { "epoch": 1.6574225122349104, "grad_norm": 0.031392499804496765, "learning_rate": 0.0008286296900489396, "loss": 0.0626, "num_input_tokens_seen": 21960384, "step": 10160 }, { "epoch": 1.6582381729200653, "grad_norm": 0.15648214519023895, "learning_rate": 0.0008290375203915171, "loss": 0.1233, "num_input_tokens_seen": 21971104, "step": 10165 }, { "epoch": 1.65905383360522, "grad_norm": 0.06735506653785706, "learning_rate": 0.0008294453507340947, "loss": 0.0851, "num_input_tokens_seen": 21982400, "step": 10170 }, { "epoch": 1.6598694942903753, "grad_norm": 0.017349006608128548, "learning_rate": 0.0008298531810766721, "loss": 0.1242, "num_input_tokens_seen": 21994080, "step": 10175 }, { "epoch": 1.6606851549755302, "grad_norm": 0.1527702659368515, "learning_rate": 0.0008302610114192496, "loss": 0.0567, "num_input_tokens_seen": 22005984, "step": 10180 }, { "epoch": 1.6615008156606852, "grad_norm": 0.07216469943523407, "learning_rate": 0.0008306688417618271, "loss": 0.1481, "num_input_tokens_seen": 22015744, "step": 10185 }, { "epoch": 1.6623164763458402, "grad_norm": 0.15371765196323395, "learning_rate": 0.0008310766721044046, "loss": 0.2624, "num_input_tokens_seen": 22025248, "step": 10190 }, { "epoch": 1.663132137030995, "grad_norm": 0.06531374156475067, "learning_rate": 0.0008314845024469821, "loss": 0.0583, "num_input_tokens_seen": 22035456, "step": 10195 }, { "epoch": 1.66394779771615, "grad_norm": 0.06966865062713623, "learning_rate": 0.0008318923327895596, "loss": 0.1549, "num_input_tokens_seen": 22045568, "step": 10200 }, { "epoch": 1.6647634584013051, "grad_norm": 0.036182425916194916, "learning_rate": 0.000832300163132137, "loss": 0.0536, "num_input_tokens_seen": 22055360, "step": 10205 }, { "epoch": 1.66557911908646, "grad_norm": 0.040477022528648376, "learning_rate": 0.0008327079934747145, "loss": 0.0946, "num_input_tokens_seen": 22066624, "step": 10210 }, { "epoch": 1.666394779771615, "grad_norm": 0.06451041251420975, "learning_rate": 0.000833115823817292, "loss": 0.1723, "num_input_tokens_seen": 22078528, "step": 10215 }, { "epoch": 1.66721044045677, "grad_norm": 0.27383536100387573, "learning_rate": 0.0008335236541598696, "loss": 0.1661, "num_input_tokens_seen": 22089824, "step": 10220 }, { "epoch": 1.6680261011419248, "grad_norm": 0.09901408106088638, "learning_rate": 0.0008339314845024471, "loss": 0.1523, "num_input_tokens_seen": 22099744, "step": 10225 }, { "epoch": 1.6688417618270799, "grad_norm": 0.3080720901489258, "learning_rate": 0.0008343393148450244, "loss": 0.2365, "num_input_tokens_seen": 22110432, "step": 10230 }, { "epoch": 1.669657422512235, "grad_norm": 0.1689985692501068, "learning_rate": 0.000834747145187602, "loss": 0.1776, "num_input_tokens_seen": 22120992, "step": 10235 }, { "epoch": 1.6704730831973897, "grad_norm": 0.36505305767059326, "learning_rate": 0.0008351549755301795, "loss": 0.2088, "num_input_tokens_seen": 22130816, "step": 10240 }, { "epoch": 1.671288743882545, "grad_norm": 0.11974579095840454, "learning_rate": 0.0008355628058727569, "loss": 0.0763, "num_input_tokens_seen": 22142144, "step": 10245 }, { "epoch": 1.6721044045676998, "grad_norm": 0.21502956748008728, "learning_rate": 0.0008359706362153344, "loss": 0.1549, "num_input_tokens_seen": 22152544, "step": 10250 }, { "epoch": 1.6729200652528549, "grad_norm": 0.29304221272468567, "learning_rate": 0.0008363784665579119, "loss": 0.1516, "num_input_tokens_seen": 22162720, "step": 10255 }, { "epoch": 1.67373572593801, "grad_norm": 0.11507556587457657, "learning_rate": 0.0008367862969004895, "loss": 0.1327, "num_input_tokens_seen": 22172832, "step": 10260 }, { "epoch": 1.6745513866231647, "grad_norm": 0.0777512788772583, "learning_rate": 0.0008371941272430669, "loss": 0.0519, "num_input_tokens_seen": 22183360, "step": 10265 }, { "epoch": 1.6753670473083198, "grad_norm": 0.02606668882071972, "learning_rate": 0.0008376019575856443, "loss": 0.1347, "num_input_tokens_seen": 22192640, "step": 10270 }, { "epoch": 1.6761827079934748, "grad_norm": 0.19271859526634216, "learning_rate": 0.0008380097879282219, "loss": 0.2345, "num_input_tokens_seen": 22204416, "step": 10275 }, { "epoch": 1.6769983686786296, "grad_norm": 0.05584167316555977, "learning_rate": 0.0008384176182707993, "loss": 0.0319, "num_input_tokens_seen": 22216288, "step": 10280 }, { "epoch": 1.6778140293637847, "grad_norm": 0.052766405045986176, "learning_rate": 0.0008388254486133769, "loss": 0.2267, "num_input_tokens_seen": 22226752, "step": 10285 }, { "epoch": 1.6786296900489397, "grad_norm": 0.10538561642169952, "learning_rate": 0.0008392332789559544, "loss": 0.225, "num_input_tokens_seen": 22237952, "step": 10290 }, { "epoch": 1.6794453507340945, "grad_norm": 0.06835313141345978, "learning_rate": 0.0008396411092985318, "loss": 0.1137, "num_input_tokens_seen": 22248032, "step": 10295 }, { "epoch": 1.6802610114192496, "grad_norm": 0.27471956610679626, "learning_rate": 0.0008400489396411093, "loss": 0.1408, "num_input_tokens_seen": 22258816, "step": 10300 }, { "epoch": 1.6810766721044046, "grad_norm": 0.07462483644485474, "learning_rate": 0.0008404567699836868, "loss": 0.1832, "num_input_tokens_seen": 22270720, "step": 10305 }, { "epoch": 1.6818923327895594, "grad_norm": 0.22200042009353638, "learning_rate": 0.0008408646003262644, "loss": 0.1152, "num_input_tokens_seen": 22280160, "step": 10310 }, { "epoch": 1.6827079934747147, "grad_norm": 0.018682435154914856, "learning_rate": 0.0008412724306688417, "loss": 0.0519, "num_input_tokens_seen": 22291264, "step": 10315 }, { "epoch": 1.6835236541598695, "grad_norm": 0.1510961651802063, "learning_rate": 0.0008416802610114192, "loss": 0.1112, "num_input_tokens_seen": 22302080, "step": 10320 }, { "epoch": 1.6843393148450243, "grad_norm": 0.46816954016685486, "learning_rate": 0.0008420880913539968, "loss": 0.2358, "num_input_tokens_seen": 22313344, "step": 10325 }, { "epoch": 1.6851549755301796, "grad_norm": 0.07551740109920502, "learning_rate": 0.0008424959216965743, "loss": 0.1266, "num_input_tokens_seen": 22324416, "step": 10330 }, { "epoch": 1.6859706362153344, "grad_norm": 0.0401376448571682, "learning_rate": 0.0008429037520391518, "loss": 0.1085, "num_input_tokens_seen": 22334528, "step": 10335 }, { "epoch": 1.6867862969004894, "grad_norm": 0.0454762764275074, "learning_rate": 0.0008433115823817292, "loss": 0.0972, "num_input_tokens_seen": 22344864, "step": 10340 }, { "epoch": 1.6876019575856445, "grad_norm": 0.0823223739862442, "learning_rate": 0.0008437194127243067, "loss": 0.1238, "num_input_tokens_seen": 22355360, "step": 10345 }, { "epoch": 1.6884176182707993, "grad_norm": 0.043828509747982025, "learning_rate": 0.0008441272430668842, "loss": 0.0374, "num_input_tokens_seen": 22366016, "step": 10350 }, { "epoch": 1.6892332789559543, "grad_norm": 0.21091154217720032, "learning_rate": 0.0008445350734094617, "loss": 0.0975, "num_input_tokens_seen": 22376128, "step": 10355 }, { "epoch": 1.6900489396411094, "grad_norm": 0.19217798113822937, "learning_rate": 0.0008449429037520392, "loss": 0.2432, "num_input_tokens_seen": 22385632, "step": 10360 }, { "epoch": 1.6908646003262642, "grad_norm": 0.17423127591609955, "learning_rate": 0.0008453507340946166, "loss": 0.2182, "num_input_tokens_seen": 22397152, "step": 10365 }, { "epoch": 1.6916802610114192, "grad_norm": 0.06854557245969772, "learning_rate": 0.0008457585644371941, "loss": 0.1772, "num_input_tokens_seen": 22409952, "step": 10370 }, { "epoch": 1.6924959216965743, "grad_norm": 0.05031463876366615, "learning_rate": 0.0008461663947797717, "loss": 0.1286, "num_input_tokens_seen": 22422016, "step": 10375 }, { "epoch": 1.693311582381729, "grad_norm": 0.10461442172527313, "learning_rate": 0.0008465742251223492, "loss": 0.1723, "num_input_tokens_seen": 22432960, "step": 10380 }, { "epoch": 1.6941272430668843, "grad_norm": 0.06757992506027222, "learning_rate": 0.0008469820554649265, "loss": 0.082, "num_input_tokens_seen": 22443648, "step": 10385 }, { "epoch": 1.6949429037520392, "grad_norm": 0.09296396374702454, "learning_rate": 0.0008473898858075041, "loss": 0.1124, "num_input_tokens_seen": 22454560, "step": 10390 }, { "epoch": 1.695758564437194, "grad_norm": 0.16991855204105377, "learning_rate": 0.0008477977161500816, "loss": 0.127, "num_input_tokens_seen": 22466400, "step": 10395 }, { "epoch": 1.6965742251223492, "grad_norm": 0.05406171828508377, "learning_rate": 0.0008482055464926591, "loss": 0.0847, "num_input_tokens_seen": 22476800, "step": 10400 }, { "epoch": 1.697389885807504, "grad_norm": 0.12532663345336914, "learning_rate": 0.0008486133768352365, "loss": 0.0764, "num_input_tokens_seen": 22488288, "step": 10405 }, { "epoch": 1.698205546492659, "grad_norm": 0.23430512845516205, "learning_rate": 0.000849021207177814, "loss": 0.1187, "num_input_tokens_seen": 22499264, "step": 10410 }, { "epoch": 1.6990212071778141, "grad_norm": 0.08874372392892838, "learning_rate": 0.0008494290375203916, "loss": 0.1074, "num_input_tokens_seen": 22510080, "step": 10415 }, { "epoch": 1.699836867862969, "grad_norm": 0.1635916829109192, "learning_rate": 0.000849836867862969, "loss": 0.0719, "num_input_tokens_seen": 22520096, "step": 10420 }, { "epoch": 1.700652528548124, "grad_norm": 0.07796313613653183, "learning_rate": 0.0008502446982055465, "loss": 0.0887, "num_input_tokens_seen": 22531168, "step": 10425 }, { "epoch": 1.701468189233279, "grad_norm": 0.2718281149864197, "learning_rate": 0.000850652528548124, "loss": 0.1157, "num_input_tokens_seen": 22541600, "step": 10430 }, { "epoch": 1.7022838499184338, "grad_norm": 0.17568756639957428, "learning_rate": 0.0008510603588907014, "loss": 0.0538, "num_input_tokens_seen": 22552864, "step": 10435 }, { "epoch": 1.7030995106035889, "grad_norm": 0.006328089628368616, "learning_rate": 0.000851468189233279, "loss": 0.2101, "num_input_tokens_seen": 22563776, "step": 10440 }, { "epoch": 1.703915171288744, "grad_norm": 0.20163559913635254, "learning_rate": 0.0008518760195758565, "loss": 0.0462, "num_input_tokens_seen": 22575328, "step": 10445 }, { "epoch": 1.7047308319738987, "grad_norm": 0.3907875418663025, "learning_rate": 0.000852283849918434, "loss": 0.1548, "num_input_tokens_seen": 22586240, "step": 10450 }, { "epoch": 1.7055464926590538, "grad_norm": 0.010955499485135078, "learning_rate": 0.0008526916802610114, "loss": 0.0301, "num_input_tokens_seen": 22596512, "step": 10455 }, { "epoch": 1.7063621533442088, "grad_norm": 0.4692709147930145, "learning_rate": 0.0008530995106035889, "loss": 0.279, "num_input_tokens_seen": 22607200, "step": 10460 }, { "epoch": 1.7071778140293636, "grad_norm": 0.17287231981754303, "learning_rate": 0.0008535073409461665, "loss": 0.0531, "num_input_tokens_seen": 22618368, "step": 10465 }, { "epoch": 1.707993474714519, "grad_norm": 0.022568654268980026, "learning_rate": 0.0008539151712887438, "loss": 0.1089, "num_input_tokens_seen": 22629312, "step": 10470 }, { "epoch": 1.7088091353996737, "grad_norm": 0.09624893218278885, "learning_rate": 0.0008543230016313214, "loss": 0.1738, "num_input_tokens_seen": 22641568, "step": 10475 }, { "epoch": 1.7096247960848288, "grad_norm": 0.12691551446914673, "learning_rate": 0.0008547308319738989, "loss": 0.0738, "num_input_tokens_seen": 22652224, "step": 10480 }, { "epoch": 1.7104404567699838, "grad_norm": 0.17793002724647522, "learning_rate": 0.0008551386623164764, "loss": 0.057, "num_input_tokens_seen": 22662816, "step": 10485 }, { "epoch": 1.7112561174551386, "grad_norm": 0.07302019745111465, "learning_rate": 0.0008555464926590538, "loss": 0.1733, "num_input_tokens_seen": 22674368, "step": 10490 }, { "epoch": 1.7120717781402937, "grad_norm": 0.15017388761043549, "learning_rate": 0.0008559543230016313, "loss": 0.1191, "num_input_tokens_seen": 22686528, "step": 10495 }, { "epoch": 1.7128874388254487, "grad_norm": 0.2169235348701477, "learning_rate": 0.0008563621533442089, "loss": 0.1601, "num_input_tokens_seen": 22696384, "step": 10500 }, { "epoch": 1.7137030995106035, "grad_norm": 0.12006626278162003, "learning_rate": 0.0008567699836867863, "loss": 0.2777, "num_input_tokens_seen": 22707552, "step": 10505 }, { "epoch": 1.7145187601957586, "grad_norm": 0.1256376951932907, "learning_rate": 0.0008571778140293638, "loss": 0.2857, "num_input_tokens_seen": 22718496, "step": 10510 }, { "epoch": 1.7153344208809136, "grad_norm": 0.24016325175762177, "learning_rate": 0.0008575856443719413, "loss": 0.2836, "num_input_tokens_seen": 22729088, "step": 10515 }, { "epoch": 1.7161500815660684, "grad_norm": 0.08173760026693344, "learning_rate": 0.0008579934747145188, "loss": 0.1304, "num_input_tokens_seen": 22738944, "step": 10520 }, { "epoch": 1.7169657422512234, "grad_norm": 0.11104224622249603, "learning_rate": 0.0008584013050570962, "loss": 0.1197, "num_input_tokens_seen": 22749952, "step": 10525 }, { "epoch": 1.7177814029363785, "grad_norm": 0.05892535671591759, "learning_rate": 0.0008588091353996738, "loss": 0.0683, "num_input_tokens_seen": 22759200, "step": 10530 }, { "epoch": 1.7185970636215333, "grad_norm": 0.14461533725261688, "learning_rate": 0.0008592169657422512, "loss": 0.1721, "num_input_tokens_seen": 22769632, "step": 10535 }, { "epoch": 1.7194127243066886, "grad_norm": 0.05228950455784798, "learning_rate": 0.0008596247960848287, "loss": 0.0866, "num_input_tokens_seen": 22781056, "step": 10540 }, { "epoch": 1.7202283849918434, "grad_norm": 0.29761654138565063, "learning_rate": 0.0008600326264274062, "loss": 0.1655, "num_input_tokens_seen": 22791776, "step": 10545 }, { "epoch": 1.7210440456769984, "grad_norm": 0.018953580409288406, "learning_rate": 0.0008604404567699837, "loss": 0.0932, "num_input_tokens_seen": 22801952, "step": 10550 }, { "epoch": 1.7218597063621535, "grad_norm": 0.05711786821484566, "learning_rate": 0.0008608482871125613, "loss": 0.0713, "num_input_tokens_seen": 22813024, "step": 10555 }, { "epoch": 1.7226753670473083, "grad_norm": 0.08064857870340347, "learning_rate": 0.0008612561174551386, "loss": 0.1857, "num_input_tokens_seen": 22823904, "step": 10560 }, { "epoch": 1.7234910277324633, "grad_norm": 0.03695710375905037, "learning_rate": 0.0008616639477977162, "loss": 0.136, "num_input_tokens_seen": 22834400, "step": 10565 }, { "epoch": 1.7243066884176184, "grad_norm": 0.132895827293396, "learning_rate": 0.0008620717781402937, "loss": 0.0797, "num_input_tokens_seen": 22845856, "step": 10570 }, { "epoch": 1.7251223491027732, "grad_norm": 0.07648682594299316, "learning_rate": 0.0008624796084828711, "loss": 0.1009, "num_input_tokens_seen": 22856608, "step": 10575 }, { "epoch": 1.7259380097879282, "grad_norm": 0.2530839741230011, "learning_rate": 0.0008628874388254486, "loss": 0.1728, "num_input_tokens_seen": 22867168, "step": 10580 }, { "epoch": 1.7267536704730833, "grad_norm": 0.14033333957195282, "learning_rate": 0.0008632952691680261, "loss": 0.1443, "num_input_tokens_seen": 22877312, "step": 10585 }, { "epoch": 1.727569331158238, "grad_norm": 0.12076137959957123, "learning_rate": 0.0008637030995106036, "loss": 0.0775, "num_input_tokens_seen": 22888832, "step": 10590 }, { "epoch": 1.7283849918433931, "grad_norm": 0.05201772227883339, "learning_rate": 0.0008641109298531811, "loss": 0.1281, "num_input_tokens_seen": 22898464, "step": 10595 }, { "epoch": 1.7292006525285482, "grad_norm": 0.05368548259139061, "learning_rate": 0.0008645187601957585, "loss": 0.0989, "num_input_tokens_seen": 22909568, "step": 10600 }, { "epoch": 1.730016313213703, "grad_norm": 0.21361590921878815, "learning_rate": 0.0008649265905383361, "loss": 0.1372, "num_input_tokens_seen": 22920992, "step": 10605 }, { "epoch": 1.7308319738988582, "grad_norm": 0.18085242807865143, "learning_rate": 0.0008653344208809135, "loss": 0.1765, "num_input_tokens_seen": 22930880, "step": 10610 }, { "epoch": 1.731647634584013, "grad_norm": 0.060455434024333954, "learning_rate": 0.0008657422512234911, "loss": 0.1894, "num_input_tokens_seen": 22941568, "step": 10615 }, { "epoch": 1.7324632952691679, "grad_norm": 0.12104543298482895, "learning_rate": 0.0008661500815660686, "loss": 0.0868, "num_input_tokens_seen": 22952576, "step": 10620 }, { "epoch": 1.7332789559543231, "grad_norm": 0.16045480966567993, "learning_rate": 0.0008665579119086459, "loss": 0.1066, "num_input_tokens_seen": 22962144, "step": 10625 }, { "epoch": 1.734094616639478, "grad_norm": 0.012985051609575748, "learning_rate": 0.0008669657422512235, "loss": 0.1631, "num_input_tokens_seen": 22973312, "step": 10630 }, { "epoch": 1.734910277324633, "grad_norm": 0.07617738097906113, "learning_rate": 0.000867373572593801, "loss": 0.1551, "num_input_tokens_seen": 22983424, "step": 10635 }, { "epoch": 1.735725938009788, "grad_norm": 0.16659033298492432, "learning_rate": 0.0008677814029363786, "loss": 0.0858, "num_input_tokens_seen": 22992800, "step": 10640 }, { "epoch": 1.7365415986949428, "grad_norm": 0.012401281856000423, "learning_rate": 0.0008681892332789559, "loss": 0.0629, "num_input_tokens_seen": 23003552, "step": 10645 }, { "epoch": 1.7373572593800979, "grad_norm": 0.010529414750635624, "learning_rate": 0.0008685970636215334, "loss": 0.1084, "num_input_tokens_seen": 23014688, "step": 10650 }, { "epoch": 1.738172920065253, "grad_norm": 0.006768247112631798, "learning_rate": 0.000869004893964111, "loss": 0.0348, "num_input_tokens_seen": 23025344, "step": 10655 }, { "epoch": 1.7389885807504077, "grad_norm": 0.058623116463422775, "learning_rate": 0.0008694127243066884, "loss": 0.0389, "num_input_tokens_seen": 23035328, "step": 10660 }, { "epoch": 1.7398042414355628, "grad_norm": 0.007725914474576712, "learning_rate": 0.000869820554649266, "loss": 0.1048, "num_input_tokens_seen": 23045504, "step": 10665 }, { "epoch": 1.7406199021207178, "grad_norm": 0.34186941385269165, "learning_rate": 0.0008702283849918434, "loss": 0.1211, "num_input_tokens_seen": 23056736, "step": 10670 }, { "epoch": 1.7414355628058726, "grad_norm": 0.47554755210876465, "learning_rate": 0.0008706362153344209, "loss": 0.1373, "num_input_tokens_seen": 23065760, "step": 10675 }, { "epoch": 1.7422512234910277, "grad_norm": 0.206298828125, "learning_rate": 0.0008710440456769984, "loss": 0.1016, "num_input_tokens_seen": 23077376, "step": 10680 }, { "epoch": 1.7430668841761827, "grad_norm": 0.016806311905384064, "learning_rate": 0.0008714518760195759, "loss": 0.0164, "num_input_tokens_seen": 23088736, "step": 10685 }, { "epoch": 1.7438825448613375, "grad_norm": 0.05761105194687843, "learning_rate": 0.0008718597063621533, "loss": 0.2616, "num_input_tokens_seen": 23101088, "step": 10690 }, { "epoch": 1.7446982055464928, "grad_norm": 0.03953443840146065, "learning_rate": 0.0008722675367047308, "loss": 0.2475, "num_input_tokens_seen": 23112576, "step": 10695 }, { "epoch": 1.7455138662316476, "grad_norm": 0.36860281229019165, "learning_rate": 0.0008726753670473083, "loss": 0.1334, "num_input_tokens_seen": 23123744, "step": 10700 }, { "epoch": 1.7463295269168027, "grad_norm": 0.23439127206802368, "learning_rate": 0.0008730831973898859, "loss": 0.1546, "num_input_tokens_seen": 23134784, "step": 10705 }, { "epoch": 1.7471451876019577, "grad_norm": 0.04523128643631935, "learning_rate": 0.0008734910277324634, "loss": 0.1541, "num_input_tokens_seen": 23146240, "step": 10710 }, { "epoch": 1.7479608482871125, "grad_norm": 0.2866004705429077, "learning_rate": 0.0008738988580750407, "loss": 0.1121, "num_input_tokens_seen": 23158208, "step": 10715 }, { "epoch": 1.7487765089722676, "grad_norm": 0.0991467610001564, "learning_rate": 0.0008743066884176183, "loss": 0.0889, "num_input_tokens_seen": 23169280, "step": 10720 }, { "epoch": 1.7495921696574226, "grad_norm": 0.05290234833955765, "learning_rate": 0.0008747145187601958, "loss": 0.0835, "num_input_tokens_seen": 23181728, "step": 10725 }, { "epoch": 1.7504078303425774, "grad_norm": 0.2909699082374573, "learning_rate": 0.0008751223491027733, "loss": 0.1154, "num_input_tokens_seen": 23191328, "step": 10730 }, { "epoch": 1.7512234910277324, "grad_norm": 0.08580849319696426, "learning_rate": 0.0008755301794453507, "loss": 0.0663, "num_input_tokens_seen": 23200832, "step": 10735 }, { "epoch": 1.7520391517128875, "grad_norm": 0.3593541085720062, "learning_rate": 0.0008759380097879282, "loss": 0.2485, "num_input_tokens_seen": 23211456, "step": 10740 }, { "epoch": 1.7528548123980423, "grad_norm": 0.09344206005334854, "learning_rate": 0.0008763458401305058, "loss": 0.111, "num_input_tokens_seen": 23223104, "step": 10745 }, { "epoch": 1.7536704730831973, "grad_norm": 0.18612459301948547, "learning_rate": 0.0008767536704730832, "loss": 0.2546, "num_input_tokens_seen": 23234752, "step": 10750 }, { "epoch": 1.7544861337683524, "grad_norm": 0.4155328869819641, "learning_rate": 0.0008771615008156608, "loss": 0.1174, "num_input_tokens_seen": 23246944, "step": 10755 }, { "epoch": 1.7553017944535072, "grad_norm": 0.02744222804903984, "learning_rate": 0.0008775693311582382, "loss": 0.1136, "num_input_tokens_seen": 23258848, "step": 10760 }, { "epoch": 1.7561174551386625, "grad_norm": 0.060500990599393845, "learning_rate": 0.0008779771615008156, "loss": 0.1549, "num_input_tokens_seen": 23269056, "step": 10765 }, { "epoch": 1.7569331158238173, "grad_norm": 0.06255891919136047, "learning_rate": 0.0008783849918433932, "loss": 0.1089, "num_input_tokens_seen": 23279328, "step": 10770 }, { "epoch": 1.7577487765089723, "grad_norm": 0.05365055426955223, "learning_rate": 0.0008787928221859707, "loss": 0.0844, "num_input_tokens_seen": 23288192, "step": 10775 }, { "epoch": 1.7585644371941274, "grad_norm": 0.02608959935605526, "learning_rate": 0.0008792006525285482, "loss": 0.0549, "num_input_tokens_seen": 23299232, "step": 10780 }, { "epoch": 1.7593800978792822, "grad_norm": 0.0558866485953331, "learning_rate": 0.0008796084828711256, "loss": 0.0902, "num_input_tokens_seen": 23309536, "step": 10785 }, { "epoch": 1.7601957585644372, "grad_norm": 0.09061188995838165, "learning_rate": 0.0008800163132137031, "loss": 0.0936, "num_input_tokens_seen": 23321120, "step": 10790 }, { "epoch": 1.7610114192495923, "grad_norm": 0.025436315685510635, "learning_rate": 0.0008804241435562807, "loss": 0.0201, "num_input_tokens_seen": 23331904, "step": 10795 }, { "epoch": 1.761827079934747, "grad_norm": 0.021344909444451332, "learning_rate": 0.000880831973898858, "loss": 0.1558, "num_input_tokens_seen": 23342016, "step": 10800 }, { "epoch": 1.7626427406199021, "grad_norm": 0.23954921960830688, "learning_rate": 0.0008812398042414356, "loss": 0.2992, "num_input_tokens_seen": 23352320, "step": 10805 }, { "epoch": 1.7634584013050572, "grad_norm": 0.353021502494812, "learning_rate": 0.0008816476345840131, "loss": 0.2506, "num_input_tokens_seen": 23363840, "step": 10810 }, { "epoch": 1.764274061990212, "grad_norm": 0.2084723562002182, "learning_rate": 0.0008820554649265906, "loss": 0.2649, "num_input_tokens_seen": 23375040, "step": 10815 }, { "epoch": 1.765089722675367, "grad_norm": 0.07466694712638855, "learning_rate": 0.000882463295269168, "loss": 0.19, "num_input_tokens_seen": 23386752, "step": 10820 }, { "epoch": 1.765905383360522, "grad_norm": 0.08622830361127853, "learning_rate": 0.0008828711256117455, "loss": 0.0898, "num_input_tokens_seen": 23398496, "step": 10825 }, { "epoch": 1.7667210440456769, "grad_norm": 0.168392151594162, "learning_rate": 0.000883278955954323, "loss": 0.1449, "num_input_tokens_seen": 23409984, "step": 10830 }, { "epoch": 1.7675367047308321, "grad_norm": 0.18816818296909332, "learning_rate": 0.0008836867862969005, "loss": 0.2378, "num_input_tokens_seen": 23419968, "step": 10835 }, { "epoch": 1.768352365415987, "grad_norm": 0.08224303275346756, "learning_rate": 0.000884094616639478, "loss": 0.0756, "num_input_tokens_seen": 23430720, "step": 10840 }, { "epoch": 1.7691680261011418, "grad_norm": 0.07841552048921585, "learning_rate": 0.0008845024469820555, "loss": 0.1262, "num_input_tokens_seen": 23440960, "step": 10845 }, { "epoch": 1.769983686786297, "grad_norm": 0.03823342174291611, "learning_rate": 0.0008849102773246329, "loss": 0.0482, "num_input_tokens_seen": 23451040, "step": 10850 }, { "epoch": 1.7707993474714518, "grad_norm": 0.369051456451416, "learning_rate": 0.0008853181076672104, "loss": 0.2453, "num_input_tokens_seen": 23461024, "step": 10855 }, { "epoch": 1.7716150081566069, "grad_norm": 0.08475756645202637, "learning_rate": 0.000885725938009788, "loss": 0.0929, "num_input_tokens_seen": 23472192, "step": 10860 }, { "epoch": 1.772430668841762, "grad_norm": 0.2269689291715622, "learning_rate": 0.0008861337683523655, "loss": 0.2801, "num_input_tokens_seen": 23482624, "step": 10865 }, { "epoch": 1.7732463295269167, "grad_norm": 0.10074033588171005, "learning_rate": 0.0008865415986949429, "loss": 0.0721, "num_input_tokens_seen": 23492928, "step": 10870 }, { "epoch": 1.7740619902120718, "grad_norm": 0.04861301928758621, "learning_rate": 0.0008869494290375204, "loss": 0.1541, "num_input_tokens_seen": 23505312, "step": 10875 }, { "epoch": 1.7748776508972268, "grad_norm": 0.05188162997364998, "learning_rate": 0.0008873572593800979, "loss": 0.1303, "num_input_tokens_seen": 23516608, "step": 10880 }, { "epoch": 1.7756933115823816, "grad_norm": 0.21070732176303864, "learning_rate": 0.0008877650897226754, "loss": 0.2544, "num_input_tokens_seen": 23527552, "step": 10885 }, { "epoch": 1.7765089722675367, "grad_norm": 0.04607458412647247, "learning_rate": 0.0008881729200652528, "loss": 0.0782, "num_input_tokens_seen": 23537888, "step": 10890 }, { "epoch": 1.7773246329526917, "grad_norm": 0.09887990355491638, "learning_rate": 0.0008885807504078304, "loss": 0.1516, "num_input_tokens_seen": 23548480, "step": 10895 }, { "epoch": 1.7781402936378465, "grad_norm": 0.13077348470687866, "learning_rate": 0.0008889885807504079, "loss": 0.1197, "num_input_tokens_seen": 23560192, "step": 10900 }, { "epoch": 1.7789559543230016, "grad_norm": 0.176055908203125, "learning_rate": 0.0008893964110929853, "loss": 0.1321, "num_input_tokens_seen": 23570432, "step": 10905 }, { "epoch": 1.7797716150081566, "grad_norm": 0.2664521634578705, "learning_rate": 0.0008898042414355628, "loss": 0.1728, "num_input_tokens_seen": 23582080, "step": 10910 }, { "epoch": 1.7805872756933114, "grad_norm": 0.17070364952087402, "learning_rate": 0.0008902120717781403, "loss": 0.1953, "num_input_tokens_seen": 23592096, "step": 10915 }, { "epoch": 1.7814029363784667, "grad_norm": 0.31275373697280884, "learning_rate": 0.0008906199021207178, "loss": 0.0856, "num_input_tokens_seen": 23603712, "step": 10920 }, { "epoch": 1.7822185970636215, "grad_norm": 0.04737719148397446, "learning_rate": 0.0008910277324632953, "loss": 0.1324, "num_input_tokens_seen": 23615168, "step": 10925 }, { "epoch": 1.7830342577487766, "grad_norm": 0.0940323919057846, "learning_rate": 0.0008914355628058728, "loss": 0.1321, "num_input_tokens_seen": 23623936, "step": 10930 }, { "epoch": 1.7838499184339316, "grad_norm": 0.23389218747615814, "learning_rate": 0.0008918433931484503, "loss": 0.1104, "num_input_tokens_seen": 23634656, "step": 10935 }, { "epoch": 1.7846655791190864, "grad_norm": 0.16723588109016418, "learning_rate": 0.0008922512234910277, "loss": 0.1185, "num_input_tokens_seen": 23644992, "step": 10940 }, { "epoch": 1.7854812398042414, "grad_norm": 0.3634952902793884, "learning_rate": 0.0008926590538336053, "loss": 0.2453, "num_input_tokens_seen": 23656800, "step": 10945 }, { "epoch": 1.7862969004893965, "grad_norm": 0.2302580624818802, "learning_rate": 0.0008930668841761828, "loss": 0.0792, "num_input_tokens_seen": 23668832, "step": 10950 }, { "epoch": 1.7871125611745513, "grad_norm": 0.34366220235824585, "learning_rate": 0.0008934747145187601, "loss": 0.2055, "num_input_tokens_seen": 23678464, "step": 10955 }, { "epoch": 1.7879282218597063, "grad_norm": 0.041612409055233, "learning_rate": 0.0008938825448613377, "loss": 0.0343, "num_input_tokens_seen": 23689376, "step": 10960 }, { "epoch": 1.7887438825448614, "grad_norm": 0.02797471545636654, "learning_rate": 0.0008942903752039152, "loss": 0.0632, "num_input_tokens_seen": 23700096, "step": 10965 }, { "epoch": 1.7895595432300162, "grad_norm": 0.19312946498394012, "learning_rate": 0.0008946982055464927, "loss": 0.0759, "num_input_tokens_seen": 23711552, "step": 10970 }, { "epoch": 1.7903752039151712, "grad_norm": 0.18493857979774475, "learning_rate": 0.0008951060358890701, "loss": 0.1243, "num_input_tokens_seen": 23722912, "step": 10975 }, { "epoch": 1.7911908646003263, "grad_norm": 0.026496220380067825, "learning_rate": 0.0008955138662316476, "loss": 0.2482, "num_input_tokens_seen": 23733248, "step": 10980 }, { "epoch": 1.792006525285481, "grad_norm": 0.2228316068649292, "learning_rate": 0.0008959216965742252, "loss": 0.1109, "num_input_tokens_seen": 23744384, "step": 10985 }, { "epoch": 1.7928221859706364, "grad_norm": 0.40756627917289734, "learning_rate": 0.0008963295269168026, "loss": 0.1965, "num_input_tokens_seen": 23755040, "step": 10990 }, { "epoch": 1.7936378466557912, "grad_norm": 0.30325761437416077, "learning_rate": 0.0008967373572593801, "loss": 0.1768, "num_input_tokens_seen": 23766240, "step": 10995 }, { "epoch": 1.7944535073409462, "grad_norm": 0.19416655600070953, "learning_rate": 0.0008971451876019576, "loss": 0.1114, "num_input_tokens_seen": 23777024, "step": 11000 }, { "epoch": 1.7952691680261013, "grad_norm": 0.0678112730383873, "learning_rate": 0.0008975530179445351, "loss": 0.0636, "num_input_tokens_seen": 23789920, "step": 11005 }, { "epoch": 1.796084828711256, "grad_norm": 0.2991323471069336, "learning_rate": 0.0008979608482871126, "loss": 0.1564, "num_input_tokens_seen": 23801600, "step": 11010 }, { "epoch": 1.7969004893964111, "grad_norm": 0.2331552803516388, "learning_rate": 0.0008983686786296901, "loss": 0.1613, "num_input_tokens_seen": 23812032, "step": 11015 }, { "epoch": 1.7977161500815662, "grad_norm": 0.12368535250425339, "learning_rate": 0.0008987765089722675, "loss": 0.1444, "num_input_tokens_seen": 23822752, "step": 11020 }, { "epoch": 1.798531810766721, "grad_norm": 0.2614479959011078, "learning_rate": 0.000899184339314845, "loss": 0.1571, "num_input_tokens_seen": 23833024, "step": 11025 }, { "epoch": 1.799347471451876, "grad_norm": 0.2386416345834732, "learning_rate": 0.0008995921696574225, "loss": 0.1573, "num_input_tokens_seen": 23843328, "step": 11030 }, { "epoch": 1.800163132137031, "grad_norm": 0.04170841723680496, "learning_rate": 0.0009000000000000001, "loss": 0.0955, "num_input_tokens_seen": 23853568, "step": 11035 }, { "epoch": 1.8009787928221859, "grad_norm": 0.21823978424072266, "learning_rate": 0.0009004078303425776, "loss": 0.1304, "num_input_tokens_seen": 23864000, "step": 11040 }, { "epoch": 1.801794453507341, "grad_norm": 0.1484844982624054, "learning_rate": 0.0009008156606851549, "loss": 0.1727, "num_input_tokens_seen": 23875136, "step": 11045 }, { "epoch": 1.802610114192496, "grad_norm": 0.09188838303089142, "learning_rate": 0.0009012234910277325, "loss": 0.0887, "num_input_tokens_seen": 23886784, "step": 11050 }, { "epoch": 1.8034257748776508, "grad_norm": 0.06821305304765701, "learning_rate": 0.00090163132137031, "loss": 0.0782, "num_input_tokens_seen": 23897440, "step": 11055 }, { "epoch": 1.804241435562806, "grad_norm": 0.08660285174846649, "learning_rate": 0.0009020391517128875, "loss": 0.0435, "num_input_tokens_seen": 23909248, "step": 11060 }, { "epoch": 1.8050570962479608, "grad_norm": 0.10734372586011887, "learning_rate": 0.0009024469820554649, "loss": 0.075, "num_input_tokens_seen": 23920224, "step": 11065 }, { "epoch": 1.8058727569331157, "grad_norm": 0.009611149318516254, "learning_rate": 0.0009028548123980424, "loss": 0.0941, "num_input_tokens_seen": 23930048, "step": 11070 }, { "epoch": 1.806688417618271, "grad_norm": 0.039050959050655365, "learning_rate": 0.0009032626427406199, "loss": 0.1033, "num_input_tokens_seen": 23940992, "step": 11075 }, { "epoch": 1.8075040783034257, "grad_norm": 0.0733620673418045, "learning_rate": 0.0009036704730831974, "loss": 0.0577, "num_input_tokens_seen": 23952288, "step": 11080 }, { "epoch": 1.8083197389885808, "grad_norm": 0.09018149226903915, "learning_rate": 0.000904078303425775, "loss": 0.085, "num_input_tokens_seen": 23962848, "step": 11085 }, { "epoch": 1.8091353996737358, "grad_norm": 0.005015532020479441, "learning_rate": 0.0009044861337683524, "loss": 0.0439, "num_input_tokens_seen": 23973504, "step": 11090 }, { "epoch": 1.8099510603588906, "grad_norm": 0.11613103747367859, "learning_rate": 0.0009048939641109298, "loss": 0.0687, "num_input_tokens_seen": 23983840, "step": 11095 }, { "epoch": 1.8107667210440457, "grad_norm": 0.14860443770885468, "learning_rate": 0.0009053017944535074, "loss": 0.1707, "num_input_tokens_seen": 23994720, "step": 11100 }, { "epoch": 1.8115823817292007, "grad_norm": 0.6425272226333618, "learning_rate": 0.0009057096247960849, "loss": 0.277, "num_input_tokens_seen": 24003488, "step": 11105 }, { "epoch": 1.8123980424143555, "grad_norm": 0.22874023020267487, "learning_rate": 0.0009061174551386622, "loss": 0.19, "num_input_tokens_seen": 24014336, "step": 11110 }, { "epoch": 1.8132137030995106, "grad_norm": 0.15151821076869965, "learning_rate": 0.0009065252854812398, "loss": 0.1462, "num_input_tokens_seen": 24025600, "step": 11115 }, { "epoch": 1.8140293637846656, "grad_norm": 0.11738032102584839, "learning_rate": 0.0009069331158238173, "loss": 0.1265, "num_input_tokens_seen": 24035392, "step": 11120 }, { "epoch": 1.8148450244698204, "grad_norm": 0.6219301819801331, "learning_rate": 0.0009073409461663949, "loss": 0.3286, "num_input_tokens_seen": 24046080, "step": 11125 }, { "epoch": 1.8156606851549757, "grad_norm": 0.08909933269023895, "learning_rate": 0.0009077487765089722, "loss": 0.1256, "num_input_tokens_seen": 24057536, "step": 11130 }, { "epoch": 1.8164763458401305, "grad_norm": 0.04366849735379219, "learning_rate": 0.0009081566068515497, "loss": 0.0288, "num_input_tokens_seen": 24068736, "step": 11135 }, { "epoch": 1.8172920065252853, "grad_norm": 0.11872971057891846, "learning_rate": 0.0009085644371941273, "loss": 0.1215, "num_input_tokens_seen": 24079360, "step": 11140 }, { "epoch": 1.8181076672104406, "grad_norm": 0.3688180446624756, "learning_rate": 0.0009089722675367047, "loss": 0.1734, "num_input_tokens_seen": 24090656, "step": 11145 }, { "epoch": 1.8189233278955954, "grad_norm": 0.01831037551164627, "learning_rate": 0.0009093800978792823, "loss": 0.1089, "num_input_tokens_seen": 24102336, "step": 11150 }, { "epoch": 1.8197389885807504, "grad_norm": 0.0956871286034584, "learning_rate": 0.0009097879282218597, "loss": 0.1478, "num_input_tokens_seen": 24113120, "step": 11155 }, { "epoch": 1.8205546492659055, "grad_norm": 0.04308653995394707, "learning_rate": 0.0009101957585644372, "loss": 0.0524, "num_input_tokens_seen": 24122688, "step": 11160 }, { "epoch": 1.8213703099510603, "grad_norm": 0.21079613268375397, "learning_rate": 0.0009106035889070147, "loss": 0.1034, "num_input_tokens_seen": 24133984, "step": 11165 }, { "epoch": 1.8221859706362153, "grad_norm": 0.01858745887875557, "learning_rate": 0.0009110114192495922, "loss": 0.1008, "num_input_tokens_seen": 24144928, "step": 11170 }, { "epoch": 1.8230016313213704, "grad_norm": 0.0403125137090683, "learning_rate": 0.0009114192495921697, "loss": 0.0812, "num_input_tokens_seen": 24154240, "step": 11175 }, { "epoch": 1.8238172920065252, "grad_norm": 0.027002638205885887, "learning_rate": 0.0009118270799347471, "loss": 0.1107, "num_input_tokens_seen": 24163584, "step": 11180 }, { "epoch": 1.8246329526916802, "grad_norm": 0.042410269379615784, "learning_rate": 0.0009122349102773246, "loss": 0.1227, "num_input_tokens_seen": 24175136, "step": 11185 }, { "epoch": 1.8254486133768353, "grad_norm": 0.12024813145399094, "learning_rate": 0.0009126427406199022, "loss": 0.0818, "num_input_tokens_seen": 24185856, "step": 11190 }, { "epoch": 1.82626427406199, "grad_norm": 0.054859358817338943, "learning_rate": 0.0009130505709624797, "loss": 0.0702, "num_input_tokens_seen": 24197152, "step": 11195 }, { "epoch": 1.8270799347471451, "grad_norm": 0.14947378635406494, "learning_rate": 0.0009134584013050571, "loss": 0.2348, "num_input_tokens_seen": 24206720, "step": 11200 }, { "epoch": 1.8278955954323002, "grad_norm": 0.01208100188523531, "learning_rate": 0.0009138662316476346, "loss": 0.0458, "num_input_tokens_seen": 24217696, "step": 11205 }, { "epoch": 1.828711256117455, "grad_norm": 0.03552878648042679, "learning_rate": 0.0009142740619902121, "loss": 0.0615, "num_input_tokens_seen": 24229088, "step": 11210 }, { "epoch": 1.8295269168026103, "grad_norm": 0.12704972922801971, "learning_rate": 0.0009146818923327896, "loss": 0.1118, "num_input_tokens_seen": 24240128, "step": 11215 }, { "epoch": 1.830342577487765, "grad_norm": 0.049700263887643814, "learning_rate": 0.000915089722675367, "loss": 0.0909, "num_input_tokens_seen": 24251520, "step": 11220 }, { "epoch": 1.8311582381729201, "grad_norm": 0.05248570069670677, "learning_rate": 0.0009154975530179446, "loss": 0.0693, "num_input_tokens_seen": 24262048, "step": 11225 }, { "epoch": 1.8319738988580752, "grad_norm": 0.040520548820495605, "learning_rate": 0.0009159053833605221, "loss": 0.1184, "num_input_tokens_seen": 24272480, "step": 11230 }, { "epoch": 1.83278955954323, "grad_norm": 0.123872309923172, "learning_rate": 0.0009163132137030995, "loss": 0.1531, "num_input_tokens_seen": 24282848, "step": 11235 }, { "epoch": 1.833605220228385, "grad_norm": 0.050551868975162506, "learning_rate": 0.000916721044045677, "loss": 0.0674, "num_input_tokens_seen": 24295072, "step": 11240 }, { "epoch": 1.83442088091354, "grad_norm": 0.014776119962334633, "learning_rate": 0.0009171288743882545, "loss": 0.2557, "num_input_tokens_seen": 24305920, "step": 11245 }, { "epoch": 1.8352365415986949, "grad_norm": 0.08579311519861221, "learning_rate": 0.0009175367047308319, "loss": 0.1091, "num_input_tokens_seen": 24317056, "step": 11250 }, { "epoch": 1.83605220228385, "grad_norm": 0.05025889351963997, "learning_rate": 0.0009179445350734095, "loss": 0.1928, "num_input_tokens_seen": 24327488, "step": 11255 }, { "epoch": 1.836867862969005, "grad_norm": 0.25533077120780945, "learning_rate": 0.000918352365415987, "loss": 0.1287, "num_input_tokens_seen": 24336896, "step": 11260 }, { "epoch": 1.8376835236541598, "grad_norm": 0.14637209475040436, "learning_rate": 0.0009187601957585645, "loss": 0.0772, "num_input_tokens_seen": 24348864, "step": 11265 }, { "epoch": 1.8384991843393148, "grad_norm": 0.02450774982571602, "learning_rate": 0.0009191680261011419, "loss": 0.1115, "num_input_tokens_seen": 24359520, "step": 11270 }, { "epoch": 1.8393148450244698, "grad_norm": 0.1265328973531723, "learning_rate": 0.0009195758564437194, "loss": 0.128, "num_input_tokens_seen": 24370304, "step": 11275 }, { "epoch": 1.8401305057096247, "grad_norm": 0.06586892902851105, "learning_rate": 0.000919983686786297, "loss": 0.1629, "num_input_tokens_seen": 24381312, "step": 11280 }, { "epoch": 1.84094616639478, "grad_norm": 0.2524750530719757, "learning_rate": 0.0009203915171288743, "loss": 0.0922, "num_input_tokens_seen": 24391552, "step": 11285 }, { "epoch": 1.8417618270799347, "grad_norm": 0.07396470755338669, "learning_rate": 0.0009207993474714519, "loss": 0.0868, "num_input_tokens_seen": 24402112, "step": 11290 }, { "epoch": 1.8425774877650896, "grad_norm": 0.20884621143341064, "learning_rate": 0.0009212071778140294, "loss": 0.0339, "num_input_tokens_seen": 24413600, "step": 11295 }, { "epoch": 1.8433931484502448, "grad_norm": 0.009276431985199451, "learning_rate": 0.0009216150081566068, "loss": 0.0742, "num_input_tokens_seen": 24423968, "step": 11300 }, { "epoch": 1.8442088091353996, "grad_norm": 0.17926661670207977, "learning_rate": 0.0009220228384991844, "loss": 0.0935, "num_input_tokens_seen": 24434688, "step": 11305 }, { "epoch": 1.8450244698205547, "grad_norm": 0.4188963770866394, "learning_rate": 0.0009224306688417618, "loss": 0.1193, "num_input_tokens_seen": 24444992, "step": 11310 }, { "epoch": 1.8458401305057097, "grad_norm": 0.015882406383752823, "learning_rate": 0.0009228384991843394, "loss": 0.1212, "num_input_tokens_seen": 24456512, "step": 11315 }, { "epoch": 1.8466557911908645, "grad_norm": 0.01852530613541603, "learning_rate": 0.0009232463295269168, "loss": 0.0301, "num_input_tokens_seen": 24467424, "step": 11320 }, { "epoch": 1.8474714518760196, "grad_norm": 0.04548301175236702, "learning_rate": 0.0009236541598694943, "loss": 0.1594, "num_input_tokens_seen": 24478720, "step": 11325 }, { "epoch": 1.8482871125611746, "grad_norm": 0.24560341238975525, "learning_rate": 0.0009240619902120718, "loss": 0.0453, "num_input_tokens_seen": 24489792, "step": 11330 }, { "epoch": 1.8491027732463294, "grad_norm": 0.017956508323550224, "learning_rate": 0.0009244698205546492, "loss": 0.1174, "num_input_tokens_seen": 24501760, "step": 11335 }, { "epoch": 1.8499184339314845, "grad_norm": 0.13720254600048065, "learning_rate": 0.0009248776508972268, "loss": 0.1975, "num_input_tokens_seen": 24511584, "step": 11340 }, { "epoch": 1.8507340946166395, "grad_norm": 0.03751807659864426, "learning_rate": 0.0009252854812398043, "loss": 0.0923, "num_input_tokens_seen": 24522432, "step": 11345 }, { "epoch": 1.8515497553017943, "grad_norm": 0.310740202665329, "learning_rate": 0.0009256933115823818, "loss": 0.2647, "num_input_tokens_seen": 24532544, "step": 11350 }, { "epoch": 1.8523654159869496, "grad_norm": 0.13542063534259796, "learning_rate": 0.0009261011419249592, "loss": 0.1742, "num_input_tokens_seen": 24543488, "step": 11355 }, { "epoch": 1.8531810766721044, "grad_norm": 0.2352442741394043, "learning_rate": 0.0009265089722675367, "loss": 0.3112, "num_input_tokens_seen": 24554080, "step": 11360 }, { "epoch": 1.8539967373572592, "grad_norm": 0.08593737334012985, "learning_rate": 0.0009269168026101143, "loss": 0.081, "num_input_tokens_seen": 24564992, "step": 11365 }, { "epoch": 1.8548123980424145, "grad_norm": 0.07849381864070892, "learning_rate": 0.0009273246329526917, "loss": 0.0747, "num_input_tokens_seen": 24576608, "step": 11370 }, { "epoch": 1.8556280587275693, "grad_norm": 0.0883590504527092, "learning_rate": 0.0009277324632952691, "loss": 0.134, "num_input_tokens_seen": 24587872, "step": 11375 }, { "epoch": 1.8564437194127243, "grad_norm": 0.037716735154390335, "learning_rate": 0.0009281402936378467, "loss": 0.1817, "num_input_tokens_seen": 24599136, "step": 11380 }, { "epoch": 1.8572593800978794, "grad_norm": 0.08465716242790222, "learning_rate": 0.0009285481239804242, "loss": 0.1005, "num_input_tokens_seen": 24610080, "step": 11385 }, { "epoch": 1.8580750407830342, "grad_norm": 0.10003326833248138, "learning_rate": 0.0009289559543230017, "loss": 0.0937, "num_input_tokens_seen": 24621568, "step": 11390 }, { "epoch": 1.8588907014681892, "grad_norm": 0.08781938254833221, "learning_rate": 0.0009293637846655791, "loss": 0.112, "num_input_tokens_seen": 24632384, "step": 11395 }, { "epoch": 1.8597063621533443, "grad_norm": 0.1371522694826126, "learning_rate": 0.0009297716150081566, "loss": 0.2002, "num_input_tokens_seen": 24643648, "step": 11400 }, { "epoch": 1.860522022838499, "grad_norm": 0.18471182882785797, "learning_rate": 0.0009301794453507341, "loss": 0.0949, "num_input_tokens_seen": 24655392, "step": 11405 }, { "epoch": 1.8613376835236541, "grad_norm": 0.11153913289308548, "learning_rate": 0.0009305872756933116, "loss": 0.1083, "num_input_tokens_seen": 24666112, "step": 11410 }, { "epoch": 1.8621533442088092, "grad_norm": 0.2752339243888855, "learning_rate": 0.000930995106035889, "loss": 0.3004, "num_input_tokens_seen": 24677248, "step": 11415 }, { "epoch": 1.862969004893964, "grad_norm": 0.11731915175914764, "learning_rate": 0.0009314029363784666, "loss": 0.0804, "num_input_tokens_seen": 24688192, "step": 11420 }, { "epoch": 1.863784665579119, "grad_norm": 0.3206159770488739, "learning_rate": 0.000931810766721044, "loss": 0.1983, "num_input_tokens_seen": 24699424, "step": 11425 }, { "epoch": 1.864600326264274, "grad_norm": 0.19717612862586975, "learning_rate": 0.0009322185970636216, "loss": 0.2276, "num_input_tokens_seen": 24709760, "step": 11430 }, { "epoch": 1.865415986949429, "grad_norm": 0.06198367476463318, "learning_rate": 0.0009326264274061991, "loss": 0.1171, "num_input_tokens_seen": 24720064, "step": 11435 }, { "epoch": 1.8662316476345842, "grad_norm": 0.14472417533397675, "learning_rate": 0.0009330342577487764, "loss": 0.1029, "num_input_tokens_seen": 24730912, "step": 11440 }, { "epoch": 1.867047308319739, "grad_norm": 0.06060084328055382, "learning_rate": 0.000933442088091354, "loss": 0.045, "num_input_tokens_seen": 24739808, "step": 11445 }, { "epoch": 1.867862969004894, "grad_norm": 0.26887786388397217, "learning_rate": 0.0009338499184339315, "loss": 0.0618, "num_input_tokens_seen": 24750976, "step": 11450 }, { "epoch": 1.868678629690049, "grad_norm": 0.07969934493303299, "learning_rate": 0.0009342577487765091, "loss": 0.1042, "num_input_tokens_seen": 24760672, "step": 11455 }, { "epoch": 1.8694942903752039, "grad_norm": 0.025502964854240417, "learning_rate": 0.0009346655791190864, "loss": 0.083, "num_input_tokens_seen": 24771968, "step": 11460 }, { "epoch": 1.870309951060359, "grad_norm": 0.10868193954229355, "learning_rate": 0.0009350734094616639, "loss": 0.1087, "num_input_tokens_seen": 24783776, "step": 11465 }, { "epoch": 1.871125611745514, "grad_norm": 0.03004133701324463, "learning_rate": 0.0009354812398042415, "loss": 0.0305, "num_input_tokens_seen": 24793472, "step": 11470 }, { "epoch": 1.8719412724306688, "grad_norm": 0.12336234748363495, "learning_rate": 0.0009358890701468189, "loss": 0.0977, "num_input_tokens_seen": 24802912, "step": 11475 }, { "epoch": 1.8727569331158238, "grad_norm": 0.07441865652799606, "learning_rate": 0.0009362969004893965, "loss": 0.0828, "num_input_tokens_seen": 24812416, "step": 11480 }, { "epoch": 1.8735725938009788, "grad_norm": 0.08542287349700928, "learning_rate": 0.0009367047308319739, "loss": 0.2089, "num_input_tokens_seen": 24823616, "step": 11485 }, { "epoch": 1.8743882544861337, "grad_norm": 0.055280644446611404, "learning_rate": 0.0009371125611745514, "loss": 0.1477, "num_input_tokens_seen": 24834656, "step": 11490 }, { "epoch": 1.8752039151712887, "grad_norm": 0.051045581698417664, "learning_rate": 0.0009375203915171289, "loss": 0.1889, "num_input_tokens_seen": 24845600, "step": 11495 }, { "epoch": 1.8760195758564437, "grad_norm": 0.1657111495733261, "learning_rate": 0.0009379282218597064, "loss": 0.0902, "num_input_tokens_seen": 24858208, "step": 11500 }, { "epoch": 1.8768352365415986, "grad_norm": 0.2820875942707062, "learning_rate": 0.000938336052202284, "loss": 0.1677, "num_input_tokens_seen": 24868416, "step": 11505 }, { "epoch": 1.8776508972267538, "grad_norm": 0.07757575809955597, "learning_rate": 0.0009387438825448613, "loss": 0.0578, "num_input_tokens_seen": 24879616, "step": 11510 }, { "epoch": 1.8784665579119086, "grad_norm": 0.0790976956486702, "learning_rate": 0.0009391517128874388, "loss": 0.09, "num_input_tokens_seen": 24890336, "step": 11515 }, { "epoch": 1.8792822185970635, "grad_norm": 0.2218760997056961, "learning_rate": 0.0009395595432300164, "loss": 0.1424, "num_input_tokens_seen": 24901760, "step": 11520 }, { "epoch": 1.8800978792822187, "grad_norm": 0.03744116052985191, "learning_rate": 0.0009399673735725939, "loss": 0.1792, "num_input_tokens_seen": 24911520, "step": 11525 }, { "epoch": 1.8809135399673735, "grad_norm": 0.16890190541744232, "learning_rate": 0.0009403752039151713, "loss": 0.1157, "num_input_tokens_seen": 24922016, "step": 11530 }, { "epoch": 1.8817292006525286, "grad_norm": 0.031172795221209526, "learning_rate": 0.0009407830342577488, "loss": 0.0411, "num_input_tokens_seen": 24932832, "step": 11535 }, { "epoch": 1.8825448613376836, "grad_norm": 0.03609168902039528, "learning_rate": 0.0009411908646003263, "loss": 0.1289, "num_input_tokens_seen": 24945632, "step": 11540 }, { "epoch": 1.8833605220228384, "grad_norm": 0.4785745441913605, "learning_rate": 0.0009415986949429038, "loss": 0.3079, "num_input_tokens_seen": 24954176, "step": 11545 }, { "epoch": 1.8841761827079935, "grad_norm": 0.06320811808109283, "learning_rate": 0.0009420065252854812, "loss": 0.1004, "num_input_tokens_seen": 24964512, "step": 11550 }, { "epoch": 1.8849918433931485, "grad_norm": 0.0489787794649601, "learning_rate": 0.0009424143556280587, "loss": 0.0694, "num_input_tokens_seen": 24976416, "step": 11555 }, { "epoch": 1.8858075040783033, "grad_norm": 0.20213648676872253, "learning_rate": 0.0009428221859706362, "loss": 0.3237, "num_input_tokens_seen": 24986656, "step": 11560 }, { "epoch": 1.8866231647634584, "grad_norm": 0.14317026734352112, "learning_rate": 0.0009432300163132137, "loss": 0.1493, "num_input_tokens_seen": 24996384, "step": 11565 }, { "epoch": 1.8874388254486134, "grad_norm": 0.04424556717276573, "learning_rate": 0.0009436378466557913, "loss": 0.0939, "num_input_tokens_seen": 25007808, "step": 11570 }, { "epoch": 1.8882544861337682, "grad_norm": 0.08489039540290833, "learning_rate": 0.0009440456769983687, "loss": 0.1333, "num_input_tokens_seen": 25018816, "step": 11575 }, { "epoch": 1.8890701468189235, "grad_norm": 0.30416834354400635, "learning_rate": 0.0009444535073409461, "loss": 0.1223, "num_input_tokens_seen": 25029472, "step": 11580 }, { "epoch": 1.8898858075040783, "grad_norm": 0.031548064202070236, "learning_rate": 0.0009448613376835237, "loss": 0.1407, "num_input_tokens_seen": 25041280, "step": 11585 }, { "epoch": 1.8907014681892331, "grad_norm": 0.05067252740263939, "learning_rate": 0.0009452691680261012, "loss": 0.1139, "num_input_tokens_seen": 25052256, "step": 11590 }, { "epoch": 1.8915171288743884, "grad_norm": 0.18682821094989777, "learning_rate": 0.0009456769983686786, "loss": 0.1019, "num_input_tokens_seen": 25063328, "step": 11595 }, { "epoch": 1.8923327895595432, "grad_norm": 0.04457815736532211, "learning_rate": 0.0009460848287112561, "loss": 0.0436, "num_input_tokens_seen": 25075296, "step": 11600 }, { "epoch": 1.8931484502446982, "grad_norm": 0.09983167052268982, "learning_rate": 0.0009464926590538336, "loss": 0.1379, "num_input_tokens_seen": 25086144, "step": 11605 }, { "epoch": 1.8939641109298533, "grad_norm": 0.19143344461917877, "learning_rate": 0.0009469004893964112, "loss": 0.1299, "num_input_tokens_seen": 25096736, "step": 11610 }, { "epoch": 1.894779771615008, "grad_norm": 0.06157934293150902, "learning_rate": 0.0009473083197389885, "loss": 0.0483, "num_input_tokens_seen": 25107296, "step": 11615 }, { "epoch": 1.8955954323001631, "grad_norm": 0.016641128808259964, "learning_rate": 0.0009477161500815661, "loss": 0.0616, "num_input_tokens_seen": 25118784, "step": 11620 }, { "epoch": 1.8964110929853182, "grad_norm": 0.049104683101177216, "learning_rate": 0.0009481239804241436, "loss": 0.2264, "num_input_tokens_seen": 25128448, "step": 11625 }, { "epoch": 1.897226753670473, "grad_norm": 0.29125604033470154, "learning_rate": 0.000948531810766721, "loss": 0.0607, "num_input_tokens_seen": 25139040, "step": 11630 }, { "epoch": 1.898042414355628, "grad_norm": 0.011295678094029427, "learning_rate": 0.0009489396411092986, "loss": 0.1261, "num_input_tokens_seen": 25149312, "step": 11635 }, { "epoch": 1.898858075040783, "grad_norm": 0.27017614245414734, "learning_rate": 0.000949347471451876, "loss": 0.1949, "num_input_tokens_seen": 25160544, "step": 11640 }, { "epoch": 1.899673735725938, "grad_norm": 0.09900322556495667, "learning_rate": 0.0009497553017944536, "loss": 0.1791, "num_input_tokens_seen": 25171168, "step": 11645 }, { "epoch": 1.900489396411093, "grad_norm": 0.015560412779450417, "learning_rate": 0.000950163132137031, "loss": 0.1213, "num_input_tokens_seen": 25181920, "step": 11650 }, { "epoch": 1.901305057096248, "grad_norm": 0.13649839162826538, "learning_rate": 0.0009505709624796085, "loss": 0.0536, "num_input_tokens_seen": 25192704, "step": 11655 }, { "epoch": 1.9021207177814028, "grad_norm": 0.04336768016219139, "learning_rate": 0.000950978792822186, "loss": 0.0182, "num_input_tokens_seen": 25204544, "step": 11660 }, { "epoch": 1.902936378466558, "grad_norm": 0.20540879666805267, "learning_rate": 0.0009513866231647634, "loss": 0.2157, "num_input_tokens_seen": 25215488, "step": 11665 }, { "epoch": 1.9037520391517129, "grad_norm": 0.31003397703170776, "learning_rate": 0.000951794453507341, "loss": 0.2263, "num_input_tokens_seen": 25225824, "step": 11670 }, { "epoch": 1.904567699836868, "grad_norm": 0.37362805008888245, "learning_rate": 0.0009522022838499185, "loss": 0.2027, "num_input_tokens_seen": 25236096, "step": 11675 }, { "epoch": 1.905383360522023, "grad_norm": 0.14676491916179657, "learning_rate": 0.000952610114192496, "loss": 0.219, "num_input_tokens_seen": 25246656, "step": 11680 }, { "epoch": 1.9061990212071778, "grad_norm": 0.2860686779022217, "learning_rate": 0.0009530179445350734, "loss": 0.2504, "num_input_tokens_seen": 25258048, "step": 11685 }, { "epoch": 1.9070146818923328, "grad_norm": 0.12542608380317688, "learning_rate": 0.0009534257748776509, "loss": 0.12, "num_input_tokens_seen": 25269088, "step": 11690 }, { "epoch": 1.9078303425774878, "grad_norm": 0.040885381400585175, "learning_rate": 0.0009538336052202285, "loss": 0.1084, "num_input_tokens_seen": 25280544, "step": 11695 }, { "epoch": 1.9086460032626427, "grad_norm": 0.053815603256225586, "learning_rate": 0.0009542414355628059, "loss": 0.1517, "num_input_tokens_seen": 25288896, "step": 11700 }, { "epoch": 1.9094616639477977, "grad_norm": 0.15390309691429138, "learning_rate": 0.0009546492659053833, "loss": 0.2707, "num_input_tokens_seen": 25299200, "step": 11705 }, { "epoch": 1.9102773246329527, "grad_norm": 0.046468086540699005, "learning_rate": 0.0009550570962479609, "loss": 0.0945, "num_input_tokens_seen": 25310240, "step": 11710 }, { "epoch": 1.9110929853181076, "grad_norm": 0.11353089660406113, "learning_rate": 0.0009554649265905384, "loss": 0.0926, "num_input_tokens_seen": 25320992, "step": 11715 }, { "epoch": 1.9119086460032626, "grad_norm": 0.04293264448642731, "learning_rate": 0.0009558727569331158, "loss": 0.0586, "num_input_tokens_seen": 25333280, "step": 11720 }, { "epoch": 1.9127243066884176, "grad_norm": 0.030097907409071922, "learning_rate": 0.0009562805872756934, "loss": 0.0627, "num_input_tokens_seen": 25344608, "step": 11725 }, { "epoch": 1.9135399673735725, "grad_norm": 0.10203356295824051, "learning_rate": 0.0009566884176182708, "loss": 0.0644, "num_input_tokens_seen": 25355264, "step": 11730 }, { "epoch": 1.9143556280587277, "grad_norm": 0.33191055059432983, "learning_rate": 0.0009570962479608483, "loss": 0.0924, "num_input_tokens_seen": 25365600, "step": 11735 }, { "epoch": 1.9151712887438825, "grad_norm": 0.33127424120903015, "learning_rate": 0.0009575040783034258, "loss": 0.2759, "num_input_tokens_seen": 25376416, "step": 11740 }, { "epoch": 1.9159869494290374, "grad_norm": 0.2595071792602539, "learning_rate": 0.0009579119086460033, "loss": 0.2841, "num_input_tokens_seen": 25387168, "step": 11745 }, { "epoch": 1.9168026101141926, "grad_norm": 0.03523773327469826, "learning_rate": 0.0009583197389885808, "loss": 0.1388, "num_input_tokens_seen": 25399008, "step": 11750 }, { "epoch": 1.9176182707993474, "grad_norm": 0.06833466142416, "learning_rate": 0.0009587275693311582, "loss": 0.0641, "num_input_tokens_seen": 25409952, "step": 11755 }, { "epoch": 1.9184339314845025, "grad_norm": 0.10483044385910034, "learning_rate": 0.0009591353996737358, "loss": 0.1451, "num_input_tokens_seen": 25421984, "step": 11760 }, { "epoch": 1.9192495921696575, "grad_norm": 0.04970962181687355, "learning_rate": 0.0009595432300163133, "loss": 0.1608, "num_input_tokens_seen": 25432416, "step": 11765 }, { "epoch": 1.9200652528548123, "grad_norm": 0.16871914267539978, "learning_rate": 0.0009599510603588906, "loss": 0.3064, "num_input_tokens_seen": 25444192, "step": 11770 }, { "epoch": 1.9208809135399674, "grad_norm": 0.03458542376756668, "learning_rate": 0.0009603588907014682, "loss": 0.0789, "num_input_tokens_seen": 25455104, "step": 11775 }, { "epoch": 1.9216965742251224, "grad_norm": 0.04260988160967827, "learning_rate": 0.0009607667210440457, "loss": 0.1354, "num_input_tokens_seen": 25465856, "step": 11780 }, { "epoch": 1.9225122349102772, "grad_norm": 0.05124415084719658, "learning_rate": 0.0009611745513866232, "loss": 0.1117, "num_input_tokens_seen": 25476000, "step": 11785 }, { "epoch": 1.9233278955954323, "grad_norm": 0.07688180357217789, "learning_rate": 0.0009615823817292007, "loss": 0.08, "num_input_tokens_seen": 25488160, "step": 11790 }, { "epoch": 1.9241435562805873, "grad_norm": 0.11089295148849487, "learning_rate": 0.0009619902120717781, "loss": 0.1051, "num_input_tokens_seen": 25499328, "step": 11795 }, { "epoch": 1.9249592169657421, "grad_norm": 0.04999072477221489, "learning_rate": 0.0009623980424143557, "loss": 0.1119, "num_input_tokens_seen": 25510592, "step": 11800 }, { "epoch": 1.9257748776508974, "grad_norm": 0.06794946640729904, "learning_rate": 0.0009628058727569331, "loss": 0.0912, "num_input_tokens_seen": 25521184, "step": 11805 }, { "epoch": 1.9265905383360522, "grad_norm": 0.21613682806491852, "learning_rate": 0.0009632137030995107, "loss": 0.0978, "num_input_tokens_seen": 25532608, "step": 11810 }, { "epoch": 1.927406199021207, "grad_norm": 0.034385792911052704, "learning_rate": 0.0009636215334420881, "loss": 0.1519, "num_input_tokens_seen": 25543936, "step": 11815 }, { "epoch": 1.9282218597063623, "grad_norm": 0.19377191364765167, "learning_rate": 0.0009640293637846655, "loss": 0.1283, "num_input_tokens_seen": 25555104, "step": 11820 }, { "epoch": 1.929037520391517, "grad_norm": 0.16980724036693573, "learning_rate": 0.0009644371941272431, "loss": 0.061, "num_input_tokens_seen": 25565472, "step": 11825 }, { "epoch": 1.9298531810766721, "grad_norm": 0.014253470115363598, "learning_rate": 0.0009648450244698206, "loss": 0.1105, "num_input_tokens_seen": 25576832, "step": 11830 }, { "epoch": 1.9306688417618272, "grad_norm": 0.03056260570883751, "learning_rate": 0.0009652528548123982, "loss": 0.0344, "num_input_tokens_seen": 25588256, "step": 11835 }, { "epoch": 1.931484502446982, "grad_norm": 0.2445419281721115, "learning_rate": 0.0009656606851549755, "loss": 0.274, "num_input_tokens_seen": 25599072, "step": 11840 }, { "epoch": 1.932300163132137, "grad_norm": 0.4470367729663849, "learning_rate": 0.000966068515497553, "loss": 0.1761, "num_input_tokens_seen": 25609696, "step": 11845 }, { "epoch": 1.933115823817292, "grad_norm": 0.03975436091423035, "learning_rate": 0.0009664763458401306, "loss": 0.0952, "num_input_tokens_seen": 25621984, "step": 11850 }, { "epoch": 1.933931484502447, "grad_norm": 0.035750702023506165, "learning_rate": 0.000966884176182708, "loss": 0.1447, "num_input_tokens_seen": 25633024, "step": 11855 }, { "epoch": 1.934747145187602, "grad_norm": 0.05805574357509613, "learning_rate": 0.0009672920065252854, "loss": 0.278, "num_input_tokens_seen": 25644320, "step": 11860 }, { "epoch": 1.935562805872757, "grad_norm": 0.246421679854393, "learning_rate": 0.000967699836867863, "loss": 0.1166, "num_input_tokens_seen": 25655456, "step": 11865 }, { "epoch": 1.9363784665579118, "grad_norm": 0.1580600142478943, "learning_rate": 0.0009681076672104405, "loss": 0.1623, "num_input_tokens_seen": 25666592, "step": 11870 }, { "epoch": 1.9371941272430668, "grad_norm": 0.14604364335536957, "learning_rate": 0.000968515497553018, "loss": 0.1526, "num_input_tokens_seen": 25676800, "step": 11875 }, { "epoch": 1.9380097879282219, "grad_norm": 0.10736247897148132, "learning_rate": 0.0009689233278955954, "loss": 0.0648, "num_input_tokens_seen": 25686560, "step": 11880 }, { "epoch": 1.9388254486133767, "grad_norm": 0.15189337730407715, "learning_rate": 0.0009693311582381729, "loss": 0.1006, "num_input_tokens_seen": 25697120, "step": 11885 }, { "epoch": 1.939641109298532, "grad_norm": 0.06999967247247696, "learning_rate": 0.0009697389885807504, "loss": 0.0706, "num_input_tokens_seen": 25708736, "step": 11890 }, { "epoch": 1.9404567699836868, "grad_norm": 0.06257314234972, "learning_rate": 0.0009701468189233279, "loss": 0.1763, "num_input_tokens_seen": 25720064, "step": 11895 }, { "epoch": 1.9412724306688418, "grad_norm": 0.23246027529239655, "learning_rate": 0.0009705546492659055, "loss": 0.2333, "num_input_tokens_seen": 25731616, "step": 11900 }, { "epoch": 1.9420880913539968, "grad_norm": 0.2732833921909332, "learning_rate": 0.0009709624796084829, "loss": 0.1856, "num_input_tokens_seen": 25743072, "step": 11905 }, { "epoch": 1.9429037520391517, "grad_norm": 0.054060909897089005, "learning_rate": 0.0009713703099510603, "loss": 0.1246, "num_input_tokens_seen": 25753920, "step": 11910 }, { "epoch": 1.9437194127243067, "grad_norm": 0.10430661588907242, "learning_rate": 0.0009717781402936379, "loss": 0.087, "num_input_tokens_seen": 25764640, "step": 11915 }, { "epoch": 1.9445350734094617, "grad_norm": 0.030959485098719597, "learning_rate": 0.0009721859706362154, "loss": 0.0374, "num_input_tokens_seen": 25777152, "step": 11920 }, { "epoch": 1.9453507340946166, "grad_norm": 0.03141267970204353, "learning_rate": 0.0009725938009787928, "loss": 0.0367, "num_input_tokens_seen": 25787040, "step": 11925 }, { "epoch": 1.9461663947797716, "grad_norm": 0.1135735735297203, "learning_rate": 0.0009730016313213703, "loss": 0.1172, "num_input_tokens_seen": 25796544, "step": 11930 }, { "epoch": 1.9469820554649266, "grad_norm": 0.1543119102716446, "learning_rate": 0.0009734094616639478, "loss": 0.1302, "num_input_tokens_seen": 25807392, "step": 11935 }, { "epoch": 1.9477977161500815, "grad_norm": 0.008437680080533028, "learning_rate": 0.0009738172920065254, "loss": 0.06, "num_input_tokens_seen": 25817952, "step": 11940 }, { "epoch": 1.9486133768352365, "grad_norm": 0.0668170228600502, "learning_rate": 0.0009742251223491027, "loss": 0.0682, "num_input_tokens_seen": 25829568, "step": 11945 }, { "epoch": 1.9494290375203915, "grad_norm": 0.10054466128349304, "learning_rate": 0.0009746329526916803, "loss": 0.0361, "num_input_tokens_seen": 25840160, "step": 11950 }, { "epoch": 1.9502446982055464, "grad_norm": 0.16265834867954254, "learning_rate": 0.0009750407830342578, "loss": 0.2625, "num_input_tokens_seen": 25850816, "step": 11955 }, { "epoch": 1.9510603588907016, "grad_norm": 0.32785764336586, "learning_rate": 0.0009754486133768352, "loss": 0.3579, "num_input_tokens_seen": 25862400, "step": 11960 }, { "epoch": 1.9518760195758564, "grad_norm": 0.06142808124423027, "learning_rate": 0.0009758564437194128, "loss": 0.203, "num_input_tokens_seen": 25872096, "step": 11965 }, { "epoch": 1.9526916802610113, "grad_norm": 0.04707051441073418, "learning_rate": 0.0009762642740619902, "loss": 0.1397, "num_input_tokens_seen": 25884000, "step": 11970 }, { "epoch": 1.9535073409461665, "grad_norm": 0.03933320939540863, "learning_rate": 0.0009766721044045677, "loss": 0.1543, "num_input_tokens_seen": 25895776, "step": 11975 }, { "epoch": 1.9543230016313213, "grad_norm": 0.2816435992717743, "learning_rate": 0.0009770799347471452, "loss": 0.2767, "num_input_tokens_seen": 25906624, "step": 11980 }, { "epoch": 1.9551386623164764, "grad_norm": 0.2596624791622162, "learning_rate": 0.0009774877650897227, "loss": 0.1901, "num_input_tokens_seen": 25918592, "step": 11985 }, { "epoch": 1.9559543230016314, "grad_norm": 0.12683138251304626, "learning_rate": 0.0009778955954323001, "loss": 0.1409, "num_input_tokens_seen": 25929056, "step": 11990 }, { "epoch": 1.9567699836867862, "grad_norm": 0.106838159263134, "learning_rate": 0.0009783034257748776, "loss": 0.1512, "num_input_tokens_seen": 25938912, "step": 11995 }, { "epoch": 1.9575856443719413, "grad_norm": 0.17070820927619934, "learning_rate": 0.000978711256117455, "loss": 0.1901, "num_input_tokens_seen": 25948192, "step": 12000 }, { "epoch": 1.9584013050570963, "grad_norm": 0.19078631699085236, "learning_rate": 0.0009791190864600326, "loss": 0.224, "num_input_tokens_seen": 25959552, "step": 12005 }, { "epoch": 1.9592169657422511, "grad_norm": 0.04365871846675873, "learning_rate": 0.00097952691680261, "loss": 0.2889, "num_input_tokens_seen": 25971232, "step": 12010 }, { "epoch": 1.9600326264274062, "grad_norm": 0.06310081481933594, "learning_rate": 0.0009799347471451875, "loss": 0.1247, "num_input_tokens_seen": 25982080, "step": 12015 }, { "epoch": 1.9608482871125612, "grad_norm": 0.09681393951177597, "learning_rate": 0.0009803425774877652, "loss": 0.1131, "num_input_tokens_seen": 25993824, "step": 12020 }, { "epoch": 1.961663947797716, "grad_norm": 0.06323209404945374, "learning_rate": 0.0009807504078303427, "loss": 0.0543, "num_input_tokens_seen": 26004512, "step": 12025 }, { "epoch": 1.9624796084828713, "grad_norm": 0.10774081945419312, "learning_rate": 0.00098115823817292, "loss": 0.2355, "num_input_tokens_seen": 26016064, "step": 12030 }, { "epoch": 1.963295269168026, "grad_norm": 0.0556272454559803, "learning_rate": 0.0009815660685154977, "loss": 0.1216, "num_input_tokens_seen": 26027392, "step": 12035 }, { "epoch": 1.964110929853181, "grad_norm": 0.007031048182398081, "learning_rate": 0.0009819738988580751, "loss": 0.0434, "num_input_tokens_seen": 26038112, "step": 12040 }, { "epoch": 1.9649265905383362, "grad_norm": 0.24994535744190216, "learning_rate": 0.0009823817292006526, "loss": 0.1626, "num_input_tokens_seen": 26049792, "step": 12045 }, { "epoch": 1.965742251223491, "grad_norm": 0.007116135209798813, "learning_rate": 0.00098278955954323, "loss": 0.1201, "num_input_tokens_seen": 26060352, "step": 12050 }, { "epoch": 1.966557911908646, "grad_norm": 0.13843262195587158, "learning_rate": 0.0009831973898858076, "loss": 0.1493, "num_input_tokens_seen": 26071520, "step": 12055 }, { "epoch": 1.967373572593801, "grad_norm": 0.047937799245119095, "learning_rate": 0.000983605220228385, "loss": 0.0618, "num_input_tokens_seen": 26082976, "step": 12060 }, { "epoch": 1.968189233278956, "grad_norm": 0.21617697179317474, "learning_rate": 0.0009840130505709625, "loss": 0.1505, "num_input_tokens_seen": 26092960, "step": 12065 }, { "epoch": 1.969004893964111, "grad_norm": 0.1679522842168808, "learning_rate": 0.00098442088091354, "loss": 0.1382, "num_input_tokens_seen": 26103776, "step": 12070 }, { "epoch": 1.969820554649266, "grad_norm": 0.09036083519458771, "learning_rate": 0.0009848287112561175, "loss": 0.0951, "num_input_tokens_seen": 26115040, "step": 12075 }, { "epoch": 1.9706362153344208, "grad_norm": 0.03721405193209648, "learning_rate": 0.000985236541598695, "loss": 0.2005, "num_input_tokens_seen": 26125952, "step": 12080 }, { "epoch": 1.9714518760195758, "grad_norm": 0.040085360407829285, "learning_rate": 0.0009856443719412724, "loss": 0.1522, "num_input_tokens_seen": 26136512, "step": 12085 }, { "epoch": 1.9722675367047309, "grad_norm": 0.16589096188545227, "learning_rate": 0.00098605220228385, "loss": 0.2404, "num_input_tokens_seen": 26146944, "step": 12090 }, { "epoch": 1.9730831973898857, "grad_norm": 0.11659594625234604, "learning_rate": 0.0009864600326264274, "loss": 0.0901, "num_input_tokens_seen": 26157920, "step": 12095 }, { "epoch": 1.9738988580750407, "grad_norm": 0.16950438916683197, "learning_rate": 0.0009868678629690048, "loss": 0.0963, "num_input_tokens_seen": 26168864, "step": 12100 }, { "epoch": 1.9747145187601958, "grad_norm": 0.09831640124320984, "learning_rate": 0.0009872756933115823, "loss": 0.0675, "num_input_tokens_seen": 26179968, "step": 12105 }, { "epoch": 1.9755301794453506, "grad_norm": 0.21385249495506287, "learning_rate": 0.00098768352365416, "loss": 0.1444, "num_input_tokens_seen": 26192096, "step": 12110 }, { "epoch": 1.9763458401305058, "grad_norm": 0.0602976493537426, "learning_rate": 0.0009880913539967373, "loss": 0.1203, "num_input_tokens_seen": 26202400, "step": 12115 }, { "epoch": 1.9771615008156607, "grad_norm": 0.2599957287311554, "learning_rate": 0.0009884991843393148, "loss": 0.0832, "num_input_tokens_seen": 26212608, "step": 12120 }, { "epoch": 1.9779771615008157, "grad_norm": 0.04638943821191788, "learning_rate": 0.0009889070146818924, "loss": 0.1196, "num_input_tokens_seen": 26222912, "step": 12125 }, { "epoch": 1.9787928221859707, "grad_norm": 0.4077025353908539, "learning_rate": 0.00098931484502447, "loss": 0.2744, "num_input_tokens_seen": 26233856, "step": 12130 }, { "epoch": 1.9796084828711256, "grad_norm": 0.18510301411151886, "learning_rate": 0.0009897226753670474, "loss": 0.1087, "num_input_tokens_seen": 26242976, "step": 12135 }, { "epoch": 1.9804241435562806, "grad_norm": 0.04004143178462982, "learning_rate": 0.0009901305057096249, "loss": 0.2139, "num_input_tokens_seen": 26253600, "step": 12140 }, { "epoch": 1.9812398042414356, "grad_norm": 0.16456164419651031, "learning_rate": 0.0009905383360522024, "loss": 0.1437, "num_input_tokens_seen": 26263296, "step": 12145 }, { "epoch": 1.9820554649265905, "grad_norm": 0.05468539148569107, "learning_rate": 0.0009909461663947798, "loss": 0.1029, "num_input_tokens_seen": 26274656, "step": 12150 }, { "epoch": 1.9828711256117455, "grad_norm": 0.11041852831840515, "learning_rate": 0.0009913539967373573, "loss": 0.0795, "num_input_tokens_seen": 26286368, "step": 12155 }, { "epoch": 1.9836867862969005, "grad_norm": 0.1570504903793335, "learning_rate": 0.0009917618270799348, "loss": 0.0905, "num_input_tokens_seen": 26297792, "step": 12160 }, { "epoch": 1.9845024469820554, "grad_norm": 0.02780609205365181, "learning_rate": 0.0009921696574225123, "loss": 0.0639, "num_input_tokens_seen": 26309024, "step": 12165 }, { "epoch": 1.9853181076672104, "grad_norm": 0.0053548384457826614, "learning_rate": 0.0009925774877650897, "loss": 0.1691, "num_input_tokens_seen": 26319360, "step": 12170 }, { "epoch": 1.9861337683523654, "grad_norm": 0.20225311815738678, "learning_rate": 0.0009929853181076672, "loss": 0.1344, "num_input_tokens_seen": 26328256, "step": 12175 }, { "epoch": 1.9869494290375203, "grad_norm": 0.02028091996908188, "learning_rate": 0.0009933931484502447, "loss": 0.0445, "num_input_tokens_seen": 26339648, "step": 12180 }, { "epoch": 1.9877650897226755, "grad_norm": 0.475198894739151, "learning_rate": 0.0009938009787928222, "loss": 0.1998, "num_input_tokens_seen": 26350528, "step": 12185 }, { "epoch": 1.9885807504078303, "grad_norm": 0.10536182671785355, "learning_rate": 0.0009942088091353996, "loss": 0.1896, "num_input_tokens_seen": 26361568, "step": 12190 }, { "epoch": 1.9893964110929854, "grad_norm": 0.06540945172309875, "learning_rate": 0.0009946166394779771, "loss": 0.1007, "num_input_tokens_seen": 26372832, "step": 12195 }, { "epoch": 1.9902120717781404, "grad_norm": 0.010280012153089046, "learning_rate": 0.0009950244698205548, "loss": 0.0371, "num_input_tokens_seen": 26384000, "step": 12200 }, { "epoch": 1.9910277324632952, "grad_norm": 0.2649214267730713, "learning_rate": 0.000995432300163132, "loss": 0.0944, "num_input_tokens_seen": 26395008, "step": 12205 }, { "epoch": 1.9918433931484503, "grad_norm": 0.05911831930279732, "learning_rate": 0.0009958401305057095, "loss": 0.0362, "num_input_tokens_seen": 26406304, "step": 12210 }, { "epoch": 1.9926590538336053, "grad_norm": 0.2364213466644287, "learning_rate": 0.0009962479608482872, "loss": 0.164, "num_input_tokens_seen": 26416576, "step": 12215 }, { "epoch": 1.9934747145187601, "grad_norm": 0.07283175736665726, "learning_rate": 0.0009966557911908645, "loss": 0.0485, "num_input_tokens_seen": 26426784, "step": 12220 }, { "epoch": 1.9942903752039152, "grad_norm": 0.1471281498670578, "learning_rate": 0.0009970636215334422, "loss": 0.0946, "num_input_tokens_seen": 26436512, "step": 12225 }, { "epoch": 1.9951060358890702, "grad_norm": 0.0576576367020607, "learning_rate": 0.0009974714518760197, "loss": 0.0836, "num_input_tokens_seen": 26446880, "step": 12230 }, { "epoch": 1.995921696574225, "grad_norm": 0.016541773453354836, "learning_rate": 0.0009978792822185971, "loss": 0.1503, "num_input_tokens_seen": 26457984, "step": 12235 }, { "epoch": 1.99673735725938, "grad_norm": 0.26312413811683655, "learning_rate": 0.0009982871125611746, "loss": 0.1184, "num_input_tokens_seen": 26469440, "step": 12240 }, { "epoch": 1.997553017944535, "grad_norm": 0.027293941006064415, "learning_rate": 0.000998694942903752, "loss": 0.0629, "num_input_tokens_seen": 26479648, "step": 12245 }, { "epoch": 1.99836867862969, "grad_norm": 0.1137554869055748, "learning_rate": 0.0009991027732463296, "loss": 0.1678, "num_input_tokens_seen": 26489920, "step": 12250 }, { "epoch": 1.9991843393148452, "grad_norm": 0.05962604284286499, "learning_rate": 0.000999510603588907, "loss": 0.0611, "num_input_tokens_seen": 26501248, "step": 12255 }, { "epoch": 2.0, "grad_norm": 0.03831448405981064, "learning_rate": 0.0009999184339314845, "loss": 0.0262, "num_input_tokens_seen": 26510112, "step": 12260 }, { "epoch": 2.0, "eval_loss": 0.13076965510845184, "eval_runtime": 103.3641, "eval_samples_per_second": 26.363, "eval_steps_per_second": 6.598, "num_input_tokens_seen": 26510112, "step": 12260 }, { "epoch": 2.000815660685155, "grad_norm": 0.02669798582792282, "learning_rate": 0.000999999996757397, "loss": 0.0441, "num_input_tokens_seen": 26521088, "step": 12265 }, { "epoch": 2.00163132137031, "grad_norm": 0.09440121054649353, "learning_rate": 0.0009999999835843226, "loss": 0.0782, "num_input_tokens_seen": 26530976, "step": 12270 }, { "epoch": 2.002446982055465, "grad_norm": 0.013201319612562656, "learning_rate": 0.000999999960278114, "loss": 0.0368, "num_input_tokens_seen": 26541536, "step": 12275 }, { "epoch": 2.0032626427406197, "grad_norm": 0.3132156729698181, "learning_rate": 0.000999999926838772, "loss": 0.24, "num_input_tokens_seen": 26551776, "step": 12280 }, { "epoch": 2.004078303425775, "grad_norm": 0.3435702621936798, "learning_rate": 0.0009999998832662972, "loss": 0.2266, "num_input_tokens_seen": 26562528, "step": 12285 }, { "epoch": 2.00489396411093, "grad_norm": 0.07973910868167877, "learning_rate": 0.0009999998295606907, "loss": 0.1712, "num_input_tokens_seen": 26572480, "step": 12290 }, { "epoch": 2.0057096247960846, "grad_norm": 0.09572141617536545, "learning_rate": 0.000999999765721953, "loss": 0.0815, "num_input_tokens_seen": 26582400, "step": 12295 }, { "epoch": 2.00652528548124, "grad_norm": 0.10497356951236725, "learning_rate": 0.000999999691750086, "loss": 0.0754, "num_input_tokens_seen": 26593248, "step": 12300 }, { "epoch": 2.0073409461663947, "grad_norm": 0.021210532635450363, "learning_rate": 0.0009999996076450908, "loss": 0.1709, "num_input_tokens_seen": 26603616, "step": 12305 }, { "epoch": 2.00815660685155, "grad_norm": 0.08911927789449692, "learning_rate": 0.0009999995134069692, "loss": 0.0257, "num_input_tokens_seen": 26614560, "step": 12310 }, { "epoch": 2.0089722675367048, "grad_norm": 0.043773408979177475, "learning_rate": 0.0009999994090357234, "loss": 0.281, "num_input_tokens_seen": 26624672, "step": 12315 }, { "epoch": 2.0097879282218596, "grad_norm": 0.12832224369049072, "learning_rate": 0.0009999992945313551, "loss": 0.1412, "num_input_tokens_seen": 26635552, "step": 12320 }, { "epoch": 2.010603588907015, "grad_norm": 0.078923799097538, "learning_rate": 0.0009999991698938669, "loss": 0.1098, "num_input_tokens_seen": 26645984, "step": 12325 }, { "epoch": 2.0114192495921697, "grad_norm": 0.13948754966259003, "learning_rate": 0.000999999035123261, "loss": 0.2246, "num_input_tokens_seen": 26656192, "step": 12330 }, { "epoch": 2.0122349102773245, "grad_norm": 0.06741499155759811, "learning_rate": 0.0009999988902195407, "loss": 0.0859, "num_input_tokens_seen": 26667200, "step": 12335 }, { "epoch": 2.0130505709624797, "grad_norm": 0.21579128503799438, "learning_rate": 0.0009999987351827085, "loss": 0.2156, "num_input_tokens_seen": 26678080, "step": 12340 }, { "epoch": 2.0138662316476346, "grad_norm": 0.045807160437107086, "learning_rate": 0.0009999985700127674, "loss": 0.0783, "num_input_tokens_seen": 26690272, "step": 12345 }, { "epoch": 2.0146818923327894, "grad_norm": 0.06333373486995697, "learning_rate": 0.0009999983947097213, "loss": 0.104, "num_input_tokens_seen": 26701408, "step": 12350 }, { "epoch": 2.0154975530179446, "grad_norm": 0.16630201041698456, "learning_rate": 0.0009999982092735733, "loss": 0.1589, "num_input_tokens_seen": 26711680, "step": 12355 }, { "epoch": 2.0163132137030995, "grad_norm": 0.1822613775730133, "learning_rate": 0.0009999980137043274, "loss": 0.1364, "num_input_tokens_seen": 26722336, "step": 12360 }, { "epoch": 2.0171288743882543, "grad_norm": 0.073283351957798, "learning_rate": 0.0009999978080019872, "loss": 0.1224, "num_input_tokens_seen": 26732832, "step": 12365 }, { "epoch": 2.0179445350734095, "grad_norm": 0.37038156390190125, "learning_rate": 0.0009999975921665574, "loss": 0.2476, "num_input_tokens_seen": 26744608, "step": 12370 }, { "epoch": 2.0187601957585644, "grad_norm": 0.020198166370391846, "learning_rate": 0.000999997366198042, "loss": 0.0229, "num_input_tokens_seen": 26755776, "step": 12375 }, { "epoch": 2.0195758564437196, "grad_norm": 0.0995674729347229, "learning_rate": 0.0009999971300964456, "loss": 0.1549, "num_input_tokens_seen": 26765472, "step": 12380 }, { "epoch": 2.0203915171288744, "grad_norm": 0.0052305120043456554, "learning_rate": 0.0009999968838617732, "loss": 0.0756, "num_input_tokens_seen": 26776224, "step": 12385 }, { "epoch": 2.0212071778140293, "grad_norm": 0.12994590401649475, "learning_rate": 0.0009999966274940296, "loss": 0.2399, "num_input_tokens_seen": 26787520, "step": 12390 }, { "epoch": 2.0220228384991845, "grad_norm": 0.019414570182561874, "learning_rate": 0.00099999636099322, "loss": 0.0252, "num_input_tokens_seen": 26798048, "step": 12395 }, { "epoch": 2.0228384991843393, "grad_norm": 0.2989323139190674, "learning_rate": 0.0009999960843593498, "loss": 0.2631, "num_input_tokens_seen": 26808064, "step": 12400 }, { "epoch": 2.023654159869494, "grad_norm": 0.03400423377752304, "learning_rate": 0.0009999957975924249, "loss": 0.0305, "num_input_tokens_seen": 26818304, "step": 12405 }, { "epoch": 2.0244698205546494, "grad_norm": 0.03117789700627327, "learning_rate": 0.0009999955006924507, "loss": 0.0789, "num_input_tokens_seen": 26829632, "step": 12410 }, { "epoch": 2.0252854812398042, "grad_norm": 0.01920362561941147, "learning_rate": 0.0009999951936594334, "loss": 0.0514, "num_input_tokens_seen": 26839264, "step": 12415 }, { "epoch": 2.026101141924959, "grad_norm": 0.1514592468738556, "learning_rate": 0.0009999948764933793, "loss": 0.1578, "num_input_tokens_seen": 26850688, "step": 12420 }, { "epoch": 2.0269168026101143, "grad_norm": 0.023600779473781586, "learning_rate": 0.0009999945491942946, "loss": 0.0543, "num_input_tokens_seen": 26861376, "step": 12425 }, { "epoch": 2.027732463295269, "grad_norm": 0.14419586956501007, "learning_rate": 0.0009999942117621863, "loss": 0.0864, "num_input_tokens_seen": 26872288, "step": 12430 }, { "epoch": 2.028548123980424, "grad_norm": 0.04900144413113594, "learning_rate": 0.0009999938641970607, "loss": 0.0924, "num_input_tokens_seen": 26883360, "step": 12435 }, { "epoch": 2.029363784665579, "grad_norm": 0.10072429478168488, "learning_rate": 0.0009999935064989255, "loss": 0.1094, "num_input_tokens_seen": 26894816, "step": 12440 }, { "epoch": 2.030179445350734, "grad_norm": 0.024320529773831367, "learning_rate": 0.0009999931386677873, "loss": 0.0679, "num_input_tokens_seen": 26905344, "step": 12445 }, { "epoch": 2.0309951060358893, "grad_norm": 0.15190035104751587, "learning_rate": 0.000999992760703654, "loss": 0.1935, "num_input_tokens_seen": 26916960, "step": 12450 }, { "epoch": 2.031810766721044, "grad_norm": 0.1964467167854309, "learning_rate": 0.000999992372606533, "loss": 0.1, "num_input_tokens_seen": 26926304, "step": 12455 }, { "epoch": 2.032626427406199, "grad_norm": 0.12315433472394943, "learning_rate": 0.0009999919743764324, "loss": 0.2653, "num_input_tokens_seen": 26935776, "step": 12460 }, { "epoch": 2.033442088091354, "grad_norm": 0.18422943353652954, "learning_rate": 0.00099999156601336, "loss": 0.0925, "num_input_tokens_seen": 26945792, "step": 12465 }, { "epoch": 2.034257748776509, "grad_norm": 0.296763151884079, "learning_rate": 0.0009999911475173245, "loss": 0.15, "num_input_tokens_seen": 26958336, "step": 12470 }, { "epoch": 2.035073409461664, "grad_norm": 0.16706916689872742, "learning_rate": 0.000999990718888334, "loss": 0.1491, "num_input_tokens_seen": 26969184, "step": 12475 }, { "epoch": 2.035889070146819, "grad_norm": 0.08804943412542343, "learning_rate": 0.0009999902801263974, "loss": 0.1627, "num_input_tokens_seen": 26979328, "step": 12480 }, { "epoch": 2.036704730831974, "grad_norm": 0.08816880732774734, "learning_rate": 0.0009999898312315232, "loss": 0.0732, "num_input_tokens_seen": 26989920, "step": 12485 }, { "epoch": 2.0375203915171287, "grad_norm": 0.021312927827239037, "learning_rate": 0.000999989372203721, "loss": 0.1219, "num_input_tokens_seen": 27000544, "step": 12490 }, { "epoch": 2.038336052202284, "grad_norm": 0.0042571392841637135, "learning_rate": 0.0009999889030429998, "loss": 0.0286, "num_input_tokens_seen": 27011360, "step": 12495 }, { "epoch": 2.039151712887439, "grad_norm": 0.09083344042301178, "learning_rate": 0.0009999884237493692, "loss": 0.0313, "num_input_tokens_seen": 27021312, "step": 12500 }, { "epoch": 2.0399673735725936, "grad_norm": 0.028655050322413445, "learning_rate": 0.000999987934322839, "loss": 0.0255, "num_input_tokens_seen": 27032640, "step": 12505 }, { "epoch": 2.040783034257749, "grad_norm": 0.047613725066185, "learning_rate": 0.000999987434763419, "loss": 0.1954, "num_input_tokens_seen": 27043648, "step": 12510 }, { "epoch": 2.0415986949429037, "grad_norm": 0.3595113158226013, "learning_rate": 0.0009999869250711193, "loss": 0.1717, "num_input_tokens_seen": 27055232, "step": 12515 }, { "epoch": 2.0424143556280585, "grad_norm": 0.05471295118331909, "learning_rate": 0.0009999864052459503, "loss": 0.2655, "num_input_tokens_seen": 27066464, "step": 12520 }, { "epoch": 2.0432300163132138, "grad_norm": 0.14621756970882416, "learning_rate": 0.0009999858752879228, "loss": 0.1095, "num_input_tokens_seen": 27077888, "step": 12525 }, { "epoch": 2.0440456769983686, "grad_norm": 0.09596377611160278, "learning_rate": 0.0009999853351970469, "loss": 0.0755, "num_input_tokens_seen": 27088064, "step": 12530 }, { "epoch": 2.044861337683524, "grad_norm": 0.05375010520219803, "learning_rate": 0.000999984784973334, "loss": 0.2147, "num_input_tokens_seen": 27099296, "step": 12535 }, { "epoch": 2.0456769983686787, "grad_norm": 0.0923907533288002, "learning_rate": 0.0009999842246167952, "loss": 0.0616, "num_input_tokens_seen": 27110176, "step": 12540 }, { "epoch": 2.0464926590538335, "grad_norm": 0.019899077713489532, "learning_rate": 0.0009999836541274417, "loss": 0.0729, "num_input_tokens_seen": 27120768, "step": 12545 }, { "epoch": 2.0473083197389887, "grad_norm": 0.13260437548160553, "learning_rate": 0.0009999830735052853, "loss": 0.0526, "num_input_tokens_seen": 27131904, "step": 12550 }, { "epoch": 2.0481239804241436, "grad_norm": 0.009612596593797207, "learning_rate": 0.0009999824827503377, "loss": 0.0405, "num_input_tokens_seen": 27142976, "step": 12555 }, { "epoch": 2.0489396411092984, "grad_norm": 0.010959037579596043, "learning_rate": 0.0009999818818626105, "loss": 0.0632, "num_input_tokens_seen": 27154656, "step": 12560 }, { "epoch": 2.0497553017944536, "grad_norm": 0.03808634728193283, "learning_rate": 0.0009999812708421166, "loss": 0.06, "num_input_tokens_seen": 27165344, "step": 12565 }, { "epoch": 2.0505709624796085, "grad_norm": 0.006856338586658239, "learning_rate": 0.0009999806496888677, "loss": 0.0447, "num_input_tokens_seen": 27175104, "step": 12570 }, { "epoch": 2.0513866231647633, "grad_norm": 0.10554108023643494, "learning_rate": 0.0009999800184028766, "loss": 0.0212, "num_input_tokens_seen": 27186528, "step": 12575 }, { "epoch": 2.0522022838499185, "grad_norm": 0.0567990280687809, "learning_rate": 0.0009999793769841564, "loss": 0.165, "num_input_tokens_seen": 27197664, "step": 12580 }, { "epoch": 2.0530179445350734, "grad_norm": 0.0033737735357135534, "learning_rate": 0.0009999787254327196, "loss": 0.3187, "num_input_tokens_seen": 27209440, "step": 12585 }, { "epoch": 2.053833605220228, "grad_norm": 0.2872142791748047, "learning_rate": 0.00099997806374858, "loss": 0.229, "num_input_tokens_seen": 27222464, "step": 12590 }, { "epoch": 2.0546492659053834, "grad_norm": 0.1510654091835022, "learning_rate": 0.0009999773919317505, "loss": 0.1552, "num_input_tokens_seen": 27233664, "step": 12595 }, { "epoch": 2.0554649265905383, "grad_norm": 0.1906013935804367, "learning_rate": 0.000999976709982245, "loss": 0.2621, "num_input_tokens_seen": 27245632, "step": 12600 }, { "epoch": 2.0562805872756935, "grad_norm": 0.10470928251743317, "learning_rate": 0.000999976017900077, "loss": 0.0961, "num_input_tokens_seen": 27255872, "step": 12605 }, { "epoch": 2.0570962479608483, "grad_norm": 0.08966370671987534, "learning_rate": 0.0009999753156852609, "loss": 0.0501, "num_input_tokens_seen": 27266368, "step": 12610 }, { "epoch": 2.057911908646003, "grad_norm": 0.12156267464160919, "learning_rate": 0.0009999746033378105, "loss": 0.0726, "num_input_tokens_seen": 27276960, "step": 12615 }, { "epoch": 2.0587275693311584, "grad_norm": 0.2356519103050232, "learning_rate": 0.0009999738808577408, "loss": 0.1518, "num_input_tokens_seen": 27288416, "step": 12620 }, { "epoch": 2.0595432300163132, "grad_norm": 0.12507210671901703, "learning_rate": 0.000999973148245066, "loss": 0.0652, "num_input_tokens_seen": 27300032, "step": 12625 }, { "epoch": 2.060358890701468, "grad_norm": 0.20876716077327728, "learning_rate": 0.000999972405499801, "loss": 0.1517, "num_input_tokens_seen": 27310688, "step": 12630 }, { "epoch": 2.0611745513866233, "grad_norm": 0.09181343019008636, "learning_rate": 0.0009999716526219611, "loss": 0.0955, "num_input_tokens_seen": 27323424, "step": 12635 }, { "epoch": 2.061990212071778, "grad_norm": 0.134469673037529, "learning_rate": 0.0009999708896115613, "loss": 0.1265, "num_input_tokens_seen": 27333024, "step": 12640 }, { "epoch": 2.062805872756933, "grad_norm": 0.06973686814308167, "learning_rate": 0.0009999701164686173, "loss": 0.0691, "num_input_tokens_seen": 27344512, "step": 12645 }, { "epoch": 2.063621533442088, "grad_norm": 0.27596315741539, "learning_rate": 0.0009999693331931446, "loss": 0.0942, "num_input_tokens_seen": 27355328, "step": 12650 }, { "epoch": 2.064437194127243, "grad_norm": 0.029866395518183708, "learning_rate": 0.000999968539785159, "loss": 0.189, "num_input_tokens_seen": 27366688, "step": 12655 }, { "epoch": 2.065252854812398, "grad_norm": 0.07480645924806595, "learning_rate": 0.0009999677362446768, "loss": 0.1079, "num_input_tokens_seen": 27377376, "step": 12660 }, { "epoch": 2.066068515497553, "grad_norm": 0.11446642875671387, "learning_rate": 0.000999966922571714, "loss": 0.1362, "num_input_tokens_seen": 27387328, "step": 12665 }, { "epoch": 2.066884176182708, "grad_norm": 0.20092760026454926, "learning_rate": 0.0009999660987662876, "loss": 0.0884, "num_input_tokens_seen": 27399136, "step": 12670 }, { "epoch": 2.067699836867863, "grad_norm": 0.48023906350135803, "learning_rate": 0.0009999652648284136, "loss": 0.1254, "num_input_tokens_seen": 27409984, "step": 12675 }, { "epoch": 2.068515497553018, "grad_norm": 0.3658871352672577, "learning_rate": 0.0009999644207581092, "loss": 0.2625, "num_input_tokens_seen": 27421088, "step": 12680 }, { "epoch": 2.069331158238173, "grad_norm": 0.10626986622810364, "learning_rate": 0.000999963566555392, "loss": 0.0983, "num_input_tokens_seen": 27431648, "step": 12685 }, { "epoch": 2.070146818923328, "grad_norm": 0.19185417890548706, "learning_rate": 0.0009999627022202785, "loss": 0.2449, "num_input_tokens_seen": 27441440, "step": 12690 }, { "epoch": 2.070962479608483, "grad_norm": 0.2664777338504791, "learning_rate": 0.0009999618277527868, "loss": 0.1815, "num_input_tokens_seen": 27450496, "step": 12695 }, { "epoch": 2.0717781402936377, "grad_norm": 0.0740237906575203, "learning_rate": 0.0009999609431529345, "loss": 0.1454, "num_input_tokens_seen": 27461248, "step": 12700 }, { "epoch": 2.072593800978793, "grad_norm": 0.112830251455307, "learning_rate": 0.0009999600484207392, "loss": 0.1482, "num_input_tokens_seen": 27471104, "step": 12705 }, { "epoch": 2.073409461663948, "grad_norm": 0.07632242888212204, "learning_rate": 0.0009999591435562193, "loss": 0.1155, "num_input_tokens_seen": 27481376, "step": 12710 }, { "epoch": 2.0742251223491026, "grad_norm": 0.06909924000501633, "learning_rate": 0.0009999582285593932, "loss": 0.031, "num_input_tokens_seen": 27491872, "step": 12715 }, { "epoch": 2.075040783034258, "grad_norm": 0.046119239181280136, "learning_rate": 0.0009999573034302793, "loss": 0.0963, "num_input_tokens_seen": 27503712, "step": 12720 }, { "epoch": 2.0758564437194127, "grad_norm": 0.0311514250934124, "learning_rate": 0.0009999563681688964, "loss": 0.1303, "num_input_tokens_seen": 27514720, "step": 12725 }, { "epoch": 2.0766721044045675, "grad_norm": 0.030699364840984344, "learning_rate": 0.0009999554227752634, "loss": 0.0915, "num_input_tokens_seen": 27526016, "step": 12730 }, { "epoch": 2.0774877650897228, "grad_norm": 0.16419100761413574, "learning_rate": 0.0009999544672493997, "loss": 0.0642, "num_input_tokens_seen": 27536160, "step": 12735 }, { "epoch": 2.0783034257748776, "grad_norm": 0.08112215995788574, "learning_rate": 0.0009999535015913243, "loss": 0.1317, "num_input_tokens_seen": 27547296, "step": 12740 }, { "epoch": 2.0791190864600324, "grad_norm": 0.04285610839724541, "learning_rate": 0.0009999525258010571, "loss": 0.26, "num_input_tokens_seen": 27558688, "step": 12745 }, { "epoch": 2.0799347471451877, "grad_norm": 0.05731838569045067, "learning_rate": 0.0009999515398786177, "loss": 0.2337, "num_input_tokens_seen": 27570144, "step": 12750 }, { "epoch": 2.0807504078303425, "grad_norm": 0.02599678374826908, "learning_rate": 0.000999950543824026, "loss": 0.0605, "num_input_tokens_seen": 27581376, "step": 12755 }, { "epoch": 2.0815660685154977, "grad_norm": 0.05650242790579796, "learning_rate": 0.0009999495376373025, "loss": 0.1582, "num_input_tokens_seen": 27592480, "step": 12760 }, { "epoch": 2.0823817292006526, "grad_norm": 0.10208354890346527, "learning_rate": 0.0009999485213184672, "loss": 0.0866, "num_input_tokens_seen": 27603584, "step": 12765 }, { "epoch": 2.0831973898858074, "grad_norm": 0.06093911826610565, "learning_rate": 0.000999947494867541, "loss": 0.1478, "num_input_tokens_seen": 27615264, "step": 12770 }, { "epoch": 2.0840130505709626, "grad_norm": 0.15886317193508148, "learning_rate": 0.0009999464582845445, "loss": 0.1507, "num_input_tokens_seen": 27624800, "step": 12775 }, { "epoch": 2.0848287112561175, "grad_norm": 0.1170632541179657, "learning_rate": 0.0009999454115694989, "loss": 0.132, "num_input_tokens_seen": 27635456, "step": 12780 }, { "epoch": 2.0856443719412723, "grad_norm": 0.23480327427387238, "learning_rate": 0.0009999443547224253, "loss": 0.0773, "num_input_tokens_seen": 27646304, "step": 12785 }, { "epoch": 2.0864600326264275, "grad_norm": 0.051176343113183975, "learning_rate": 0.0009999432877433449, "loss": 0.1447, "num_input_tokens_seen": 27655456, "step": 12790 }, { "epoch": 2.0872756933115824, "grad_norm": 0.11672715097665787, "learning_rate": 0.0009999422106322798, "loss": 0.0529, "num_input_tokens_seen": 27665632, "step": 12795 }, { "epoch": 2.088091353996737, "grad_norm": 0.15267214179039001, "learning_rate": 0.0009999411233892516, "loss": 0.1734, "num_input_tokens_seen": 27677504, "step": 12800 }, { "epoch": 2.0889070146818924, "grad_norm": 0.011678201146423817, "learning_rate": 0.000999940026014282, "loss": 0.0411, "num_input_tokens_seen": 27688768, "step": 12805 }, { "epoch": 2.0897226753670473, "grad_norm": 0.06737668067216873, "learning_rate": 0.000999938918507394, "loss": 0.0999, "num_input_tokens_seen": 27699680, "step": 12810 }, { "epoch": 2.090538336052202, "grad_norm": 0.03292597457766533, "learning_rate": 0.0009999378008686093, "loss": 0.1877, "num_input_tokens_seen": 27709504, "step": 12815 }, { "epoch": 2.0913539967373573, "grad_norm": 0.12016480416059494, "learning_rate": 0.000999936673097951, "loss": 0.1935, "num_input_tokens_seen": 27720704, "step": 12820 }, { "epoch": 2.092169657422512, "grad_norm": 0.018072044476866722, "learning_rate": 0.0009999355351954418, "loss": 0.178, "num_input_tokens_seen": 27730656, "step": 12825 }, { "epoch": 2.0929853181076674, "grad_norm": 0.1713826060295105, "learning_rate": 0.0009999343871611045, "loss": 0.1951, "num_input_tokens_seen": 27740864, "step": 12830 }, { "epoch": 2.0938009787928222, "grad_norm": 0.10080626606941223, "learning_rate": 0.000999933228994963, "loss": 0.1041, "num_input_tokens_seen": 27751104, "step": 12835 }, { "epoch": 2.094616639477977, "grad_norm": 0.10050133615732193, "learning_rate": 0.00099993206069704, "loss": 0.0784, "num_input_tokens_seen": 27761760, "step": 12840 }, { "epoch": 2.0954323001631323, "grad_norm": 0.02649875171482563, "learning_rate": 0.0009999308822673599, "loss": 0.2457, "num_input_tokens_seen": 27772864, "step": 12845 }, { "epoch": 2.096247960848287, "grad_norm": 0.09067980200052261, "learning_rate": 0.000999929693705946, "loss": 0.1051, "num_input_tokens_seen": 27783936, "step": 12850 }, { "epoch": 2.097063621533442, "grad_norm": 0.13061358034610748, "learning_rate": 0.000999928495012823, "loss": 0.1225, "num_input_tokens_seen": 27794368, "step": 12855 }, { "epoch": 2.097879282218597, "grad_norm": 0.11418254673480988, "learning_rate": 0.0009999272861880148, "loss": 0.0877, "num_input_tokens_seen": 27804224, "step": 12860 }, { "epoch": 2.098694942903752, "grad_norm": 0.1471940577030182, "learning_rate": 0.0009999260672315456, "loss": 0.1568, "num_input_tokens_seen": 27815168, "step": 12865 }, { "epoch": 2.099510603588907, "grad_norm": 0.45913568139076233, "learning_rate": 0.0009999248381434406, "loss": 0.1507, "num_input_tokens_seen": 27825088, "step": 12870 }, { "epoch": 2.100326264274062, "grad_norm": 0.21608853340148926, "learning_rate": 0.0009999235989237249, "loss": 0.2859, "num_input_tokens_seen": 27834880, "step": 12875 }, { "epoch": 2.101141924959217, "grad_norm": 0.06812023371458054, "learning_rate": 0.0009999223495724228, "loss": 0.1004, "num_input_tokens_seen": 27845248, "step": 12880 }, { "epoch": 2.1019575856443717, "grad_norm": 0.02336515672504902, "learning_rate": 0.0009999210900895603, "loss": 0.1676, "num_input_tokens_seen": 27854144, "step": 12885 }, { "epoch": 2.102773246329527, "grad_norm": 0.10959352552890778, "learning_rate": 0.0009999198204751628, "loss": 0.2141, "num_input_tokens_seen": 27864000, "step": 12890 }, { "epoch": 2.103588907014682, "grad_norm": 0.10948460549116135, "learning_rate": 0.0009999185407292557, "loss": 0.1374, "num_input_tokens_seen": 27873856, "step": 12895 }, { "epoch": 2.104404567699837, "grad_norm": 0.05279287323355675, "learning_rate": 0.0009999172508518654, "loss": 0.0531, "num_input_tokens_seen": 27885088, "step": 12900 }, { "epoch": 2.105220228384992, "grad_norm": 0.11285223066806793, "learning_rate": 0.0009999159508430177, "loss": 0.1392, "num_input_tokens_seen": 27896256, "step": 12905 }, { "epoch": 2.1060358890701467, "grad_norm": 0.12476310133934021, "learning_rate": 0.000999914640702739, "loss": 0.169, "num_input_tokens_seen": 27905696, "step": 12910 }, { "epoch": 2.106851549755302, "grad_norm": 0.21431344747543335, "learning_rate": 0.000999913320431056, "loss": 0.0857, "num_input_tokens_seen": 27915808, "step": 12915 }, { "epoch": 2.107667210440457, "grad_norm": 0.04303182661533356, "learning_rate": 0.0009999119900279956, "loss": 0.0545, "num_input_tokens_seen": 27926528, "step": 12920 }, { "epoch": 2.1084828711256116, "grad_norm": 0.029864931479096413, "learning_rate": 0.0009999106494935843, "loss": 0.162, "num_input_tokens_seen": 27937120, "step": 12925 }, { "epoch": 2.109298531810767, "grad_norm": 0.05805061757564545, "learning_rate": 0.0009999092988278496, "loss": 0.0684, "num_input_tokens_seen": 27947648, "step": 12930 }, { "epoch": 2.1101141924959217, "grad_norm": 0.012730555608868599, "learning_rate": 0.0009999079380308186, "loss": 0.0532, "num_input_tokens_seen": 27958752, "step": 12935 }, { "epoch": 2.1109298531810765, "grad_norm": 0.05740470066666603, "learning_rate": 0.000999906567102519, "loss": 0.1548, "num_input_tokens_seen": 27969248, "step": 12940 }, { "epoch": 2.1117455138662318, "grad_norm": 0.2081340104341507, "learning_rate": 0.0009999051860429791, "loss": 0.2038, "num_input_tokens_seen": 27980256, "step": 12945 }, { "epoch": 2.1125611745513866, "grad_norm": 0.057117413729429245, "learning_rate": 0.000999903794852226, "loss": 0.0831, "num_input_tokens_seen": 27990016, "step": 12950 }, { "epoch": 2.1133768352365414, "grad_norm": 0.03829582408070564, "learning_rate": 0.0009999023935302886, "loss": 0.0685, "num_input_tokens_seen": 28000928, "step": 12955 }, { "epoch": 2.1141924959216967, "grad_norm": 0.05271393433213234, "learning_rate": 0.000999900982077195, "loss": 0.0568, "num_input_tokens_seen": 28009984, "step": 12960 }, { "epoch": 2.1150081566068515, "grad_norm": 0.050736140459775925, "learning_rate": 0.0009998995604929735, "loss": 0.0266, "num_input_tokens_seen": 28022560, "step": 12965 }, { "epoch": 2.1158238172920063, "grad_norm": 0.018366431817412376, "learning_rate": 0.0009998981287776536, "loss": 0.0346, "num_input_tokens_seen": 28032960, "step": 12970 }, { "epoch": 2.1166394779771616, "grad_norm": 0.18964123725891113, "learning_rate": 0.0009998966869312637, "loss": 0.0733, "num_input_tokens_seen": 28044512, "step": 12975 }, { "epoch": 2.1174551386623164, "grad_norm": 0.14065471291542053, "learning_rate": 0.0009998952349538335, "loss": 0.103, "num_input_tokens_seen": 28055040, "step": 12980 }, { "epoch": 2.1182707993474716, "grad_norm": 0.022199753671884537, "learning_rate": 0.000999893772845392, "loss": 0.0426, "num_input_tokens_seen": 28065952, "step": 12985 }, { "epoch": 2.1190864600326265, "grad_norm": 0.06245112419128418, "learning_rate": 0.0009998923006059692, "loss": 0.0593, "num_input_tokens_seen": 28077280, "step": 12990 }, { "epoch": 2.1199021207177813, "grad_norm": 0.08001261949539185, "learning_rate": 0.0009998908182355948, "loss": 0.0577, "num_input_tokens_seen": 28088448, "step": 12995 }, { "epoch": 2.1207177814029365, "grad_norm": 0.04466702789068222, "learning_rate": 0.0009998893257342986, "loss": 0.0837, "num_input_tokens_seen": 28099584, "step": 13000 }, { "epoch": 2.1215334420880914, "grad_norm": 0.0033461377024650574, "learning_rate": 0.000999887823102111, "loss": 0.1053, "num_input_tokens_seen": 28110496, "step": 13005 }, { "epoch": 2.122349102773246, "grad_norm": 0.029360774904489517, "learning_rate": 0.0009998863103390628, "loss": 0.2003, "num_input_tokens_seen": 28119616, "step": 13010 }, { "epoch": 2.1231647634584014, "grad_norm": 0.047707974910736084, "learning_rate": 0.0009998847874451843, "loss": 0.1026, "num_input_tokens_seen": 28129664, "step": 13015 }, { "epoch": 2.1239804241435563, "grad_norm": 0.39130014181137085, "learning_rate": 0.0009998832544205064, "loss": 0.273, "num_input_tokens_seen": 28140576, "step": 13020 }, { "epoch": 2.124796084828711, "grad_norm": 0.1589263379573822, "learning_rate": 0.0009998817112650603, "loss": 0.1321, "num_input_tokens_seen": 28150976, "step": 13025 }, { "epoch": 2.1256117455138663, "grad_norm": 0.03926459699869156, "learning_rate": 0.000999880157978877, "loss": 0.0811, "num_input_tokens_seen": 28161728, "step": 13030 }, { "epoch": 2.126427406199021, "grad_norm": 0.09237375855445862, "learning_rate": 0.0009998785945619882, "loss": 0.0559, "num_input_tokens_seen": 28172896, "step": 13035 }, { "epoch": 2.1272430668841764, "grad_norm": 0.06005003675818443, "learning_rate": 0.0009998770210144256, "loss": 0.1213, "num_input_tokens_seen": 28181184, "step": 13040 }, { "epoch": 2.1280587275693312, "grad_norm": 0.15633663535118103, "learning_rate": 0.000999875437336221, "loss": 0.1371, "num_input_tokens_seen": 28191552, "step": 13045 }, { "epoch": 2.128874388254486, "grad_norm": 0.12029604613780975, "learning_rate": 0.0009998738435274064, "loss": 0.194, "num_input_tokens_seen": 28203168, "step": 13050 }, { "epoch": 2.1296900489396413, "grad_norm": 0.10934768617153168, "learning_rate": 0.0009998722395880145, "loss": 0.056, "num_input_tokens_seen": 28214720, "step": 13055 }, { "epoch": 2.130505709624796, "grad_norm": 0.016308283433318138, "learning_rate": 0.0009998706255180774, "loss": 0.1291, "num_input_tokens_seen": 28225472, "step": 13060 }, { "epoch": 2.131321370309951, "grad_norm": 0.35608285665512085, "learning_rate": 0.0009998690013176279, "loss": 0.2248, "num_input_tokens_seen": 28235328, "step": 13065 }, { "epoch": 2.132137030995106, "grad_norm": 0.29491209983825684, "learning_rate": 0.0009998673669866988, "loss": 0.3122, "num_input_tokens_seen": 28246912, "step": 13070 }, { "epoch": 2.132952691680261, "grad_norm": 0.09003280103206635, "learning_rate": 0.0009998657225253236, "loss": 0.2082, "num_input_tokens_seen": 28258368, "step": 13075 }, { "epoch": 2.133768352365416, "grad_norm": 0.11987301707267761, "learning_rate": 0.0009998640679335354, "loss": 0.0845, "num_input_tokens_seen": 28268544, "step": 13080 }, { "epoch": 2.134584013050571, "grad_norm": 0.031110180541872978, "learning_rate": 0.0009998624032113677, "loss": 0.0505, "num_input_tokens_seen": 28280576, "step": 13085 }, { "epoch": 2.135399673735726, "grad_norm": 0.11604081839323044, "learning_rate": 0.0009998607283588543, "loss": 0.0855, "num_input_tokens_seen": 28290944, "step": 13090 }, { "epoch": 2.1362153344208807, "grad_norm": 0.09189382195472717, "learning_rate": 0.000999859043376029, "loss": 0.1483, "num_input_tokens_seen": 28302176, "step": 13095 }, { "epoch": 2.137030995106036, "grad_norm": 0.026161260902881622, "learning_rate": 0.0009998573482629264, "loss": 0.0861, "num_input_tokens_seen": 28312864, "step": 13100 }, { "epoch": 2.137846655791191, "grad_norm": 0.10673241317272186, "learning_rate": 0.0009998556430195803, "loss": 0.0502, "num_input_tokens_seen": 28323328, "step": 13105 }, { "epoch": 2.1386623164763456, "grad_norm": 0.06484927237033844, "learning_rate": 0.0009998539276460255, "loss": 0.0325, "num_input_tokens_seen": 28335264, "step": 13110 }, { "epoch": 2.139477977161501, "grad_norm": 0.08133813738822937, "learning_rate": 0.0009998522021422967, "loss": 0.2518, "num_input_tokens_seen": 28346400, "step": 13115 }, { "epoch": 2.1402936378466557, "grad_norm": 0.2059800624847412, "learning_rate": 0.000999850466508429, "loss": 0.1144, "num_input_tokens_seen": 28357888, "step": 13120 }, { "epoch": 2.141109298531811, "grad_norm": 0.008014354854822159, "learning_rate": 0.0009998487207444574, "loss": 0.0368, "num_input_tokens_seen": 28369888, "step": 13125 }, { "epoch": 2.141924959216966, "grad_norm": 0.0397895947098732, "learning_rate": 0.0009998469648504174, "loss": 0.0487, "num_input_tokens_seen": 28380736, "step": 13130 }, { "epoch": 2.1427406199021206, "grad_norm": 0.05694444105029106, "learning_rate": 0.0009998451988263444, "loss": 0.238, "num_input_tokens_seen": 28390816, "step": 13135 }, { "epoch": 2.143556280587276, "grad_norm": 0.1789853572845459, "learning_rate": 0.0009998434226722746, "loss": 0.0606, "num_input_tokens_seen": 28402080, "step": 13140 }, { "epoch": 2.1443719412724307, "grad_norm": 0.24429011344909668, "learning_rate": 0.0009998416363882438, "loss": 0.1706, "num_input_tokens_seen": 28412768, "step": 13145 }, { "epoch": 2.1451876019575855, "grad_norm": 0.032388217747211456, "learning_rate": 0.0009998398399742878, "loss": 0.0746, "num_input_tokens_seen": 28425056, "step": 13150 }, { "epoch": 2.1460032626427408, "grad_norm": 0.08640412241220474, "learning_rate": 0.0009998380334304436, "loss": 0.1357, "num_input_tokens_seen": 28436800, "step": 13155 }, { "epoch": 2.1468189233278956, "grad_norm": 0.08437643945217133, "learning_rate": 0.0009998362167567476, "loss": 0.0239, "num_input_tokens_seen": 28447360, "step": 13160 }, { "epoch": 2.1476345840130504, "grad_norm": 0.03582681342959404, "learning_rate": 0.0009998343899532364, "loss": 0.0438, "num_input_tokens_seen": 28457888, "step": 13165 }, { "epoch": 2.1484502446982057, "grad_norm": 0.4298367202281952, "learning_rate": 0.0009998325530199473, "loss": 0.1974, "num_input_tokens_seen": 28469472, "step": 13170 }, { "epoch": 2.1492659053833605, "grad_norm": 0.034253429621458054, "learning_rate": 0.0009998307059569174, "loss": 0.2651, "num_input_tokens_seen": 28479584, "step": 13175 }, { "epoch": 2.1500815660685153, "grad_norm": 0.044094908982515335, "learning_rate": 0.0009998288487641843, "loss": 0.0908, "num_input_tokens_seen": 28491520, "step": 13180 }, { "epoch": 2.1508972267536706, "grad_norm": 0.020100802183151245, "learning_rate": 0.0009998269814417854, "loss": 0.1055, "num_input_tokens_seen": 28501472, "step": 13185 }, { "epoch": 2.1517128874388254, "grad_norm": 0.05620182305574417, "learning_rate": 0.0009998251039897586, "loss": 0.1607, "num_input_tokens_seen": 28512320, "step": 13190 }, { "epoch": 2.15252854812398, "grad_norm": 0.27030977606773376, "learning_rate": 0.000999823216408142, "loss": 0.1351, "num_input_tokens_seen": 28523776, "step": 13195 }, { "epoch": 2.1533442088091355, "grad_norm": 0.1298169493675232, "learning_rate": 0.0009998213186969739, "loss": 0.157, "num_input_tokens_seen": 28534016, "step": 13200 }, { "epoch": 2.1541598694942903, "grad_norm": 0.06606268137693405, "learning_rate": 0.0009998194108562927, "loss": 0.0791, "num_input_tokens_seen": 28544096, "step": 13205 }, { "epoch": 2.1549755301794455, "grad_norm": 0.07334471493959427, "learning_rate": 0.000999817492886137, "loss": 0.0535, "num_input_tokens_seen": 28554912, "step": 13210 }, { "epoch": 2.1557911908646004, "grad_norm": 0.08001653105020523, "learning_rate": 0.000999815564786546, "loss": 0.0618, "num_input_tokens_seen": 28564416, "step": 13215 }, { "epoch": 2.156606851549755, "grad_norm": 0.09251904487609863, "learning_rate": 0.0009998136265575582, "loss": 0.1092, "num_input_tokens_seen": 28575328, "step": 13220 }, { "epoch": 2.1574225122349104, "grad_norm": 0.16219399869441986, "learning_rate": 0.0009998116781992133, "loss": 0.176, "num_input_tokens_seen": 28584992, "step": 13225 }, { "epoch": 2.1582381729200653, "grad_norm": 0.1895899772644043, "learning_rate": 0.0009998097197115507, "loss": 0.0768, "num_input_tokens_seen": 28594720, "step": 13230 }, { "epoch": 2.15905383360522, "grad_norm": 0.10811378061771393, "learning_rate": 0.00099980775109461, "loss": 0.0716, "num_input_tokens_seen": 28606560, "step": 13235 }, { "epoch": 2.1598694942903753, "grad_norm": 0.006182703655213118, "learning_rate": 0.0009998057723484312, "loss": 0.0508, "num_input_tokens_seen": 28617312, "step": 13240 }, { "epoch": 2.16068515497553, "grad_norm": 0.04205413907766342, "learning_rate": 0.0009998037834730545, "loss": 0.2351, "num_input_tokens_seen": 28629696, "step": 13245 }, { "epoch": 2.161500815660685, "grad_norm": 0.05634569004178047, "learning_rate": 0.0009998017844685201, "loss": 0.0308, "num_input_tokens_seen": 28640704, "step": 13250 }, { "epoch": 2.1623164763458402, "grad_norm": 0.006111837457865477, "learning_rate": 0.0009997997753348684, "loss": 0.2064, "num_input_tokens_seen": 28650432, "step": 13255 }, { "epoch": 2.163132137030995, "grad_norm": 0.23091314733028412, "learning_rate": 0.0009997977560721402, "loss": 0.134, "num_input_tokens_seen": 28662848, "step": 13260 }, { "epoch": 2.1639477977161503, "grad_norm": 0.07334164530038834, "learning_rate": 0.0009997957266803766, "loss": 0.1186, "num_input_tokens_seen": 28673568, "step": 13265 }, { "epoch": 2.164763458401305, "grad_norm": 0.0321493074297905, "learning_rate": 0.0009997936871596182, "loss": 0.1529, "num_input_tokens_seen": 28682784, "step": 13270 }, { "epoch": 2.16557911908646, "grad_norm": 0.055742476135492325, "learning_rate": 0.000999791637509907, "loss": 0.1244, "num_input_tokens_seen": 28692480, "step": 13275 }, { "epoch": 2.166394779771615, "grad_norm": 0.19681106507778168, "learning_rate": 0.0009997895777312843, "loss": 0.1311, "num_input_tokens_seen": 28702144, "step": 13280 }, { "epoch": 2.16721044045677, "grad_norm": 0.059670474380254745, "learning_rate": 0.0009997875078237915, "loss": 0.1195, "num_input_tokens_seen": 28711072, "step": 13285 }, { "epoch": 2.168026101141925, "grad_norm": 0.1475917249917984, "learning_rate": 0.000999785427787471, "loss": 0.0813, "num_input_tokens_seen": 28721504, "step": 13290 }, { "epoch": 2.16884176182708, "grad_norm": 0.005357819609344006, "learning_rate": 0.0009997833376223647, "loss": 0.0831, "num_input_tokens_seen": 28733184, "step": 13295 }, { "epoch": 2.169657422512235, "grad_norm": 0.25688982009887695, "learning_rate": 0.000999781237328515, "loss": 0.1804, "num_input_tokens_seen": 28743584, "step": 13300 }, { "epoch": 2.1704730831973897, "grad_norm": 0.15253715217113495, "learning_rate": 0.0009997791269059646, "loss": 0.1502, "num_input_tokens_seen": 28754432, "step": 13305 }, { "epoch": 2.171288743882545, "grad_norm": 0.050005823373794556, "learning_rate": 0.0009997770063547562, "loss": 0.0221, "num_input_tokens_seen": 28764928, "step": 13310 }, { "epoch": 2.1721044045677, "grad_norm": 0.09376704692840576, "learning_rate": 0.0009997748756749327, "loss": 0.053, "num_input_tokens_seen": 28775648, "step": 13315 }, { "epoch": 2.1729200652528546, "grad_norm": 0.1947200894355774, "learning_rate": 0.0009997727348665373, "loss": 0.07, "num_input_tokens_seen": 28787040, "step": 13320 }, { "epoch": 2.17373572593801, "grad_norm": 0.04367386922240257, "learning_rate": 0.0009997705839296135, "loss": 0.0663, "num_input_tokens_seen": 28798304, "step": 13325 }, { "epoch": 2.1745513866231647, "grad_norm": 0.15603747963905334, "learning_rate": 0.0009997684228642049, "loss": 0.1732, "num_input_tokens_seen": 28808608, "step": 13330 }, { "epoch": 2.1753670473083195, "grad_norm": 0.052698567509651184, "learning_rate": 0.0009997662516703552, "loss": 0.0443, "num_input_tokens_seen": 28819520, "step": 13335 }, { "epoch": 2.176182707993475, "grad_norm": 0.016307028010487556, "learning_rate": 0.0009997640703481082, "loss": 0.1428, "num_input_tokens_seen": 28830912, "step": 13340 }, { "epoch": 2.1769983686786296, "grad_norm": 0.029360493645071983, "learning_rate": 0.0009997618788975084, "loss": 0.0631, "num_input_tokens_seen": 28842208, "step": 13345 }, { "epoch": 2.177814029363785, "grad_norm": 0.061105113476514816, "learning_rate": 0.0009997596773186, "loss": 0.0664, "num_input_tokens_seen": 28852864, "step": 13350 }, { "epoch": 2.1786296900489397, "grad_norm": 0.024250203743577003, "learning_rate": 0.000999757465611428, "loss": 0.0284, "num_input_tokens_seen": 28862400, "step": 13355 }, { "epoch": 2.1794453507340945, "grad_norm": 0.22527538239955902, "learning_rate": 0.000999755243776037, "loss": 0.0893, "num_input_tokens_seen": 28872832, "step": 13360 }, { "epoch": 2.1802610114192498, "grad_norm": 0.27681607007980347, "learning_rate": 0.000999753011812472, "loss": 0.0944, "num_input_tokens_seen": 28883008, "step": 13365 }, { "epoch": 2.1810766721044046, "grad_norm": 0.020333116874098778, "learning_rate": 0.000999750769720778, "loss": 0.0301, "num_input_tokens_seen": 28894976, "step": 13370 }, { "epoch": 2.1818923327895594, "grad_norm": 0.2983275055885315, "learning_rate": 0.0009997485175010008, "loss": 0.1387, "num_input_tokens_seen": 28905888, "step": 13375 }, { "epoch": 2.1827079934747147, "grad_norm": 0.15638279914855957, "learning_rate": 0.000999746255153186, "loss": 0.1873, "num_input_tokens_seen": 28916640, "step": 13380 }, { "epoch": 2.1835236541598695, "grad_norm": 0.34206530451774597, "learning_rate": 0.0009997439826773791, "loss": 0.175, "num_input_tokens_seen": 28927424, "step": 13385 }, { "epoch": 2.1843393148450243, "grad_norm": 0.013786377385258675, "learning_rate": 0.0009997417000736266, "loss": 0.1524, "num_input_tokens_seen": 28938240, "step": 13390 }, { "epoch": 2.1851549755301796, "grad_norm": 0.02515227347612381, "learning_rate": 0.0009997394073419747, "loss": 0.0392, "num_input_tokens_seen": 28948192, "step": 13395 }, { "epoch": 2.1859706362153344, "grad_norm": 0.24523992836475372, "learning_rate": 0.0009997371044824697, "loss": 0.0854, "num_input_tokens_seen": 28958784, "step": 13400 }, { "epoch": 2.186786296900489, "grad_norm": 0.011088637635111809, "learning_rate": 0.0009997347914951582, "loss": 0.1012, "num_input_tokens_seen": 28969568, "step": 13405 }, { "epoch": 2.1876019575856445, "grad_norm": 0.21824337542057037, "learning_rate": 0.0009997324683800872, "loss": 0.1296, "num_input_tokens_seen": 28980096, "step": 13410 }, { "epoch": 2.1884176182707993, "grad_norm": 0.07013077288866043, "learning_rate": 0.0009997301351373038, "loss": 0.0685, "num_input_tokens_seen": 28990752, "step": 13415 }, { "epoch": 2.189233278955954, "grad_norm": 0.008021237328648567, "learning_rate": 0.0009997277917668552, "loss": 0.1336, "num_input_tokens_seen": 29001248, "step": 13420 }, { "epoch": 2.1900489396411094, "grad_norm": 0.3219771087169647, "learning_rate": 0.000999725438268789, "loss": 0.1138, "num_input_tokens_seen": 29011520, "step": 13425 }, { "epoch": 2.190864600326264, "grad_norm": 0.05944007635116577, "learning_rate": 0.0009997230746431529, "loss": 0.0874, "num_input_tokens_seen": 29022336, "step": 13430 }, { "epoch": 2.1916802610114194, "grad_norm": 0.24976307153701782, "learning_rate": 0.0009997207008899946, "loss": 0.1174, "num_input_tokens_seen": 29034464, "step": 13435 }, { "epoch": 2.1924959216965743, "grad_norm": 0.09191533923149109, "learning_rate": 0.0009997183170093625, "loss": 0.1743, "num_input_tokens_seen": 29045728, "step": 13440 }, { "epoch": 2.193311582381729, "grad_norm": 0.01558864489197731, "learning_rate": 0.000999715923001305, "loss": 0.2273, "num_input_tokens_seen": 29055392, "step": 13445 }, { "epoch": 2.1941272430668843, "grad_norm": 0.03274226933717728, "learning_rate": 0.00099971351886587, "loss": 0.0776, "num_input_tokens_seen": 29066304, "step": 13450 }, { "epoch": 2.194942903752039, "grad_norm": 0.08572755008935928, "learning_rate": 0.0009997111046031067, "loss": 0.0711, "num_input_tokens_seen": 29077344, "step": 13455 }, { "epoch": 2.195758564437194, "grad_norm": 0.033109501004219055, "learning_rate": 0.000999708680213064, "loss": 0.0327, "num_input_tokens_seen": 29088672, "step": 13460 }, { "epoch": 2.1965742251223492, "grad_norm": 0.15404462814331055, "learning_rate": 0.000999706245695791, "loss": 0.1834, "num_input_tokens_seen": 29100224, "step": 13465 }, { "epoch": 2.197389885807504, "grad_norm": 0.1067809909582138, "learning_rate": 0.0009997038010513368, "loss": 0.0657, "num_input_tokens_seen": 29110336, "step": 13470 }, { "epoch": 2.198205546492659, "grad_norm": 0.3593546450138092, "learning_rate": 0.0009997013462797514, "loss": 0.1793, "num_input_tokens_seen": 29121280, "step": 13475 }, { "epoch": 2.199021207177814, "grad_norm": 0.13226577639579773, "learning_rate": 0.000999698881381084, "loss": 0.234, "num_input_tokens_seen": 29133056, "step": 13480 }, { "epoch": 2.199836867862969, "grad_norm": 0.037532739341259, "learning_rate": 0.0009996964063553851, "loss": 0.0819, "num_input_tokens_seen": 29144544, "step": 13485 }, { "epoch": 2.200652528548124, "grad_norm": 0.03458193317055702, "learning_rate": 0.0009996939212027045, "loss": 0.126, "num_input_tokens_seen": 29155936, "step": 13490 }, { "epoch": 2.201468189233279, "grad_norm": 0.11294587701559067, "learning_rate": 0.0009996914259230928, "loss": 0.1229, "num_input_tokens_seen": 29166688, "step": 13495 }, { "epoch": 2.202283849918434, "grad_norm": 0.1393410861492157, "learning_rate": 0.0009996889205166003, "loss": 0.124, "num_input_tokens_seen": 29177568, "step": 13500 }, { "epoch": 2.203099510603589, "grad_norm": 0.06924308836460114, "learning_rate": 0.000999686404983278, "loss": 0.1475, "num_input_tokens_seen": 29187936, "step": 13505 }, { "epoch": 2.203915171288744, "grad_norm": 0.023421689867973328, "learning_rate": 0.0009996838793231771, "loss": 0.155, "num_input_tokens_seen": 29198208, "step": 13510 }, { "epoch": 2.2047308319738987, "grad_norm": 0.1216554045677185, "learning_rate": 0.0009996813435363481, "loss": 0.1129, "num_input_tokens_seen": 29209248, "step": 13515 }, { "epoch": 2.205546492659054, "grad_norm": 0.29664355516433716, "learning_rate": 0.000999678797622843, "loss": 0.2195, "num_input_tokens_seen": 29220032, "step": 13520 }, { "epoch": 2.206362153344209, "grad_norm": 0.22446954250335693, "learning_rate": 0.000999676241582713, "loss": 0.1816, "num_input_tokens_seen": 29230432, "step": 13525 }, { "epoch": 2.2071778140293636, "grad_norm": 0.077766552567482, "learning_rate": 0.0009996736754160102, "loss": 0.0407, "num_input_tokens_seen": 29241024, "step": 13530 }, { "epoch": 2.207993474714519, "grad_norm": 0.12766964733600616, "learning_rate": 0.0009996710991227865, "loss": 0.1988, "num_input_tokens_seen": 29252384, "step": 13535 }, { "epoch": 2.2088091353996737, "grad_norm": 0.04737265780568123, "learning_rate": 0.000999668512703094, "loss": 0.13, "num_input_tokens_seen": 29263648, "step": 13540 }, { "epoch": 2.2096247960848285, "grad_norm": 0.1462884545326233, "learning_rate": 0.0009996659161569852, "loss": 0.1695, "num_input_tokens_seen": 29274016, "step": 13545 }, { "epoch": 2.210440456769984, "grad_norm": 0.026576614007353783, "learning_rate": 0.0009996633094845127, "loss": 0.163, "num_input_tokens_seen": 29284128, "step": 13550 }, { "epoch": 2.2112561174551386, "grad_norm": 0.16052138805389404, "learning_rate": 0.0009996606926857296, "loss": 0.0733, "num_input_tokens_seen": 29294976, "step": 13555 }, { "epoch": 2.2120717781402934, "grad_norm": 0.29137226939201355, "learning_rate": 0.0009996580657606886, "loss": 0.2453, "num_input_tokens_seen": 29305696, "step": 13560 }, { "epoch": 2.2128874388254487, "grad_norm": 0.14441095292568207, "learning_rate": 0.0009996554287094428, "loss": 0.1043, "num_input_tokens_seen": 29316000, "step": 13565 }, { "epoch": 2.2137030995106035, "grad_norm": 0.10074219852685928, "learning_rate": 0.0009996527815320463, "loss": 0.1148, "num_input_tokens_seen": 29327872, "step": 13570 }, { "epoch": 2.2145187601957588, "grad_norm": 0.043885741382837296, "learning_rate": 0.000999650124228552, "loss": 0.0642, "num_input_tokens_seen": 29338528, "step": 13575 }, { "epoch": 2.2153344208809136, "grad_norm": 0.29121461510658264, "learning_rate": 0.0009996474567990142, "loss": 0.2077, "num_input_tokens_seen": 29349568, "step": 13580 }, { "epoch": 2.2161500815660684, "grad_norm": 0.026214681565761566, "learning_rate": 0.0009996447792434868, "loss": 0.1421, "num_input_tokens_seen": 29360544, "step": 13585 }, { "epoch": 2.2169657422512237, "grad_norm": 0.11622193455696106, "learning_rate": 0.000999642091562024, "loss": 0.129, "num_input_tokens_seen": 29371424, "step": 13590 }, { "epoch": 2.2177814029363785, "grad_norm": 0.09396478533744812, "learning_rate": 0.0009996393937546806, "loss": 0.1663, "num_input_tokens_seen": 29382176, "step": 13595 }, { "epoch": 2.2185970636215333, "grad_norm": 0.19870883226394653, "learning_rate": 0.000999636685821511, "loss": 0.0664, "num_input_tokens_seen": 29392672, "step": 13600 }, { "epoch": 2.2194127243066886, "grad_norm": 0.013889658264815807, "learning_rate": 0.0009996339677625702, "loss": 0.0608, "num_input_tokens_seen": 29402816, "step": 13605 }, { "epoch": 2.2202283849918434, "grad_norm": 0.20676304399967194, "learning_rate": 0.000999631239577913, "loss": 0.1154, "num_input_tokens_seen": 29415104, "step": 13610 }, { "epoch": 2.221044045676998, "grad_norm": 0.3030164837837219, "learning_rate": 0.000999628501267595, "loss": 0.158, "num_input_tokens_seen": 29425888, "step": 13615 }, { "epoch": 2.2218597063621535, "grad_norm": 0.0975450873374939, "learning_rate": 0.0009996257528316716, "loss": 0.0996, "num_input_tokens_seen": 29436672, "step": 13620 }, { "epoch": 2.2226753670473083, "grad_norm": 0.027641698718070984, "learning_rate": 0.0009996229942701984, "loss": 0.2582, "num_input_tokens_seen": 29447744, "step": 13625 }, { "epoch": 2.223491027732463, "grad_norm": 0.3274204730987549, "learning_rate": 0.0009996202255832317, "loss": 0.1159, "num_input_tokens_seen": 29458080, "step": 13630 }, { "epoch": 2.2243066884176184, "grad_norm": 0.1280737817287445, "learning_rate": 0.000999617446770827, "loss": 0.1324, "num_input_tokens_seen": 29468928, "step": 13635 }, { "epoch": 2.225122349102773, "grad_norm": 0.027391565963625908, "learning_rate": 0.0009996146578330409, "loss": 0.0873, "num_input_tokens_seen": 29478624, "step": 13640 }, { "epoch": 2.225938009787928, "grad_norm": 0.03806430846452713, "learning_rate": 0.0009996118587699302, "loss": 0.0518, "num_input_tokens_seen": 29489408, "step": 13645 }, { "epoch": 2.2267536704730833, "grad_norm": 0.10042430460453033, "learning_rate": 0.0009996090495815514, "loss": 0.0661, "num_input_tokens_seen": 29498912, "step": 13650 }, { "epoch": 2.227569331158238, "grad_norm": 0.2514718770980835, "learning_rate": 0.000999606230267961, "loss": 0.1197, "num_input_tokens_seen": 29509824, "step": 13655 }, { "epoch": 2.2283849918433933, "grad_norm": 0.39634665846824646, "learning_rate": 0.000999603400829217, "loss": 0.2653, "num_input_tokens_seen": 29521344, "step": 13660 }, { "epoch": 2.229200652528548, "grad_norm": 0.3142750561237335, "learning_rate": 0.0009996005612653762, "loss": 0.1665, "num_input_tokens_seen": 29533152, "step": 13665 }, { "epoch": 2.230016313213703, "grad_norm": 0.06737440079450607, "learning_rate": 0.000999597711576496, "loss": 0.0555, "num_input_tokens_seen": 29543360, "step": 13670 }, { "epoch": 2.2308319738988582, "grad_norm": 0.06384480744600296, "learning_rate": 0.0009995948517626347, "loss": 0.1319, "num_input_tokens_seen": 29555520, "step": 13675 }, { "epoch": 2.231647634584013, "grad_norm": 0.057452812790870667, "learning_rate": 0.0009995919818238496, "loss": 0.0955, "num_input_tokens_seen": 29566528, "step": 13680 }, { "epoch": 2.232463295269168, "grad_norm": 0.17521750926971436, "learning_rate": 0.0009995891017601996, "loss": 0.0957, "num_input_tokens_seen": 29578400, "step": 13685 }, { "epoch": 2.233278955954323, "grad_norm": 0.017670799046754837, "learning_rate": 0.0009995862115717426, "loss": 0.116, "num_input_tokens_seen": 29590528, "step": 13690 }, { "epoch": 2.234094616639478, "grad_norm": 0.03287632763385773, "learning_rate": 0.000999583311258537, "loss": 0.1225, "num_input_tokens_seen": 29601856, "step": 13695 }, { "epoch": 2.2349102773246328, "grad_norm": 0.12917554378509521, "learning_rate": 0.000999580400820642, "loss": 0.118, "num_input_tokens_seen": 29613344, "step": 13700 }, { "epoch": 2.235725938009788, "grad_norm": 0.060185208916664124, "learning_rate": 0.0009995774802581165, "loss": 0.0905, "num_input_tokens_seen": 29622656, "step": 13705 }, { "epoch": 2.236541598694943, "grad_norm": 0.08099085092544556, "learning_rate": 0.0009995745495710194, "loss": 0.1697, "num_input_tokens_seen": 29633824, "step": 13710 }, { "epoch": 2.237357259380098, "grad_norm": 0.015069660730659962, "learning_rate": 0.0009995716087594104, "loss": 0.0533, "num_input_tokens_seen": 29644608, "step": 13715 }, { "epoch": 2.238172920065253, "grad_norm": 0.1912614107131958, "learning_rate": 0.000999568657823349, "loss": 0.1524, "num_input_tokens_seen": 29655648, "step": 13720 }, { "epoch": 2.2389885807504077, "grad_norm": 0.07988587021827698, "learning_rate": 0.000999565696762895, "loss": 0.0696, "num_input_tokens_seen": 29666336, "step": 13725 }, { "epoch": 2.239804241435563, "grad_norm": 0.10382115095853806, "learning_rate": 0.0009995627255781083, "loss": 0.1417, "num_input_tokens_seen": 29677792, "step": 13730 }, { "epoch": 2.240619902120718, "grad_norm": 0.004169138614088297, "learning_rate": 0.0009995597442690493, "loss": 0.0632, "num_input_tokens_seen": 29687840, "step": 13735 }, { "epoch": 2.2414355628058726, "grad_norm": 0.08007065951824188, "learning_rate": 0.0009995567528357785, "loss": 0.0196, "num_input_tokens_seen": 29698880, "step": 13740 }, { "epoch": 2.242251223491028, "grad_norm": 0.04466477409005165, "learning_rate": 0.0009995537512783562, "loss": 0.0859, "num_input_tokens_seen": 29709408, "step": 13745 }, { "epoch": 2.2430668841761827, "grad_norm": 0.003576258197426796, "learning_rate": 0.0009995507395968435, "loss": 0.0557, "num_input_tokens_seen": 29719712, "step": 13750 }, { "epoch": 2.2438825448613375, "grad_norm": 0.2553325593471527, "learning_rate": 0.0009995477177913014, "loss": 0.2313, "num_input_tokens_seen": 29730144, "step": 13755 }, { "epoch": 2.244698205546493, "grad_norm": 0.10004813224077225, "learning_rate": 0.0009995446858617908, "loss": 0.1032, "num_input_tokens_seen": 29741408, "step": 13760 }, { "epoch": 2.2455138662316476, "grad_norm": 0.08300987631082535, "learning_rate": 0.0009995416438083736, "loss": 0.169, "num_input_tokens_seen": 29752736, "step": 13765 }, { "epoch": 2.2463295269168024, "grad_norm": 0.11108089983463287, "learning_rate": 0.0009995385916311112, "loss": 0.0882, "num_input_tokens_seen": 29763680, "step": 13770 }, { "epoch": 2.2471451876019577, "grad_norm": 0.21327409148216248, "learning_rate": 0.0009995355293300656, "loss": 0.1918, "num_input_tokens_seen": 29773472, "step": 13775 }, { "epoch": 2.2479608482871125, "grad_norm": 0.0419314019382, "learning_rate": 0.0009995324569052988, "loss": 0.3502, "num_input_tokens_seen": 29783776, "step": 13780 }, { "epoch": 2.2487765089722673, "grad_norm": 0.06031573563814163, "learning_rate": 0.000999529374356873, "loss": 0.1638, "num_input_tokens_seen": 29794752, "step": 13785 }, { "epoch": 2.2495921696574226, "grad_norm": 0.03626589849591255, "learning_rate": 0.0009995262816848507, "loss": 0.0725, "num_input_tokens_seen": 29805920, "step": 13790 }, { "epoch": 2.2504078303425774, "grad_norm": 0.009249306283891201, "learning_rate": 0.0009995231788892949, "loss": 0.055, "num_input_tokens_seen": 29817184, "step": 13795 }, { "epoch": 2.2512234910277327, "grad_norm": 0.015564526431262493, "learning_rate": 0.000999520065970268, "loss": 0.0539, "num_input_tokens_seen": 29828448, "step": 13800 }, { "epoch": 2.2520391517128875, "grad_norm": 0.13924640417099, "learning_rate": 0.000999516942927833, "loss": 0.1173, "num_input_tokens_seen": 29840512, "step": 13805 }, { "epoch": 2.2528548123980423, "grad_norm": 0.09099038690328598, "learning_rate": 0.0009995138097620537, "loss": 0.0805, "num_input_tokens_seen": 29852608, "step": 13810 }, { "epoch": 2.2536704730831976, "grad_norm": 0.2963365912437439, "learning_rate": 0.0009995106664729934, "loss": 0.1063, "num_input_tokens_seen": 29863488, "step": 13815 }, { "epoch": 2.2544861337683524, "grad_norm": 0.11496601998806, "learning_rate": 0.0009995075130607158, "loss": 0.1043, "num_input_tokens_seen": 29874752, "step": 13820 }, { "epoch": 2.255301794453507, "grad_norm": 0.09994488954544067, "learning_rate": 0.0009995043495252848, "loss": 0.2477, "num_input_tokens_seen": 29886144, "step": 13825 }, { "epoch": 2.2561174551386625, "grad_norm": 0.11016276478767395, "learning_rate": 0.0009995011758667644, "loss": 0.3073, "num_input_tokens_seen": 29896960, "step": 13830 }, { "epoch": 2.2569331158238173, "grad_norm": 0.036299578845500946, "learning_rate": 0.000999497992085219, "loss": 0.0813, "num_input_tokens_seen": 29908096, "step": 13835 }, { "epoch": 2.257748776508972, "grad_norm": 0.0857962816953659, "learning_rate": 0.0009994947981807132, "loss": 0.1517, "num_input_tokens_seen": 29918880, "step": 13840 }, { "epoch": 2.2585644371941274, "grad_norm": 0.37297701835632324, "learning_rate": 0.0009994915941533115, "loss": 0.2363, "num_input_tokens_seen": 29929152, "step": 13845 }, { "epoch": 2.259380097879282, "grad_norm": 0.08345893025398254, "learning_rate": 0.0009994883800030791, "loss": 0.0882, "num_input_tokens_seen": 29938560, "step": 13850 }, { "epoch": 2.2601957585644374, "grad_norm": 0.006022712681442499, "learning_rate": 0.0009994851557300812, "loss": 0.1289, "num_input_tokens_seen": 29948512, "step": 13855 }, { "epoch": 2.2610114192495923, "grad_norm": 0.04013175144791603, "learning_rate": 0.000999481921334383, "loss": 0.2283, "num_input_tokens_seen": 29958944, "step": 13860 }, { "epoch": 2.261827079934747, "grad_norm": 0.07237616926431656, "learning_rate": 0.0009994786768160496, "loss": 0.2818, "num_input_tokens_seen": 29970080, "step": 13865 }, { "epoch": 2.262642740619902, "grad_norm": 0.1106957495212555, "learning_rate": 0.0009994754221751474, "loss": 0.087, "num_input_tokens_seen": 29980256, "step": 13870 }, { "epoch": 2.263458401305057, "grad_norm": 0.12279194593429565, "learning_rate": 0.0009994721574117422, "loss": 0.1116, "num_input_tokens_seen": 29991776, "step": 13875 }, { "epoch": 2.264274061990212, "grad_norm": 0.09282184392213821, "learning_rate": 0.0009994688825259001, "loss": 0.1075, "num_input_tokens_seen": 30003232, "step": 13880 }, { "epoch": 2.2650897226753672, "grad_norm": 0.036214679479599, "learning_rate": 0.0009994655975176874, "loss": 0.0976, "num_input_tokens_seen": 30013504, "step": 13885 }, { "epoch": 2.265905383360522, "grad_norm": 0.19235451519489288, "learning_rate": 0.0009994623023871709, "loss": 0.1041, "num_input_tokens_seen": 30024256, "step": 13890 }, { "epoch": 2.266721044045677, "grad_norm": 0.19567914307117462, "learning_rate": 0.000999458997134417, "loss": 0.1421, "num_input_tokens_seen": 30034656, "step": 13895 }, { "epoch": 2.267536704730832, "grad_norm": 0.3461471199989319, "learning_rate": 0.000999455681759493, "loss": 0.2955, "num_input_tokens_seen": 30045856, "step": 13900 }, { "epoch": 2.268352365415987, "grad_norm": 0.10061849653720856, "learning_rate": 0.0009994523562624662, "loss": 0.2066, "num_input_tokens_seen": 30056032, "step": 13905 }, { "epoch": 2.2691680261011418, "grad_norm": 0.23414430022239685, "learning_rate": 0.0009994490206434038, "loss": 0.0969, "num_input_tokens_seen": 30067456, "step": 13910 }, { "epoch": 2.269983686786297, "grad_norm": 0.16487205028533936, "learning_rate": 0.000999445674902373, "loss": 0.14, "num_input_tokens_seen": 30078944, "step": 13915 }, { "epoch": 2.270799347471452, "grad_norm": 0.1677953600883484, "learning_rate": 0.0009994423190394423, "loss": 0.1317, "num_input_tokens_seen": 30089536, "step": 13920 }, { "epoch": 2.2716150081566067, "grad_norm": 0.10743577778339386, "learning_rate": 0.0009994389530546795, "loss": 0.0725, "num_input_tokens_seen": 30100256, "step": 13925 }, { "epoch": 2.272430668841762, "grad_norm": 0.17737969756126404, "learning_rate": 0.0009994355769481524, "loss": 0.14, "num_input_tokens_seen": 30110368, "step": 13930 }, { "epoch": 2.2732463295269167, "grad_norm": 0.1521010547876358, "learning_rate": 0.00099943219071993, "loss": 0.2311, "num_input_tokens_seen": 30122080, "step": 13935 }, { "epoch": 2.274061990212072, "grad_norm": 0.12245091050863266, "learning_rate": 0.0009994287943700807, "loss": 0.0706, "num_input_tokens_seen": 30132736, "step": 13940 }, { "epoch": 2.274877650897227, "grad_norm": 0.13133811950683594, "learning_rate": 0.0009994253878986732, "loss": 0.1867, "num_input_tokens_seen": 30142624, "step": 13945 }, { "epoch": 2.2756933115823816, "grad_norm": 0.13661529123783112, "learning_rate": 0.0009994219713057768, "loss": 0.1389, "num_input_tokens_seen": 30153632, "step": 13950 }, { "epoch": 2.2765089722675365, "grad_norm": 0.08187350630760193, "learning_rate": 0.0009994185445914604, "loss": 0.0999, "num_input_tokens_seen": 30165056, "step": 13955 }, { "epoch": 2.2773246329526917, "grad_norm": 0.12019728869199753, "learning_rate": 0.000999415107755794, "loss": 0.0526, "num_input_tokens_seen": 30176480, "step": 13960 }, { "epoch": 2.2781402936378465, "grad_norm": 0.04871026799082756, "learning_rate": 0.0009994116607988464, "loss": 0.2142, "num_input_tokens_seen": 30187200, "step": 13965 }, { "epoch": 2.278955954323002, "grad_norm": 0.03447539359331131, "learning_rate": 0.0009994082037206881, "loss": 0.0814, "num_input_tokens_seen": 30198336, "step": 13970 }, { "epoch": 2.2797716150081566, "grad_norm": 0.09334293007850647, "learning_rate": 0.0009994047365213892, "loss": 0.1331, "num_input_tokens_seen": 30208416, "step": 13975 }, { "epoch": 2.2805872756933114, "grad_norm": 0.07840663939714432, "learning_rate": 0.0009994012592010196, "loss": 0.0942, "num_input_tokens_seen": 30219424, "step": 13980 }, { "epoch": 2.2814029363784667, "grad_norm": 0.24991539120674133, "learning_rate": 0.00099939777175965, "loss": 0.1466, "num_input_tokens_seen": 30229440, "step": 13985 }, { "epoch": 2.2822185970636215, "grad_norm": 0.02840086817741394, "learning_rate": 0.000999394274197351, "loss": 0.1704, "num_input_tokens_seen": 30241088, "step": 13990 }, { "epoch": 2.2830342577487763, "grad_norm": 0.11454634368419647, "learning_rate": 0.0009993907665141934, "loss": 0.0365, "num_input_tokens_seen": 30252672, "step": 13995 }, { "epoch": 2.2838499184339316, "grad_norm": 0.43254274129867554, "learning_rate": 0.0009993872487102486, "loss": 0.1782, "num_input_tokens_seen": 30263712, "step": 14000 }, { "epoch": 2.2846655791190864, "grad_norm": 0.27640798687934875, "learning_rate": 0.0009993837207855876, "loss": 0.2381, "num_input_tokens_seen": 30275200, "step": 14005 }, { "epoch": 2.2854812398042412, "grad_norm": 0.36231812834739685, "learning_rate": 0.000999380182740282, "loss": 0.1693, "num_input_tokens_seen": 30286080, "step": 14010 }, { "epoch": 2.2862969004893965, "grad_norm": 0.07490170747041702, "learning_rate": 0.0009993766345744036, "loss": 0.0939, "num_input_tokens_seen": 30297504, "step": 14015 }, { "epoch": 2.2871125611745513, "grad_norm": 0.07344070076942444, "learning_rate": 0.000999373076288024, "loss": 0.0515, "num_input_tokens_seen": 30308704, "step": 14020 }, { "epoch": 2.2879282218597066, "grad_norm": 0.027155442163348198, "learning_rate": 0.0009993695078812156, "loss": 0.064, "num_input_tokens_seen": 30318208, "step": 14025 }, { "epoch": 2.2887438825448614, "grad_norm": 0.04195243865251541, "learning_rate": 0.0009993659293540506, "loss": 0.3038, "num_input_tokens_seen": 30328480, "step": 14030 }, { "epoch": 2.289559543230016, "grad_norm": 0.05106004700064659, "learning_rate": 0.0009993623407066016, "loss": 0.1349, "num_input_tokens_seen": 30339200, "step": 14035 }, { "epoch": 2.2903752039151715, "grad_norm": 0.032931577414274216, "learning_rate": 0.0009993587419389412, "loss": 0.0565, "num_input_tokens_seen": 30348864, "step": 14040 }, { "epoch": 2.2911908646003263, "grad_norm": 0.1499442309141159, "learning_rate": 0.0009993551330511423, "loss": 0.0761, "num_input_tokens_seen": 30360064, "step": 14045 }, { "epoch": 2.292006525285481, "grad_norm": 0.07077664136886597, "learning_rate": 0.0009993515140432783, "loss": 0.1002, "num_input_tokens_seen": 30371616, "step": 14050 }, { "epoch": 2.2928221859706364, "grad_norm": 0.11122244596481323, "learning_rate": 0.0009993478849154224, "loss": 0.0986, "num_input_tokens_seen": 30382976, "step": 14055 }, { "epoch": 2.293637846655791, "grad_norm": 0.016836611554026604, "learning_rate": 0.0009993442456676482, "loss": 0.1041, "num_input_tokens_seen": 30395040, "step": 14060 }, { "epoch": 2.294453507340946, "grad_norm": 0.1562366485595703, "learning_rate": 0.0009993405963000294, "loss": 0.083, "num_input_tokens_seen": 30405792, "step": 14065 }, { "epoch": 2.2952691680261013, "grad_norm": 0.07218848168849945, "learning_rate": 0.00099933693681264, "loss": 0.1354, "num_input_tokens_seen": 30418272, "step": 14070 }, { "epoch": 2.296084828711256, "grad_norm": 0.2335197776556015, "learning_rate": 0.000999333267205554, "loss": 0.0964, "num_input_tokens_seen": 30429472, "step": 14075 }, { "epoch": 2.2969004893964113, "grad_norm": 0.0899466797709465, "learning_rate": 0.000999329587478846, "loss": 0.0773, "num_input_tokens_seen": 30439456, "step": 14080 }, { "epoch": 2.297716150081566, "grad_norm": 0.14783619344234467, "learning_rate": 0.0009993258976325903, "loss": 0.1321, "num_input_tokens_seen": 30451168, "step": 14085 }, { "epoch": 2.298531810766721, "grad_norm": 0.08867207169532776, "learning_rate": 0.0009993221976668618, "loss": 0.1167, "num_input_tokens_seen": 30461888, "step": 14090 }, { "epoch": 2.299347471451876, "grad_norm": 0.09165129065513611, "learning_rate": 0.0009993184875817357, "loss": 0.1071, "num_input_tokens_seen": 30472128, "step": 14095 }, { "epoch": 2.300163132137031, "grad_norm": 0.5241231918334961, "learning_rate": 0.0009993147673772868, "loss": 0.3387, "num_input_tokens_seen": 30482624, "step": 14100 }, { "epoch": 2.300978792822186, "grad_norm": 0.1067451611161232, "learning_rate": 0.000999311037053591, "loss": 0.0684, "num_input_tokens_seen": 30495104, "step": 14105 }, { "epoch": 2.301794453507341, "grad_norm": 0.19375212490558624, "learning_rate": 0.0009993072966107235, "loss": 0.1801, "num_input_tokens_seen": 30506368, "step": 14110 }, { "epoch": 2.302610114192496, "grad_norm": 0.05557816103100777, "learning_rate": 0.0009993035460487602, "loss": 0.1223, "num_input_tokens_seen": 30516608, "step": 14115 }, { "epoch": 2.3034257748776508, "grad_norm": 0.15730692446231842, "learning_rate": 0.0009992997853677773, "loss": 0.1223, "num_input_tokens_seen": 30526944, "step": 14120 }, { "epoch": 2.304241435562806, "grad_norm": 0.07870891690254211, "learning_rate": 0.0009992960145678506, "loss": 0.0757, "num_input_tokens_seen": 30537216, "step": 14125 }, { "epoch": 2.305057096247961, "grad_norm": 0.19272860884666443, "learning_rate": 0.0009992922336490568, "loss": 0.1123, "num_input_tokens_seen": 30547904, "step": 14130 }, { "epoch": 2.3058727569331157, "grad_norm": 0.13096141815185547, "learning_rate": 0.0009992884426114725, "loss": 0.2245, "num_input_tokens_seen": 30559328, "step": 14135 }, { "epoch": 2.306688417618271, "grad_norm": 0.12387151271104813, "learning_rate": 0.0009992846414551746, "loss": 0.2344, "num_input_tokens_seen": 30571424, "step": 14140 }, { "epoch": 2.3075040783034257, "grad_norm": 0.06615098565816879, "learning_rate": 0.00099928083018024, "loss": 0.1153, "num_input_tokens_seen": 30582560, "step": 14145 }, { "epoch": 2.3083197389885806, "grad_norm": 0.156528040766716, "learning_rate": 0.000999277008786746, "loss": 0.1359, "num_input_tokens_seen": 30593568, "step": 14150 }, { "epoch": 2.309135399673736, "grad_norm": 0.0884905606508255, "learning_rate": 0.0009992731772747701, "loss": 0.188, "num_input_tokens_seen": 30604704, "step": 14155 }, { "epoch": 2.3099510603588906, "grad_norm": 0.1028173565864563, "learning_rate": 0.0009992693356443898, "loss": 0.0918, "num_input_tokens_seen": 30615200, "step": 14160 }, { "epoch": 2.310766721044046, "grad_norm": 0.06769051402807236, "learning_rate": 0.0009992654838956831, "loss": 0.0695, "num_input_tokens_seen": 30626016, "step": 14165 }, { "epoch": 2.3115823817292007, "grad_norm": 0.06213633716106415, "learning_rate": 0.000999261622028728, "loss": 0.1648, "num_input_tokens_seen": 30637056, "step": 14170 }, { "epoch": 2.3123980424143555, "grad_norm": 0.1374928504228592, "learning_rate": 0.0009992577500436027, "loss": 0.0828, "num_input_tokens_seen": 30647616, "step": 14175 }, { "epoch": 2.3132137030995104, "grad_norm": 0.027195928618311882, "learning_rate": 0.0009992538679403857, "loss": 0.1721, "num_input_tokens_seen": 30658848, "step": 14180 }, { "epoch": 2.3140293637846656, "grad_norm": 0.07334202527999878, "learning_rate": 0.0009992499757191559, "loss": 0.129, "num_input_tokens_seen": 30669952, "step": 14185 }, { "epoch": 2.3148450244698204, "grad_norm": 0.16840721666812897, "learning_rate": 0.000999246073379992, "loss": 0.1011, "num_input_tokens_seen": 30680576, "step": 14190 }, { "epoch": 2.3156606851549757, "grad_norm": 0.006261528003960848, "learning_rate": 0.0009992421609229729, "loss": 0.1487, "num_input_tokens_seen": 30689984, "step": 14195 }, { "epoch": 2.3164763458401305, "grad_norm": 0.11172261834144592, "learning_rate": 0.0009992382383481782, "loss": 0.0794, "num_input_tokens_seen": 30701696, "step": 14200 }, { "epoch": 2.3172920065252853, "grad_norm": 0.21091987192630768, "learning_rate": 0.0009992343056556873, "loss": 0.1625, "num_input_tokens_seen": 30712064, "step": 14205 }, { "epoch": 2.3181076672104406, "grad_norm": 0.19095903635025024, "learning_rate": 0.0009992303628455796, "loss": 0.154, "num_input_tokens_seen": 30723040, "step": 14210 }, { "epoch": 2.3189233278955954, "grad_norm": 0.14958035945892334, "learning_rate": 0.0009992264099179355, "loss": 0.1231, "num_input_tokens_seen": 30734720, "step": 14215 }, { "epoch": 2.3197389885807502, "grad_norm": 0.207402765750885, "learning_rate": 0.000999222446872835, "loss": 0.0863, "num_input_tokens_seen": 30747008, "step": 14220 }, { "epoch": 2.3205546492659055, "grad_norm": 0.07774131745100021, "learning_rate": 0.0009992184737103583, "loss": 0.0618, "num_input_tokens_seen": 30757248, "step": 14225 }, { "epoch": 2.3213703099510603, "grad_norm": 0.06501632183790207, "learning_rate": 0.0009992144904305857, "loss": 0.119, "num_input_tokens_seen": 30768736, "step": 14230 }, { "epoch": 2.322185970636215, "grad_norm": 0.19293227791786194, "learning_rate": 0.0009992104970335982, "loss": 0.0865, "num_input_tokens_seen": 30779264, "step": 14235 }, { "epoch": 2.3230016313213704, "grad_norm": 0.32836616039276123, "learning_rate": 0.0009992064935194767, "loss": 0.2237, "num_input_tokens_seen": 30790592, "step": 14240 }, { "epoch": 2.323817292006525, "grad_norm": 0.04832938686013222, "learning_rate": 0.0009992024798883025, "loss": 0.1201, "num_input_tokens_seen": 30802208, "step": 14245 }, { "epoch": 2.3246329526916805, "grad_norm": 0.20137490332126617, "learning_rate": 0.0009991984561401566, "loss": 0.1799, "num_input_tokens_seen": 30812160, "step": 14250 }, { "epoch": 2.3254486133768353, "grad_norm": 0.12982739508152008, "learning_rate": 0.0009991944222751208, "loss": 0.0397, "num_input_tokens_seen": 30823040, "step": 14255 }, { "epoch": 2.32626427406199, "grad_norm": 0.11527493596076965, "learning_rate": 0.0009991903782932765, "loss": 0.1478, "num_input_tokens_seen": 30833760, "step": 14260 }, { "epoch": 2.3270799347471454, "grad_norm": 0.009630398824810982, "learning_rate": 0.0009991863241947062, "loss": 0.1292, "num_input_tokens_seen": 30845216, "step": 14265 }, { "epoch": 2.3278955954323, "grad_norm": 0.2665800154209137, "learning_rate": 0.0009991822599794916, "loss": 0.1223, "num_input_tokens_seen": 30855200, "step": 14270 }, { "epoch": 2.328711256117455, "grad_norm": 0.21644163131713867, "learning_rate": 0.0009991781856477156, "loss": 0.0993, "num_input_tokens_seen": 30865696, "step": 14275 }, { "epoch": 2.3295269168026103, "grad_norm": 0.11290088295936584, "learning_rate": 0.00099917410119946, "loss": 0.0717, "num_input_tokens_seen": 30877472, "step": 14280 }, { "epoch": 2.330342577487765, "grad_norm": 0.09280723333358765, "learning_rate": 0.0009991700066348081, "loss": 0.0898, "num_input_tokens_seen": 30887552, "step": 14285 }, { "epoch": 2.33115823817292, "grad_norm": 0.04525647312402725, "learning_rate": 0.000999165901953843, "loss": 0.0806, "num_input_tokens_seen": 30898432, "step": 14290 }, { "epoch": 2.331973898858075, "grad_norm": 0.1614445596933365, "learning_rate": 0.0009991617871566473, "loss": 0.0871, "num_input_tokens_seen": 30908000, "step": 14295 }, { "epoch": 2.33278955954323, "grad_norm": 0.08257835358381271, "learning_rate": 0.000999157662243305, "loss": 0.1466, "num_input_tokens_seen": 30920256, "step": 14300 }, { "epoch": 2.3336052202283852, "grad_norm": 0.010302538052201271, "learning_rate": 0.0009991535272138995, "loss": 0.3769, "num_input_tokens_seen": 30931232, "step": 14305 }, { "epoch": 2.33442088091354, "grad_norm": 0.05618816241621971, "learning_rate": 0.0009991493820685142, "loss": 0.0733, "num_input_tokens_seen": 30941536, "step": 14310 }, { "epoch": 2.335236541598695, "grad_norm": 0.04763595759868622, "learning_rate": 0.000999145226807234, "loss": 0.0348, "num_input_tokens_seen": 30951872, "step": 14315 }, { "epoch": 2.3360522022838497, "grad_norm": 0.028318610042333603, "learning_rate": 0.000999141061430142, "loss": 0.1134, "num_input_tokens_seen": 30961792, "step": 14320 }, { "epoch": 2.336867862969005, "grad_norm": 0.20109055936336517, "learning_rate": 0.0009991368859373236, "loss": 0.1174, "num_input_tokens_seen": 30971968, "step": 14325 }, { "epoch": 2.3376835236541598, "grad_norm": 0.057134952396154404, "learning_rate": 0.0009991327003288626, "loss": 0.0911, "num_input_tokens_seen": 30983456, "step": 14330 }, { "epoch": 2.338499184339315, "grad_norm": 0.07061900943517685, "learning_rate": 0.0009991285046048446, "loss": 0.1042, "num_input_tokens_seen": 30994400, "step": 14335 }, { "epoch": 2.33931484502447, "grad_norm": 0.06494476646184921, "learning_rate": 0.0009991242987653541, "loss": 0.2292, "num_input_tokens_seen": 31005280, "step": 14340 }, { "epoch": 2.3401305057096247, "grad_norm": 0.18833774328231812, "learning_rate": 0.0009991200828104766, "loss": 0.1026, "num_input_tokens_seen": 31015264, "step": 14345 }, { "epoch": 2.34094616639478, "grad_norm": 0.06600786000490189, "learning_rate": 0.0009991158567402973, "loss": 0.1364, "num_input_tokens_seen": 31023776, "step": 14350 }, { "epoch": 2.3417618270799347, "grad_norm": 0.3423004746437073, "learning_rate": 0.0009991116205549022, "loss": 0.3825, "num_input_tokens_seen": 31035232, "step": 14355 }, { "epoch": 2.3425774877650896, "grad_norm": 0.21730433404445648, "learning_rate": 0.0009991073742543768, "loss": 0.1815, "num_input_tokens_seen": 31046688, "step": 14360 }, { "epoch": 2.343393148450245, "grad_norm": 0.03402172401547432, "learning_rate": 0.0009991031178388072, "loss": 0.086, "num_input_tokens_seen": 31056928, "step": 14365 }, { "epoch": 2.3442088091353996, "grad_norm": 0.05237003415822983, "learning_rate": 0.0009990988513082799, "loss": 0.1465, "num_input_tokens_seen": 31067616, "step": 14370 }, { "epoch": 2.3450244698205545, "grad_norm": 0.06425706297159195, "learning_rate": 0.0009990945746628812, "loss": 0.0777, "num_input_tokens_seen": 31079648, "step": 14375 }, { "epoch": 2.3458401305057097, "grad_norm": 0.07050355523824692, "learning_rate": 0.0009990902879026978, "loss": 0.1368, "num_input_tokens_seen": 31091040, "step": 14380 }, { "epoch": 2.3466557911908645, "grad_norm": 0.09159641712903976, "learning_rate": 0.0009990859910278167, "loss": 0.0728, "num_input_tokens_seen": 31101024, "step": 14385 }, { "epoch": 2.34747145187602, "grad_norm": 0.17783799767494202, "learning_rate": 0.0009990816840383247, "loss": 0.113, "num_input_tokens_seen": 31111520, "step": 14390 }, { "epoch": 2.3482871125611746, "grad_norm": 0.12338680028915405, "learning_rate": 0.0009990773669343092, "loss": 0.136, "num_input_tokens_seen": 31123680, "step": 14395 }, { "epoch": 2.3491027732463294, "grad_norm": 0.08523198962211609, "learning_rate": 0.0009990730397158578, "loss": 0.1999, "num_input_tokens_seen": 31132768, "step": 14400 }, { "epoch": 2.3499184339314847, "grad_norm": 0.27966123819351196, "learning_rate": 0.0009990687023830583, "loss": 0.0596, "num_input_tokens_seen": 31144960, "step": 14405 }, { "epoch": 2.3507340946166395, "grad_norm": 0.06837616860866547, "learning_rate": 0.0009990643549359982, "loss": 0.06, "num_input_tokens_seen": 31155872, "step": 14410 }, { "epoch": 2.3515497553017943, "grad_norm": 0.034586962312459946, "learning_rate": 0.0009990599973747657, "loss": 0.0702, "num_input_tokens_seen": 31166912, "step": 14415 }, { "epoch": 2.3523654159869496, "grad_norm": 0.1494152545928955, "learning_rate": 0.0009990556296994497, "loss": 0.1052, "num_input_tokens_seen": 31177504, "step": 14420 }, { "epoch": 2.3531810766721044, "grad_norm": 0.03763037547469139, "learning_rate": 0.000999051251910138, "loss": 0.1022, "num_input_tokens_seen": 31188576, "step": 14425 }, { "epoch": 2.3539967373572592, "grad_norm": 0.05461564660072327, "learning_rate": 0.0009990468640069196, "loss": 0.085, "num_input_tokens_seen": 31199136, "step": 14430 }, { "epoch": 2.3548123980424145, "grad_norm": 0.02698604017496109, "learning_rate": 0.0009990424659898833, "loss": 0.0803, "num_input_tokens_seen": 31209408, "step": 14435 }, { "epoch": 2.3556280587275693, "grad_norm": 0.05417114123702049, "learning_rate": 0.0009990380578591186, "loss": 0.1756, "num_input_tokens_seen": 31220128, "step": 14440 }, { "epoch": 2.356443719412724, "grad_norm": 0.1396542489528656, "learning_rate": 0.0009990336396147144, "loss": 0.232, "num_input_tokens_seen": 31230944, "step": 14445 }, { "epoch": 2.3572593800978794, "grad_norm": 0.07993250340223312, "learning_rate": 0.0009990292112567606, "loss": 0.1641, "num_input_tokens_seen": 31241888, "step": 14450 }, { "epoch": 2.358075040783034, "grad_norm": 0.22863461077213287, "learning_rate": 0.0009990247727853466, "loss": 0.114, "num_input_tokens_seen": 31252480, "step": 14455 }, { "epoch": 2.358890701468189, "grad_norm": 0.039005108177661896, "learning_rate": 0.0009990203242005626, "loss": 0.1857, "num_input_tokens_seen": 31263296, "step": 14460 }, { "epoch": 2.3597063621533443, "grad_norm": 0.06166834011673927, "learning_rate": 0.0009990158655024985, "loss": 0.3229, "num_input_tokens_seen": 31274880, "step": 14465 }, { "epoch": 2.360522022838499, "grad_norm": 0.2376435250043869, "learning_rate": 0.0009990113966912451, "loss": 0.2014, "num_input_tokens_seen": 31285600, "step": 14470 }, { "epoch": 2.3613376835236544, "grad_norm": 0.17259903252124786, "learning_rate": 0.0009990069177668926, "loss": 0.1741, "num_input_tokens_seen": 31296288, "step": 14475 }, { "epoch": 2.362153344208809, "grad_norm": 0.06873729079961777, "learning_rate": 0.0009990024287295318, "loss": 0.1471, "num_input_tokens_seen": 31307264, "step": 14480 }, { "epoch": 2.362969004893964, "grad_norm": 0.042510055005550385, "learning_rate": 0.000998997929579254, "loss": 0.083, "num_input_tokens_seen": 31317568, "step": 14485 }, { "epoch": 2.3637846655791193, "grad_norm": 0.02149435691535473, "learning_rate": 0.0009989934203161498, "loss": 0.0877, "num_input_tokens_seen": 31329792, "step": 14490 }, { "epoch": 2.364600326264274, "grad_norm": 0.07500947266817093, "learning_rate": 0.0009989889009403112, "loss": 0.0941, "num_input_tokens_seen": 31341824, "step": 14495 }, { "epoch": 2.365415986949429, "grad_norm": 0.16817909479141235, "learning_rate": 0.0009989843714518294, "loss": 0.2337, "num_input_tokens_seen": 31354112, "step": 14500 }, { "epoch": 2.366231647634584, "grad_norm": 0.12466907501220703, "learning_rate": 0.0009989798318507962, "loss": 0.0941, "num_input_tokens_seen": 31365664, "step": 14505 }, { "epoch": 2.367047308319739, "grad_norm": 0.020459629595279694, "learning_rate": 0.0009989752821373038, "loss": 0.1951, "num_input_tokens_seen": 31376864, "step": 14510 }, { "epoch": 2.367862969004894, "grad_norm": 0.11784857511520386, "learning_rate": 0.0009989707223114444, "loss": 0.1683, "num_input_tokens_seen": 31386816, "step": 14515 }, { "epoch": 2.368678629690049, "grad_norm": 0.008937754668295383, "learning_rate": 0.0009989661523733102, "loss": 0.1142, "num_input_tokens_seen": 31398368, "step": 14520 }, { "epoch": 2.369494290375204, "grad_norm": 0.09518945962190628, "learning_rate": 0.000998961572322994, "loss": 0.3046, "num_input_tokens_seen": 31408864, "step": 14525 }, { "epoch": 2.370309951060359, "grad_norm": 0.061172470450401306, "learning_rate": 0.0009989569821605886, "loss": 0.2233, "num_input_tokens_seen": 31420256, "step": 14530 }, { "epoch": 2.371125611745514, "grad_norm": 0.12996791303157806, "learning_rate": 0.0009989523818861867, "loss": 0.2008, "num_input_tokens_seen": 31430016, "step": 14535 }, { "epoch": 2.3719412724306688, "grad_norm": 0.04465119168162346, "learning_rate": 0.0009989477714998822, "loss": 0.0848, "num_input_tokens_seen": 31440128, "step": 14540 }, { "epoch": 2.3727569331158236, "grad_norm": 0.17468681931495667, "learning_rate": 0.000998943151001768, "loss": 0.1192, "num_input_tokens_seen": 31451872, "step": 14545 }, { "epoch": 2.373572593800979, "grad_norm": 0.04149039462208748, "learning_rate": 0.0009989385203919379, "loss": 0.1115, "num_input_tokens_seen": 31463776, "step": 14550 }, { "epoch": 2.3743882544861337, "grad_norm": 0.1784275621175766, "learning_rate": 0.0009989338796704856, "loss": 0.1233, "num_input_tokens_seen": 31475136, "step": 14555 }, { "epoch": 2.375203915171289, "grad_norm": 0.06148466467857361, "learning_rate": 0.0009989292288375053, "loss": 0.1171, "num_input_tokens_seen": 31486176, "step": 14560 }, { "epoch": 2.3760195758564437, "grad_norm": 0.054308172315359116, "learning_rate": 0.0009989245678930915, "loss": 0.0486, "num_input_tokens_seen": 31497504, "step": 14565 }, { "epoch": 2.3768352365415986, "grad_norm": 0.23977619409561157, "learning_rate": 0.0009989198968373381, "loss": 0.1074, "num_input_tokens_seen": 31509664, "step": 14570 }, { "epoch": 2.377650897226754, "grad_norm": 0.59047532081604, "learning_rate": 0.0009989152156703403, "loss": 0.1587, "num_input_tokens_seen": 31520352, "step": 14575 }, { "epoch": 2.3784665579119086, "grad_norm": 0.14900071918964386, "learning_rate": 0.0009989105243921926, "loss": 0.1093, "num_input_tokens_seen": 31530496, "step": 14580 }, { "epoch": 2.3792822185970635, "grad_norm": 0.012715587392449379, "learning_rate": 0.0009989058230029904, "loss": 0.067, "num_input_tokens_seen": 31540480, "step": 14585 }, { "epoch": 2.3800978792822187, "grad_norm": 0.06799346208572388, "learning_rate": 0.0009989011115028286, "loss": 0.1453, "num_input_tokens_seen": 31551808, "step": 14590 }, { "epoch": 2.3809135399673735, "grad_norm": 0.01626054011285305, "learning_rate": 0.0009988963898918029, "loss": 0.0401, "num_input_tokens_seen": 31562752, "step": 14595 }, { "epoch": 2.3817292006525284, "grad_norm": 0.0843578651547432, "learning_rate": 0.000998891658170009, "loss": 0.1734, "num_input_tokens_seen": 31573600, "step": 14600 }, { "epoch": 2.3825448613376836, "grad_norm": 0.09764565527439117, "learning_rate": 0.0009988869163375428, "loss": 0.0758, "num_input_tokens_seen": 31584128, "step": 14605 }, { "epoch": 2.3833605220228384, "grad_norm": 0.10026843100786209, "learning_rate": 0.0009988821643945002, "loss": 0.107, "num_input_tokens_seen": 31594880, "step": 14610 }, { "epoch": 2.3841761827079937, "grad_norm": 0.07060685753822327, "learning_rate": 0.0009988774023409776, "loss": 0.1273, "num_input_tokens_seen": 31605984, "step": 14615 }, { "epoch": 2.3849918433931485, "grad_norm": 0.013278050348162651, "learning_rate": 0.0009988726301770718, "loss": 0.2176, "num_input_tokens_seen": 31616960, "step": 14620 }, { "epoch": 2.3858075040783033, "grad_norm": 0.05854358151555061, "learning_rate": 0.0009988678479028793, "loss": 0.025, "num_input_tokens_seen": 31628896, "step": 14625 }, { "epoch": 2.3866231647634586, "grad_norm": 0.27456170320510864, "learning_rate": 0.000998863055518497, "loss": 0.1917, "num_input_tokens_seen": 31640480, "step": 14630 }, { "epoch": 2.3874388254486134, "grad_norm": 0.016536332666873932, "learning_rate": 0.0009988582530240217, "loss": 0.0856, "num_input_tokens_seen": 31651392, "step": 14635 }, { "epoch": 2.3882544861337682, "grad_norm": 0.09446101635694504, "learning_rate": 0.0009988534404195516, "loss": 0.1164, "num_input_tokens_seen": 31661472, "step": 14640 }, { "epoch": 2.3890701468189235, "grad_norm": 0.5649963021278381, "learning_rate": 0.000998848617705183, "loss": 0.1488, "num_input_tokens_seen": 31672224, "step": 14645 }, { "epoch": 2.3898858075040783, "grad_norm": 0.03826959431171417, "learning_rate": 0.000998843784881015, "loss": 0.2156, "num_input_tokens_seen": 31683392, "step": 14650 }, { "epoch": 2.390701468189233, "grad_norm": 0.12697307765483856, "learning_rate": 0.0009988389419471446, "loss": 0.0878, "num_input_tokens_seen": 31694720, "step": 14655 }, { "epoch": 2.3915171288743884, "grad_norm": 0.10490674525499344, "learning_rate": 0.0009988340889036701, "loss": 0.1254, "num_input_tokens_seen": 31705024, "step": 14660 }, { "epoch": 2.392332789559543, "grad_norm": 0.29427623748779297, "learning_rate": 0.0009988292257506902, "loss": 0.3219, "num_input_tokens_seen": 31717440, "step": 14665 }, { "epoch": 2.393148450244698, "grad_norm": 0.20007169246673584, "learning_rate": 0.000998824352488303, "loss": 0.1459, "num_input_tokens_seen": 31729632, "step": 14670 }, { "epoch": 2.3939641109298533, "grad_norm": 0.19052647054195404, "learning_rate": 0.0009988194691166077, "loss": 0.155, "num_input_tokens_seen": 31739648, "step": 14675 }, { "epoch": 2.394779771615008, "grad_norm": 0.060261037200689316, "learning_rate": 0.000998814575635703, "loss": 0.1356, "num_input_tokens_seen": 31750944, "step": 14680 }, { "epoch": 2.395595432300163, "grad_norm": 0.11980581283569336, "learning_rate": 0.000998809672045688, "loss": 0.1422, "num_input_tokens_seen": 31760416, "step": 14685 }, { "epoch": 2.396411092985318, "grad_norm": 0.0499906986951828, "learning_rate": 0.0009988047583466622, "loss": 0.1829, "num_input_tokens_seen": 31770336, "step": 14690 }, { "epoch": 2.397226753670473, "grad_norm": 0.09711778163909912, "learning_rate": 0.0009987998345387255, "loss": 0.0789, "num_input_tokens_seen": 31782208, "step": 14695 }, { "epoch": 2.3980424143556283, "grad_norm": 0.171736478805542, "learning_rate": 0.000998794900621977, "loss": 0.1197, "num_input_tokens_seen": 31792576, "step": 14700 }, { "epoch": 2.398858075040783, "grad_norm": 0.27212202548980713, "learning_rate": 0.0009987899565965172, "loss": 0.104, "num_input_tokens_seen": 31802656, "step": 14705 }, { "epoch": 2.399673735725938, "grad_norm": 0.01339148823171854, "learning_rate": 0.0009987850024624463, "loss": 0.0807, "num_input_tokens_seen": 31812896, "step": 14710 }, { "epoch": 2.400489396411093, "grad_norm": 0.05710975453257561, "learning_rate": 0.0009987800382198647, "loss": 0.0603, "num_input_tokens_seen": 31823872, "step": 14715 }, { "epoch": 2.401305057096248, "grad_norm": 0.027438897639513016, "learning_rate": 0.0009987750638688726, "loss": 0.0612, "num_input_tokens_seen": 31835840, "step": 14720 }, { "epoch": 2.402120717781403, "grad_norm": 0.011026641353964806, "learning_rate": 0.000998770079409571, "loss": 0.1365, "num_input_tokens_seen": 31847008, "step": 14725 }, { "epoch": 2.402936378466558, "grad_norm": 0.15247893333435059, "learning_rate": 0.0009987650848420613, "loss": 0.0311, "num_input_tokens_seen": 31857888, "step": 14730 }, { "epoch": 2.403752039151713, "grad_norm": 0.561492919921875, "learning_rate": 0.0009987600801664442, "loss": 0.2133, "num_input_tokens_seen": 31867712, "step": 14735 }, { "epoch": 2.4045676998368677, "grad_norm": 0.04680028185248375, "learning_rate": 0.0009987550653828214, "loss": 0.1001, "num_input_tokens_seen": 31878656, "step": 14740 }, { "epoch": 2.405383360522023, "grad_norm": 0.018265612423419952, "learning_rate": 0.0009987500404912946, "loss": 0.0544, "num_input_tokens_seen": 31889792, "step": 14745 }, { "epoch": 2.4061990212071778, "grad_norm": 0.09335828572511673, "learning_rate": 0.0009987450054919655, "loss": 0.0635, "num_input_tokens_seen": 31901216, "step": 14750 }, { "epoch": 2.407014681892333, "grad_norm": 0.04732209071516991, "learning_rate": 0.000998739960384936, "loss": 0.1827, "num_input_tokens_seen": 31912256, "step": 14755 }, { "epoch": 2.407830342577488, "grad_norm": 0.07890065014362335, "learning_rate": 0.0009987349051703088, "loss": 0.0249, "num_input_tokens_seen": 31922336, "step": 14760 }, { "epoch": 2.4086460032626427, "grad_norm": 0.20765246450901031, "learning_rate": 0.0009987298398481859, "loss": 0.1302, "num_input_tokens_seen": 31933664, "step": 14765 }, { "epoch": 2.4094616639477975, "grad_norm": 0.1883508414030075, "learning_rate": 0.00099872476441867, "loss": 0.0663, "num_input_tokens_seen": 31945856, "step": 14770 }, { "epoch": 2.4102773246329527, "grad_norm": 0.01676262356340885, "learning_rate": 0.0009987196788818643, "loss": 0.0757, "num_input_tokens_seen": 31955872, "step": 14775 }, { "epoch": 2.4110929853181076, "grad_norm": 0.1437556892633438, "learning_rate": 0.0009987145832378713, "loss": 0.0811, "num_input_tokens_seen": 31965952, "step": 14780 }, { "epoch": 2.411908646003263, "grad_norm": 0.082915298640728, "learning_rate": 0.0009987094774867949, "loss": 0.0544, "num_input_tokens_seen": 31977024, "step": 14785 }, { "epoch": 2.4127243066884176, "grad_norm": 0.012319295667111874, "learning_rate": 0.000998704361628738, "loss": 0.0436, "num_input_tokens_seen": 31987232, "step": 14790 }, { "epoch": 2.4135399673735725, "grad_norm": 0.027199752628803253, "learning_rate": 0.000998699235663805, "loss": 0.1137, "num_input_tokens_seen": 31997952, "step": 14795 }, { "epoch": 2.4143556280587277, "grad_norm": 0.21190786361694336, "learning_rate": 0.000998694099592099, "loss": 0.1201, "num_input_tokens_seen": 32008480, "step": 14800 }, { "epoch": 2.4151712887438825, "grad_norm": 0.16720622777938843, "learning_rate": 0.0009986889534137245, "loss": 0.0574, "num_input_tokens_seen": 32019360, "step": 14805 }, { "epoch": 2.4159869494290374, "grad_norm": 0.4074651300907135, "learning_rate": 0.0009986837971287857, "loss": 0.1117, "num_input_tokens_seen": 32030752, "step": 14810 }, { "epoch": 2.4168026101141926, "grad_norm": 0.14166052639484406, "learning_rate": 0.0009986786307373873, "loss": 0.1184, "num_input_tokens_seen": 32042688, "step": 14815 }, { "epoch": 2.4176182707993474, "grad_norm": 0.04240216687321663, "learning_rate": 0.0009986734542396336, "loss": 0.0605, "num_input_tokens_seen": 32053184, "step": 14820 }, { "epoch": 2.4184339314845023, "grad_norm": 0.024739380925893784, "learning_rate": 0.0009986682676356299, "loss": 0.1312, "num_input_tokens_seen": 32063840, "step": 14825 }, { "epoch": 2.4192495921696575, "grad_norm": 0.060678571462631226, "learning_rate": 0.000998663070925481, "loss": 0.1352, "num_input_tokens_seen": 32075584, "step": 14830 }, { "epoch": 2.4200652528548123, "grad_norm": 0.012930831871926785, "learning_rate": 0.0009986578641092924, "loss": 0.3656, "num_input_tokens_seen": 32085504, "step": 14835 }, { "epoch": 2.4208809135399676, "grad_norm": 0.03932429105043411, "learning_rate": 0.0009986526471871698, "loss": 0.0843, "num_input_tokens_seen": 32096992, "step": 14840 }, { "epoch": 2.4216965742251224, "grad_norm": 0.131380096077919, "learning_rate": 0.0009986474201592187, "loss": 0.1235, "num_input_tokens_seen": 32107072, "step": 14845 }, { "epoch": 2.4225122349102772, "grad_norm": 0.10655047744512558, "learning_rate": 0.0009986421830255447, "loss": 0.2237, "num_input_tokens_seen": 32118560, "step": 14850 }, { "epoch": 2.4233278955954325, "grad_norm": 0.2773621678352356, "learning_rate": 0.0009986369357862545, "loss": 0.1452, "num_input_tokens_seen": 32130080, "step": 14855 }, { "epoch": 2.4241435562805873, "grad_norm": 0.06350556761026382, "learning_rate": 0.0009986316784414543, "loss": 0.1017, "num_input_tokens_seen": 32139456, "step": 14860 }, { "epoch": 2.424959216965742, "grad_norm": 0.036143578588962555, "learning_rate": 0.0009986264109912507, "loss": 0.1017, "num_input_tokens_seen": 32151552, "step": 14865 }, { "epoch": 2.4257748776508974, "grad_norm": 0.13741706311702728, "learning_rate": 0.00099862113343575, "loss": 0.0588, "num_input_tokens_seen": 32163456, "step": 14870 }, { "epoch": 2.426590538336052, "grad_norm": 0.03913474828004837, "learning_rate": 0.0009986158457750596, "loss": 0.0753, "num_input_tokens_seen": 32173888, "step": 14875 }, { "epoch": 2.427406199021207, "grad_norm": 0.01649622619152069, "learning_rate": 0.0009986105480092866, "loss": 0.1942, "num_input_tokens_seen": 32185504, "step": 14880 }, { "epoch": 2.4282218597063623, "grad_norm": 0.02170804888010025, "learning_rate": 0.0009986052401385385, "loss": 0.1132, "num_input_tokens_seen": 32195968, "step": 14885 }, { "epoch": 2.429037520391517, "grad_norm": 0.0716899037361145, "learning_rate": 0.0009985999221629224, "loss": 0.2186, "num_input_tokens_seen": 32205312, "step": 14890 }, { "epoch": 2.429853181076672, "grad_norm": 0.02708481065928936, "learning_rate": 0.0009985945940825464, "loss": 0.0402, "num_input_tokens_seen": 32216576, "step": 14895 }, { "epoch": 2.430668841761827, "grad_norm": 0.13017131388187408, "learning_rate": 0.0009985892558975185, "loss": 0.0907, "num_input_tokens_seen": 32227200, "step": 14900 }, { "epoch": 2.431484502446982, "grad_norm": 0.04850441962480545, "learning_rate": 0.0009985839076079469, "loss": 0.1175, "num_input_tokens_seen": 32237152, "step": 14905 }, { "epoch": 2.432300163132137, "grad_norm": 0.11114905774593353, "learning_rate": 0.0009985785492139397, "loss": 0.1311, "num_input_tokens_seen": 32247520, "step": 14910 }, { "epoch": 2.433115823817292, "grad_norm": 0.07037919014692307, "learning_rate": 0.0009985731807156057, "loss": 0.1295, "num_input_tokens_seen": 32257632, "step": 14915 }, { "epoch": 2.433931484502447, "grad_norm": 0.08659728616476059, "learning_rate": 0.0009985678021130538, "loss": 0.2184, "num_input_tokens_seen": 32267808, "step": 14920 }, { "epoch": 2.434747145187602, "grad_norm": 0.06102270260453224, "learning_rate": 0.000998562413406393, "loss": 0.1491, "num_input_tokens_seen": 32278752, "step": 14925 }, { "epoch": 2.435562805872757, "grad_norm": 0.010404076427221298, "learning_rate": 0.0009985570145957324, "loss": 0.2626, "num_input_tokens_seen": 32289696, "step": 14930 }, { "epoch": 2.436378466557912, "grad_norm": 0.06372539699077606, "learning_rate": 0.0009985516056811815, "loss": 0.0724, "num_input_tokens_seen": 32299392, "step": 14935 }, { "epoch": 2.437194127243067, "grad_norm": 0.07917524129152298, "learning_rate": 0.0009985461866628496, "loss": 0.0539, "num_input_tokens_seen": 32310592, "step": 14940 }, { "epoch": 2.438009787928222, "grad_norm": 0.20440097153186798, "learning_rate": 0.000998540757540847, "loss": 0.1825, "num_input_tokens_seen": 32319968, "step": 14945 }, { "epoch": 2.4388254486133767, "grad_norm": 0.007162266410887241, "learning_rate": 0.0009985353183152835, "loss": 0.088, "num_input_tokens_seen": 32330272, "step": 14950 }, { "epoch": 2.439641109298532, "grad_norm": 0.11462079733610153, "learning_rate": 0.0009985298689862692, "loss": 0.1583, "num_input_tokens_seen": 32341376, "step": 14955 }, { "epoch": 2.4404567699836868, "grad_norm": 0.07103787362575531, "learning_rate": 0.0009985244095539149, "loss": 0.1189, "num_input_tokens_seen": 32352704, "step": 14960 }, { "epoch": 2.4412724306688416, "grad_norm": 0.11806105077266693, "learning_rate": 0.0009985189400183306, "loss": 0.1283, "num_input_tokens_seen": 32363616, "step": 14965 }, { "epoch": 2.442088091353997, "grad_norm": 0.061703894287347794, "learning_rate": 0.0009985134603796278, "loss": 0.1217, "num_input_tokens_seen": 32372960, "step": 14970 }, { "epoch": 2.4429037520391517, "grad_norm": 0.03348749130964279, "learning_rate": 0.0009985079706379175, "loss": 0.1353, "num_input_tokens_seen": 32383456, "step": 14975 }, { "epoch": 2.443719412724307, "grad_norm": 0.03957496955990791, "learning_rate": 0.0009985024707933107, "loss": 0.0475, "num_input_tokens_seen": 32395136, "step": 14980 }, { "epoch": 2.4445350734094617, "grad_norm": 0.06971059739589691, "learning_rate": 0.0009984969608459186, "loss": 0.0367, "num_input_tokens_seen": 32403744, "step": 14985 }, { "epoch": 2.4453507340946166, "grad_norm": 0.07785134762525558, "learning_rate": 0.0009984914407958536, "loss": 0.1242, "num_input_tokens_seen": 32414848, "step": 14990 }, { "epoch": 2.4461663947797714, "grad_norm": 0.15177929401397705, "learning_rate": 0.000998485910643227, "loss": 0.2226, "num_input_tokens_seen": 32426112, "step": 14995 }, { "epoch": 2.4469820554649266, "grad_norm": 0.1218811422586441, "learning_rate": 0.000998480370388151, "loss": 0.113, "num_input_tokens_seen": 32436640, "step": 15000 }, { "epoch": 2.4477977161500815, "grad_norm": 0.18148620426654816, "learning_rate": 0.000998474820030738, "loss": 0.0694, "num_input_tokens_seen": 32447584, "step": 15005 }, { "epoch": 2.4486133768352367, "grad_norm": 0.1163652166724205, "learning_rate": 0.0009984692595711004, "loss": 0.0809, "num_input_tokens_seen": 32458272, "step": 15010 }, { "epoch": 2.4494290375203915, "grad_norm": 0.034473199397325516, "learning_rate": 0.0009984636890093509, "loss": 0.0824, "num_input_tokens_seen": 32469152, "step": 15015 }, { "epoch": 2.4502446982055464, "grad_norm": 0.13606807589530945, "learning_rate": 0.0009984581083456023, "loss": 0.14, "num_input_tokens_seen": 32480576, "step": 15020 }, { "epoch": 2.4510603588907016, "grad_norm": 0.16098394989967346, "learning_rate": 0.000998452517579968, "loss": 0.0327, "num_input_tokens_seen": 32490976, "step": 15025 }, { "epoch": 2.4518760195758564, "grad_norm": 0.04360827058553696, "learning_rate": 0.000998446916712561, "loss": 0.0645, "num_input_tokens_seen": 32501216, "step": 15030 }, { "epoch": 2.4526916802610113, "grad_norm": 0.10341744124889374, "learning_rate": 0.0009984413057434948, "loss": 0.0638, "num_input_tokens_seen": 32513120, "step": 15035 }, { "epoch": 2.4535073409461665, "grad_norm": 0.15271392464637756, "learning_rate": 0.0009984356846728835, "loss": 0.2005, "num_input_tokens_seen": 32524320, "step": 15040 }, { "epoch": 2.4543230016313213, "grad_norm": 0.20539811253547668, "learning_rate": 0.0009984300535008405, "loss": 0.1879, "num_input_tokens_seen": 32534208, "step": 15045 }, { "epoch": 2.455138662316476, "grad_norm": 0.11480668932199478, "learning_rate": 0.0009984244122274802, "loss": 0.1111, "num_input_tokens_seen": 32545792, "step": 15050 }, { "epoch": 2.4559543230016314, "grad_norm": 0.050684988498687744, "learning_rate": 0.000998418760852917, "loss": 0.0521, "num_input_tokens_seen": 32556576, "step": 15055 }, { "epoch": 2.4567699836867862, "grad_norm": 0.014801833778619766, "learning_rate": 0.0009984130993772652, "loss": 0.0767, "num_input_tokens_seen": 32568000, "step": 15060 }, { "epoch": 2.4575856443719415, "grad_norm": 0.009958263486623764, "learning_rate": 0.0009984074278006397, "loss": 0.0668, "num_input_tokens_seen": 32577472, "step": 15065 }, { "epoch": 2.4584013050570963, "grad_norm": 0.3374749422073364, "learning_rate": 0.0009984017461231553, "loss": 0.1985, "num_input_tokens_seen": 32588096, "step": 15070 }, { "epoch": 2.459216965742251, "grad_norm": 0.07289399951696396, "learning_rate": 0.0009983960543449276, "loss": 0.0832, "num_input_tokens_seen": 32599328, "step": 15075 }, { "epoch": 2.4600326264274064, "grad_norm": 0.11521682888269424, "learning_rate": 0.0009983903524660711, "loss": 0.0833, "num_input_tokens_seen": 32609216, "step": 15080 }, { "epoch": 2.460848287112561, "grad_norm": 0.4067881107330322, "learning_rate": 0.0009983846404867022, "loss": 0.2918, "num_input_tokens_seen": 32620416, "step": 15085 }, { "epoch": 2.461663947797716, "grad_norm": 0.028215084224939346, "learning_rate": 0.0009983789184069363, "loss": 0.0462, "num_input_tokens_seen": 32631776, "step": 15090 }, { "epoch": 2.4624796084828713, "grad_norm": 0.18957918882369995, "learning_rate": 0.0009983731862268893, "loss": 0.1796, "num_input_tokens_seen": 32642304, "step": 15095 }, { "epoch": 2.463295269168026, "grad_norm": 0.009655492380261421, "learning_rate": 0.0009983674439466774, "loss": 0.0251, "num_input_tokens_seen": 32653088, "step": 15100 }, { "epoch": 2.464110929853181, "grad_norm": 0.06055706366896629, "learning_rate": 0.000998361691566417, "loss": 0.1191, "num_input_tokens_seen": 32662464, "step": 15105 }, { "epoch": 2.464926590538336, "grad_norm": 0.03933952748775482, "learning_rate": 0.0009983559290862247, "loss": 0.0662, "num_input_tokens_seen": 32672992, "step": 15110 }, { "epoch": 2.465742251223491, "grad_norm": 0.10756219178438187, "learning_rate": 0.0009983501565062173, "loss": 0.0942, "num_input_tokens_seen": 32684256, "step": 15115 }, { "epoch": 2.466557911908646, "grad_norm": 0.04777355492115021, "learning_rate": 0.000998344373826512, "loss": 0.1098, "num_input_tokens_seen": 32694816, "step": 15120 }, { "epoch": 2.467373572593801, "grad_norm": 0.3761058747768402, "learning_rate": 0.0009983385810472256, "loss": 0.3218, "num_input_tokens_seen": 32705568, "step": 15125 }, { "epoch": 2.468189233278956, "grad_norm": 0.19796237349510193, "learning_rate": 0.0009983327781684756, "loss": 0.1266, "num_input_tokens_seen": 32716128, "step": 15130 }, { "epoch": 2.4690048939641107, "grad_norm": 0.016280511394143105, "learning_rate": 0.0009983269651903798, "loss": 0.1654, "num_input_tokens_seen": 32728480, "step": 15135 }, { "epoch": 2.469820554649266, "grad_norm": 0.11683381348848343, "learning_rate": 0.0009983211421130558, "loss": 0.2111, "num_input_tokens_seen": 32738848, "step": 15140 }, { "epoch": 2.470636215334421, "grad_norm": 0.12451004981994629, "learning_rate": 0.0009983153089366218, "loss": 0.1187, "num_input_tokens_seen": 32749728, "step": 15145 }, { "epoch": 2.471451876019576, "grad_norm": 0.11421272903680801, "learning_rate": 0.0009983094656611958, "loss": 0.1476, "num_input_tokens_seen": 32761824, "step": 15150 }, { "epoch": 2.472267536704731, "grad_norm": 0.20336616039276123, "learning_rate": 0.0009983036122868962, "loss": 0.1398, "num_input_tokens_seen": 32773216, "step": 15155 }, { "epoch": 2.4730831973898857, "grad_norm": 0.013980901800096035, "learning_rate": 0.000998297748813842, "loss": 0.1411, "num_input_tokens_seen": 32785344, "step": 15160 }, { "epoch": 2.473898858075041, "grad_norm": 0.026078760623931885, "learning_rate": 0.0009982918752421516, "loss": 0.0482, "num_input_tokens_seen": 32795648, "step": 15165 }, { "epoch": 2.4747145187601958, "grad_norm": 0.07515005022287369, "learning_rate": 0.0009982859915719444, "loss": 0.04, "num_input_tokens_seen": 32806848, "step": 15170 }, { "epoch": 2.4755301794453506, "grad_norm": 0.1675223857164383, "learning_rate": 0.0009982800978033395, "loss": 0.1084, "num_input_tokens_seen": 32819552, "step": 15175 }, { "epoch": 2.476345840130506, "grad_norm": 0.020948603749275208, "learning_rate": 0.000998274193936456, "loss": 0.117, "num_input_tokens_seen": 32830944, "step": 15180 }, { "epoch": 2.4771615008156607, "grad_norm": 0.12657499313354492, "learning_rate": 0.000998268279971414, "loss": 0.1146, "num_input_tokens_seen": 32840512, "step": 15185 }, { "epoch": 2.4779771615008155, "grad_norm": 0.2049468606710434, "learning_rate": 0.0009982623559083332, "loss": 0.0701, "num_input_tokens_seen": 32851424, "step": 15190 }, { "epoch": 2.4787928221859707, "grad_norm": 0.19748122990131378, "learning_rate": 0.0009982564217473338, "loss": 0.183, "num_input_tokens_seen": 32861856, "step": 15195 }, { "epoch": 2.4796084828711256, "grad_norm": 0.35869279503822327, "learning_rate": 0.000998250477488536, "loss": 0.1523, "num_input_tokens_seen": 32873696, "step": 15200 }, { "epoch": 2.480424143556281, "grad_norm": 0.03681536018848419, "learning_rate": 0.0009982445231320597, "loss": 0.2374, "num_input_tokens_seen": 32883488, "step": 15205 }, { "epoch": 2.4812398042414356, "grad_norm": 0.022419409826397896, "learning_rate": 0.0009982385586780264, "loss": 0.1162, "num_input_tokens_seen": 32894720, "step": 15210 }, { "epoch": 2.4820554649265905, "grad_norm": 0.2048080563545227, "learning_rate": 0.0009982325841265567, "loss": 0.1677, "num_input_tokens_seen": 32905696, "step": 15215 }, { "epoch": 2.4828711256117453, "grad_norm": 0.04616566747426987, "learning_rate": 0.0009982265994777717, "loss": 0.1081, "num_input_tokens_seen": 32916704, "step": 15220 }, { "epoch": 2.4836867862969005, "grad_norm": 0.04768180847167969, "learning_rate": 0.0009982206047317926, "loss": 0.0489, "num_input_tokens_seen": 32927392, "step": 15225 }, { "epoch": 2.4845024469820554, "grad_norm": 0.034913014620542526, "learning_rate": 0.0009982145998887406, "loss": 0.0482, "num_input_tokens_seen": 32939232, "step": 15230 }, { "epoch": 2.4853181076672106, "grad_norm": 0.12986132502555847, "learning_rate": 0.000998208584948738, "loss": 0.1679, "num_input_tokens_seen": 32950144, "step": 15235 }, { "epoch": 2.4861337683523654, "grad_norm": 0.06647571176290512, "learning_rate": 0.0009982025599119062, "loss": 0.1219, "num_input_tokens_seen": 32961184, "step": 15240 }, { "epoch": 2.4869494290375203, "grad_norm": 0.08212022483348846, "learning_rate": 0.0009981965247783677, "loss": 0.0928, "num_input_tokens_seen": 32971584, "step": 15245 }, { "epoch": 2.4877650897226755, "grad_norm": 0.05855432525277138, "learning_rate": 0.0009981904795482446, "loss": 0.1524, "num_input_tokens_seen": 32982496, "step": 15250 }, { "epoch": 2.4885807504078303, "grad_norm": 2.2332639694213867, "learning_rate": 0.0009981844242216594, "loss": 0.2134, "num_input_tokens_seen": 32992192, "step": 15255 }, { "epoch": 2.489396411092985, "grad_norm": 0.030122999101877213, "learning_rate": 0.0009981783587987348, "loss": 0.0564, "num_input_tokens_seen": 33003136, "step": 15260 }, { "epoch": 2.4902120717781404, "grad_norm": 0.09303940832614899, "learning_rate": 0.0009981722832795937, "loss": 0.0588, "num_input_tokens_seen": 33014592, "step": 15265 }, { "epoch": 2.4910277324632952, "grad_norm": 0.019270701333880424, "learning_rate": 0.0009981661976643595, "loss": 0.1582, "num_input_tokens_seen": 33025568, "step": 15270 }, { "epoch": 2.49184339314845, "grad_norm": 0.16244956851005554, "learning_rate": 0.0009981601019531552, "loss": 0.1883, "num_input_tokens_seen": 33037632, "step": 15275 }, { "epoch": 2.4926590538336053, "grad_norm": 0.06669965386390686, "learning_rate": 0.0009981539961461045, "loss": 0.0413, "num_input_tokens_seen": 33047840, "step": 15280 }, { "epoch": 2.49347471451876, "grad_norm": 0.027973853051662445, "learning_rate": 0.000998147880243331, "loss": 0.1188, "num_input_tokens_seen": 33057824, "step": 15285 }, { "epoch": 2.4942903752039154, "grad_norm": 0.23098739981651306, "learning_rate": 0.000998141754244959, "loss": 0.2423, "num_input_tokens_seen": 33069056, "step": 15290 }, { "epoch": 2.49510603588907, "grad_norm": 0.06371300667524338, "learning_rate": 0.0009981356181511124, "loss": 0.0366, "num_input_tokens_seen": 33080352, "step": 15295 }, { "epoch": 2.495921696574225, "grad_norm": 0.056606777012348175, "learning_rate": 0.0009981294719619152, "loss": 0.064, "num_input_tokens_seen": 33090592, "step": 15300 }, { "epoch": 2.4967373572593803, "grad_norm": 0.05140992999076843, "learning_rate": 0.0009981233156774927, "loss": 0.0346, "num_input_tokens_seen": 33102304, "step": 15305 }, { "epoch": 2.497553017944535, "grad_norm": 0.101639524102211, "learning_rate": 0.0009981171492979691, "loss": 0.0721, "num_input_tokens_seen": 33112192, "step": 15310 }, { "epoch": 2.49836867862969, "grad_norm": 0.012121300213038921, "learning_rate": 0.0009981109728234698, "loss": 0.2219, "num_input_tokens_seen": 33122336, "step": 15315 }, { "epoch": 2.499184339314845, "grad_norm": 0.11129625886678696, "learning_rate": 0.0009981047862541194, "loss": 0.1776, "num_input_tokens_seen": 33133376, "step": 15320 }, { "epoch": 2.5, "grad_norm": 0.2766715884208679, "learning_rate": 0.0009980985895900439, "loss": 0.2495, "num_input_tokens_seen": 33145248, "step": 15325 }, { "epoch": 2.500815660685155, "grad_norm": 0.091251902282238, "learning_rate": 0.0009980923828313685, "loss": 0.0637, "num_input_tokens_seen": 33156320, "step": 15330 }, { "epoch": 2.50163132137031, "grad_norm": 0.025461995974183083, "learning_rate": 0.000998086165978219, "loss": 0.1226, "num_input_tokens_seen": 33165600, "step": 15335 }, { "epoch": 2.502446982055465, "grad_norm": 0.028699345886707306, "learning_rate": 0.0009980799390307215, "loss": 0.0736, "num_input_tokens_seen": 33176992, "step": 15340 }, { "epoch": 2.50326264274062, "grad_norm": 0.06483875215053558, "learning_rate": 0.0009980737019890024, "loss": 0.0747, "num_input_tokens_seen": 33188800, "step": 15345 }, { "epoch": 2.504078303425775, "grad_norm": 0.2196149379014969, "learning_rate": 0.0009980674548531877, "loss": 0.1588, "num_input_tokens_seen": 33198784, "step": 15350 }, { "epoch": 2.50489396411093, "grad_norm": 0.01191483624279499, "learning_rate": 0.0009980611976234041, "loss": 0.0171, "num_input_tokens_seen": 33210336, "step": 15355 }, { "epoch": 2.5057096247960846, "grad_norm": 0.010378982871770859, "learning_rate": 0.0009980549302997788, "loss": 0.0217, "num_input_tokens_seen": 33221088, "step": 15360 }, { "epoch": 2.50652528548124, "grad_norm": 0.05322907119989395, "learning_rate": 0.000998048652882438, "loss": 0.143, "num_input_tokens_seen": 33231232, "step": 15365 }, { "epoch": 2.5073409461663947, "grad_norm": 0.13478170335292816, "learning_rate": 0.00099804236537151, "loss": 0.1536, "num_input_tokens_seen": 33243776, "step": 15370 }, { "epoch": 2.50815660685155, "grad_norm": 0.07025640457868576, "learning_rate": 0.0009980360677671214, "loss": 0.1515, "num_input_tokens_seen": 33253632, "step": 15375 }, { "epoch": 2.5089722675367048, "grad_norm": 0.08173404633998871, "learning_rate": 0.0009980297600694, "loss": 0.0829, "num_input_tokens_seen": 33264000, "step": 15380 }, { "epoch": 2.5097879282218596, "grad_norm": 0.041357748210430145, "learning_rate": 0.0009980234422784738, "loss": 0.1647, "num_input_tokens_seen": 33275008, "step": 15385 }, { "epoch": 2.5106035889070144, "grad_norm": 0.14917460083961487, "learning_rate": 0.0009980171143944708, "loss": 0.2091, "num_input_tokens_seen": 33285632, "step": 15390 }, { "epoch": 2.5114192495921697, "grad_norm": 0.0907067358493805, "learning_rate": 0.000998010776417519, "loss": 0.1932, "num_input_tokens_seen": 33297376, "step": 15395 }, { "epoch": 2.5122349102773245, "grad_norm": 0.060952670872211456, "learning_rate": 0.0009980044283477473, "loss": 0.0709, "num_input_tokens_seen": 33307712, "step": 15400 }, { "epoch": 2.5130505709624797, "grad_norm": 0.2798716425895691, "learning_rate": 0.000997998070185284, "loss": 0.0892, "num_input_tokens_seen": 33318304, "step": 15405 }, { "epoch": 2.5138662316476346, "grad_norm": 0.04096159338951111, "learning_rate": 0.000997991701930258, "loss": 0.1341, "num_input_tokens_seen": 33328544, "step": 15410 }, { "epoch": 2.5146818923327894, "grad_norm": 0.053559333086013794, "learning_rate": 0.0009979853235827984, "loss": 0.1205, "num_input_tokens_seen": 33338016, "step": 15415 }, { "epoch": 2.5154975530179446, "grad_norm": 0.06617650389671326, "learning_rate": 0.0009979789351430347, "loss": 0.0993, "num_input_tokens_seen": 33346496, "step": 15420 }, { "epoch": 2.5163132137030995, "grad_norm": 0.05221320688724518, "learning_rate": 0.0009979725366110958, "loss": 0.0364, "num_input_tokens_seen": 33356448, "step": 15425 }, { "epoch": 2.5171288743882547, "grad_norm": 0.030037062242627144, "learning_rate": 0.0009979661279871119, "loss": 0.0951, "num_input_tokens_seen": 33366432, "step": 15430 }, { "epoch": 2.5179445350734095, "grad_norm": 0.11865063011646271, "learning_rate": 0.0009979597092712128, "loss": 0.0728, "num_input_tokens_seen": 33377344, "step": 15435 }, { "epoch": 2.5187601957585644, "grad_norm": 0.08982488512992859, "learning_rate": 0.0009979532804635283, "loss": 0.0634, "num_input_tokens_seen": 33387776, "step": 15440 }, { "epoch": 2.519575856443719, "grad_norm": 0.0816715732216835, "learning_rate": 0.000997946841564189, "loss": 0.1627, "num_input_tokens_seen": 33398208, "step": 15445 }, { "epoch": 2.5203915171288744, "grad_norm": 0.0918974056839943, "learning_rate": 0.0009979403925733253, "loss": 0.0532, "num_input_tokens_seen": 33408864, "step": 15450 }, { "epoch": 2.5212071778140293, "grad_norm": 0.0044286069460213184, "learning_rate": 0.0009979339334910678, "loss": 0.1216, "num_input_tokens_seen": 33419040, "step": 15455 }, { "epoch": 2.5220228384991845, "grad_norm": 0.05251329392194748, "learning_rate": 0.0009979274643175473, "loss": 0.0569, "num_input_tokens_seen": 33430048, "step": 15460 }, { "epoch": 2.5228384991843393, "grad_norm": 0.3440323770046234, "learning_rate": 0.0009979209850528954, "loss": 0.1986, "num_input_tokens_seen": 33440288, "step": 15465 }, { "epoch": 2.523654159869494, "grad_norm": 0.1386038362979889, "learning_rate": 0.0009979144956972427, "loss": 0.0708, "num_input_tokens_seen": 33451616, "step": 15470 }, { "epoch": 2.5244698205546494, "grad_norm": 0.0037523547653108835, "learning_rate": 0.0009979079962507214, "loss": 0.1202, "num_input_tokens_seen": 33463520, "step": 15475 }, { "epoch": 2.5252854812398042, "grad_norm": 0.05357692763209343, "learning_rate": 0.0009979014867134628, "loss": 0.116, "num_input_tokens_seen": 33474272, "step": 15480 }, { "epoch": 2.5261011419249595, "grad_norm": 0.13688050210475922, "learning_rate": 0.000997894967085599, "loss": 0.128, "num_input_tokens_seen": 33485024, "step": 15485 }, { "epoch": 2.5269168026101143, "grad_norm": 0.13154840469360352, "learning_rate": 0.000997888437367262, "loss": 0.1607, "num_input_tokens_seen": 33494912, "step": 15490 }, { "epoch": 2.527732463295269, "grad_norm": 0.0538487546145916, "learning_rate": 0.0009978818975585843, "loss": 0.1917, "num_input_tokens_seen": 33506560, "step": 15495 }, { "epoch": 2.528548123980424, "grad_norm": 0.048410579562187195, "learning_rate": 0.0009978753476596982, "loss": 0.1062, "num_input_tokens_seen": 33518752, "step": 15500 }, { "epoch": 2.529363784665579, "grad_norm": 0.2545541226863861, "learning_rate": 0.0009978687876707366, "loss": 0.0737, "num_input_tokens_seen": 33529856, "step": 15505 }, { "epoch": 2.530179445350734, "grad_norm": 0.21288903057575226, "learning_rate": 0.0009978622175918323, "loss": 0.2061, "num_input_tokens_seen": 33540896, "step": 15510 }, { "epoch": 2.5309951060358893, "grad_norm": 0.06659834831953049, "learning_rate": 0.0009978556374231188, "loss": 0.0653, "num_input_tokens_seen": 33552224, "step": 15515 }, { "epoch": 2.531810766721044, "grad_norm": 0.056002382189035416, "learning_rate": 0.0009978490471647292, "loss": 0.1134, "num_input_tokens_seen": 33562176, "step": 15520 }, { "epoch": 2.532626427406199, "grad_norm": 0.026309235021471977, "learning_rate": 0.000997842446816797, "loss": 0.0851, "num_input_tokens_seen": 33573984, "step": 15525 }, { "epoch": 2.5334420880913537, "grad_norm": 0.13522249460220337, "learning_rate": 0.0009978358363794562, "loss": 0.1014, "num_input_tokens_seen": 33584032, "step": 15530 }, { "epoch": 2.534257748776509, "grad_norm": 0.35306116938591003, "learning_rate": 0.0009978292158528406, "loss": 0.1917, "num_input_tokens_seen": 33595744, "step": 15535 }, { "epoch": 2.535073409461664, "grad_norm": 0.1873067319393158, "learning_rate": 0.0009978225852370843, "loss": 0.2297, "num_input_tokens_seen": 33606304, "step": 15540 }, { "epoch": 2.535889070146819, "grad_norm": 0.0513768270611763, "learning_rate": 0.000997815944532322, "loss": 0.1684, "num_input_tokens_seen": 33616224, "step": 15545 }, { "epoch": 2.536704730831974, "grad_norm": 0.30809465050697327, "learning_rate": 0.0009978092937386878, "loss": 0.1936, "num_input_tokens_seen": 33628000, "step": 15550 }, { "epoch": 2.5375203915171287, "grad_norm": 0.10597193241119385, "learning_rate": 0.0009978026328563167, "loss": 0.1492, "num_input_tokens_seen": 33639296, "step": 15555 }, { "epoch": 2.538336052202284, "grad_norm": 0.0677858367562294, "learning_rate": 0.0009977959618853438, "loss": 0.3077, "num_input_tokens_seen": 33650496, "step": 15560 }, { "epoch": 2.539151712887439, "grad_norm": 0.09901798516511917, "learning_rate": 0.0009977892808259044, "loss": 0.1203, "num_input_tokens_seen": 33659712, "step": 15565 }, { "epoch": 2.539967373572594, "grad_norm": 0.10196952521800995, "learning_rate": 0.0009977825896781336, "loss": 0.2513, "num_input_tokens_seen": 33670656, "step": 15570 }, { "epoch": 2.540783034257749, "grad_norm": 0.1093427911400795, "learning_rate": 0.0009977758884421673, "loss": 0.1093, "num_input_tokens_seen": 33682176, "step": 15575 }, { "epoch": 2.5415986949429037, "grad_norm": 0.08146216720342636, "learning_rate": 0.000997769177118141, "loss": 0.0914, "num_input_tokens_seen": 33693280, "step": 15580 }, { "epoch": 2.5424143556280585, "grad_norm": 0.02189205028116703, "learning_rate": 0.0009977624557061908, "loss": 0.0368, "num_input_tokens_seen": 33704032, "step": 15585 }, { "epoch": 2.5432300163132138, "grad_norm": 0.039141327142715454, "learning_rate": 0.000997755724206453, "loss": 0.0615, "num_input_tokens_seen": 33715168, "step": 15590 }, { "epoch": 2.5440456769983686, "grad_norm": 0.08967316895723343, "learning_rate": 0.0009977489826190641, "loss": 0.0734, "num_input_tokens_seen": 33724896, "step": 15595 }, { "epoch": 2.544861337683524, "grad_norm": 0.18973685801029205, "learning_rate": 0.0009977422309441605, "loss": 0.1065, "num_input_tokens_seen": 33735904, "step": 15600 }, { "epoch": 2.5456769983686787, "grad_norm": 0.011004339903593063, "learning_rate": 0.0009977354691818794, "loss": 0.3357, "num_input_tokens_seen": 33746752, "step": 15605 }, { "epoch": 2.5464926590538335, "grad_norm": 0.089126817882061, "learning_rate": 0.0009977286973323575, "loss": 0.1619, "num_input_tokens_seen": 33757056, "step": 15610 }, { "epoch": 2.5473083197389887, "grad_norm": 0.10468865931034088, "learning_rate": 0.000997721915395732, "loss": 0.0873, "num_input_tokens_seen": 33767616, "step": 15615 }, { "epoch": 2.5481239804241436, "grad_norm": 0.1721217781305313, "learning_rate": 0.0009977151233721406, "loss": 0.0854, "num_input_tokens_seen": 33777152, "step": 15620 }, { "epoch": 2.5489396411092984, "grad_norm": 0.09493706375360489, "learning_rate": 0.0009977083212617207, "loss": 0.2463, "num_input_tokens_seen": 33787904, "step": 15625 }, { "epoch": 2.5497553017944536, "grad_norm": 0.2843787968158722, "learning_rate": 0.0009977015090646105, "loss": 0.1284, "num_input_tokens_seen": 33798464, "step": 15630 }, { "epoch": 2.5505709624796085, "grad_norm": 0.02873399294912815, "learning_rate": 0.0009976946867809476, "loss": 0.0721, "num_input_tokens_seen": 33809824, "step": 15635 }, { "epoch": 2.5513866231647633, "grad_norm": 0.0776677280664444, "learning_rate": 0.0009976878544108705, "loss": 0.0718, "num_input_tokens_seen": 33819936, "step": 15640 }, { "epoch": 2.5522022838499185, "grad_norm": 0.05906492844223976, "learning_rate": 0.000997681011954518, "loss": 0.1859, "num_input_tokens_seen": 33830784, "step": 15645 }, { "epoch": 2.5530179445350734, "grad_norm": 0.08871506154537201, "learning_rate": 0.0009976741594120281, "loss": 0.078, "num_input_tokens_seen": 33842272, "step": 15650 }, { "epoch": 2.5538336052202286, "grad_norm": 0.172580748796463, "learning_rate": 0.00099766729678354, "loss": 0.1249, "num_input_tokens_seen": 33853184, "step": 15655 }, { "epoch": 2.5546492659053834, "grad_norm": 0.2699727416038513, "learning_rate": 0.0009976604240691932, "loss": 0.181, "num_input_tokens_seen": 33865248, "step": 15660 }, { "epoch": 2.5554649265905383, "grad_norm": 0.2737541198730469, "learning_rate": 0.0009976535412691261, "loss": 0.2657, "num_input_tokens_seen": 33876544, "step": 15665 }, { "epoch": 2.556280587275693, "grad_norm": 0.056653060019016266, "learning_rate": 0.0009976466483834789, "loss": 0.0694, "num_input_tokens_seen": 33886816, "step": 15670 }, { "epoch": 2.5570962479608483, "grad_norm": 0.060865480452775955, "learning_rate": 0.0009976397454123911, "loss": 0.3578, "num_input_tokens_seen": 33897728, "step": 15675 }, { "epoch": 2.557911908646003, "grad_norm": 0.1835509091615677, "learning_rate": 0.0009976328323560025, "loss": 0.067, "num_input_tokens_seen": 33907936, "step": 15680 }, { "epoch": 2.5587275693311584, "grad_norm": 0.07976268976926804, "learning_rate": 0.0009976259092144533, "loss": 0.0566, "num_input_tokens_seen": 33917280, "step": 15685 }, { "epoch": 2.5595432300163132, "grad_norm": 0.05880413204431534, "learning_rate": 0.0009976189759878836, "loss": 0.148, "num_input_tokens_seen": 33928896, "step": 15690 }, { "epoch": 2.560358890701468, "grad_norm": 0.04047093167901039, "learning_rate": 0.0009976120326764342, "loss": 0.1329, "num_input_tokens_seen": 33939008, "step": 15695 }, { "epoch": 2.5611745513866233, "grad_norm": 0.036515820771455765, "learning_rate": 0.0009976050792802457, "loss": 0.1013, "num_input_tokens_seen": 33950048, "step": 15700 }, { "epoch": 2.561990212071778, "grad_norm": 0.03752981126308441, "learning_rate": 0.000997598115799459, "loss": 0.0863, "num_input_tokens_seen": 33960992, "step": 15705 }, { "epoch": 2.5628058727569334, "grad_norm": 0.1627136617898941, "learning_rate": 0.0009975911422342152, "loss": 0.0564, "num_input_tokens_seen": 33972288, "step": 15710 }, { "epoch": 2.563621533442088, "grad_norm": 0.16130013763904572, "learning_rate": 0.0009975841585846558, "loss": 0.0638, "num_input_tokens_seen": 33983264, "step": 15715 }, { "epoch": 2.564437194127243, "grad_norm": 0.05317399650812149, "learning_rate": 0.000997577164850922, "loss": 0.1662, "num_input_tokens_seen": 33993152, "step": 15720 }, { "epoch": 2.565252854812398, "grad_norm": 0.14853844046592712, "learning_rate": 0.000997570161033156, "loss": 0.071, "num_input_tokens_seen": 34003296, "step": 15725 }, { "epoch": 2.566068515497553, "grad_norm": 0.07281967252492905, "learning_rate": 0.0009975631471314992, "loss": 0.1886, "num_input_tokens_seen": 34015168, "step": 15730 }, { "epoch": 2.566884176182708, "grad_norm": 0.053378649055957794, "learning_rate": 0.0009975561231460942, "loss": 0.0543, "num_input_tokens_seen": 34024128, "step": 15735 }, { "epoch": 2.567699836867863, "grad_norm": 0.29613426327705383, "learning_rate": 0.000997549089077083, "loss": 0.1518, "num_input_tokens_seen": 34033888, "step": 15740 }, { "epoch": 2.568515497553018, "grad_norm": 0.14543229341506958, "learning_rate": 0.0009975420449246084, "loss": 0.2044, "num_input_tokens_seen": 34044128, "step": 15745 }, { "epoch": 2.569331158238173, "grad_norm": 0.019933458417654037, "learning_rate": 0.0009975349906888131, "loss": 0.1099, "num_input_tokens_seen": 34055520, "step": 15750 }, { "epoch": 2.5701468189233276, "grad_norm": 0.06531205028295517, "learning_rate": 0.00099752792636984, "loss": 0.1278, "num_input_tokens_seen": 34066048, "step": 15755 }, { "epoch": 2.570962479608483, "grad_norm": 0.14804935455322266, "learning_rate": 0.0009975208519678324, "loss": 0.0681, "num_input_tokens_seen": 34077632, "step": 15760 }, { "epoch": 2.5717781402936377, "grad_norm": 0.12645001709461212, "learning_rate": 0.0009975137674829335, "loss": 0.2039, "num_input_tokens_seen": 34088384, "step": 15765 }, { "epoch": 2.572593800978793, "grad_norm": 0.0323776975274086, "learning_rate": 0.000997506672915287, "loss": 0.04, "num_input_tokens_seen": 34098880, "step": 15770 }, { "epoch": 2.573409461663948, "grad_norm": 0.025035852566361427, "learning_rate": 0.0009974995682650368, "loss": 0.1377, "num_input_tokens_seen": 34109952, "step": 15775 }, { "epoch": 2.5742251223491026, "grad_norm": 0.13843075931072235, "learning_rate": 0.0009974924535323265, "loss": 0.0768, "num_input_tokens_seen": 34120576, "step": 15780 }, { "epoch": 2.575040783034258, "grad_norm": 0.2659997045993805, "learning_rate": 0.0009974853287173006, "loss": 0.1793, "num_input_tokens_seen": 34131168, "step": 15785 }, { "epoch": 2.5758564437194127, "grad_norm": 0.045313864946365356, "learning_rate": 0.0009974781938201034, "loss": 0.1267, "num_input_tokens_seen": 34141984, "step": 15790 }, { "epoch": 2.576672104404568, "grad_norm": 0.23315565288066864, "learning_rate": 0.0009974710488408795, "loss": 0.1438, "num_input_tokens_seen": 34153056, "step": 15795 }, { "epoch": 2.5774877650897228, "grad_norm": 0.10043656826019287, "learning_rate": 0.0009974638937797736, "loss": 0.165, "num_input_tokens_seen": 34163424, "step": 15800 }, { "epoch": 2.5783034257748776, "grad_norm": 0.05951232835650444, "learning_rate": 0.000997456728636931, "loss": 0.2315, "num_input_tokens_seen": 34175712, "step": 15805 }, { "epoch": 2.5791190864600324, "grad_norm": 1.1597989797592163, "learning_rate": 0.0009974495534124967, "loss": 0.1146, "num_input_tokens_seen": 34186784, "step": 15810 }, { "epoch": 2.5799347471451877, "grad_norm": 0.058770764619112015, "learning_rate": 0.000997442368106616, "loss": 0.0759, "num_input_tokens_seen": 34196224, "step": 15815 }, { "epoch": 2.5807504078303425, "grad_norm": 0.23356138169765472, "learning_rate": 0.0009974351727194347, "loss": 0.1931, "num_input_tokens_seen": 34206720, "step": 15820 }, { "epoch": 2.5815660685154977, "grad_norm": 0.02992885187268257, "learning_rate": 0.0009974279672510986, "loss": 0.107, "num_input_tokens_seen": 34217120, "step": 15825 }, { "epoch": 2.5823817292006526, "grad_norm": 0.2092907577753067, "learning_rate": 0.0009974207517017537, "loss": 0.1395, "num_input_tokens_seen": 34226592, "step": 15830 }, { "epoch": 2.5831973898858074, "grad_norm": 0.046701934188604355, "learning_rate": 0.0009974135260715465, "loss": 0.0898, "num_input_tokens_seen": 34237440, "step": 15835 }, { "epoch": 2.5840130505709626, "grad_norm": 0.1012524962425232, "learning_rate": 0.0009974062903606229, "loss": 0.1047, "num_input_tokens_seen": 34248896, "step": 15840 }, { "epoch": 2.5848287112561175, "grad_norm": 0.19864198565483093, "learning_rate": 0.0009973990445691298, "loss": 0.1758, "num_input_tokens_seen": 34260512, "step": 15845 }, { "epoch": 2.5856443719412723, "grad_norm": 0.14162364602088928, "learning_rate": 0.0009973917886972143, "loss": 0.1472, "num_input_tokens_seen": 34271104, "step": 15850 }, { "epoch": 2.5864600326264275, "grad_norm": 0.1076699048280716, "learning_rate": 0.000997384522745023, "loss": 0.0957, "num_input_tokens_seen": 34281472, "step": 15855 }, { "epoch": 2.5872756933115824, "grad_norm": 0.05362573638558388, "learning_rate": 0.0009973772467127035, "loss": 0.1355, "num_input_tokens_seen": 34293216, "step": 15860 }, { "epoch": 2.588091353996737, "grad_norm": 0.028096288442611694, "learning_rate": 0.000997369960600403, "loss": 0.0984, "num_input_tokens_seen": 34304448, "step": 15865 }, { "epoch": 2.5889070146818924, "grad_norm": 0.06138193607330322, "learning_rate": 0.0009973626644082694, "loss": 0.0566, "num_input_tokens_seen": 34316352, "step": 15870 }, { "epoch": 2.5897226753670473, "grad_norm": 0.05653262510895729, "learning_rate": 0.0009973553581364503, "loss": 0.0578, "num_input_tokens_seen": 34326688, "step": 15875 }, { "epoch": 2.5905383360522025, "grad_norm": 0.10075034201145172, "learning_rate": 0.0009973480417850942, "loss": 0.1513, "num_input_tokens_seen": 34338848, "step": 15880 }, { "epoch": 2.5913539967373573, "grad_norm": 0.10138165205717087, "learning_rate": 0.0009973407153543489, "loss": 0.1257, "num_input_tokens_seen": 34349472, "step": 15885 }, { "epoch": 2.592169657422512, "grad_norm": 0.12078166007995605, "learning_rate": 0.0009973333788443632, "loss": 0.1139, "num_input_tokens_seen": 34359712, "step": 15890 }, { "epoch": 2.592985318107667, "grad_norm": 0.056835684925317764, "learning_rate": 0.0009973260322552855, "loss": 0.0601, "num_input_tokens_seen": 34371136, "step": 15895 }, { "epoch": 2.5938009787928222, "grad_norm": 0.2918012738227844, "learning_rate": 0.000997318675587265, "loss": 0.1126, "num_input_tokens_seen": 34381120, "step": 15900 }, { "epoch": 2.594616639477977, "grad_norm": 0.04708686098456383, "learning_rate": 0.0009973113088404507, "loss": 0.1877, "num_input_tokens_seen": 34392384, "step": 15905 }, { "epoch": 2.5954323001631323, "grad_norm": 0.024804405868053436, "learning_rate": 0.0009973039320149916, "loss": 0.128, "num_input_tokens_seen": 34402688, "step": 15910 }, { "epoch": 2.596247960848287, "grad_norm": 0.12013711035251617, "learning_rate": 0.0009972965451110376, "loss": 0.0896, "num_input_tokens_seen": 34413280, "step": 15915 }, { "epoch": 2.597063621533442, "grad_norm": 0.030978182330727577, "learning_rate": 0.0009972891481287382, "loss": 0.124, "num_input_tokens_seen": 34424224, "step": 15920 }, { "epoch": 2.597879282218597, "grad_norm": 0.03300139680504799, "learning_rate": 0.0009972817410682433, "loss": 0.0973, "num_input_tokens_seen": 34433888, "step": 15925 }, { "epoch": 2.598694942903752, "grad_norm": 0.009529628790915012, "learning_rate": 0.0009972743239297032, "loss": 0.0316, "num_input_tokens_seen": 34444576, "step": 15930 }, { "epoch": 2.5995106035889073, "grad_norm": 0.21190616488456726, "learning_rate": 0.000997266896713268, "loss": 0.123, "num_input_tokens_seen": 34455232, "step": 15935 }, { "epoch": 2.600326264274062, "grad_norm": 0.1390226036310196, "learning_rate": 0.0009972594594190884, "loss": 0.2094, "num_input_tokens_seen": 34465792, "step": 15940 }, { "epoch": 2.601141924959217, "grad_norm": 0.046548616141080856, "learning_rate": 0.0009972520120473149, "loss": 0.0458, "num_input_tokens_seen": 34477024, "step": 15945 }, { "epoch": 2.6019575856443717, "grad_norm": 0.053153183311223984, "learning_rate": 0.0009972445545980988, "loss": 0.1465, "num_input_tokens_seen": 34487808, "step": 15950 }, { "epoch": 2.602773246329527, "grad_norm": 0.016847344115376472, "learning_rate": 0.0009972370870715908, "loss": 0.0365, "num_input_tokens_seen": 34498848, "step": 15955 }, { "epoch": 2.603588907014682, "grad_norm": 0.05434371903538704, "learning_rate": 0.0009972296094679426, "loss": 0.1182, "num_input_tokens_seen": 34509664, "step": 15960 }, { "epoch": 2.604404567699837, "grad_norm": 0.02624763920903206, "learning_rate": 0.0009972221217873054, "loss": 0.0167, "num_input_tokens_seen": 34518720, "step": 15965 }, { "epoch": 2.605220228384992, "grad_norm": 0.03936131298542023, "learning_rate": 0.0009972146240298312, "loss": 0.0485, "num_input_tokens_seen": 34530272, "step": 15970 }, { "epoch": 2.6060358890701467, "grad_norm": 0.06856126338243484, "learning_rate": 0.000997207116195672, "loss": 0.0453, "num_input_tokens_seen": 34541152, "step": 15975 }, { "epoch": 2.6068515497553015, "grad_norm": 0.18589448928833008, "learning_rate": 0.0009971995982849795, "loss": 0.1929, "num_input_tokens_seen": 34551872, "step": 15980 }, { "epoch": 2.607667210440457, "grad_norm": 0.1637170910835266, "learning_rate": 0.0009971920702979066, "loss": 0.0866, "num_input_tokens_seen": 34563168, "step": 15985 }, { "epoch": 2.6084828711256116, "grad_norm": 0.25298258662223816, "learning_rate": 0.000997184532234606, "loss": 0.2414, "num_input_tokens_seen": 34572832, "step": 15990 }, { "epoch": 2.609298531810767, "grad_norm": 0.10075201094150543, "learning_rate": 0.0009971769840952296, "loss": 0.0439, "num_input_tokens_seen": 34584704, "step": 15995 }, { "epoch": 2.6101141924959217, "grad_norm": 0.03244736045598984, "learning_rate": 0.0009971694258799312, "loss": 0.1003, "num_input_tokens_seen": 34594912, "step": 16000 }, { "epoch": 2.6109298531810765, "grad_norm": 0.10081294924020767, "learning_rate": 0.0009971618575888637, "loss": 0.1658, "num_input_tokens_seen": 34604480, "step": 16005 }, { "epoch": 2.6117455138662318, "grad_norm": 0.010015531443059444, "learning_rate": 0.0009971542792221802, "loss": 0.1819, "num_input_tokens_seen": 34615072, "step": 16010 }, { "epoch": 2.6125611745513866, "grad_norm": 0.15907056629657745, "learning_rate": 0.000997146690780035, "loss": 0.104, "num_input_tokens_seen": 34626016, "step": 16015 }, { "epoch": 2.613376835236542, "grad_norm": 0.03764641657471657, "learning_rate": 0.000997139092262581, "loss": 0.0564, "num_input_tokens_seen": 34637600, "step": 16020 }, { "epoch": 2.6141924959216967, "grad_norm": 0.03706960752606392, "learning_rate": 0.0009971314836699728, "loss": 0.1023, "num_input_tokens_seen": 34648736, "step": 16025 }, { "epoch": 2.6150081566068515, "grad_norm": 0.03933669254183769, "learning_rate": 0.0009971238650023644, "loss": 0.0428, "num_input_tokens_seen": 34658528, "step": 16030 }, { "epoch": 2.6158238172920063, "grad_norm": 0.07986725121736526, "learning_rate": 0.0009971162362599102, "loss": 0.0671, "num_input_tokens_seen": 34669088, "step": 16035 }, { "epoch": 2.6166394779771616, "grad_norm": 0.29692548513412476, "learning_rate": 0.000997108597442765, "loss": 0.0939, "num_input_tokens_seen": 34681632, "step": 16040 }, { "epoch": 2.6174551386623164, "grad_norm": 0.10401139408349991, "learning_rate": 0.000997100948551083, "loss": 0.0722, "num_input_tokens_seen": 34692768, "step": 16045 }, { "epoch": 2.6182707993474716, "grad_norm": 0.04141692817211151, "learning_rate": 0.0009970932895850201, "loss": 0.0579, "num_input_tokens_seen": 34702656, "step": 16050 }, { "epoch": 2.6190864600326265, "grad_norm": 0.017853064462542534, "learning_rate": 0.000997085620544731, "loss": 0.0688, "num_input_tokens_seen": 34713248, "step": 16055 }, { "epoch": 2.6199021207177813, "grad_norm": 0.018851248547434807, "learning_rate": 0.0009970779414303712, "loss": 0.2701, "num_input_tokens_seen": 34723808, "step": 16060 }, { "epoch": 2.6207177814029365, "grad_norm": 0.06940672546625137, "learning_rate": 0.0009970702522420962, "loss": 0.0288, "num_input_tokens_seen": 34735616, "step": 16065 }, { "epoch": 2.6215334420880914, "grad_norm": 0.2243514209985733, "learning_rate": 0.000997062552980062, "loss": 0.3053, "num_input_tokens_seen": 34746048, "step": 16070 }, { "epoch": 2.622349102773246, "grad_norm": 0.1178874745965004, "learning_rate": 0.0009970548436444248, "loss": 0.07, "num_input_tokens_seen": 34755488, "step": 16075 }, { "epoch": 2.6231647634584014, "grad_norm": 0.03382722660899162, "learning_rate": 0.0009970471242353406, "loss": 0.1779, "num_input_tokens_seen": 34766048, "step": 16080 }, { "epoch": 2.6239804241435563, "grad_norm": 0.017402131110429764, "learning_rate": 0.0009970393947529657, "loss": 0.1642, "num_input_tokens_seen": 34776992, "step": 16085 }, { "epoch": 2.624796084828711, "grad_norm": 0.1997496634721756, "learning_rate": 0.0009970316551974568, "loss": 0.1137, "num_input_tokens_seen": 34788640, "step": 16090 }, { "epoch": 2.6256117455138663, "grad_norm": 0.032988984137773514, "learning_rate": 0.0009970239055689712, "loss": 0.1022, "num_input_tokens_seen": 34797536, "step": 16095 }, { "epoch": 2.626427406199021, "grad_norm": 0.063567616045475, "learning_rate": 0.0009970161458676655, "loss": 0.0633, "num_input_tokens_seen": 34809152, "step": 16100 }, { "epoch": 2.6272430668841764, "grad_norm": 0.11862125992774963, "learning_rate": 0.000997008376093697, "loss": 0.1626, "num_input_tokens_seen": 34820160, "step": 16105 }, { "epoch": 2.6280587275693312, "grad_norm": 0.1909906566143036, "learning_rate": 0.0009970005962472233, "loss": 0.1546, "num_input_tokens_seen": 34833312, "step": 16110 }, { "epoch": 2.628874388254486, "grad_norm": 0.22565732896327972, "learning_rate": 0.0009969928063284022, "loss": 0.1844, "num_input_tokens_seen": 34842368, "step": 16115 }, { "epoch": 2.629690048939641, "grad_norm": 0.04123775288462639, "learning_rate": 0.0009969850063373913, "loss": 0.197, "num_input_tokens_seen": 34852320, "step": 16120 }, { "epoch": 2.630505709624796, "grad_norm": 0.038814183324575424, "learning_rate": 0.0009969771962743488, "loss": 0.0386, "num_input_tokens_seen": 34863680, "step": 16125 }, { "epoch": 2.631321370309951, "grad_norm": 0.007476178463548422, "learning_rate": 0.0009969693761394326, "loss": 0.1201, "num_input_tokens_seen": 34874880, "step": 16130 }, { "epoch": 2.632137030995106, "grad_norm": 0.11543486267328262, "learning_rate": 0.000996961545932802, "loss": 0.1336, "num_input_tokens_seen": 34884192, "step": 16135 }, { "epoch": 2.632952691680261, "grad_norm": 0.038836341351270676, "learning_rate": 0.0009969537056546151, "loss": 0.1078, "num_input_tokens_seen": 34894688, "step": 16140 }, { "epoch": 2.633768352365416, "grad_norm": 0.062395110726356506, "learning_rate": 0.000996945855305031, "loss": 0.1237, "num_input_tokens_seen": 34904256, "step": 16145 }, { "epoch": 2.634584013050571, "grad_norm": 0.06816502660512924, "learning_rate": 0.0009969379948842085, "loss": 0.0695, "num_input_tokens_seen": 34916864, "step": 16150 }, { "epoch": 2.635399673735726, "grad_norm": 0.028660116717219353, "learning_rate": 0.0009969301243923073, "loss": 0.0581, "num_input_tokens_seen": 34927200, "step": 16155 }, { "epoch": 2.636215334420881, "grad_norm": 0.054395247250795364, "learning_rate": 0.0009969222438294867, "loss": 0.0342, "num_input_tokens_seen": 34938656, "step": 16160 }, { "epoch": 2.637030995106036, "grad_norm": 0.12486924231052399, "learning_rate": 0.0009969143531959063, "loss": 0.0679, "num_input_tokens_seen": 34949600, "step": 16165 }, { "epoch": 2.637846655791191, "grad_norm": 0.017010482028126717, "learning_rate": 0.0009969064524917265, "loss": 0.0674, "num_input_tokens_seen": 34961696, "step": 16170 }, { "epoch": 2.6386623164763456, "grad_norm": 0.009399576112627983, "learning_rate": 0.000996898541717107, "loss": 0.0415, "num_input_tokens_seen": 34972320, "step": 16175 }, { "epoch": 2.639477977161501, "grad_norm": 0.2002403438091278, "learning_rate": 0.0009968906208722077, "loss": 0.0695, "num_input_tokens_seen": 34983744, "step": 16180 }, { "epoch": 2.6402936378466557, "grad_norm": 0.0805182009935379, "learning_rate": 0.00099688268995719, "loss": 0.1237, "num_input_tokens_seen": 34994880, "step": 16185 }, { "epoch": 2.641109298531811, "grad_norm": 0.14909473061561584, "learning_rate": 0.0009968747489722141, "loss": 0.0621, "num_input_tokens_seen": 35004928, "step": 16190 }, { "epoch": 2.641924959216966, "grad_norm": 0.2769220471382141, "learning_rate": 0.0009968667979174412, "loss": 0.1966, "num_input_tokens_seen": 35015136, "step": 16195 }, { "epoch": 2.6427406199021206, "grad_norm": 0.2111338973045349, "learning_rate": 0.0009968588367930324, "loss": 0.1392, "num_input_tokens_seen": 35024736, "step": 16200 }, { "epoch": 2.6435562805872754, "grad_norm": 0.003953300416469574, "learning_rate": 0.0009968508655991489, "loss": 0.2273, "num_input_tokens_seen": 35035872, "step": 16205 }, { "epoch": 2.6443719412724307, "grad_norm": 0.03729906305670738, "learning_rate": 0.0009968428843359523, "loss": 0.0513, "num_input_tokens_seen": 35047424, "step": 16210 }, { "epoch": 2.6451876019575855, "grad_norm": 0.2112385332584381, "learning_rate": 0.0009968348930036043, "loss": 0.097, "num_input_tokens_seen": 35057696, "step": 16215 }, { "epoch": 2.6460032626427408, "grad_norm": 0.12490873783826828, "learning_rate": 0.000996826891602267, "loss": 0.0654, "num_input_tokens_seen": 35069440, "step": 16220 }, { "epoch": 2.6468189233278956, "grad_norm": 0.03835887461900711, "learning_rate": 0.0009968188801321024, "loss": 0.1038, "num_input_tokens_seen": 35080064, "step": 16225 }, { "epoch": 2.6476345840130504, "grad_norm": 0.12459404021501541, "learning_rate": 0.000996810858593273, "loss": 0.1034, "num_input_tokens_seen": 35090944, "step": 16230 }, { "epoch": 2.6484502446982057, "grad_norm": 0.21367542445659637, "learning_rate": 0.000996802826985941, "loss": 0.1437, "num_input_tokens_seen": 35100896, "step": 16235 }, { "epoch": 2.6492659053833605, "grad_norm": 0.03609883412718773, "learning_rate": 0.0009967947853102698, "loss": 0.1104, "num_input_tokens_seen": 35111488, "step": 16240 }, { "epoch": 2.6500815660685157, "grad_norm": 0.010214082896709442, "learning_rate": 0.000996786733566422, "loss": 0.1245, "num_input_tokens_seen": 35123360, "step": 16245 }, { "epoch": 2.6508972267536706, "grad_norm": 0.11889711022377014, "learning_rate": 0.0009967786717545609, "loss": 0.1321, "num_input_tokens_seen": 35133952, "step": 16250 }, { "epoch": 2.6517128874388254, "grad_norm": 0.4400777220726013, "learning_rate": 0.0009967705998748496, "loss": 0.1365, "num_input_tokens_seen": 35144800, "step": 16255 }, { "epoch": 2.65252854812398, "grad_norm": 0.023491906002163887, "learning_rate": 0.000996762517927452, "loss": 0.0422, "num_input_tokens_seen": 35154624, "step": 16260 }, { "epoch": 2.6533442088091355, "grad_norm": 0.2003888189792633, "learning_rate": 0.0009967544259125317, "loss": 0.1334, "num_input_tokens_seen": 35163648, "step": 16265 }, { "epoch": 2.6541598694942903, "grad_norm": 0.19598814845085144, "learning_rate": 0.000996746323830253, "loss": 0.2184, "num_input_tokens_seen": 35174976, "step": 16270 }, { "epoch": 2.6549755301794455, "grad_norm": 0.050365347415208817, "learning_rate": 0.0009967382116807797, "loss": 0.1016, "num_input_tokens_seen": 35186432, "step": 16275 }, { "epoch": 2.6557911908646004, "grad_norm": 0.05750131234526634, "learning_rate": 0.0009967300894642764, "loss": 0.0348, "num_input_tokens_seen": 35196896, "step": 16280 }, { "epoch": 2.656606851549755, "grad_norm": 0.04523392394185066, "learning_rate": 0.0009967219571809076, "loss": 0.0938, "num_input_tokens_seen": 35207360, "step": 16285 }, { "epoch": 2.6574225122349104, "grad_norm": 0.0062296329997479916, "learning_rate": 0.0009967138148308384, "loss": 0.0612, "num_input_tokens_seen": 35217664, "step": 16290 }, { "epoch": 2.6582381729200653, "grad_norm": 0.0448344424366951, "learning_rate": 0.0009967056624142336, "loss": 0.0749, "num_input_tokens_seen": 35228128, "step": 16295 }, { "epoch": 2.65905383360522, "grad_norm": 0.046069227159023285, "learning_rate": 0.0009966974999312584, "loss": 0.1693, "num_input_tokens_seen": 35237728, "step": 16300 }, { "epoch": 2.6598694942903753, "grad_norm": 0.04713843762874603, "learning_rate": 0.000996689327382078, "loss": 0.0631, "num_input_tokens_seen": 35247968, "step": 16305 }, { "epoch": 2.66068515497553, "grad_norm": 0.088753342628479, "learning_rate": 0.0009966811447668586, "loss": 0.1018, "num_input_tokens_seen": 35258496, "step": 16310 }, { "epoch": 2.661500815660685, "grad_norm": 0.23398029804229736, "learning_rate": 0.0009966729520857658, "loss": 0.1089, "num_input_tokens_seen": 35268448, "step": 16315 }, { "epoch": 2.6623164763458402, "grad_norm": 0.02539239078760147, "learning_rate": 0.0009966647493389654, "loss": 0.2314, "num_input_tokens_seen": 35279680, "step": 16320 }, { "epoch": 2.663132137030995, "grad_norm": 0.05267626419663429, "learning_rate": 0.0009966565365266238, "loss": 0.1049, "num_input_tokens_seen": 35290080, "step": 16325 }, { "epoch": 2.6639477977161503, "grad_norm": 0.01566062681376934, "learning_rate": 0.0009966483136489073, "loss": 0.0362, "num_input_tokens_seen": 35301760, "step": 16330 }, { "epoch": 2.664763458401305, "grad_norm": 0.06919416785240173, "learning_rate": 0.0009966400807059827, "loss": 0.0646, "num_input_tokens_seen": 35313376, "step": 16335 }, { "epoch": 2.66557911908646, "grad_norm": 0.04618493840098381, "learning_rate": 0.000996631837698017, "loss": 0.0782, "num_input_tokens_seen": 35323968, "step": 16340 }, { "epoch": 2.6663947797716148, "grad_norm": 0.2246595174074173, "learning_rate": 0.000996623584625177, "loss": 0.3272, "num_input_tokens_seen": 35334592, "step": 16345 }, { "epoch": 2.66721044045677, "grad_norm": 0.02475031651556492, "learning_rate": 0.00099661532148763, "loss": 0.0636, "num_input_tokens_seen": 35346208, "step": 16350 }, { "epoch": 2.668026101141925, "grad_norm": 0.1650330275297165, "learning_rate": 0.0009966070482855436, "loss": 0.1049, "num_input_tokens_seen": 35357696, "step": 16355 }, { "epoch": 2.66884176182708, "grad_norm": 0.0800071507692337, "learning_rate": 0.0009965987650190852, "loss": 0.2077, "num_input_tokens_seen": 35368640, "step": 16360 }, { "epoch": 2.669657422512235, "grad_norm": 0.03208388015627861, "learning_rate": 0.000996590471688423, "loss": 0.1451, "num_input_tokens_seen": 35380480, "step": 16365 }, { "epoch": 2.6704730831973897, "grad_norm": 0.15314123034477234, "learning_rate": 0.000996582168293725, "loss": 0.0715, "num_input_tokens_seen": 35393344, "step": 16370 }, { "epoch": 2.671288743882545, "grad_norm": 0.20598310232162476, "learning_rate": 0.0009965738548351592, "loss": 0.1603, "num_input_tokens_seen": 35402656, "step": 16375 }, { "epoch": 2.6721044045677, "grad_norm": 0.06572859734296799, "learning_rate": 0.0009965655313128945, "loss": 0.0563, "num_input_tokens_seen": 35413920, "step": 16380 }, { "epoch": 2.672920065252855, "grad_norm": 0.10475694388151169, "learning_rate": 0.0009965571977270994, "loss": 0.0779, "num_input_tokens_seen": 35422688, "step": 16385 }, { "epoch": 2.67373572593801, "grad_norm": 0.0855659693479538, "learning_rate": 0.0009965488540779426, "loss": 0.1929, "num_input_tokens_seen": 35432896, "step": 16390 }, { "epoch": 2.6745513866231647, "grad_norm": 0.0473497100174427, "learning_rate": 0.0009965405003655933, "loss": 0.052, "num_input_tokens_seen": 35443328, "step": 16395 }, { "epoch": 2.6753670473083195, "grad_norm": 0.040079183876514435, "learning_rate": 0.000996532136590221, "loss": 0.1367, "num_input_tokens_seen": 35452992, "step": 16400 }, { "epoch": 2.676182707993475, "grad_norm": 0.1019391268491745, "learning_rate": 0.000996523762751995, "loss": 0.15, "num_input_tokens_seen": 35464544, "step": 16405 }, { "epoch": 2.6769983686786296, "grad_norm": 0.038980767130851746, "learning_rate": 0.000996515378851085, "loss": 0.1345, "num_input_tokens_seen": 35475488, "step": 16410 }, { "epoch": 2.677814029363785, "grad_norm": 0.12611663341522217, "learning_rate": 0.0009965069848876609, "loss": 0.0944, "num_input_tokens_seen": 35486272, "step": 16415 }, { "epoch": 2.6786296900489397, "grad_norm": 0.2920730412006378, "learning_rate": 0.000996498580861893, "loss": 0.1352, "num_input_tokens_seen": 35497312, "step": 16420 }, { "epoch": 2.6794453507340945, "grad_norm": 0.2523801624774933, "learning_rate": 0.0009964901667739517, "loss": 0.2546, "num_input_tokens_seen": 35506624, "step": 16425 }, { "epoch": 2.6802610114192493, "grad_norm": 0.4290613532066345, "learning_rate": 0.000996481742624007, "loss": 0.2409, "num_input_tokens_seen": 35516352, "step": 16430 }, { "epoch": 2.6810766721044046, "grad_norm": 0.029356911778450012, "learning_rate": 0.00099647330841223, "loss": 0.0954, "num_input_tokens_seen": 35527392, "step": 16435 }, { "epoch": 2.6818923327895594, "grad_norm": 0.10720787197351456, "learning_rate": 0.0009964648641387918, "loss": 0.2365, "num_input_tokens_seen": 35537888, "step": 16440 }, { "epoch": 2.6827079934747147, "grad_norm": 0.026292763650417328, "learning_rate": 0.000996456409803863, "loss": 0.1337, "num_input_tokens_seen": 35547680, "step": 16445 }, { "epoch": 2.6835236541598695, "grad_norm": 0.030141742900013924, "learning_rate": 0.0009964479454076156, "loss": 0.1958, "num_input_tokens_seen": 35557920, "step": 16450 }, { "epoch": 2.6843393148450243, "grad_norm": 0.09774786978960037, "learning_rate": 0.0009964394709502207, "loss": 0.1538, "num_input_tokens_seen": 35568064, "step": 16455 }, { "epoch": 2.6851549755301796, "grad_norm": 0.03949353098869324, "learning_rate": 0.0009964309864318502, "loss": 0.0572, "num_input_tokens_seen": 35579200, "step": 16460 }, { "epoch": 2.6859706362153344, "grad_norm": 0.0355185829102993, "learning_rate": 0.0009964224918526758, "loss": 0.1612, "num_input_tokens_seen": 35588928, "step": 16465 }, { "epoch": 2.6867862969004896, "grad_norm": 0.03025023639202118, "learning_rate": 0.0009964139872128699, "loss": 0.0876, "num_input_tokens_seen": 35600576, "step": 16470 }, { "epoch": 2.6876019575856445, "grad_norm": 0.23995541036128998, "learning_rate": 0.000996405472512605, "loss": 0.1761, "num_input_tokens_seen": 35611296, "step": 16475 }, { "epoch": 2.6884176182707993, "grad_norm": 0.26141855120658875, "learning_rate": 0.0009963969477520531, "loss": 0.1881, "num_input_tokens_seen": 35622592, "step": 16480 }, { "epoch": 2.689233278955954, "grad_norm": 0.244488924741745, "learning_rate": 0.0009963884129313876, "loss": 0.1601, "num_input_tokens_seen": 35632704, "step": 16485 }, { "epoch": 2.6900489396411094, "grad_norm": 0.0766143873333931, "learning_rate": 0.0009963798680507811, "loss": 0.0869, "num_input_tokens_seen": 35644000, "step": 16490 }, { "epoch": 2.690864600326264, "grad_norm": 0.02519366890192032, "learning_rate": 0.0009963713131104068, "loss": 0.0949, "num_input_tokens_seen": 35653632, "step": 16495 }, { "epoch": 2.6916802610114194, "grad_norm": 0.07979767769575119, "learning_rate": 0.0009963627481104384, "loss": 0.1554, "num_input_tokens_seen": 35664704, "step": 16500 }, { "epoch": 2.6924959216965743, "grad_norm": 0.1658993363380432, "learning_rate": 0.000996354173051049, "loss": 0.1786, "num_input_tokens_seen": 35674944, "step": 16505 }, { "epoch": 2.693311582381729, "grad_norm": 0.12059544771909714, "learning_rate": 0.0009963455879324129, "loss": 0.0858, "num_input_tokens_seen": 35685856, "step": 16510 }, { "epoch": 2.6941272430668843, "grad_norm": 0.1469067484140396, "learning_rate": 0.0009963369927547035, "loss": 0.092, "num_input_tokens_seen": 35697376, "step": 16515 }, { "epoch": 2.694942903752039, "grad_norm": 0.12039193511009216, "learning_rate": 0.0009963283875180952, "loss": 0.0749, "num_input_tokens_seen": 35709184, "step": 16520 }, { "epoch": 2.695758564437194, "grad_norm": 0.03307841345667839, "learning_rate": 0.0009963197722227628, "loss": 0.0792, "num_input_tokens_seen": 35719168, "step": 16525 }, { "epoch": 2.6965742251223492, "grad_norm": 0.13452056050300598, "learning_rate": 0.0009963111468688805, "loss": 0.0751, "num_input_tokens_seen": 35729184, "step": 16530 }, { "epoch": 2.697389885807504, "grad_norm": 0.024676907807588577, "learning_rate": 0.000996302511456623, "loss": 0.0837, "num_input_tokens_seen": 35739968, "step": 16535 }, { "epoch": 2.698205546492659, "grad_norm": 0.12530513107776642, "learning_rate": 0.0009962938659861657, "loss": 0.142, "num_input_tokens_seen": 35752384, "step": 16540 }, { "epoch": 2.699021207177814, "grad_norm": 0.20160333812236786, "learning_rate": 0.0009962852104576836, "loss": 0.0889, "num_input_tokens_seen": 35762976, "step": 16545 }, { "epoch": 2.699836867862969, "grad_norm": 0.02842654101550579, "learning_rate": 0.0009962765448713522, "loss": 0.0224, "num_input_tokens_seen": 35773536, "step": 16550 }, { "epoch": 2.700652528548124, "grad_norm": 0.010374743491411209, "learning_rate": 0.000996267869227347, "loss": 0.2177, "num_input_tokens_seen": 35784480, "step": 16555 }, { "epoch": 2.701468189233279, "grad_norm": 0.04730985313653946, "learning_rate": 0.0009962591835258436, "loss": 0.0501, "num_input_tokens_seen": 35794720, "step": 16560 }, { "epoch": 2.702283849918434, "grad_norm": 0.14314627647399902, "learning_rate": 0.0009962504877670186, "loss": 0.1492, "num_input_tokens_seen": 35805280, "step": 16565 }, { "epoch": 2.7030995106035887, "grad_norm": 0.01593198999762535, "learning_rate": 0.0009962417819510479, "loss": 0.0565, "num_input_tokens_seen": 35814752, "step": 16570 }, { "epoch": 2.703915171288744, "grad_norm": 0.022026842460036278, "learning_rate": 0.0009962330660781078, "loss": 0.11, "num_input_tokens_seen": 35825344, "step": 16575 }, { "epoch": 2.7047308319738987, "grad_norm": 0.07531365007162094, "learning_rate": 0.0009962243401483752, "loss": 0.313, "num_input_tokens_seen": 35835840, "step": 16580 }, { "epoch": 2.705546492659054, "grad_norm": 0.0568210706114769, "learning_rate": 0.000996215604162027, "loss": 0.1396, "num_input_tokens_seen": 35845856, "step": 16585 }, { "epoch": 2.706362153344209, "grad_norm": 0.038479603826999664, "learning_rate": 0.0009962068581192399, "loss": 0.1486, "num_input_tokens_seen": 35856928, "step": 16590 }, { "epoch": 2.7071778140293636, "grad_norm": 0.03605350852012634, "learning_rate": 0.0009961981020201913, "loss": 0.0991, "num_input_tokens_seen": 35867232, "step": 16595 }, { "epoch": 2.707993474714519, "grad_norm": 0.12278129905462265, "learning_rate": 0.0009961893358650586, "loss": 0.1502, "num_input_tokens_seen": 35878272, "step": 16600 }, { "epoch": 2.7088091353996737, "grad_norm": 0.031820762902498245, "learning_rate": 0.00099618055965402, "loss": 0.1481, "num_input_tokens_seen": 35889088, "step": 16605 }, { "epoch": 2.709624796084829, "grad_norm": 0.15299761295318604, "learning_rate": 0.0009961717733872524, "loss": 0.1214, "num_input_tokens_seen": 35899328, "step": 16610 }, { "epoch": 2.710440456769984, "grad_norm": 0.08476471900939941, "learning_rate": 0.0009961629770649347, "loss": 0.1204, "num_input_tokens_seen": 35911008, "step": 16615 }, { "epoch": 2.7112561174551386, "grad_norm": 0.1937631070613861, "learning_rate": 0.0009961541706872447, "loss": 0.2569, "num_input_tokens_seen": 35921440, "step": 16620 }, { "epoch": 2.7120717781402934, "grad_norm": 0.03174687549471855, "learning_rate": 0.000996145354254361, "loss": 0.0654, "num_input_tokens_seen": 35933792, "step": 16625 }, { "epoch": 2.7128874388254487, "grad_norm": 0.021128924563527107, "learning_rate": 0.0009961365277664624, "loss": 0.2715, "num_input_tokens_seen": 35945920, "step": 16630 }, { "epoch": 2.7137030995106035, "grad_norm": 0.06190233677625656, "learning_rate": 0.0009961276912237276, "loss": 0.1118, "num_input_tokens_seen": 35957536, "step": 16635 }, { "epoch": 2.7145187601957588, "grad_norm": 0.25115782022476196, "learning_rate": 0.0009961188446263357, "loss": 0.2473, "num_input_tokens_seen": 35968512, "step": 16640 }, { "epoch": 2.7153344208809136, "grad_norm": 0.08620987087488174, "learning_rate": 0.0009961099879744661, "loss": 0.0696, "num_input_tokens_seen": 35977760, "step": 16645 }, { "epoch": 2.7161500815660684, "grad_norm": 0.16655907034873962, "learning_rate": 0.0009961011212682982, "loss": 0.1499, "num_input_tokens_seen": 35989056, "step": 16650 }, { "epoch": 2.7169657422512232, "grad_norm": 0.038287725299596786, "learning_rate": 0.0009960922445080118, "loss": 0.1075, "num_input_tokens_seen": 36000096, "step": 16655 }, { "epoch": 2.7177814029363785, "grad_norm": 0.021945785731077194, "learning_rate": 0.0009960833576937867, "loss": 0.1108, "num_input_tokens_seen": 36010624, "step": 16660 }, { "epoch": 2.7185970636215333, "grad_norm": 0.0757167786359787, "learning_rate": 0.000996074460825803, "loss": 0.081, "num_input_tokens_seen": 36022144, "step": 16665 }, { "epoch": 2.7194127243066886, "grad_norm": 0.15707460045814514, "learning_rate": 0.0009960655539042412, "loss": 0.0851, "num_input_tokens_seen": 36033216, "step": 16670 }, { "epoch": 2.7202283849918434, "grad_norm": 0.08880306780338287, "learning_rate": 0.0009960566369292814, "loss": 0.0704, "num_input_tokens_seen": 36044224, "step": 16675 }, { "epoch": 2.721044045676998, "grad_norm": 0.04351628199219704, "learning_rate": 0.0009960477099011048, "loss": 0.1569, "num_input_tokens_seen": 36056768, "step": 16680 }, { "epoch": 2.7218597063621535, "grad_norm": 0.010717559605836868, "learning_rate": 0.000996038772819892, "loss": 0.1071, "num_input_tokens_seen": 36067584, "step": 16685 }, { "epoch": 2.7226753670473083, "grad_norm": 0.02620554156601429, "learning_rate": 0.0009960298256858238, "loss": 0.0415, "num_input_tokens_seen": 36078528, "step": 16690 }, { "epoch": 2.7234910277324635, "grad_norm": 0.02566135860979557, "learning_rate": 0.0009960208684990824, "loss": 0.0877, "num_input_tokens_seen": 36090400, "step": 16695 }, { "epoch": 2.7243066884176184, "grad_norm": 0.21687543392181396, "learning_rate": 0.0009960119012598489, "loss": 0.0928, "num_input_tokens_seen": 36101792, "step": 16700 }, { "epoch": 2.725122349102773, "grad_norm": 0.19889047741889954, "learning_rate": 0.0009960029239683046, "loss": 0.3088, "num_input_tokens_seen": 36112160, "step": 16705 }, { "epoch": 2.725938009787928, "grad_norm": 0.14541514217853546, "learning_rate": 0.000995993936624632, "loss": 0.1052, "num_input_tokens_seen": 36123232, "step": 16710 }, { "epoch": 2.7267536704730833, "grad_norm": 0.12091077119112015, "learning_rate": 0.000995984939229013, "loss": 0.0878, "num_input_tokens_seen": 36135232, "step": 16715 }, { "epoch": 2.727569331158238, "grad_norm": 0.02517981454730034, "learning_rate": 0.0009959759317816302, "loss": 0.0899, "num_input_tokens_seen": 36147104, "step": 16720 }, { "epoch": 2.7283849918433933, "grad_norm": 0.03287290409207344, "learning_rate": 0.0009959669142826659, "loss": 0.1489, "num_input_tokens_seen": 36158144, "step": 16725 }, { "epoch": 2.729200652528548, "grad_norm": 0.04140019416809082, "learning_rate": 0.0009959578867323028, "loss": 0.1108, "num_input_tokens_seen": 36168416, "step": 16730 }, { "epoch": 2.730016313213703, "grad_norm": 0.07255889475345612, "learning_rate": 0.000995948849130724, "loss": 0.111, "num_input_tokens_seen": 36179488, "step": 16735 }, { "epoch": 2.7308319738988582, "grad_norm": 0.04503436014056206, "learning_rate": 0.0009959398014781128, "loss": 0.1488, "num_input_tokens_seen": 36190336, "step": 16740 }, { "epoch": 2.731647634584013, "grad_norm": 0.08020366728305817, "learning_rate": 0.000995930743774652, "loss": 0.1268, "num_input_tokens_seen": 36201280, "step": 16745 }, { "epoch": 2.732463295269168, "grad_norm": 0.029173359274864197, "learning_rate": 0.0009959216760205257, "loss": 0.1225, "num_input_tokens_seen": 36211904, "step": 16750 }, { "epoch": 2.733278955954323, "grad_norm": 0.14357851445674896, "learning_rate": 0.0009959125982159176, "loss": 0.2048, "num_input_tokens_seen": 36222912, "step": 16755 }, { "epoch": 2.734094616639478, "grad_norm": 0.17278997600078583, "learning_rate": 0.0009959035103610115, "loss": 0.1939, "num_input_tokens_seen": 36234176, "step": 16760 }, { "epoch": 2.7349102773246328, "grad_norm": 0.09782871603965759, "learning_rate": 0.0009958944124559919, "loss": 0.0862, "num_input_tokens_seen": 36244480, "step": 16765 }, { "epoch": 2.735725938009788, "grad_norm": 0.16363048553466797, "learning_rate": 0.0009958853045010426, "loss": 0.1488, "num_input_tokens_seen": 36255520, "step": 16770 }, { "epoch": 2.736541598694943, "grad_norm": 0.050788093358278275, "learning_rate": 0.0009958761864963487, "loss": 0.0555, "num_input_tokens_seen": 36265952, "step": 16775 }, { "epoch": 2.737357259380098, "grad_norm": 0.08968115597963333, "learning_rate": 0.0009958670584420948, "loss": 0.1237, "num_input_tokens_seen": 36277248, "step": 16780 }, { "epoch": 2.738172920065253, "grad_norm": 0.06076105311512947, "learning_rate": 0.000995857920338466, "loss": 0.0688, "num_input_tokens_seen": 36287424, "step": 16785 }, { "epoch": 2.7389885807504077, "grad_norm": 0.19070397317409515, "learning_rate": 0.0009958487721856474, "loss": 0.1682, "num_input_tokens_seen": 36297632, "step": 16790 }, { "epoch": 2.7398042414355626, "grad_norm": 0.002982828998938203, "learning_rate": 0.0009958396139838242, "loss": 0.0469, "num_input_tokens_seen": 36307008, "step": 16795 }, { "epoch": 2.740619902120718, "grad_norm": 0.015070038847625256, "learning_rate": 0.0009958304457331822, "loss": 0.0126, "num_input_tokens_seen": 36317952, "step": 16800 }, { "epoch": 2.7414355628058726, "grad_norm": 0.19135338068008423, "learning_rate": 0.0009958212674339075, "loss": 0.059, "num_input_tokens_seen": 36328128, "step": 16805 }, { "epoch": 2.742251223491028, "grad_norm": 0.045779090374708176, "learning_rate": 0.0009958120790861855, "loss": 0.1217, "num_input_tokens_seen": 36337440, "step": 16810 }, { "epoch": 2.7430668841761827, "grad_norm": 0.15114933252334595, "learning_rate": 0.000995802880690203, "loss": 0.1546, "num_input_tokens_seen": 36347776, "step": 16815 }, { "epoch": 2.7438825448613375, "grad_norm": 0.014411961659789085, "learning_rate": 0.000995793672246146, "loss": 0.1026, "num_input_tokens_seen": 36359360, "step": 16820 }, { "epoch": 2.744698205546493, "grad_norm": 0.011926224455237389, "learning_rate": 0.0009957844537542013, "loss": 0.0833, "num_input_tokens_seen": 36370560, "step": 16825 }, { "epoch": 2.7455138662316476, "grad_norm": 0.16889113187789917, "learning_rate": 0.0009957752252145557, "loss": 0.1278, "num_input_tokens_seen": 36381024, "step": 16830 }, { "epoch": 2.746329526916803, "grad_norm": 0.042039863765239716, "learning_rate": 0.0009957659866273963, "loss": 0.0866, "num_input_tokens_seen": 36392224, "step": 16835 }, { "epoch": 2.7471451876019577, "grad_norm": 0.013748877681791782, "learning_rate": 0.0009957567379929103, "loss": 0.0833, "num_input_tokens_seen": 36402880, "step": 16840 }, { "epoch": 2.7479608482871125, "grad_norm": 0.05248915031552315, "learning_rate": 0.0009957474793112848, "loss": 0.0354, "num_input_tokens_seen": 36412288, "step": 16845 }, { "epoch": 2.7487765089722673, "grad_norm": 0.030665088444948196, "learning_rate": 0.0009957382105827079, "loss": 0.0658, "num_input_tokens_seen": 36422976, "step": 16850 }, { "epoch": 2.7495921696574226, "grad_norm": 0.009233402088284492, "learning_rate": 0.0009957289318073674, "loss": 0.0312, "num_input_tokens_seen": 36433984, "step": 16855 }, { "epoch": 2.7504078303425774, "grad_norm": 0.18588995933532715, "learning_rate": 0.000995719642985451, "loss": 0.1046, "num_input_tokens_seen": 36444640, "step": 16860 }, { "epoch": 2.7512234910277327, "grad_norm": 0.04793168231844902, "learning_rate": 0.0009957103441171472, "loss": 0.0486, "num_input_tokens_seen": 36456256, "step": 16865 }, { "epoch": 2.7520391517128875, "grad_norm": 0.0442626029253006, "learning_rate": 0.0009957010352026447, "loss": 0.0761, "num_input_tokens_seen": 36467456, "step": 16870 }, { "epoch": 2.7528548123980423, "grad_norm": 0.07995634526014328, "learning_rate": 0.0009956917162421317, "loss": 0.1783, "num_input_tokens_seen": 36477472, "step": 16875 }, { "epoch": 2.753670473083197, "grad_norm": 0.025483250617980957, "learning_rate": 0.0009956823872357972, "loss": 0.222, "num_input_tokens_seen": 36489728, "step": 16880 }, { "epoch": 2.7544861337683524, "grad_norm": 0.07462822645902634, "learning_rate": 0.0009956730481838303, "loss": 0.0635, "num_input_tokens_seen": 36500384, "step": 16885 }, { "epoch": 2.755301794453507, "grad_norm": 0.029625002294778824, "learning_rate": 0.0009956636990864202, "loss": 0.0823, "num_input_tokens_seen": 36512192, "step": 16890 }, { "epoch": 2.7561174551386625, "grad_norm": 0.04722673445940018, "learning_rate": 0.0009956543399437569, "loss": 0.0857, "num_input_tokens_seen": 36521600, "step": 16895 }, { "epoch": 2.7569331158238173, "grad_norm": 0.009356287308037281, "learning_rate": 0.0009956449707560291, "loss": 0.0197, "num_input_tokens_seen": 36532192, "step": 16900 }, { "epoch": 2.757748776508972, "grad_norm": 0.023361343890428543, "learning_rate": 0.0009956355915234274, "loss": 0.1314, "num_input_tokens_seen": 36542816, "step": 16905 }, { "epoch": 2.7585644371941274, "grad_norm": 0.12797385454177856, "learning_rate": 0.0009956262022461416, "loss": 0.223, "num_input_tokens_seen": 36553344, "step": 16910 }, { "epoch": 2.759380097879282, "grad_norm": 0.18771253526210785, "learning_rate": 0.0009956168029243621, "loss": 0.3599, "num_input_tokens_seen": 36564320, "step": 16915 }, { "epoch": 2.7601957585644374, "grad_norm": 0.039339229464530945, "learning_rate": 0.0009956073935582794, "loss": 0.0463, "num_input_tokens_seen": 36575744, "step": 16920 }, { "epoch": 2.7610114192495923, "grad_norm": 0.16050797700881958, "learning_rate": 0.000995597974148084, "loss": 0.1364, "num_input_tokens_seen": 36587328, "step": 16925 }, { "epoch": 2.761827079934747, "grad_norm": 0.04854540526866913, "learning_rate": 0.0009955885446939672, "loss": 0.1264, "num_input_tokens_seen": 36598304, "step": 16930 }, { "epoch": 2.762642740619902, "grad_norm": 0.03333837911486626, "learning_rate": 0.0009955791051961195, "loss": 0.0639, "num_input_tokens_seen": 36609856, "step": 16935 }, { "epoch": 2.763458401305057, "grad_norm": 0.015216774307191372, "learning_rate": 0.000995569655654733, "loss": 0.0753, "num_input_tokens_seen": 36619648, "step": 16940 }, { "epoch": 2.764274061990212, "grad_norm": 0.04915167763829231, "learning_rate": 0.0009955601960699983, "loss": 0.0694, "num_input_tokens_seen": 36629440, "step": 16945 }, { "epoch": 2.7650897226753672, "grad_norm": 0.07696164399385452, "learning_rate": 0.0009955507264421079, "loss": 0.0829, "num_input_tokens_seen": 36640224, "step": 16950 }, { "epoch": 2.765905383360522, "grad_norm": 0.17016223073005676, "learning_rate": 0.0009955412467712531, "loss": 0.1458, "num_input_tokens_seen": 36652288, "step": 16955 }, { "epoch": 2.766721044045677, "grad_norm": 0.07087530940771103, "learning_rate": 0.0009955317570576265, "loss": 0.024, "num_input_tokens_seen": 36663776, "step": 16960 }, { "epoch": 2.767536704730832, "grad_norm": 0.015933789312839508, "learning_rate": 0.0009955222573014202, "loss": 0.0209, "num_input_tokens_seen": 36674560, "step": 16965 }, { "epoch": 2.768352365415987, "grad_norm": 0.05921601876616478, "learning_rate": 0.0009955127475028266, "loss": 0.0991, "num_input_tokens_seen": 36685664, "step": 16970 }, { "epoch": 2.7691680261011418, "grad_norm": 0.03298629820346832, "learning_rate": 0.0009955032276620388, "loss": 0.0534, "num_input_tokens_seen": 36697568, "step": 16975 }, { "epoch": 2.769983686786297, "grad_norm": 0.08991827815771103, "learning_rate": 0.0009954936977792492, "loss": 0.0566, "num_input_tokens_seen": 36708288, "step": 16980 }, { "epoch": 2.770799347471452, "grad_norm": 0.04772263765335083, "learning_rate": 0.0009954841578546515, "loss": 0.0423, "num_input_tokens_seen": 36718976, "step": 16985 }, { "epoch": 2.7716150081566067, "grad_norm": 0.32339712977409363, "learning_rate": 0.0009954746078884387, "loss": 0.1995, "num_input_tokens_seen": 36730688, "step": 16990 }, { "epoch": 2.772430668841762, "grad_norm": 0.10802032053470612, "learning_rate": 0.0009954650478808042, "loss": 0.1608, "num_input_tokens_seen": 36741120, "step": 16995 }, { "epoch": 2.7732463295269167, "grad_norm": 0.006154247093945742, "learning_rate": 0.0009954554778319423, "loss": 0.0105, "num_input_tokens_seen": 36751904, "step": 17000 }, { "epoch": 2.774061990212072, "grad_norm": 0.0152150709182024, "learning_rate": 0.0009954458977420465, "loss": 0.0958, "num_input_tokens_seen": 36761952, "step": 17005 }, { "epoch": 2.774877650897227, "grad_norm": 0.0315382294356823, "learning_rate": 0.000995436307611311, "loss": 0.1436, "num_input_tokens_seen": 36772800, "step": 17010 }, { "epoch": 2.7756933115823816, "grad_norm": 0.13253070414066315, "learning_rate": 0.0009954267074399302, "loss": 0.1868, "num_input_tokens_seen": 36783168, "step": 17015 }, { "epoch": 2.7765089722675365, "grad_norm": 0.052694886922836304, "learning_rate": 0.0009954170972280988, "loss": 0.0201, "num_input_tokens_seen": 36794464, "step": 17020 }, { "epoch": 2.7773246329526917, "grad_norm": 0.011267283000051975, "learning_rate": 0.0009954074769760112, "loss": 0.0306, "num_input_tokens_seen": 36805344, "step": 17025 }, { "epoch": 2.7781402936378465, "grad_norm": 0.031975969672203064, "learning_rate": 0.0009953978466838629, "loss": 0.1541, "num_input_tokens_seen": 36816192, "step": 17030 }, { "epoch": 2.778955954323002, "grad_norm": 0.20794735848903656, "learning_rate": 0.0009953882063518486, "loss": 0.0352, "num_input_tokens_seen": 36827968, "step": 17035 }, { "epoch": 2.7797716150081566, "grad_norm": 0.013200036250054836, "learning_rate": 0.000995378555980164, "loss": 0.1362, "num_input_tokens_seen": 36839392, "step": 17040 }, { "epoch": 2.7805872756933114, "grad_norm": 0.3909376859664917, "learning_rate": 0.0009953688955690045, "loss": 0.2137, "num_input_tokens_seen": 36850208, "step": 17045 }, { "epoch": 2.7814029363784667, "grad_norm": 0.09196203947067261, "learning_rate": 0.0009953592251185658, "loss": 0.136, "num_input_tokens_seen": 36861504, "step": 17050 }, { "epoch": 2.7822185970636215, "grad_norm": 0.03299575299024582, "learning_rate": 0.000995349544629044, "loss": 0.0893, "num_input_tokens_seen": 36871616, "step": 17055 }, { "epoch": 2.7830342577487768, "grad_norm": 0.0684279352426529, "learning_rate": 0.0009953398541006353, "loss": 0.0477, "num_input_tokens_seen": 36882816, "step": 17060 }, { "epoch": 2.7838499184339316, "grad_norm": 0.06969677656888962, "learning_rate": 0.0009953301535335361, "loss": 0.211, "num_input_tokens_seen": 36894208, "step": 17065 }, { "epoch": 2.7846655791190864, "grad_norm": 0.05764150246977806, "learning_rate": 0.000995320442927943, "loss": 0.2221, "num_input_tokens_seen": 36906560, "step": 17070 }, { "epoch": 2.7854812398042412, "grad_norm": 0.03223137930035591, "learning_rate": 0.0009953107222840528, "loss": 0.0646, "num_input_tokens_seen": 36916672, "step": 17075 }, { "epoch": 2.7862969004893965, "grad_norm": 0.1956624835729599, "learning_rate": 0.0009953009916020624, "loss": 0.1274, "num_input_tokens_seen": 36927776, "step": 17080 }, { "epoch": 2.7871125611745513, "grad_norm": 0.042754679918289185, "learning_rate": 0.0009952912508821691, "loss": 0.1977, "num_input_tokens_seen": 36939392, "step": 17085 }, { "epoch": 2.7879282218597066, "grad_norm": 0.061387669295072556, "learning_rate": 0.0009952815001245702, "loss": 0.0669, "num_input_tokens_seen": 36951168, "step": 17090 }, { "epoch": 2.7887438825448614, "grad_norm": 0.03364640846848488, "learning_rate": 0.0009952717393294636, "loss": 0.0622, "num_input_tokens_seen": 36962112, "step": 17095 }, { "epoch": 2.789559543230016, "grad_norm": 0.1411299854516983, "learning_rate": 0.0009952619684970468, "loss": 0.1288, "num_input_tokens_seen": 36973184, "step": 17100 }, { "epoch": 2.790375203915171, "grad_norm": 0.02755509875714779, "learning_rate": 0.0009952521876275178, "loss": 0.1578, "num_input_tokens_seen": 36983232, "step": 17105 }, { "epoch": 2.7911908646003263, "grad_norm": 0.043589744716882706, "learning_rate": 0.0009952423967210752, "loss": 0.1919, "num_input_tokens_seen": 36993824, "step": 17110 }, { "epoch": 2.792006525285481, "grad_norm": 0.032452911138534546, "learning_rate": 0.0009952325957779168, "loss": 0.0787, "num_input_tokens_seen": 37005248, "step": 17115 }, { "epoch": 2.7928221859706364, "grad_norm": 0.11344970762729645, "learning_rate": 0.0009952227847982418, "loss": 0.1194, "num_input_tokens_seen": 37015808, "step": 17120 }, { "epoch": 2.793637846655791, "grad_norm": 0.010948172770440578, "learning_rate": 0.000995212963782249, "loss": 0.0596, "num_input_tokens_seen": 37026176, "step": 17125 }, { "epoch": 2.794453507340946, "grad_norm": 0.05834111198782921, "learning_rate": 0.000995203132730137, "loss": 0.1682, "num_input_tokens_seen": 37036864, "step": 17130 }, { "epoch": 2.7952691680261013, "grad_norm": 0.12609679996967316, "learning_rate": 0.0009951932916421053, "loss": 0.2157, "num_input_tokens_seen": 37046688, "step": 17135 }, { "epoch": 2.796084828711256, "grad_norm": 0.017100023105740547, "learning_rate": 0.0009951834405183535, "loss": 0.1031, "num_input_tokens_seen": 37056992, "step": 17140 }, { "epoch": 2.7969004893964113, "grad_norm": 0.17199130356311798, "learning_rate": 0.0009951735793590811, "loss": 0.0784, "num_input_tokens_seen": 37068064, "step": 17145 }, { "epoch": 2.797716150081566, "grad_norm": 0.0340568870306015, "learning_rate": 0.0009951637081644879, "loss": 0.2099, "num_input_tokens_seen": 37078752, "step": 17150 }, { "epoch": 2.798531810766721, "grad_norm": 0.10646383464336395, "learning_rate": 0.000995153826934774, "loss": 0.0641, "num_input_tokens_seen": 37089984, "step": 17155 }, { "epoch": 2.799347471451876, "grad_norm": 0.17747409641742706, "learning_rate": 0.0009951439356701394, "loss": 0.1234, "num_input_tokens_seen": 37100320, "step": 17160 }, { "epoch": 2.800163132137031, "grad_norm": 0.2767079770565033, "learning_rate": 0.0009951340343707852, "loss": 0.3425, "num_input_tokens_seen": 37111648, "step": 17165 }, { "epoch": 2.800978792822186, "grad_norm": 0.02862684801220894, "learning_rate": 0.0009951241230369114, "loss": 0.0344, "num_input_tokens_seen": 37121376, "step": 17170 }, { "epoch": 2.801794453507341, "grad_norm": 0.07905431091785431, "learning_rate": 0.0009951142016687193, "loss": 0.0706, "num_input_tokens_seen": 37131008, "step": 17175 }, { "epoch": 2.802610114192496, "grad_norm": 0.06610142439603806, "learning_rate": 0.0009951042702664099, "loss": 0.2177, "num_input_tokens_seen": 37141664, "step": 17180 }, { "epoch": 2.8034257748776508, "grad_norm": 0.041321273893117905, "learning_rate": 0.0009950943288301842, "loss": 0.055, "num_input_tokens_seen": 37153344, "step": 17185 }, { "epoch": 2.804241435562806, "grad_norm": 0.1373293399810791, "learning_rate": 0.0009950843773602438, "loss": 0.2467, "num_input_tokens_seen": 37163840, "step": 17190 }, { "epoch": 2.805057096247961, "grad_norm": 0.08510863035917282, "learning_rate": 0.0009950744158567905, "loss": 0.0857, "num_input_tokens_seen": 37175008, "step": 17195 }, { "epoch": 2.8058727569331157, "grad_norm": 0.039745964109897614, "learning_rate": 0.0009950644443200262, "loss": 0.102, "num_input_tokens_seen": 37185888, "step": 17200 }, { "epoch": 2.806688417618271, "grad_norm": 0.03634534031152725, "learning_rate": 0.0009950544627501529, "loss": 0.1042, "num_input_tokens_seen": 37196864, "step": 17205 }, { "epoch": 2.8075040783034257, "grad_norm": 0.004250263329595327, "learning_rate": 0.0009950444711473727, "loss": 0.1475, "num_input_tokens_seen": 37208608, "step": 17210 }, { "epoch": 2.8083197389885806, "grad_norm": 0.07612774521112442, "learning_rate": 0.0009950344695118885, "loss": 0.1043, "num_input_tokens_seen": 37218464, "step": 17215 }, { "epoch": 2.809135399673736, "grad_norm": 0.05913073569536209, "learning_rate": 0.0009950244578439027, "loss": 0.108, "num_input_tokens_seen": 37229536, "step": 17220 }, { "epoch": 2.8099510603588906, "grad_norm": 0.06495174020528793, "learning_rate": 0.0009950144361436182, "loss": 0.2674, "num_input_tokens_seen": 37239744, "step": 17225 }, { "epoch": 2.810766721044046, "grad_norm": 0.04763927310705185, "learning_rate": 0.0009950044044112383, "loss": 0.0752, "num_input_tokens_seen": 37250912, "step": 17230 }, { "epoch": 2.8115823817292007, "grad_norm": 0.23130439221858978, "learning_rate": 0.000994994362646966, "loss": 0.1877, "num_input_tokens_seen": 37261920, "step": 17235 }, { "epoch": 2.8123980424143555, "grad_norm": 0.084737628698349, "learning_rate": 0.0009949843108510053, "loss": 0.0475, "num_input_tokens_seen": 37273184, "step": 17240 }, { "epoch": 2.8132137030995104, "grad_norm": 0.030352629721164703, "learning_rate": 0.0009949742490235594, "loss": 0.0357, "num_input_tokens_seen": 37284992, "step": 17245 }, { "epoch": 2.8140293637846656, "grad_norm": 0.08658528327941895, "learning_rate": 0.0009949641771648324, "loss": 0.0821, "num_input_tokens_seen": 37295872, "step": 17250 }, { "epoch": 2.8148450244698204, "grad_norm": 0.10219324380159378, "learning_rate": 0.0009949540952750285, "loss": 0.0533, "num_input_tokens_seen": 37305696, "step": 17255 }, { "epoch": 2.8156606851549757, "grad_norm": 0.015360584482550621, "learning_rate": 0.000994944003354352, "loss": 0.1204, "num_input_tokens_seen": 37315712, "step": 17260 }, { "epoch": 2.8164763458401305, "grad_norm": 0.11603693664073944, "learning_rate": 0.0009949339014030075, "loss": 0.1377, "num_input_tokens_seen": 37325856, "step": 17265 }, { "epoch": 2.8172920065252853, "grad_norm": 0.09709656238555908, "learning_rate": 0.0009949237894211994, "loss": 0.0453, "num_input_tokens_seen": 37336160, "step": 17270 }, { "epoch": 2.8181076672104406, "grad_norm": 0.07709289342164993, "learning_rate": 0.000994913667409133, "loss": 0.024, "num_input_tokens_seen": 37346912, "step": 17275 }, { "epoch": 2.8189233278955954, "grad_norm": 0.015501154586672783, "learning_rate": 0.0009949035353670132, "loss": 0.0947, "num_input_tokens_seen": 37357184, "step": 17280 }, { "epoch": 2.8197389885807507, "grad_norm": 0.15367379784584045, "learning_rate": 0.0009948933932950456, "loss": 0.0817, "num_input_tokens_seen": 37367232, "step": 17285 }, { "epoch": 2.8205546492659055, "grad_norm": 0.10089415311813354, "learning_rate": 0.0009948832411934352, "loss": 0.0574, "num_input_tokens_seen": 37378848, "step": 17290 }, { "epoch": 2.8213703099510603, "grad_norm": 0.2953617572784424, "learning_rate": 0.0009948730790623884, "loss": 0.2016, "num_input_tokens_seen": 37389760, "step": 17295 }, { "epoch": 2.822185970636215, "grad_norm": 0.13659532368183136, "learning_rate": 0.0009948629069021107, "loss": 0.1575, "num_input_tokens_seen": 37400512, "step": 17300 }, { "epoch": 2.8230016313213704, "grad_norm": 0.04431614279747009, "learning_rate": 0.0009948527247128085, "loss": 0.1449, "num_input_tokens_seen": 37409408, "step": 17305 }, { "epoch": 2.823817292006525, "grad_norm": 0.022543810307979584, "learning_rate": 0.0009948425324946882, "loss": 0.1717, "num_input_tokens_seen": 37419456, "step": 17310 }, { "epoch": 2.8246329526916805, "grad_norm": 0.16338996589183807, "learning_rate": 0.0009948323302479561, "loss": 0.0756, "num_input_tokens_seen": 37430336, "step": 17315 }, { "epoch": 2.8254486133768353, "grad_norm": 0.06958018243312836, "learning_rate": 0.000994822117972819, "loss": 0.0902, "num_input_tokens_seen": 37441856, "step": 17320 }, { "epoch": 2.82626427406199, "grad_norm": 0.08969177305698395, "learning_rate": 0.000994811895669484, "loss": 0.0669, "num_input_tokens_seen": 37451808, "step": 17325 }, { "epoch": 2.827079934747145, "grad_norm": 0.04363901913166046, "learning_rate": 0.0009948016633381583, "loss": 0.1059, "num_input_tokens_seen": 37462784, "step": 17330 }, { "epoch": 2.8278955954323, "grad_norm": 0.018658151850104332, "learning_rate": 0.0009947914209790492, "loss": 0.0434, "num_input_tokens_seen": 37474592, "step": 17335 }, { "epoch": 2.828711256117455, "grad_norm": 0.01596796326339245, "learning_rate": 0.0009947811685923642, "loss": 0.1768, "num_input_tokens_seen": 37484000, "step": 17340 }, { "epoch": 2.8295269168026103, "grad_norm": 0.061537813395261765, "learning_rate": 0.0009947709061783113, "loss": 0.1124, "num_input_tokens_seen": 37493856, "step": 17345 }, { "epoch": 2.830342577487765, "grad_norm": 0.06127791479229927, "learning_rate": 0.000994760633737098, "loss": 0.0803, "num_input_tokens_seen": 37504736, "step": 17350 }, { "epoch": 2.83115823817292, "grad_norm": 0.06189500913023949, "learning_rate": 0.0009947503512689332, "loss": 0.0975, "num_input_tokens_seen": 37515744, "step": 17355 }, { "epoch": 2.831973898858075, "grad_norm": 0.14593607187271118, "learning_rate": 0.0009947400587740245, "loss": 0.1168, "num_input_tokens_seen": 37526240, "step": 17360 }, { "epoch": 2.83278955954323, "grad_norm": 0.11315637826919556, "learning_rate": 0.0009947297562525811, "loss": 0.131, "num_input_tokens_seen": 37537088, "step": 17365 }, { "epoch": 2.8336052202283852, "grad_norm": 0.006786394864320755, "learning_rate": 0.0009947194437048116, "loss": 0.1604, "num_input_tokens_seen": 37548352, "step": 17370 }, { "epoch": 2.83442088091354, "grad_norm": 0.16215111315250397, "learning_rate": 0.000994709121130925, "loss": 0.1263, "num_input_tokens_seen": 37560128, "step": 17375 }, { "epoch": 2.835236541598695, "grad_norm": 0.05200238898396492, "learning_rate": 0.0009946987885311304, "loss": 0.0564, "num_input_tokens_seen": 37571264, "step": 17380 }, { "epoch": 2.8360522022838497, "grad_norm": 0.049134548753499985, "learning_rate": 0.0009946884459056374, "loss": 0.0917, "num_input_tokens_seen": 37580992, "step": 17385 }, { "epoch": 2.836867862969005, "grad_norm": 0.08593729138374329, "learning_rate": 0.0009946780932546552, "loss": 0.1033, "num_input_tokens_seen": 37591488, "step": 17390 }, { "epoch": 2.8376835236541598, "grad_norm": 0.2163233906030655, "learning_rate": 0.0009946677305783943, "loss": 0.1685, "num_input_tokens_seen": 37603200, "step": 17395 }, { "epoch": 2.838499184339315, "grad_norm": 0.05750100314617157, "learning_rate": 0.000994657357877064, "loss": 0.0956, "num_input_tokens_seen": 37614944, "step": 17400 }, { "epoch": 2.83931484502447, "grad_norm": 0.02084900252521038, "learning_rate": 0.0009946469751508748, "loss": 0.0773, "num_input_tokens_seen": 37624960, "step": 17405 }, { "epoch": 2.8401305057096247, "grad_norm": 0.08458512276411057, "learning_rate": 0.0009946365824000374, "loss": 0.1109, "num_input_tokens_seen": 37636352, "step": 17410 }, { "epoch": 2.84094616639478, "grad_norm": 0.045834679156541824, "learning_rate": 0.000994626179624762, "loss": 0.1966, "num_input_tokens_seen": 37647872, "step": 17415 }, { "epoch": 2.8417618270799347, "grad_norm": 0.14066869020462036, "learning_rate": 0.0009946157668252597, "loss": 0.098, "num_input_tokens_seen": 37659040, "step": 17420 }, { "epoch": 2.8425774877650896, "grad_norm": 0.08382485061883926, "learning_rate": 0.0009946053440017413, "loss": 0.2024, "num_input_tokens_seen": 37669088, "step": 17425 }, { "epoch": 2.843393148450245, "grad_norm": 0.0480208620429039, "learning_rate": 0.000994594911154418, "loss": 0.0692, "num_input_tokens_seen": 37680768, "step": 17430 }, { "epoch": 2.8442088091353996, "grad_norm": 0.15396052598953247, "learning_rate": 0.0009945844682835018, "loss": 0.2609, "num_input_tokens_seen": 37692416, "step": 17435 }, { "epoch": 2.8450244698205545, "grad_norm": 0.3378278315067291, "learning_rate": 0.0009945740153892036, "loss": 0.2769, "num_input_tokens_seen": 37702080, "step": 17440 }, { "epoch": 2.8458401305057097, "grad_norm": 0.04631584882736206, "learning_rate": 0.0009945635524717359, "loss": 0.0574, "num_input_tokens_seen": 37712480, "step": 17445 }, { "epoch": 2.8466557911908645, "grad_norm": 0.01898629404604435, "learning_rate": 0.00099455307953131, "loss": 0.0857, "num_input_tokens_seen": 37724032, "step": 17450 }, { "epoch": 2.84747145187602, "grad_norm": 0.0465041846036911, "learning_rate": 0.0009945425965681388, "loss": 0.0883, "num_input_tokens_seen": 37734272, "step": 17455 }, { "epoch": 2.8482871125611746, "grad_norm": 0.027010783553123474, "learning_rate": 0.0009945321035824343, "loss": 0.1646, "num_input_tokens_seen": 37745760, "step": 17460 }, { "epoch": 2.8491027732463294, "grad_norm": 0.1783338189125061, "learning_rate": 0.0009945216005744096, "loss": 0.3157, "num_input_tokens_seen": 37756160, "step": 17465 }, { "epoch": 2.8499184339314843, "grad_norm": 0.024316715076565742, "learning_rate": 0.0009945110875442774, "loss": 0.0998, "num_input_tokens_seen": 37768224, "step": 17470 }, { "epoch": 2.8507340946166395, "grad_norm": 0.02121208980679512, "learning_rate": 0.0009945005644922504, "loss": 0.125, "num_input_tokens_seen": 37778880, "step": 17475 }, { "epoch": 2.8515497553017943, "grad_norm": 0.0659647062420845, "learning_rate": 0.0009944900314185422, "loss": 0.1498, "num_input_tokens_seen": 37790944, "step": 17480 }, { "epoch": 2.8523654159869496, "grad_norm": 0.02814595028758049, "learning_rate": 0.0009944794883233663, "loss": 0.1876, "num_input_tokens_seen": 37801792, "step": 17485 }, { "epoch": 2.8531810766721044, "grad_norm": 0.053324826061725616, "learning_rate": 0.0009944689352069363, "loss": 0.1417, "num_input_tokens_seen": 37813504, "step": 17490 }, { "epoch": 2.8539967373572592, "grad_norm": 0.10937617719173431, "learning_rate": 0.000994458372069466, "loss": 0.0979, "num_input_tokens_seen": 37823136, "step": 17495 }, { "epoch": 2.8548123980424145, "grad_norm": 0.04137161374092102, "learning_rate": 0.0009944477989111695, "loss": 0.0794, "num_input_tokens_seen": 37833568, "step": 17500 }, { "epoch": 2.8556280587275693, "grad_norm": 0.03246806561946869, "learning_rate": 0.0009944372157322612, "loss": 0.0449, "num_input_tokens_seen": 37845120, "step": 17505 }, { "epoch": 2.8564437194127246, "grad_norm": 0.21855475008487701, "learning_rate": 0.0009944266225329552, "loss": 0.2053, "num_input_tokens_seen": 37854368, "step": 17510 }, { "epoch": 2.8572593800978794, "grad_norm": 0.21136105060577393, "learning_rate": 0.0009944160193134668, "loss": 0.1399, "num_input_tokens_seen": 37865568, "step": 17515 }, { "epoch": 2.858075040783034, "grad_norm": 0.11021832376718521, "learning_rate": 0.0009944054060740104, "loss": 0.0635, "num_input_tokens_seen": 37876128, "step": 17520 }, { "epoch": 2.858890701468189, "grad_norm": 0.32767796516418457, "learning_rate": 0.0009943947828148013, "loss": 0.1833, "num_input_tokens_seen": 37887040, "step": 17525 }, { "epoch": 2.8597063621533443, "grad_norm": 0.0583861842751503, "learning_rate": 0.0009943841495360546, "loss": 0.1344, "num_input_tokens_seen": 37897088, "step": 17530 }, { "epoch": 2.860522022838499, "grad_norm": 0.10598666220903397, "learning_rate": 0.0009943735062379862, "loss": 0.0413, "num_input_tokens_seen": 37908096, "step": 17535 }, { "epoch": 2.8613376835236544, "grad_norm": 0.10326528549194336, "learning_rate": 0.0009943628529208114, "loss": 0.0641, "num_input_tokens_seen": 37918432, "step": 17540 }, { "epoch": 2.862153344208809, "grad_norm": 0.07112540304660797, "learning_rate": 0.0009943521895847461, "loss": 0.114, "num_input_tokens_seen": 37928320, "step": 17545 }, { "epoch": 2.862969004893964, "grad_norm": 0.25001704692840576, "learning_rate": 0.0009943415162300066, "loss": 0.1949, "num_input_tokens_seen": 37939520, "step": 17550 }, { "epoch": 2.863784665579119, "grad_norm": 0.05746942013502121, "learning_rate": 0.0009943308328568094, "loss": 0.1526, "num_input_tokens_seen": 37949728, "step": 17555 }, { "epoch": 2.864600326264274, "grad_norm": 0.12497828155755997, "learning_rate": 0.0009943201394653706, "loss": 0.0897, "num_input_tokens_seen": 37961248, "step": 17560 }, { "epoch": 2.865415986949429, "grad_norm": 0.04214569926261902, "learning_rate": 0.0009943094360559072, "loss": 0.1186, "num_input_tokens_seen": 37972320, "step": 17565 }, { "epoch": 2.866231647634584, "grad_norm": 0.01726198010146618, "learning_rate": 0.0009942987226286358, "loss": 0.082, "num_input_tokens_seen": 37983520, "step": 17570 }, { "epoch": 2.867047308319739, "grad_norm": 0.006213213782757521, "learning_rate": 0.0009942879991837739, "loss": 0.0446, "num_input_tokens_seen": 37995104, "step": 17575 }, { "epoch": 2.867862969004894, "grad_norm": 0.07860016822814941, "learning_rate": 0.0009942772657215385, "loss": 0.0319, "num_input_tokens_seen": 38006240, "step": 17580 }, { "epoch": 2.868678629690049, "grad_norm": 0.19524259865283966, "learning_rate": 0.0009942665222421475, "loss": 0.2527, "num_input_tokens_seen": 38017504, "step": 17585 }, { "epoch": 2.869494290375204, "grad_norm": 0.021658936515450478, "learning_rate": 0.0009942557687458182, "loss": 0.0806, "num_input_tokens_seen": 38029248, "step": 17590 }, { "epoch": 2.870309951060359, "grad_norm": 0.0713447779417038, "learning_rate": 0.0009942450052327688, "loss": 0.0292, "num_input_tokens_seen": 38039424, "step": 17595 }, { "epoch": 2.871125611745514, "grad_norm": 0.09926056861877441, "learning_rate": 0.0009942342317032172, "loss": 0.0932, "num_input_tokens_seen": 38048608, "step": 17600 }, { "epoch": 2.8719412724306688, "grad_norm": 0.023149937391281128, "learning_rate": 0.000994223448157382, "loss": 0.0098, "num_input_tokens_seen": 38060000, "step": 17605 }, { "epoch": 2.8727569331158236, "grad_norm": 0.07217232882976532, "learning_rate": 0.000994212654595482, "loss": 0.1114, "num_input_tokens_seen": 38070848, "step": 17610 }, { "epoch": 2.873572593800979, "grad_norm": 0.20313020050525665, "learning_rate": 0.0009942018510177351, "loss": 0.1052, "num_input_tokens_seen": 38082016, "step": 17615 }, { "epoch": 2.8743882544861337, "grad_norm": 0.15015803277492523, "learning_rate": 0.000994191037424361, "loss": 0.1398, "num_input_tokens_seen": 38092672, "step": 17620 }, { "epoch": 2.875203915171289, "grad_norm": 0.0243154838681221, "learning_rate": 0.0009941802138155786, "loss": 0.1759, "num_input_tokens_seen": 38103328, "step": 17625 }, { "epoch": 2.8760195758564437, "grad_norm": 0.03959393873810768, "learning_rate": 0.0009941693801916074, "loss": 0.2552, "num_input_tokens_seen": 38114464, "step": 17630 }, { "epoch": 2.8768352365415986, "grad_norm": 0.13363416492938995, "learning_rate": 0.0009941585365526666, "loss": 0.2813, "num_input_tokens_seen": 38124800, "step": 17635 }, { "epoch": 2.877650897226754, "grad_norm": 0.0363851822912693, "learning_rate": 0.0009941476828989762, "loss": 0.1221, "num_input_tokens_seen": 38135456, "step": 17640 }, { "epoch": 2.8784665579119086, "grad_norm": 0.0372467003762722, "learning_rate": 0.0009941368192307562, "loss": 0.0719, "num_input_tokens_seen": 38146432, "step": 17645 }, { "epoch": 2.8792822185970635, "grad_norm": 0.06116746738553047, "learning_rate": 0.0009941259455482267, "loss": 0.1476, "num_input_tokens_seen": 38157952, "step": 17650 }, { "epoch": 2.8800978792822187, "grad_norm": 0.028371773660182953, "learning_rate": 0.0009941150618516079, "loss": 0.1191, "num_input_tokens_seen": 38168224, "step": 17655 }, { "epoch": 2.8809135399673735, "grad_norm": 0.10169167071580887, "learning_rate": 0.0009941041681411206, "loss": 0.1522, "num_input_tokens_seen": 38179104, "step": 17660 }, { "epoch": 2.8817292006525284, "grad_norm": 0.022912686690688133, "learning_rate": 0.0009940932644169858, "loss": 0.0693, "num_input_tokens_seen": 38189536, "step": 17665 }, { "epoch": 2.8825448613376836, "grad_norm": 0.10834155231714249, "learning_rate": 0.000994082350679424, "loss": 0.1066, "num_input_tokens_seen": 38200480, "step": 17670 }, { "epoch": 2.8833605220228384, "grad_norm": 0.021890873089432716, "learning_rate": 0.0009940714269286565, "loss": 0.168, "num_input_tokens_seen": 38211872, "step": 17675 }, { "epoch": 2.8841761827079937, "grad_norm": 0.05110299587249756, "learning_rate": 0.000994060493164905, "loss": 0.0401, "num_input_tokens_seen": 38223680, "step": 17680 }, { "epoch": 2.8849918433931485, "grad_norm": 0.036153409630060196, "learning_rate": 0.0009940495493883906, "loss": 0.0998, "num_input_tokens_seen": 38234464, "step": 17685 }, { "epoch": 2.8858075040783033, "grad_norm": 0.16665154695510864, "learning_rate": 0.0009940385955993353, "loss": 0.0915, "num_input_tokens_seen": 38245952, "step": 17690 }, { "epoch": 2.886623164763458, "grad_norm": 0.18631958961486816, "learning_rate": 0.0009940276317979611, "loss": 0.1663, "num_input_tokens_seen": 38256640, "step": 17695 }, { "epoch": 2.8874388254486134, "grad_norm": 0.028646018356084824, "learning_rate": 0.0009940166579844906, "loss": 0.0803, "num_input_tokens_seen": 38267136, "step": 17700 }, { "epoch": 2.8882544861337682, "grad_norm": 0.2977273166179657, "learning_rate": 0.0009940056741591455, "loss": 0.1029, "num_input_tokens_seen": 38277056, "step": 17705 }, { "epoch": 2.8890701468189235, "grad_norm": 0.01933087781071663, "learning_rate": 0.0009939946803221487, "loss": 0.1755, "num_input_tokens_seen": 38287936, "step": 17710 }, { "epoch": 2.8898858075040783, "grad_norm": 0.09155084192752838, "learning_rate": 0.000993983676473723, "loss": 0.1827, "num_input_tokens_seen": 38299040, "step": 17715 }, { "epoch": 2.890701468189233, "grad_norm": 0.04351414740085602, "learning_rate": 0.0009939726626140917, "loss": 0.0792, "num_input_tokens_seen": 38309696, "step": 17720 }, { "epoch": 2.8915171288743884, "grad_norm": 0.15587954223155975, "learning_rate": 0.0009939616387434776, "loss": 0.0669, "num_input_tokens_seen": 38321568, "step": 17725 }, { "epoch": 2.892332789559543, "grad_norm": 0.08067210018634796, "learning_rate": 0.0009939506048621044, "loss": 0.0425, "num_input_tokens_seen": 38331392, "step": 17730 }, { "epoch": 2.8931484502446985, "grad_norm": 0.3965483605861664, "learning_rate": 0.0009939395609701953, "loss": 0.1558, "num_input_tokens_seen": 38340800, "step": 17735 }, { "epoch": 2.8939641109298533, "grad_norm": 0.06495457887649536, "learning_rate": 0.0009939285070679745, "loss": 0.0168, "num_input_tokens_seen": 38351296, "step": 17740 }, { "epoch": 2.894779771615008, "grad_norm": 0.10801254212856293, "learning_rate": 0.000993917443155666, "loss": 0.0849, "num_input_tokens_seen": 38362304, "step": 17745 }, { "epoch": 2.895595432300163, "grad_norm": 0.006482311058789492, "learning_rate": 0.0009939063692334937, "loss": 0.1352, "num_input_tokens_seen": 38372064, "step": 17750 }, { "epoch": 2.896411092985318, "grad_norm": 0.11562133580446243, "learning_rate": 0.0009938952853016825, "loss": 0.1764, "num_input_tokens_seen": 38382272, "step": 17755 }, { "epoch": 2.897226753670473, "grad_norm": 0.0724702775478363, "learning_rate": 0.0009938841913604568, "loss": 0.0763, "num_input_tokens_seen": 38392384, "step": 17760 }, { "epoch": 2.8980424143556283, "grad_norm": 0.029041165485978127, "learning_rate": 0.0009938730874100412, "loss": 0.0394, "num_input_tokens_seen": 38403200, "step": 17765 }, { "epoch": 2.898858075040783, "grad_norm": 0.025574803352355957, "learning_rate": 0.0009938619734506612, "loss": 0.2273, "num_input_tokens_seen": 38413408, "step": 17770 }, { "epoch": 2.899673735725938, "grad_norm": 0.012726670131087303, "learning_rate": 0.0009938508494825417, "loss": 0.1231, "num_input_tokens_seen": 38423904, "step": 17775 }, { "epoch": 2.9004893964110927, "grad_norm": 0.023426564410328865, "learning_rate": 0.0009938397155059083, "loss": 0.1429, "num_input_tokens_seen": 38435584, "step": 17780 }, { "epoch": 2.901305057096248, "grad_norm": 0.1236015111207962, "learning_rate": 0.0009938285715209866, "loss": 0.1968, "num_input_tokens_seen": 38446272, "step": 17785 }, { "epoch": 2.902120717781403, "grad_norm": 0.11476442217826843, "learning_rate": 0.0009938174175280023, "loss": 0.1609, "num_input_tokens_seen": 38457312, "step": 17790 }, { "epoch": 2.902936378466558, "grad_norm": 0.20464342832565308, "learning_rate": 0.0009938062535271817, "loss": 0.1929, "num_input_tokens_seen": 38467616, "step": 17795 }, { "epoch": 2.903752039151713, "grad_norm": 0.18224021792411804, "learning_rate": 0.0009937950795187508, "loss": 0.1864, "num_input_tokens_seen": 38478528, "step": 17800 }, { "epoch": 2.9045676998368677, "grad_norm": 0.07501034438610077, "learning_rate": 0.0009937838955029362, "loss": 0.1231, "num_input_tokens_seen": 38490208, "step": 17805 }, { "epoch": 2.905383360522023, "grad_norm": 0.020777180790901184, "learning_rate": 0.0009937727014799646, "loss": 0.0777, "num_input_tokens_seen": 38500672, "step": 17810 }, { "epoch": 2.9061990212071778, "grad_norm": 0.11070007085800171, "learning_rate": 0.0009937614974500628, "loss": 0.1563, "num_input_tokens_seen": 38511296, "step": 17815 }, { "epoch": 2.907014681892333, "grad_norm": 0.031207676976919174, "learning_rate": 0.000993750283413458, "loss": 0.1357, "num_input_tokens_seen": 38521344, "step": 17820 }, { "epoch": 2.907830342577488, "grad_norm": 0.012583895586431026, "learning_rate": 0.0009937390593703773, "loss": 0.0796, "num_input_tokens_seen": 38531296, "step": 17825 }, { "epoch": 2.9086460032626427, "grad_norm": 0.10093329101800919, "learning_rate": 0.000993727825321048, "loss": 0.0785, "num_input_tokens_seen": 38542912, "step": 17830 }, { "epoch": 2.9094616639477975, "grad_norm": 0.13077743351459503, "learning_rate": 0.0009937165812656983, "loss": 0.0848, "num_input_tokens_seen": 38554848, "step": 17835 }, { "epoch": 2.9102773246329527, "grad_norm": 0.021879076957702637, "learning_rate": 0.0009937053272045554, "loss": 0.0666, "num_input_tokens_seen": 38565280, "step": 17840 }, { "epoch": 2.9110929853181076, "grad_norm": 0.035172972828149796, "learning_rate": 0.000993694063137848, "loss": 0.1404, "num_input_tokens_seen": 38575168, "step": 17845 }, { "epoch": 2.911908646003263, "grad_norm": 0.27714836597442627, "learning_rate": 0.000993682789065804, "loss": 0.1276, "num_input_tokens_seen": 38586624, "step": 17850 }, { "epoch": 2.9127243066884176, "grad_norm": 0.010750222019851208, "learning_rate": 0.0009936715049886522, "loss": 0.1634, "num_input_tokens_seen": 38596896, "step": 17855 }, { "epoch": 2.9135399673735725, "grad_norm": 0.08408083766698837, "learning_rate": 0.0009936602109066209, "loss": 0.0561, "num_input_tokens_seen": 38609152, "step": 17860 }, { "epoch": 2.9143556280587277, "grad_norm": 0.07528946548700333, "learning_rate": 0.0009936489068199392, "loss": 0.071, "num_input_tokens_seen": 38619776, "step": 17865 }, { "epoch": 2.9151712887438825, "grad_norm": 0.05108056589961052, "learning_rate": 0.0009936375927288362, "loss": 0.1126, "num_input_tokens_seen": 38630752, "step": 17870 }, { "epoch": 2.9159869494290374, "grad_norm": 0.08132991939783096, "learning_rate": 0.000993626268633541, "loss": 0.1087, "num_input_tokens_seen": 38642112, "step": 17875 }, { "epoch": 2.9168026101141926, "grad_norm": 0.031875815242528915, "learning_rate": 0.0009936149345342834, "loss": 0.0846, "num_input_tokens_seen": 38652096, "step": 17880 }, { "epoch": 2.9176182707993474, "grad_norm": 0.08545435965061188, "learning_rate": 0.000993603590431293, "loss": 0.0701, "num_input_tokens_seen": 38663456, "step": 17885 }, { "epoch": 2.9184339314845023, "grad_norm": 0.015088371001183987, "learning_rate": 0.0009935922363247995, "loss": 0.0486, "num_input_tokens_seen": 38674368, "step": 17890 }, { "epoch": 2.9192495921696575, "grad_norm": 0.0254862941801548, "learning_rate": 0.0009935808722150333, "loss": 0.0544, "num_input_tokens_seen": 38685888, "step": 17895 }, { "epoch": 2.9200652528548123, "grad_norm": 0.017295166850090027, "learning_rate": 0.0009935694981022245, "loss": 0.0302, "num_input_tokens_seen": 38697408, "step": 17900 }, { "epoch": 2.9208809135399676, "grad_norm": 0.06552346795797348, "learning_rate": 0.0009935581139866039, "loss": 0.1045, "num_input_tokens_seen": 38707616, "step": 17905 }, { "epoch": 2.9216965742251224, "grad_norm": 0.02178419567644596, "learning_rate": 0.0009935467198684015, "loss": 0.1506, "num_input_tokens_seen": 38719840, "step": 17910 }, { "epoch": 2.9225122349102772, "grad_norm": 0.0072307041846215725, "learning_rate": 0.0009935353157478493, "loss": 0.1003, "num_input_tokens_seen": 38732064, "step": 17915 }, { "epoch": 2.923327895595432, "grad_norm": 0.09286753833293915, "learning_rate": 0.0009935239016251776, "loss": 0.0853, "num_input_tokens_seen": 38743808, "step": 17920 }, { "epoch": 2.9241435562805873, "grad_norm": 0.023326946422457695, "learning_rate": 0.0009935124775006178, "loss": 0.0571, "num_input_tokens_seen": 38755264, "step": 17925 }, { "epoch": 2.924959216965742, "grad_norm": 0.32947757840156555, "learning_rate": 0.0009935010433744017, "loss": 0.1625, "num_input_tokens_seen": 38766944, "step": 17930 }, { "epoch": 2.9257748776508974, "grad_norm": 0.06605410575866699, "learning_rate": 0.000993489599246761, "loss": 0.1863, "num_input_tokens_seen": 38778304, "step": 17935 }, { "epoch": 2.926590538336052, "grad_norm": 0.13709475100040436, "learning_rate": 0.0009934781451179273, "loss": 0.0641, "num_input_tokens_seen": 38790816, "step": 17940 }, { "epoch": 2.927406199021207, "grad_norm": 0.2378200739622116, "learning_rate": 0.000993466680988133, "loss": 0.1579, "num_input_tokens_seen": 38801088, "step": 17945 }, { "epoch": 2.9282218597063623, "grad_norm": 0.036661747843027115, "learning_rate": 0.0009934552068576105, "loss": 0.0297, "num_input_tokens_seen": 38811296, "step": 17950 }, { "epoch": 2.929037520391517, "grad_norm": 0.018623948097229004, "learning_rate": 0.0009934437227265924, "loss": 0.1375, "num_input_tokens_seen": 38822048, "step": 17955 }, { "epoch": 2.9298531810766724, "grad_norm": 0.15882422029972076, "learning_rate": 0.0009934322285953111, "loss": 0.0939, "num_input_tokens_seen": 38831968, "step": 17960 }, { "epoch": 2.930668841761827, "grad_norm": 0.0032852909062057734, "learning_rate": 0.0009934207244639997, "loss": 0.1332, "num_input_tokens_seen": 38841440, "step": 17965 }, { "epoch": 2.931484502446982, "grad_norm": 0.2479127198457718, "learning_rate": 0.0009934092103328915, "loss": 0.1153, "num_input_tokens_seen": 38851168, "step": 17970 }, { "epoch": 2.932300163132137, "grad_norm": 0.010026328265666962, "learning_rate": 0.0009933976862022196, "loss": 0.1458, "num_input_tokens_seen": 38862848, "step": 17975 }, { "epoch": 2.933115823817292, "grad_norm": 0.014436143450438976, "learning_rate": 0.0009933861520722176, "loss": 0.066, "num_input_tokens_seen": 38874176, "step": 17980 }, { "epoch": 2.933931484502447, "grad_norm": 0.028659898787736893, "learning_rate": 0.0009933746079431195, "loss": 0.1446, "num_input_tokens_seen": 38885056, "step": 17985 }, { "epoch": 2.934747145187602, "grad_norm": 0.02407947927713394, "learning_rate": 0.000993363053815159, "loss": 0.0667, "num_input_tokens_seen": 38895968, "step": 17990 }, { "epoch": 2.935562805872757, "grad_norm": 0.1150076687335968, "learning_rate": 0.0009933514896885705, "loss": 0.2417, "num_input_tokens_seen": 38907584, "step": 17995 }, { "epoch": 2.936378466557912, "grad_norm": 0.15142081677913666, "learning_rate": 0.000993339915563588, "loss": 0.1271, "num_input_tokens_seen": 38919296, "step": 18000 }, { "epoch": 2.9371941272430666, "grad_norm": 0.36399489641189575, "learning_rate": 0.0009933283314404462, "loss": 0.1575, "num_input_tokens_seen": 38930016, "step": 18005 }, { "epoch": 2.938009787928222, "grad_norm": 0.17013728618621826, "learning_rate": 0.0009933167373193802, "loss": 0.0984, "num_input_tokens_seen": 38941056, "step": 18010 }, { "epoch": 2.9388254486133767, "grad_norm": 0.1846058964729309, "learning_rate": 0.0009933051332006245, "loss": 0.1026, "num_input_tokens_seen": 38952480, "step": 18015 }, { "epoch": 2.939641109298532, "grad_norm": 0.006210431456565857, "learning_rate": 0.0009932935190844145, "loss": 0.038, "num_input_tokens_seen": 38962272, "step": 18020 }, { "epoch": 2.9404567699836868, "grad_norm": 0.243395134806633, "learning_rate": 0.0009932818949709855, "loss": 0.2279, "num_input_tokens_seen": 38971712, "step": 18025 }, { "epoch": 2.9412724306688416, "grad_norm": 0.24012702703475952, "learning_rate": 0.0009932702608605733, "loss": 0.1315, "num_input_tokens_seen": 38982688, "step": 18030 }, { "epoch": 2.942088091353997, "grad_norm": 0.024320699274539948, "learning_rate": 0.0009932586167534134, "loss": 0.138, "num_input_tokens_seen": 38993152, "step": 18035 }, { "epoch": 2.9429037520391517, "grad_norm": 0.23949933052062988, "learning_rate": 0.0009932469626497418, "loss": 0.0934, "num_input_tokens_seen": 39004640, "step": 18040 }, { "epoch": 2.943719412724307, "grad_norm": 0.1403769850730896, "learning_rate": 0.000993235298549795, "loss": 0.1009, "num_input_tokens_seen": 39015040, "step": 18045 }, { "epoch": 2.9445350734094617, "grad_norm": 0.42424431443214417, "learning_rate": 0.0009932236244538089, "loss": 0.217, "num_input_tokens_seen": 39025792, "step": 18050 }, { "epoch": 2.9453507340946166, "grad_norm": 0.2443961650133133, "learning_rate": 0.0009932119403620206, "loss": 0.1784, "num_input_tokens_seen": 39036512, "step": 18055 }, { "epoch": 2.9461663947797714, "grad_norm": 0.1767474114894867, "learning_rate": 0.0009932002462746665, "loss": 0.1867, "num_input_tokens_seen": 39048224, "step": 18060 }, { "epoch": 2.9469820554649266, "grad_norm": 0.1322534829378128, "learning_rate": 0.0009931885421919837, "loss": 0.0662, "num_input_tokens_seen": 39060288, "step": 18065 }, { "epoch": 2.9477977161500815, "grad_norm": 0.07639496773481369, "learning_rate": 0.0009931768281142095, "loss": 0.0996, "num_input_tokens_seen": 39071360, "step": 18070 }, { "epoch": 2.9486133768352367, "grad_norm": 0.22929255664348602, "learning_rate": 0.0009931651040415812, "loss": 0.2103, "num_input_tokens_seen": 39082336, "step": 18075 }, { "epoch": 2.9494290375203915, "grad_norm": 0.10765232890844345, "learning_rate": 0.0009931533699743364, "loss": 0.1251, "num_input_tokens_seen": 39093984, "step": 18080 }, { "epoch": 2.9502446982055464, "grad_norm": 0.08604505658149719, "learning_rate": 0.000993141625912713, "loss": 0.0533, "num_input_tokens_seen": 39104992, "step": 18085 }, { "epoch": 2.9510603588907016, "grad_norm": 0.42062729597091675, "learning_rate": 0.0009931298718569492, "loss": 0.1695, "num_input_tokens_seen": 39115840, "step": 18090 }, { "epoch": 2.9518760195758564, "grad_norm": 0.1661776304244995, "learning_rate": 0.0009931181078072827, "loss": 0.1415, "num_input_tokens_seen": 39126784, "step": 18095 }, { "epoch": 2.9526916802610113, "grad_norm": 0.030810121446847916, "learning_rate": 0.0009931063337639521, "loss": 0.0517, "num_input_tokens_seen": 39136448, "step": 18100 }, { "epoch": 2.9535073409461665, "grad_norm": 0.12106618285179138, "learning_rate": 0.0009930945497271964, "loss": 0.0673, "num_input_tokens_seen": 39146688, "step": 18105 }, { "epoch": 2.9543230016313213, "grad_norm": 0.07541876286268234, "learning_rate": 0.0009930827556972539, "loss": 0.233, "num_input_tokens_seen": 39157568, "step": 18110 }, { "epoch": 2.955138662316476, "grad_norm": 0.25600165128707886, "learning_rate": 0.0009930709516743639, "loss": 0.162, "num_input_tokens_seen": 39168960, "step": 18115 }, { "epoch": 2.9559543230016314, "grad_norm": 0.02245187573134899, "learning_rate": 0.0009930591376587654, "loss": 0.2354, "num_input_tokens_seen": 39178848, "step": 18120 }, { "epoch": 2.9567699836867862, "grad_norm": 0.21825523674488068, "learning_rate": 0.0009930473136506982, "loss": 0.1749, "num_input_tokens_seen": 39190432, "step": 18125 }, { "epoch": 2.9575856443719415, "grad_norm": 0.10119245946407318, "learning_rate": 0.0009930354796504018, "loss": 0.0423, "num_input_tokens_seen": 39200896, "step": 18130 }, { "epoch": 2.9584013050570963, "grad_norm": 0.021194253116846085, "learning_rate": 0.0009930236356581158, "loss": 0.2261, "num_input_tokens_seen": 39211872, "step": 18135 }, { "epoch": 2.959216965742251, "grad_norm": 0.06642644107341766, "learning_rate": 0.0009930117816740803, "loss": 0.0689, "num_input_tokens_seen": 39221472, "step": 18140 }, { "epoch": 2.960032626427406, "grad_norm": 0.014469392597675323, "learning_rate": 0.0009929999176985355, "loss": 0.1492, "num_input_tokens_seen": 39232832, "step": 18145 }, { "epoch": 2.960848287112561, "grad_norm": 0.15131227672100067, "learning_rate": 0.0009929880437317222, "loss": 0.0935, "num_input_tokens_seen": 39243264, "step": 18150 }, { "epoch": 2.961663947797716, "grad_norm": 0.16809213161468506, "learning_rate": 0.0009929761597738808, "loss": 0.3091, "num_input_tokens_seen": 39254784, "step": 18155 }, { "epoch": 2.9624796084828713, "grad_norm": 0.07613056898117065, "learning_rate": 0.000992964265825252, "loss": 0.1164, "num_input_tokens_seen": 39264608, "step": 18160 }, { "epoch": 2.963295269168026, "grad_norm": 0.1471426635980606, "learning_rate": 0.0009929523618860772, "loss": 0.1588, "num_input_tokens_seen": 39274880, "step": 18165 }, { "epoch": 2.964110929853181, "grad_norm": 0.18869291245937347, "learning_rate": 0.000992940447956597, "loss": 0.2073, "num_input_tokens_seen": 39285696, "step": 18170 }, { "epoch": 2.964926590538336, "grad_norm": 0.0686211809515953, "learning_rate": 0.000992928524037054, "loss": 0.0567, "num_input_tokens_seen": 39297728, "step": 18175 }, { "epoch": 2.965742251223491, "grad_norm": 0.09575016796588898, "learning_rate": 0.0009929165901276884, "loss": 0.1175, "num_input_tokens_seen": 39309248, "step": 18180 }, { "epoch": 2.9665579119086463, "grad_norm": 0.03930651396512985, "learning_rate": 0.000992904646228743, "loss": 0.0464, "num_input_tokens_seen": 39318848, "step": 18185 }, { "epoch": 2.967373572593801, "grad_norm": 0.018661662936210632, "learning_rate": 0.00099289269234046, "loss": 0.0403, "num_input_tokens_seen": 39329504, "step": 18190 }, { "epoch": 2.968189233278956, "grad_norm": 0.17350107431411743, "learning_rate": 0.000992880728463081, "loss": 0.1852, "num_input_tokens_seen": 39339744, "step": 18195 }, { "epoch": 2.9690048939641107, "grad_norm": 0.3067820072174072, "learning_rate": 0.0009928687545968486, "loss": 0.1869, "num_input_tokens_seen": 39349600, "step": 18200 }, { "epoch": 2.969820554649266, "grad_norm": 0.24880094826221466, "learning_rate": 0.0009928567707420059, "loss": 0.213, "num_input_tokens_seen": 39360480, "step": 18205 }, { "epoch": 2.970636215334421, "grad_norm": 0.05344460904598236, "learning_rate": 0.0009928447768987956, "loss": 0.1125, "num_input_tokens_seen": 39371168, "step": 18210 }, { "epoch": 2.971451876019576, "grad_norm": 0.06595193594694138, "learning_rate": 0.0009928327730674604, "loss": 0.0714, "num_input_tokens_seen": 39382272, "step": 18215 }, { "epoch": 2.972267536704731, "grad_norm": 0.07218914479017258, "learning_rate": 0.000992820759248244, "loss": 0.2494, "num_input_tokens_seen": 39392544, "step": 18220 }, { "epoch": 2.9730831973898857, "grad_norm": 0.23639288544654846, "learning_rate": 0.00099280873544139, "loss": 0.2001, "num_input_tokens_seen": 39401760, "step": 18225 }, { "epoch": 2.9738988580750405, "grad_norm": 0.1368018090724945, "learning_rate": 0.0009927967016471414, "loss": 0.1813, "num_input_tokens_seen": 39411904, "step": 18230 }, { "epoch": 2.9747145187601958, "grad_norm": 0.029314961284399033, "learning_rate": 0.0009927846578657426, "loss": 0.1154, "num_input_tokens_seen": 39422368, "step": 18235 }, { "epoch": 2.9755301794453506, "grad_norm": 0.07671564072370529, "learning_rate": 0.0009927726040974377, "loss": 0.1119, "num_input_tokens_seen": 39432672, "step": 18240 }, { "epoch": 2.976345840130506, "grad_norm": 0.01595219224691391, "learning_rate": 0.0009927605403424707, "loss": 0.0523, "num_input_tokens_seen": 39442944, "step": 18245 }, { "epoch": 2.9771615008156607, "grad_norm": 0.26665130257606506, "learning_rate": 0.0009927484666010862, "loss": 0.3434, "num_input_tokens_seen": 39453920, "step": 18250 }, { "epoch": 2.9779771615008155, "grad_norm": 0.01717739924788475, "learning_rate": 0.000992736382873529, "loss": 0.0542, "num_input_tokens_seen": 39464768, "step": 18255 }, { "epoch": 2.9787928221859707, "grad_norm": 0.08836905658245087, "learning_rate": 0.000992724289160044, "loss": 0.123, "num_input_tokens_seen": 39476544, "step": 18260 }, { "epoch": 2.9796084828711256, "grad_norm": 0.18800336122512817, "learning_rate": 0.000992712185460876, "loss": 0.0685, "num_input_tokens_seen": 39487008, "step": 18265 }, { "epoch": 2.980424143556281, "grad_norm": 0.21052786707878113, "learning_rate": 0.0009927000717762707, "loss": 0.2213, "num_input_tokens_seen": 39497408, "step": 18270 }, { "epoch": 2.9812398042414356, "grad_norm": 0.0330355167388916, "learning_rate": 0.0009926879481064734, "loss": 0.0982, "num_input_tokens_seen": 39506400, "step": 18275 }, { "epoch": 2.9820554649265905, "grad_norm": 0.04376707971096039, "learning_rate": 0.0009926758144517297, "loss": 0.1352, "num_input_tokens_seen": 39517376, "step": 18280 }, { "epoch": 2.9828711256117453, "grad_norm": 0.03611741214990616, "learning_rate": 0.000992663670812286, "loss": 0.0615, "num_input_tokens_seen": 39528096, "step": 18285 }, { "epoch": 2.9836867862969005, "grad_norm": 0.10370776802301407, "learning_rate": 0.0009926515171883874, "loss": 0.0593, "num_input_tokens_seen": 39539552, "step": 18290 }, { "epoch": 2.9845024469820554, "grad_norm": 0.006939777173101902, "learning_rate": 0.0009926393535802812, "loss": 0.0796, "num_input_tokens_seen": 39550560, "step": 18295 }, { "epoch": 2.9853181076672106, "grad_norm": 0.042476359754800797, "learning_rate": 0.0009926271799882134, "loss": 0.1941, "num_input_tokens_seen": 39561728, "step": 18300 }, { "epoch": 2.9861337683523654, "grad_norm": 0.042153820395469666, "learning_rate": 0.000992614996412431, "loss": 0.1872, "num_input_tokens_seen": 39572608, "step": 18305 }, { "epoch": 2.9869494290375203, "grad_norm": 0.1687641739845276, "learning_rate": 0.0009926028028531808, "loss": 0.1653, "num_input_tokens_seen": 39583008, "step": 18310 }, { "epoch": 2.9877650897226755, "grad_norm": 0.09225862473249435, "learning_rate": 0.0009925905993107098, "loss": 0.0366, "num_input_tokens_seen": 39594688, "step": 18315 }, { "epoch": 2.9885807504078303, "grad_norm": 0.3248561918735504, "learning_rate": 0.0009925783857852653, "loss": 0.1379, "num_input_tokens_seen": 39605920, "step": 18320 }, { "epoch": 2.9893964110929856, "grad_norm": 0.019485827535390854, "learning_rate": 0.0009925661622770953, "loss": 0.2786, "num_input_tokens_seen": 39617088, "step": 18325 }, { "epoch": 2.9902120717781404, "grad_norm": 0.09611544013023376, "learning_rate": 0.0009925539287864466, "loss": 0.1963, "num_input_tokens_seen": 39627232, "step": 18330 }, { "epoch": 2.9910277324632952, "grad_norm": 0.09955920279026031, "learning_rate": 0.000992541685313568, "loss": 0.0978, "num_input_tokens_seen": 39637600, "step": 18335 }, { "epoch": 2.99184339314845, "grad_norm": 0.017093650996685028, "learning_rate": 0.0009925294318587075, "loss": 0.0625, "num_input_tokens_seen": 39648192, "step": 18340 }, { "epoch": 2.9926590538336053, "grad_norm": 0.02634618431329727, "learning_rate": 0.000992517168422113, "loss": 0.1107, "num_input_tokens_seen": 39658464, "step": 18345 }, { "epoch": 2.99347471451876, "grad_norm": 0.043731238692998886, "learning_rate": 0.0009925048950040333, "loss": 0.175, "num_input_tokens_seen": 39668800, "step": 18350 }, { "epoch": 2.9942903752039154, "grad_norm": 0.027921607717871666, "learning_rate": 0.000992492611604717, "loss": 0.108, "num_input_tokens_seen": 39680224, "step": 18355 }, { "epoch": 2.99510603588907, "grad_norm": 0.07438381016254425, "learning_rate": 0.0009924803182244134, "loss": 0.0632, "num_input_tokens_seen": 39691584, "step": 18360 }, { "epoch": 2.995921696574225, "grad_norm": 0.13787518441677094, "learning_rate": 0.0009924680148633714, "loss": 0.0801, "num_input_tokens_seen": 39702816, "step": 18365 }, { "epoch": 2.99673735725938, "grad_norm": 0.009611738845705986, "learning_rate": 0.0009924557015218401, "loss": 0.0202, "num_input_tokens_seen": 39713504, "step": 18370 }, { "epoch": 2.997553017944535, "grad_norm": 0.1483236849308014, "learning_rate": 0.0009924433782000695, "loss": 0.0886, "num_input_tokens_seen": 39724320, "step": 18375 }, { "epoch": 2.99836867862969, "grad_norm": 0.015144513919949532, "learning_rate": 0.000992431044898309, "loss": 0.0932, "num_input_tokens_seen": 39735648, "step": 18380 }, { "epoch": 2.999184339314845, "grad_norm": 0.2547537684440613, "learning_rate": 0.0009924187016168086, "loss": 0.1138, "num_input_tokens_seen": 39745408, "step": 18385 }, { "epoch": 3.0, "grad_norm": 0.01212351769208908, "learning_rate": 0.0009924063483558187, "loss": 0.0511, "num_input_tokens_seen": 39755376, "step": 18390 }, { "epoch": 3.0, "eval_loss": 0.12335383147001266, "eval_runtime": 103.2218, "eval_samples_per_second": 26.399, "eval_steps_per_second": 6.607, "num_input_tokens_seen": 39755376, "step": 18390 }, { "epoch": 3.000815660685155, "grad_norm": 0.014516279101371765, "learning_rate": 0.0009923939851155896, "loss": 0.0607, "num_input_tokens_seen": 39765936, "step": 18395 }, { "epoch": 3.00163132137031, "grad_norm": 0.01939479261636734, "learning_rate": 0.0009923816118963715, "loss": 0.1182, "num_input_tokens_seen": 39777104, "step": 18400 }, { "epoch": 3.002446982055465, "grad_norm": 0.276183158159256, "learning_rate": 0.0009923692286984156, "loss": 0.3429, "num_input_tokens_seen": 39787280, "step": 18405 }, { "epoch": 3.0032626427406197, "grad_norm": 0.021763058379292488, "learning_rate": 0.0009923568355219726, "loss": 0.1081, "num_input_tokens_seen": 39797328, "step": 18410 }, { "epoch": 3.004078303425775, "grad_norm": 0.033986590802669525, "learning_rate": 0.0009923444323672937, "loss": 0.0204, "num_input_tokens_seen": 39809424, "step": 18415 }, { "epoch": 3.00489396411093, "grad_norm": 0.12651905417442322, "learning_rate": 0.0009923320192346302, "loss": 0.162, "num_input_tokens_seen": 39820464, "step": 18420 }, { "epoch": 3.0057096247960846, "grad_norm": 0.07450538873672485, "learning_rate": 0.000992319596124234, "loss": 0.0964, "num_input_tokens_seen": 39831152, "step": 18425 }, { "epoch": 3.00652528548124, "grad_norm": 0.13451939821243286, "learning_rate": 0.0009923071630363563, "loss": 0.1113, "num_input_tokens_seen": 39842480, "step": 18430 }, { "epoch": 3.0073409461663947, "grad_norm": 0.014959607273340225, "learning_rate": 0.0009922947199712496, "loss": 0.0608, "num_input_tokens_seen": 39852240, "step": 18435 }, { "epoch": 3.00815660685155, "grad_norm": 0.38718149065971375, "learning_rate": 0.0009922822669291658, "loss": 0.2369, "num_input_tokens_seen": 39863440, "step": 18440 }, { "epoch": 3.0089722675367048, "grad_norm": 0.020501941442489624, "learning_rate": 0.0009922698039103574, "loss": 0.0728, "num_input_tokens_seen": 39874032, "step": 18445 }, { "epoch": 3.0097879282218596, "grad_norm": 0.07462462037801743, "learning_rate": 0.000992257330915077, "loss": 0.0733, "num_input_tokens_seen": 39885392, "step": 18450 }, { "epoch": 3.010603588907015, "grad_norm": 0.030257759615778923, "learning_rate": 0.0009922448479435773, "loss": 0.2009, "num_input_tokens_seen": 39896272, "step": 18455 }, { "epoch": 3.0114192495921697, "grad_norm": 0.07138212770223618, "learning_rate": 0.0009922323549961113, "loss": 0.0512, "num_input_tokens_seen": 39907664, "step": 18460 }, { "epoch": 3.0122349102773245, "grad_norm": 0.035266272723674774, "learning_rate": 0.000992219852072932, "loss": 0.0572, "num_input_tokens_seen": 39918992, "step": 18465 }, { "epoch": 3.0130505709624797, "grad_norm": 0.13974876701831818, "learning_rate": 0.0009922073391742932, "loss": 0.1032, "num_input_tokens_seen": 39929424, "step": 18470 }, { "epoch": 3.0138662316476346, "grad_norm": 0.23525859415531158, "learning_rate": 0.0009921948163004483, "loss": 0.1119, "num_input_tokens_seen": 39939856, "step": 18475 }, { "epoch": 3.0146818923327894, "grad_norm": 0.06922031193971634, "learning_rate": 0.000992182283451651, "loss": 0.0607, "num_input_tokens_seen": 39949392, "step": 18480 }, { "epoch": 3.0154975530179446, "grad_norm": 0.007163001224398613, "learning_rate": 0.0009921697406281554, "loss": 0.1979, "num_input_tokens_seen": 39959056, "step": 18485 }, { "epoch": 3.0163132137030995, "grad_norm": 0.008242499083280563, "learning_rate": 0.0009921571878302154, "loss": 0.0617, "num_input_tokens_seen": 39969360, "step": 18490 }, { "epoch": 3.0171288743882543, "grad_norm": 0.04228367283940315, "learning_rate": 0.0009921446250580857, "loss": 0.0403, "num_input_tokens_seen": 39979664, "step": 18495 }, { "epoch": 3.0179445350734095, "grad_norm": 0.03732677921652794, "learning_rate": 0.000992132052312021, "loss": 0.2385, "num_input_tokens_seen": 39990480, "step": 18500 }, { "epoch": 3.0187601957585644, "grad_norm": 0.12380467355251312, "learning_rate": 0.000992119469592276, "loss": 0.1255, "num_input_tokens_seen": 40000976, "step": 18505 }, { "epoch": 3.0195758564437196, "grad_norm": 0.18119105696678162, "learning_rate": 0.0009921068768991056, "loss": 0.0736, "num_input_tokens_seen": 40011472, "step": 18510 }, { "epoch": 3.0203915171288744, "grad_norm": 0.12756581604480743, "learning_rate": 0.0009920942742327648, "loss": 0.0755, "num_input_tokens_seen": 40022096, "step": 18515 }, { "epoch": 3.0212071778140293, "grad_norm": 0.03765145689249039, "learning_rate": 0.0009920816615935095, "loss": 0.1434, "num_input_tokens_seen": 40032848, "step": 18520 }, { "epoch": 3.0220228384991845, "grad_norm": 0.009554882533848286, "learning_rate": 0.000992069038981595, "loss": 0.0725, "num_input_tokens_seen": 40043856, "step": 18525 }, { "epoch": 3.0228384991843393, "grad_norm": 0.04768422991037369, "learning_rate": 0.0009920564063972772, "loss": 0.0575, "num_input_tokens_seen": 40053648, "step": 18530 }, { "epoch": 3.023654159869494, "grad_norm": 0.24854423105716705, "learning_rate": 0.0009920437638408122, "loss": 0.0433, "num_input_tokens_seen": 40064944, "step": 18535 }, { "epoch": 3.0244698205546494, "grad_norm": 0.20471757650375366, "learning_rate": 0.000992031111312456, "loss": 0.1138, "num_input_tokens_seen": 40075536, "step": 18540 }, { "epoch": 3.0252854812398042, "grad_norm": 0.04740242287516594, "learning_rate": 0.0009920184488124654, "loss": 0.2227, "num_input_tokens_seen": 40085712, "step": 18545 }, { "epoch": 3.026101141924959, "grad_norm": 0.01797325722873211, "learning_rate": 0.0009920057763410962, "loss": 0.0672, "num_input_tokens_seen": 40096688, "step": 18550 }, { "epoch": 3.0269168026101143, "grad_norm": 0.07381663471460342, "learning_rate": 0.0009919930938986064, "loss": 0.1917, "num_input_tokens_seen": 40107440, "step": 18555 }, { "epoch": 3.027732463295269, "grad_norm": 0.21422551572322845, "learning_rate": 0.000991980401485252, "loss": 0.103, "num_input_tokens_seen": 40119216, "step": 18560 }, { "epoch": 3.028548123980424, "grad_norm": 0.21496577560901642, "learning_rate": 0.000991967699101291, "loss": 0.109, "num_input_tokens_seen": 40130672, "step": 18565 }, { "epoch": 3.029363784665579, "grad_norm": 0.04606503248214722, "learning_rate": 0.00099195498674698, "loss": 0.0676, "num_input_tokens_seen": 40142032, "step": 18570 }, { "epoch": 3.030179445350734, "grad_norm": 0.00870492309331894, "learning_rate": 0.0009919422644225776, "loss": 0.11, "num_input_tokens_seen": 40152528, "step": 18575 }, { "epoch": 3.0309951060358893, "grad_norm": 0.023363051936030388, "learning_rate": 0.0009919295321283409, "loss": 0.1298, "num_input_tokens_seen": 40162064, "step": 18580 }, { "epoch": 3.031810766721044, "grad_norm": 0.08522692322731018, "learning_rate": 0.0009919167898645282, "loss": 0.116, "num_input_tokens_seen": 40173872, "step": 18585 }, { "epoch": 3.032626427406199, "grad_norm": 0.014564625918865204, "learning_rate": 0.0009919040376313976, "loss": 0.0727, "num_input_tokens_seen": 40184976, "step": 18590 }, { "epoch": 3.033442088091354, "grad_norm": 0.09863422065973282, "learning_rate": 0.0009918912754292078, "loss": 0.1579, "num_input_tokens_seen": 40196720, "step": 18595 }, { "epoch": 3.034257748776509, "grad_norm": 0.168905109167099, "learning_rate": 0.0009918785032582173, "loss": 0.0404, "num_input_tokens_seen": 40207344, "step": 18600 }, { "epoch": 3.035073409461664, "grad_norm": 0.07370392233133316, "learning_rate": 0.000991865721118685, "loss": 0.0626, "num_input_tokens_seen": 40219632, "step": 18605 }, { "epoch": 3.035889070146819, "grad_norm": 0.12834832072257996, "learning_rate": 0.0009918529290108696, "loss": 0.0315, "num_input_tokens_seen": 40230896, "step": 18610 }, { "epoch": 3.036704730831974, "grad_norm": 0.18769440054893494, "learning_rate": 0.000991840126935031, "loss": 0.0714, "num_input_tokens_seen": 40241264, "step": 18615 }, { "epoch": 3.0375203915171287, "grad_norm": 0.029713381081819534, "learning_rate": 0.000991827314891428, "loss": 0.1725, "num_input_tokens_seen": 40252272, "step": 18620 }, { "epoch": 3.038336052202284, "grad_norm": 0.25868767499923706, "learning_rate": 0.0009918144928803205, "loss": 0.1248, "num_input_tokens_seen": 40262768, "step": 18625 }, { "epoch": 3.039151712887439, "grad_norm": 0.02136482112109661, "learning_rate": 0.0009918016609019686, "loss": 0.0209, "num_input_tokens_seen": 40273552, "step": 18630 }, { "epoch": 3.0399673735725936, "grad_norm": 0.19516494870185852, "learning_rate": 0.0009917888189566323, "loss": 0.2173, "num_input_tokens_seen": 40283664, "step": 18635 }, { "epoch": 3.040783034257749, "grad_norm": 0.005416174419224262, "learning_rate": 0.0009917759670445712, "loss": 0.0397, "num_input_tokens_seen": 40293904, "step": 18640 }, { "epoch": 3.0415986949429037, "grad_norm": 0.0065343305468559265, "learning_rate": 0.0009917631051660468, "loss": 0.0655, "num_input_tokens_seen": 40304912, "step": 18645 }, { "epoch": 3.0424143556280585, "grad_norm": 0.034539107233285904, "learning_rate": 0.0009917502333213189, "loss": 0.1223, "num_input_tokens_seen": 40315856, "step": 18650 }, { "epoch": 3.0432300163132138, "grad_norm": 0.008732376620173454, "learning_rate": 0.0009917373515106486, "loss": 0.0421, "num_input_tokens_seen": 40325936, "step": 18655 }, { "epoch": 3.0440456769983686, "grad_norm": 0.1251506805419922, "learning_rate": 0.0009917244597342973, "loss": 0.0945, "num_input_tokens_seen": 40336880, "step": 18660 }, { "epoch": 3.044861337683524, "grad_norm": 0.009798316285014153, "learning_rate": 0.000991711557992526, "loss": 0.0674, "num_input_tokens_seen": 40347568, "step": 18665 }, { "epoch": 3.0456769983686787, "grad_norm": 0.015984924510121346, "learning_rate": 0.000991698646285596, "loss": 0.0855, "num_input_tokens_seen": 40358736, "step": 18670 }, { "epoch": 3.0464926590538335, "grad_norm": 0.37127813696861267, "learning_rate": 0.0009916857246137693, "loss": 0.2148, "num_input_tokens_seen": 40370480, "step": 18675 }, { "epoch": 3.0473083197389887, "grad_norm": 0.16755881905555725, "learning_rate": 0.0009916727929773078, "loss": 0.0697, "num_input_tokens_seen": 40380336, "step": 18680 }, { "epoch": 3.0481239804241436, "grad_norm": 0.04090609401464462, "learning_rate": 0.0009916598513764732, "loss": 0.122, "num_input_tokens_seen": 40391824, "step": 18685 }, { "epoch": 3.0489396411092984, "grad_norm": 0.05720209330320358, "learning_rate": 0.0009916468998115281, "loss": 0.1412, "num_input_tokens_seen": 40403088, "step": 18690 }, { "epoch": 3.0497553017944536, "grad_norm": 0.1086057499051094, "learning_rate": 0.000991633938282735, "loss": 0.1968, "num_input_tokens_seen": 40414480, "step": 18695 }, { "epoch": 3.0505709624796085, "grad_norm": 0.032246749848127365, "learning_rate": 0.0009916209667903562, "loss": 0.0715, "num_input_tokens_seen": 40425968, "step": 18700 }, { "epoch": 3.0513866231647633, "grad_norm": 0.03548622503876686, "learning_rate": 0.0009916079853346548, "loss": 0.0328, "num_input_tokens_seen": 40436912, "step": 18705 }, { "epoch": 3.0522022838499185, "grad_norm": 0.06208382919430733, "learning_rate": 0.0009915949939158942, "loss": 0.0605, "num_input_tokens_seen": 40447472, "step": 18710 }, { "epoch": 3.0530179445350734, "grad_norm": 0.010785914957523346, "learning_rate": 0.0009915819925343373, "loss": 0.1707, "num_input_tokens_seen": 40458192, "step": 18715 }, { "epoch": 3.053833605220228, "grad_norm": 0.01436684001237154, "learning_rate": 0.0009915689811902477, "loss": 0.0577, "num_input_tokens_seen": 40468016, "step": 18720 }, { "epoch": 3.0546492659053834, "grad_norm": 0.2570911943912506, "learning_rate": 0.000991555959883889, "loss": 0.0871, "num_input_tokens_seen": 40478448, "step": 18725 }, { "epoch": 3.0554649265905383, "grad_norm": 0.012195185758173466, "learning_rate": 0.0009915429286155254, "loss": 0.0189, "num_input_tokens_seen": 40489488, "step": 18730 }, { "epoch": 3.0562805872756935, "grad_norm": 0.007231024093925953, "learning_rate": 0.0009915298873854207, "loss": 0.0101, "num_input_tokens_seen": 40499888, "step": 18735 }, { "epoch": 3.0570962479608483, "grad_norm": 0.054103195667266846, "learning_rate": 0.0009915168361938392, "loss": 0.1771, "num_input_tokens_seen": 40509744, "step": 18740 }, { "epoch": 3.057911908646003, "grad_norm": 0.22565001249313354, "learning_rate": 0.0009915037750410456, "loss": 0.2337, "num_input_tokens_seen": 40520176, "step": 18745 }, { "epoch": 3.0587275693311584, "grad_norm": 0.1788310408592224, "learning_rate": 0.0009914907039273045, "loss": 0.1327, "num_input_tokens_seen": 40530896, "step": 18750 }, { "epoch": 3.0595432300163132, "grad_norm": 0.03100595623254776, "learning_rate": 0.0009914776228528805, "loss": 0.3058, "num_input_tokens_seen": 40541456, "step": 18755 }, { "epoch": 3.060358890701468, "grad_norm": 0.3577895164489746, "learning_rate": 0.0009914645318180393, "loss": 0.1918, "num_input_tokens_seen": 40552912, "step": 18760 }, { "epoch": 3.0611745513866233, "grad_norm": 0.12759797275066376, "learning_rate": 0.0009914514308230458, "loss": 0.0771, "num_input_tokens_seen": 40563824, "step": 18765 }, { "epoch": 3.061990212071778, "grad_norm": 0.08027143776416779, "learning_rate": 0.0009914383198681657, "loss": 0.044, "num_input_tokens_seen": 40575120, "step": 18770 }, { "epoch": 3.062805872756933, "grad_norm": 0.20178310573101044, "learning_rate": 0.0009914251989536645, "loss": 0.0979, "num_input_tokens_seen": 40586448, "step": 18775 }, { "epoch": 3.063621533442088, "grad_norm": 0.043073415756225586, "learning_rate": 0.0009914120680798082, "loss": 0.0983, "num_input_tokens_seen": 40597936, "step": 18780 }, { "epoch": 3.064437194127243, "grad_norm": 0.14184482395648956, "learning_rate": 0.000991398927246863, "loss": 0.0433, "num_input_tokens_seen": 40607984, "step": 18785 }, { "epoch": 3.065252854812398, "grad_norm": 0.07433011382818222, "learning_rate": 0.000991385776455095, "loss": 0.0627, "num_input_tokens_seen": 40618928, "step": 18790 }, { "epoch": 3.066068515497553, "grad_norm": 0.030221259221434593, "learning_rate": 0.0009913726157047712, "loss": 0.1432, "num_input_tokens_seen": 40628432, "step": 18795 }, { "epoch": 3.066884176182708, "grad_norm": 0.015028982423245907, "learning_rate": 0.0009913594449961576, "loss": 0.0488, "num_input_tokens_seen": 40639920, "step": 18800 }, { "epoch": 3.067699836867863, "grad_norm": 0.08033467084169388, "learning_rate": 0.0009913462643295217, "loss": 0.1466, "num_input_tokens_seen": 40651120, "step": 18805 }, { "epoch": 3.068515497553018, "grad_norm": 0.1094004362821579, "learning_rate": 0.0009913330737051304, "loss": 0.221, "num_input_tokens_seen": 40661008, "step": 18810 }, { "epoch": 3.069331158238173, "grad_norm": 0.1347908228635788, "learning_rate": 0.0009913198731232513, "loss": 0.123, "num_input_tokens_seen": 40671920, "step": 18815 }, { "epoch": 3.070146818923328, "grad_norm": 0.04396280273795128, "learning_rate": 0.0009913066625841513, "loss": 0.0832, "num_input_tokens_seen": 40681552, "step": 18820 }, { "epoch": 3.070962479608483, "grad_norm": 0.01838274672627449, "learning_rate": 0.0009912934420880988, "loss": 0.0323, "num_input_tokens_seen": 40692368, "step": 18825 }, { "epoch": 3.0717781402936377, "grad_norm": 0.007258755154907703, "learning_rate": 0.0009912802116353613, "loss": 0.0615, "num_input_tokens_seen": 40701392, "step": 18830 }, { "epoch": 3.072593800978793, "grad_norm": 0.10661440342664719, "learning_rate": 0.0009912669712262073, "loss": 0.1358, "num_input_tokens_seen": 40712272, "step": 18835 }, { "epoch": 3.073409461663948, "grad_norm": 0.10035596787929535, "learning_rate": 0.0009912537208609047, "loss": 0.0314, "num_input_tokens_seen": 40724496, "step": 18840 }, { "epoch": 3.0742251223491026, "grad_norm": 0.009967050515115261, "learning_rate": 0.0009912404605397222, "loss": 0.1524, "num_input_tokens_seen": 40734928, "step": 18845 }, { "epoch": 3.075040783034258, "grad_norm": 0.13334870338439941, "learning_rate": 0.0009912271902629288, "loss": 0.0507, "num_input_tokens_seen": 40744240, "step": 18850 }, { "epoch": 3.0758564437194127, "grad_norm": 0.13278226554393768, "learning_rate": 0.000991213910030793, "loss": 0.0939, "num_input_tokens_seen": 40754096, "step": 18855 }, { "epoch": 3.0766721044045675, "grad_norm": 0.22139577567577362, "learning_rate": 0.0009912006198435843, "loss": 0.1393, "num_input_tokens_seen": 40765328, "step": 18860 }, { "epoch": 3.0774877650897228, "grad_norm": 0.00508854491636157, "learning_rate": 0.000991187319701572, "loss": 0.0157, "num_input_tokens_seen": 40776304, "step": 18865 }, { "epoch": 3.0783034257748776, "grad_norm": 0.31696730852127075, "learning_rate": 0.0009911740096050252, "loss": 0.1645, "num_input_tokens_seen": 40787376, "step": 18870 }, { "epoch": 3.0791190864600324, "grad_norm": 0.08231733739376068, "learning_rate": 0.0009911606895542143, "loss": 0.0805, "num_input_tokens_seen": 40798032, "step": 18875 }, { "epoch": 3.0799347471451877, "grad_norm": 0.2622213363647461, "learning_rate": 0.0009911473595494089, "loss": 0.109, "num_input_tokens_seen": 40809456, "step": 18880 }, { "epoch": 3.0807504078303425, "grad_norm": 0.026339467614889145, "learning_rate": 0.0009911340195908791, "loss": 0.0945, "num_input_tokens_seen": 40819184, "step": 18885 }, { "epoch": 3.0815660685154977, "grad_norm": 0.022269627079367638, "learning_rate": 0.0009911206696788955, "loss": 0.096, "num_input_tokens_seen": 40831088, "step": 18890 }, { "epoch": 3.0823817292006526, "grad_norm": 0.13229554891586304, "learning_rate": 0.0009911073098137285, "loss": 0.113, "num_input_tokens_seen": 40842160, "step": 18895 }, { "epoch": 3.0831973898858074, "grad_norm": 0.15041932463645935, "learning_rate": 0.0009910939399956488, "loss": 0.2426, "num_input_tokens_seen": 40852304, "step": 18900 }, { "epoch": 3.0840130505709626, "grad_norm": 0.10796932131052017, "learning_rate": 0.0009910805602249273, "loss": 0.0974, "num_input_tokens_seen": 40864816, "step": 18905 }, { "epoch": 3.0848287112561175, "grad_norm": 0.30564913153648376, "learning_rate": 0.0009910671705018353, "loss": 0.151, "num_input_tokens_seen": 40875632, "step": 18910 }, { "epoch": 3.0856443719412723, "grad_norm": 0.08066358417272568, "learning_rate": 0.000991053770826644, "loss": 0.0565, "num_input_tokens_seen": 40886896, "step": 18915 }, { "epoch": 3.0864600326264275, "grad_norm": 0.07252980023622513, "learning_rate": 0.0009910403611996252, "loss": 0.2728, "num_input_tokens_seen": 40897744, "step": 18920 }, { "epoch": 3.0872756933115824, "grad_norm": 0.046622633934020996, "learning_rate": 0.0009910269416210508, "loss": 0.1146, "num_input_tokens_seen": 40907184, "step": 18925 }, { "epoch": 3.088091353996737, "grad_norm": 0.13025681674480438, "learning_rate": 0.0009910135120911924, "loss": 0.0651, "num_input_tokens_seen": 40918160, "step": 18930 }, { "epoch": 3.0889070146818924, "grad_norm": 0.06960994750261307, "learning_rate": 0.0009910000726103222, "loss": 0.2516, "num_input_tokens_seen": 40927856, "step": 18935 }, { "epoch": 3.0897226753670473, "grad_norm": 0.1453385353088379, "learning_rate": 0.0009909866231787125, "loss": 0.0677, "num_input_tokens_seen": 40939088, "step": 18940 }, { "epoch": 3.090538336052202, "grad_norm": 0.07813244313001633, "learning_rate": 0.0009909731637966362, "loss": 0.0854, "num_input_tokens_seen": 40949008, "step": 18945 }, { "epoch": 3.0913539967373573, "grad_norm": 0.1851913183927536, "learning_rate": 0.0009909596944643658, "loss": 0.1805, "num_input_tokens_seen": 40959984, "step": 18950 }, { "epoch": 3.092169657422512, "grad_norm": 0.04275398701429367, "learning_rate": 0.0009909462151821745, "loss": 0.1174, "num_input_tokens_seen": 40971024, "step": 18955 }, { "epoch": 3.0929853181076674, "grad_norm": 0.0939917117357254, "learning_rate": 0.0009909327259503351, "loss": 0.0721, "num_input_tokens_seen": 40984112, "step": 18960 }, { "epoch": 3.0938009787928222, "grad_norm": 0.006666467059403658, "learning_rate": 0.0009909192267691215, "loss": 0.1091, "num_input_tokens_seen": 40995504, "step": 18965 }, { "epoch": 3.094616639477977, "grad_norm": 0.008724762126803398, "learning_rate": 0.000990905717638807, "loss": 0.0256, "num_input_tokens_seen": 41006960, "step": 18970 }, { "epoch": 3.0954323001631323, "grad_norm": 0.02470196783542633, "learning_rate": 0.000990892198559665, "loss": 0.1204, "num_input_tokens_seen": 41017104, "step": 18975 }, { "epoch": 3.096247960848287, "grad_norm": 0.21092841029167175, "learning_rate": 0.0009908786695319702, "loss": 0.114, "num_input_tokens_seen": 41028368, "step": 18980 }, { "epoch": 3.097063621533442, "grad_norm": 0.04528717324137688, "learning_rate": 0.0009908651305559964, "loss": 0.0845, "num_input_tokens_seen": 41038960, "step": 18985 }, { "epoch": 3.097879282218597, "grad_norm": 0.10174711793661118, "learning_rate": 0.000990851581632018, "loss": 0.0807, "num_input_tokens_seen": 41049008, "step": 18990 }, { "epoch": 3.098694942903752, "grad_norm": 0.42217883467674255, "learning_rate": 0.0009908380227603094, "loss": 0.2884, "num_input_tokens_seen": 41060400, "step": 18995 }, { "epoch": 3.099510603588907, "grad_norm": 0.020816409960389137, "learning_rate": 0.000990824453941146, "loss": 0.0787, "num_input_tokens_seen": 41072112, "step": 19000 }, { "epoch": 3.100326264274062, "grad_norm": 0.035774942487478256, "learning_rate": 0.000990810875174802, "loss": 0.0627, "num_input_tokens_seen": 41083664, "step": 19005 }, { "epoch": 3.101141924959217, "grad_norm": 0.015757689252495766, "learning_rate": 0.0009907972864615531, "loss": 0.1523, "num_input_tokens_seen": 41093232, "step": 19010 }, { "epoch": 3.1019575856443717, "grad_norm": 0.12344611436128616, "learning_rate": 0.0009907836878016746, "loss": 0.1165, "num_input_tokens_seen": 41105328, "step": 19015 }, { "epoch": 3.102773246329527, "grad_norm": 0.08110906928777695, "learning_rate": 0.000990770079195442, "loss": 0.0464, "num_input_tokens_seen": 41115504, "step": 19020 }, { "epoch": 3.103588907014682, "grad_norm": 0.07154972851276398, "learning_rate": 0.0009907564606431315, "loss": 0.1205, "num_input_tokens_seen": 41126480, "step": 19025 }, { "epoch": 3.104404567699837, "grad_norm": 0.007094620727002621, "learning_rate": 0.0009907428321450182, "loss": 0.0812, "num_input_tokens_seen": 41138512, "step": 19030 }, { "epoch": 3.105220228384992, "grad_norm": 0.02809945121407509, "learning_rate": 0.0009907291937013792, "loss": 0.0625, "num_input_tokens_seen": 41150224, "step": 19035 }, { "epoch": 3.1060358890701467, "grad_norm": 0.06788235157728195, "learning_rate": 0.0009907155453124906, "loss": 0.0248, "num_input_tokens_seen": 41161392, "step": 19040 }, { "epoch": 3.106851549755302, "grad_norm": 0.01055830903351307, "learning_rate": 0.0009907018869786289, "loss": 0.0953, "num_input_tokens_seen": 41172592, "step": 19045 }, { "epoch": 3.107667210440457, "grad_norm": 0.07226168364286423, "learning_rate": 0.0009906882187000708, "loss": 0.1012, "num_input_tokens_seen": 41183792, "step": 19050 }, { "epoch": 3.1084828711256116, "grad_norm": 0.008402747102081776, "learning_rate": 0.0009906745404770936, "loss": 0.0609, "num_input_tokens_seen": 41192880, "step": 19055 }, { "epoch": 3.109298531810767, "grad_norm": 0.015082316473126411, "learning_rate": 0.0009906608523099743, "loss": 0.0394, "num_input_tokens_seen": 41202768, "step": 19060 }, { "epoch": 3.1101141924959217, "grad_norm": 0.0037719886749982834, "learning_rate": 0.0009906471541989905, "loss": 0.0603, "num_input_tokens_seen": 41213552, "step": 19065 }, { "epoch": 3.1109298531810765, "grad_norm": 0.2002812772989273, "learning_rate": 0.0009906334461444195, "loss": 0.1401, "num_input_tokens_seen": 41223920, "step": 19070 }, { "epoch": 3.1117455138662318, "grad_norm": 0.05566996708512306, "learning_rate": 0.0009906197281465395, "loss": 0.0523, "num_input_tokens_seen": 41235440, "step": 19075 }, { "epoch": 3.1125611745513866, "grad_norm": 0.09809504449367523, "learning_rate": 0.0009906060002056283, "loss": 0.0989, "num_input_tokens_seen": 41247184, "step": 19080 }, { "epoch": 3.1133768352365414, "grad_norm": 0.05056000500917435, "learning_rate": 0.000990592262321964, "loss": 0.2402, "num_input_tokens_seen": 41257552, "step": 19085 }, { "epoch": 3.1141924959216967, "grad_norm": 0.22058925032615662, "learning_rate": 0.0009905785144958253, "loss": 0.2654, "num_input_tokens_seen": 41268496, "step": 19090 }, { "epoch": 3.1150081566068515, "grad_norm": 0.2546690106391907, "learning_rate": 0.0009905647567274905, "loss": 0.1255, "num_input_tokens_seen": 41278640, "step": 19095 }, { "epoch": 3.1158238172920063, "grad_norm": 0.11807727813720703, "learning_rate": 0.0009905509890172385, "loss": 0.0706, "num_input_tokens_seen": 41288976, "step": 19100 }, { "epoch": 3.1166394779771616, "grad_norm": 0.06996791809797287, "learning_rate": 0.0009905372113653487, "loss": 0.1316, "num_input_tokens_seen": 41300912, "step": 19105 }, { "epoch": 3.1174551386623164, "grad_norm": 0.1596715748310089, "learning_rate": 0.0009905234237721, "loss": 0.3419, "num_input_tokens_seen": 41311088, "step": 19110 }, { "epoch": 3.1182707993474716, "grad_norm": 0.03004053235054016, "learning_rate": 0.0009905096262377716, "loss": 0.0519, "num_input_tokens_seen": 41321488, "step": 19115 }, { "epoch": 3.1190864600326265, "grad_norm": 0.16573455929756165, "learning_rate": 0.0009904958187626433, "loss": 0.1674, "num_input_tokens_seen": 41333008, "step": 19120 }, { "epoch": 3.1199021207177813, "grad_norm": 0.10211943089962006, "learning_rate": 0.0009904820013469952, "loss": 0.2155, "num_input_tokens_seen": 41343920, "step": 19125 }, { "epoch": 3.1207177814029365, "grad_norm": 0.08475879579782486, "learning_rate": 0.0009904681739911073, "loss": 0.0721, "num_input_tokens_seen": 41353488, "step": 19130 }, { "epoch": 3.1215334420880914, "grad_norm": 0.2866770923137665, "learning_rate": 0.0009904543366952593, "loss": 0.1097, "num_input_tokens_seen": 41364112, "step": 19135 }, { "epoch": 3.122349102773246, "grad_norm": 0.033991675823926926, "learning_rate": 0.0009904404894597323, "loss": 0.0394, "num_input_tokens_seen": 41375664, "step": 19140 }, { "epoch": 3.1231647634584014, "grad_norm": 0.009995969012379646, "learning_rate": 0.0009904266322848063, "loss": 0.043, "num_input_tokens_seen": 41387440, "step": 19145 }, { "epoch": 3.1239804241435563, "grad_norm": 0.05611064285039902, "learning_rate": 0.0009904127651707627, "loss": 0.0443, "num_input_tokens_seen": 41399664, "step": 19150 }, { "epoch": 3.124796084828711, "grad_norm": 0.07509331405162811, "learning_rate": 0.000990398888117882, "loss": 0.2621, "num_input_tokens_seen": 41409296, "step": 19155 }, { "epoch": 3.1256117455138663, "grad_norm": 0.14326560497283936, "learning_rate": 0.0009903850011264458, "loss": 0.1059, "num_input_tokens_seen": 41421008, "step": 19160 }, { "epoch": 3.126427406199021, "grad_norm": 0.1420290321111679, "learning_rate": 0.0009903711041967357, "loss": 0.1972, "num_input_tokens_seen": 41431984, "step": 19165 }, { "epoch": 3.1272430668841764, "grad_norm": 0.018591681495308876, "learning_rate": 0.000990357197329033, "loss": 0.0265, "num_input_tokens_seen": 41442768, "step": 19170 }, { "epoch": 3.1280587275693312, "grad_norm": 0.017070859670639038, "learning_rate": 0.0009903432805236194, "loss": 0.05, "num_input_tokens_seen": 41452240, "step": 19175 }, { "epoch": 3.128874388254486, "grad_norm": 0.22679705917835236, "learning_rate": 0.0009903293537807773, "loss": 0.2145, "num_input_tokens_seen": 41463696, "step": 19180 }, { "epoch": 3.1296900489396413, "grad_norm": 0.10408644378185272, "learning_rate": 0.0009903154171007889, "loss": 0.0406, "num_input_tokens_seen": 41474608, "step": 19185 }, { "epoch": 3.130505709624796, "grad_norm": 0.00605523819103837, "learning_rate": 0.0009903014704839366, "loss": 0.067, "num_input_tokens_seen": 41485968, "step": 19190 }, { "epoch": 3.131321370309951, "grad_norm": 0.11108432710170746, "learning_rate": 0.000990287513930503, "loss": 0.0651, "num_input_tokens_seen": 41496688, "step": 19195 }, { "epoch": 3.132137030995106, "grad_norm": 0.004489239305257797, "learning_rate": 0.000990273547440771, "loss": 0.0742, "num_input_tokens_seen": 41507824, "step": 19200 }, { "epoch": 3.132952691680261, "grad_norm": 0.0032085098791867495, "learning_rate": 0.0009902595710150233, "loss": 0.0714, "num_input_tokens_seen": 41519440, "step": 19205 }, { "epoch": 3.133768352365416, "grad_norm": 0.004664691165089607, "learning_rate": 0.0009902455846535437, "loss": 0.0315, "num_input_tokens_seen": 41530768, "step": 19210 }, { "epoch": 3.134584013050571, "grad_norm": 0.2081509232521057, "learning_rate": 0.0009902315883566152, "loss": 0.2124, "num_input_tokens_seen": 41541808, "step": 19215 }, { "epoch": 3.135399673735726, "grad_norm": 0.055930253118276596, "learning_rate": 0.000990217582124522, "loss": 0.0733, "num_input_tokens_seen": 41552400, "step": 19220 }, { "epoch": 3.1362153344208807, "grad_norm": 0.023356657475233078, "learning_rate": 0.0009902035659575474, "loss": 0.0448, "num_input_tokens_seen": 41562416, "step": 19225 }, { "epoch": 3.137030995106036, "grad_norm": 0.010270596481859684, "learning_rate": 0.0009901895398559757, "loss": 0.0487, "num_input_tokens_seen": 41574000, "step": 19230 }, { "epoch": 3.137846655791191, "grad_norm": 0.06142498180270195, "learning_rate": 0.0009901755038200912, "loss": 0.1271, "num_input_tokens_seen": 41584400, "step": 19235 }, { "epoch": 3.1386623164763456, "grad_norm": 0.059650782495737076, "learning_rate": 0.0009901614578501782, "loss": 0.1603, "num_input_tokens_seen": 41596656, "step": 19240 }, { "epoch": 3.139477977161501, "grad_norm": 0.056524645537137985, "learning_rate": 0.0009901474019465215, "loss": 0.0678, "num_input_tokens_seen": 41606960, "step": 19245 }, { "epoch": 3.1402936378466557, "grad_norm": 0.038970183581113815, "learning_rate": 0.0009901333361094057, "loss": 0.0453, "num_input_tokens_seen": 41617936, "step": 19250 }, { "epoch": 3.141109298531811, "grad_norm": 0.3043142855167389, "learning_rate": 0.0009901192603391162, "loss": 0.1262, "num_input_tokens_seen": 41628720, "step": 19255 }, { "epoch": 3.141924959216966, "grad_norm": 0.2813945710659027, "learning_rate": 0.0009901051746359381, "loss": 0.0738, "num_input_tokens_seen": 41639408, "step": 19260 }, { "epoch": 3.1427406199021206, "grad_norm": 0.024280589073896408, "learning_rate": 0.0009900910790001571, "loss": 0.0133, "num_input_tokens_seen": 41650128, "step": 19265 }, { "epoch": 3.143556280587276, "grad_norm": 0.25504744052886963, "learning_rate": 0.0009900769734320586, "loss": 0.1879, "num_input_tokens_seen": 41661168, "step": 19270 }, { "epoch": 3.1443719412724307, "grad_norm": 0.08303016424179077, "learning_rate": 0.0009900628579319283, "loss": 0.1217, "num_input_tokens_seen": 41672656, "step": 19275 }, { "epoch": 3.1451876019575855, "grad_norm": 0.04942861944437027, "learning_rate": 0.0009900487325000527, "loss": 0.1128, "num_input_tokens_seen": 41684176, "step": 19280 }, { "epoch": 3.1460032626427408, "grad_norm": 0.40202391147613525, "learning_rate": 0.0009900345971367178, "loss": 0.1924, "num_input_tokens_seen": 41694896, "step": 19285 }, { "epoch": 3.1468189233278956, "grad_norm": 0.013745547272264957, "learning_rate": 0.00099002045184221, "loss": 0.08, "num_input_tokens_seen": 41705840, "step": 19290 }, { "epoch": 3.1476345840130504, "grad_norm": 0.23511438071727753, "learning_rate": 0.0009900062966168163, "loss": 0.1777, "num_input_tokens_seen": 41717104, "step": 19295 }, { "epoch": 3.1484502446982057, "grad_norm": 0.5526800155639648, "learning_rate": 0.0009899921314608232, "loss": 0.1843, "num_input_tokens_seen": 41727632, "step": 19300 }, { "epoch": 3.1492659053833605, "grad_norm": 0.022193720564246178, "learning_rate": 0.0009899779563745182, "loss": 0.0536, "num_input_tokens_seen": 41738608, "step": 19305 }, { "epoch": 3.1500815660685153, "grad_norm": 0.4144640564918518, "learning_rate": 0.0009899637713581882, "loss": 0.0486, "num_input_tokens_seen": 41749072, "step": 19310 }, { "epoch": 3.1508972267536706, "grad_norm": 0.060778357088565826, "learning_rate": 0.0009899495764121207, "loss": 0.2802, "num_input_tokens_seen": 41760400, "step": 19315 }, { "epoch": 3.1517128874388254, "grad_norm": 0.17989078164100647, "learning_rate": 0.0009899353715366037, "loss": 0.1911, "num_input_tokens_seen": 41771184, "step": 19320 }, { "epoch": 3.15252854812398, "grad_norm": 0.025782205164432526, "learning_rate": 0.0009899211567319247, "loss": 0.0811, "num_input_tokens_seen": 41782896, "step": 19325 }, { "epoch": 3.1533442088091355, "grad_norm": 0.016936376690864563, "learning_rate": 0.000989906931998372, "loss": 0.1149, "num_input_tokens_seen": 41793616, "step": 19330 }, { "epoch": 3.1541598694942903, "grad_norm": 0.035956088453531265, "learning_rate": 0.000989892697336234, "loss": 0.0452, "num_input_tokens_seen": 41806000, "step": 19335 }, { "epoch": 3.1549755301794455, "grad_norm": 0.06265423446893692, "learning_rate": 0.0009898784527457988, "loss": 0.0659, "num_input_tokens_seen": 41816016, "step": 19340 }, { "epoch": 3.1557911908646004, "grad_norm": 0.048211682587862015, "learning_rate": 0.0009898641982273553, "loss": 0.0343, "num_input_tokens_seen": 41826320, "step": 19345 }, { "epoch": 3.156606851549755, "grad_norm": 0.10827599465847015, "learning_rate": 0.0009898499337811925, "loss": 0.1373, "num_input_tokens_seen": 41837328, "step": 19350 }, { "epoch": 3.1574225122349104, "grad_norm": 0.012927313335239887, "learning_rate": 0.0009898356594075992, "loss": 0.1189, "num_input_tokens_seen": 41847856, "step": 19355 }, { "epoch": 3.1582381729200653, "grad_norm": 0.08092676848173141, "learning_rate": 0.0009898213751068652, "loss": 0.0205, "num_input_tokens_seen": 41858288, "step": 19360 }, { "epoch": 3.15905383360522, "grad_norm": 0.12177547812461853, "learning_rate": 0.0009898070808792795, "loss": 0.1304, "num_input_tokens_seen": 41868496, "step": 19365 }, { "epoch": 3.1598694942903753, "grad_norm": 0.064247727394104, "learning_rate": 0.0009897927767251319, "loss": 0.0914, "num_input_tokens_seen": 41879312, "step": 19370 }, { "epoch": 3.16068515497553, "grad_norm": 0.05906981602311134, "learning_rate": 0.0009897784626447122, "loss": 0.0883, "num_input_tokens_seen": 41890736, "step": 19375 }, { "epoch": 3.161500815660685, "grad_norm": 0.010267877951264381, "learning_rate": 0.0009897641386383106, "loss": 0.0268, "num_input_tokens_seen": 41902704, "step": 19380 }, { "epoch": 3.1623164763458402, "grad_norm": 0.006765549536794424, "learning_rate": 0.0009897498047062177, "loss": 0.1427, "num_input_tokens_seen": 41913200, "step": 19385 }, { "epoch": 3.163132137030995, "grad_norm": 0.035863492637872696, "learning_rate": 0.0009897354608487234, "loss": 0.1432, "num_input_tokens_seen": 41923792, "step": 19390 }, { "epoch": 3.1639477977161503, "grad_norm": 0.004194718785583973, "learning_rate": 0.000989721107066119, "loss": 0.0884, "num_input_tokens_seen": 41934960, "step": 19395 }, { "epoch": 3.164763458401305, "grad_norm": 0.010995978489518166, "learning_rate": 0.000989706743358695, "loss": 0.0438, "num_input_tokens_seen": 41945904, "step": 19400 }, { "epoch": 3.16557911908646, "grad_norm": 0.28057172894477844, "learning_rate": 0.0009896923697267426, "loss": 0.252, "num_input_tokens_seen": 41957488, "step": 19405 }, { "epoch": 3.166394779771615, "grad_norm": 0.09755899012088776, "learning_rate": 0.0009896779861705532, "loss": 0.1373, "num_input_tokens_seen": 41968784, "step": 19410 }, { "epoch": 3.16721044045677, "grad_norm": 0.04553823918104172, "learning_rate": 0.000989663592690418, "loss": 0.2561, "num_input_tokens_seen": 41980048, "step": 19415 }, { "epoch": 3.168026101141925, "grad_norm": 0.05321956053376198, "learning_rate": 0.0009896491892866291, "loss": 0.0508, "num_input_tokens_seen": 41989904, "step": 19420 }, { "epoch": 3.16884176182708, "grad_norm": 0.24264544248580933, "learning_rate": 0.0009896347759594782, "loss": 0.2203, "num_input_tokens_seen": 42000432, "step": 19425 }, { "epoch": 3.169657422512235, "grad_norm": 0.10809178650379181, "learning_rate": 0.0009896203527092573, "loss": 0.1427, "num_input_tokens_seen": 42010576, "step": 19430 }, { "epoch": 3.1704730831973897, "grad_norm": 0.046370748430490494, "learning_rate": 0.000989605919536259, "loss": 0.0368, "num_input_tokens_seen": 42023536, "step": 19435 }, { "epoch": 3.171288743882545, "grad_norm": 0.02008720114827156, "learning_rate": 0.0009895914764407755, "loss": 0.1091, "num_input_tokens_seen": 42035792, "step": 19440 }, { "epoch": 3.1721044045677, "grad_norm": 0.12499675899744034, "learning_rate": 0.0009895770234230996, "loss": 0.052, "num_input_tokens_seen": 42045968, "step": 19445 }, { "epoch": 3.1729200652528546, "grad_norm": 0.02297365851700306, "learning_rate": 0.0009895625604835244, "loss": 0.1416, "num_input_tokens_seen": 42057616, "step": 19450 }, { "epoch": 3.17373572593801, "grad_norm": 0.02101576328277588, "learning_rate": 0.0009895480876223428, "loss": 0.0367, "num_input_tokens_seen": 42067536, "step": 19455 }, { "epoch": 3.1745513866231647, "grad_norm": 0.05291339010000229, "learning_rate": 0.000989533604839848, "loss": 0.101, "num_input_tokens_seen": 42079024, "step": 19460 }, { "epoch": 3.1753670473083195, "grad_norm": 0.03308214247226715, "learning_rate": 0.0009895191121363338, "loss": 0.0384, "num_input_tokens_seen": 42090864, "step": 19465 }, { "epoch": 3.176182707993475, "grad_norm": 0.007731168996542692, "learning_rate": 0.0009895046095120938, "loss": 0.026, "num_input_tokens_seen": 42101776, "step": 19470 }, { "epoch": 3.1769983686786296, "grad_norm": 0.016055205836892128, "learning_rate": 0.0009894900969674221, "loss": 0.1852, "num_input_tokens_seen": 42112752, "step": 19475 }, { "epoch": 3.177814029363785, "grad_norm": 0.02115645818412304, "learning_rate": 0.0009894755745026124, "loss": 0.2081, "num_input_tokens_seen": 42125072, "step": 19480 }, { "epoch": 3.1786296900489397, "grad_norm": 0.0125564094632864, "learning_rate": 0.0009894610421179594, "loss": 0.0768, "num_input_tokens_seen": 42136368, "step": 19485 }, { "epoch": 3.1794453507340945, "grad_norm": 0.00938443560153246, "learning_rate": 0.0009894464998137572, "loss": 0.0911, "num_input_tokens_seen": 42146576, "step": 19490 }, { "epoch": 3.1802610114192498, "grad_norm": 0.2912297546863556, "learning_rate": 0.000989431947590301, "loss": 0.0781, "num_input_tokens_seen": 42157680, "step": 19495 }, { "epoch": 3.1810766721044046, "grad_norm": 0.08204010128974915, "learning_rate": 0.0009894173854478854, "loss": 0.0977, "num_input_tokens_seen": 42168720, "step": 19500 }, { "epoch": 3.1818923327895594, "grad_norm": 0.041957221925258636, "learning_rate": 0.0009894028133868055, "loss": 0.1634, "num_input_tokens_seen": 42179376, "step": 19505 }, { "epoch": 3.1827079934747147, "grad_norm": 0.2277340292930603, "learning_rate": 0.000989388231407357, "loss": 0.0766, "num_input_tokens_seen": 42190416, "step": 19510 }, { "epoch": 3.1835236541598695, "grad_norm": 0.1018163338303566, "learning_rate": 0.000989373639509835, "loss": 0.0571, "num_input_tokens_seen": 42200752, "step": 19515 }, { "epoch": 3.1843393148450243, "grad_norm": 0.023954367265105247, "learning_rate": 0.0009893590376945354, "loss": 0.0406, "num_input_tokens_seen": 42210352, "step": 19520 }, { "epoch": 3.1851549755301796, "grad_norm": 0.18598498404026031, "learning_rate": 0.000989344425961754, "loss": 0.2032, "num_input_tokens_seen": 42222256, "step": 19525 }, { "epoch": 3.1859706362153344, "grad_norm": 0.2285570353269577, "learning_rate": 0.000989329804311787, "loss": 0.1581, "num_input_tokens_seen": 42234320, "step": 19530 }, { "epoch": 3.186786296900489, "grad_norm": 0.03166361153125763, "learning_rate": 0.000989315172744931, "loss": 0.231, "num_input_tokens_seen": 42244688, "step": 19535 }, { "epoch": 3.1876019575856445, "grad_norm": 0.253466933965683, "learning_rate": 0.0009893005312614823, "loss": 0.1098, "num_input_tokens_seen": 42254736, "step": 19540 }, { "epoch": 3.1884176182707993, "grad_norm": 0.016121881082654, "learning_rate": 0.0009892858798617374, "loss": 0.073, "num_input_tokens_seen": 42266992, "step": 19545 }, { "epoch": 3.189233278955954, "grad_norm": 0.10315223783254623, "learning_rate": 0.0009892712185459935, "loss": 0.1024, "num_input_tokens_seen": 42277968, "step": 19550 }, { "epoch": 3.1900489396411094, "grad_norm": 0.2475394308567047, "learning_rate": 0.0009892565473145476, "loss": 0.1473, "num_input_tokens_seen": 42289680, "step": 19555 }, { "epoch": 3.190864600326264, "grad_norm": 0.037393637001514435, "learning_rate": 0.0009892418661676973, "loss": 0.127, "num_input_tokens_seen": 42299696, "step": 19560 }, { "epoch": 3.1916802610114194, "grad_norm": 0.019417136907577515, "learning_rate": 0.0009892271751057399, "loss": 0.0779, "num_input_tokens_seen": 42309968, "step": 19565 }, { "epoch": 3.1924959216965743, "grad_norm": 0.23461590707302094, "learning_rate": 0.000989212474128973, "loss": 0.1245, "num_input_tokens_seen": 42319952, "step": 19570 }, { "epoch": 3.193311582381729, "grad_norm": 0.2393941581249237, "learning_rate": 0.0009891977632376949, "loss": 0.1614, "num_input_tokens_seen": 42330704, "step": 19575 }, { "epoch": 3.1941272430668843, "grad_norm": 0.11234183609485626, "learning_rate": 0.0009891830424322034, "loss": 0.1503, "num_input_tokens_seen": 42340944, "step": 19580 }, { "epoch": 3.194942903752039, "grad_norm": 0.00959884561598301, "learning_rate": 0.000989168311712797, "loss": 0.0682, "num_input_tokens_seen": 42351696, "step": 19585 }, { "epoch": 3.195758564437194, "grad_norm": 0.04411447048187256, "learning_rate": 0.0009891535710797744, "loss": 0.0312, "num_input_tokens_seen": 42363728, "step": 19590 }, { "epoch": 3.1965742251223492, "grad_norm": 0.1372920125722885, "learning_rate": 0.0009891388205334338, "loss": 0.3343, "num_input_tokens_seen": 42374992, "step": 19595 }, { "epoch": 3.197389885807504, "grad_norm": 0.0299091674387455, "learning_rate": 0.0009891240600740747, "loss": 0.0688, "num_input_tokens_seen": 42385232, "step": 19600 }, { "epoch": 3.198205546492659, "grad_norm": 0.012968046590685844, "learning_rate": 0.000989109289701996, "loss": 0.0465, "num_input_tokens_seen": 42396176, "step": 19605 }, { "epoch": 3.199021207177814, "grad_norm": 0.2268250286579132, "learning_rate": 0.000989094509417497, "loss": 0.1662, "num_input_tokens_seen": 42407632, "step": 19610 }, { "epoch": 3.199836867862969, "grad_norm": 0.025382978841662407, "learning_rate": 0.0009890797192208774, "loss": 0.0664, "num_input_tokens_seen": 42417776, "step": 19615 }, { "epoch": 3.200652528548124, "grad_norm": 0.07793654501438141, "learning_rate": 0.0009890649191124368, "loss": 0.1867, "num_input_tokens_seen": 42428400, "step": 19620 }, { "epoch": 3.201468189233279, "grad_norm": 0.11532403528690338, "learning_rate": 0.000989050109092475, "loss": 0.0966, "num_input_tokens_seen": 42440176, "step": 19625 }, { "epoch": 3.202283849918434, "grad_norm": 0.15272283554077148, "learning_rate": 0.0009890352891612927, "loss": 0.1024, "num_input_tokens_seen": 42451632, "step": 19630 }, { "epoch": 3.203099510603589, "grad_norm": 0.06868071109056473, "learning_rate": 0.0009890204593191896, "loss": 0.1599, "num_input_tokens_seen": 42462672, "step": 19635 }, { "epoch": 3.203915171288744, "grad_norm": 0.17394308745861053, "learning_rate": 0.0009890056195664668, "loss": 0.0878, "num_input_tokens_seen": 42473040, "step": 19640 }, { "epoch": 3.2047308319738987, "grad_norm": 0.060149114578962326, "learning_rate": 0.0009889907699034246, "loss": 0.045, "num_input_tokens_seen": 42484816, "step": 19645 }, { "epoch": 3.205546492659054, "grad_norm": 0.015076788142323494, "learning_rate": 0.000988975910330364, "loss": 0.0669, "num_input_tokens_seen": 42495216, "step": 19650 }, { "epoch": 3.206362153344209, "grad_norm": 0.088878333568573, "learning_rate": 0.0009889610408475864, "loss": 0.19, "num_input_tokens_seen": 42506160, "step": 19655 }, { "epoch": 3.2071778140293636, "grad_norm": 0.15855666995048523, "learning_rate": 0.000988946161455393, "loss": 0.1424, "num_input_tokens_seen": 42516336, "step": 19660 }, { "epoch": 3.207993474714519, "grad_norm": 0.02387774921953678, "learning_rate": 0.0009889312721540855, "loss": 0.077, "num_input_tokens_seen": 42525776, "step": 19665 }, { "epoch": 3.2088091353996737, "grad_norm": 0.05558675155043602, "learning_rate": 0.0009889163729439653, "loss": 0.1243, "num_input_tokens_seen": 42536624, "step": 19670 }, { "epoch": 3.2096247960848285, "grad_norm": 0.0066003985702991486, "learning_rate": 0.0009889014638253346, "loss": 0.0481, "num_input_tokens_seen": 42547216, "step": 19675 }, { "epoch": 3.210440456769984, "grad_norm": 0.16620665788650513, "learning_rate": 0.0009888865447984956, "loss": 0.0817, "num_input_tokens_seen": 42557328, "step": 19680 }, { "epoch": 3.2112561174551386, "grad_norm": 0.05774753913283348, "learning_rate": 0.0009888716158637505, "loss": 0.068, "num_input_tokens_seen": 42568016, "step": 19685 }, { "epoch": 3.2120717781402934, "grad_norm": 0.02686813473701477, "learning_rate": 0.000988856677021402, "loss": 0.0304, "num_input_tokens_seen": 42580240, "step": 19690 }, { "epoch": 3.2128874388254487, "grad_norm": 0.018097640946507454, "learning_rate": 0.0009888417282717529, "loss": 0.1011, "num_input_tokens_seen": 42590928, "step": 19695 }, { "epoch": 3.2137030995106035, "grad_norm": 0.002314778044819832, "learning_rate": 0.000988826769615106, "loss": 0.1185, "num_input_tokens_seen": 42602384, "step": 19700 }, { "epoch": 3.2145187601957588, "grad_norm": 0.0739186555147171, "learning_rate": 0.0009888118010517642, "loss": 0.2168, "num_input_tokens_seen": 42612240, "step": 19705 }, { "epoch": 3.2153344208809136, "grad_norm": 0.054093651473522186, "learning_rate": 0.0009887968225820315, "loss": 0.0316, "num_input_tokens_seen": 42624336, "step": 19710 }, { "epoch": 3.2161500815660684, "grad_norm": 0.07303130626678467, "learning_rate": 0.0009887818342062106, "loss": 0.1457, "num_input_tokens_seen": 42634672, "step": 19715 }, { "epoch": 3.2169657422512237, "grad_norm": 0.07583710551261902, "learning_rate": 0.0009887668359246063, "loss": 0.0942, "num_input_tokens_seen": 42646032, "step": 19720 }, { "epoch": 3.2177814029363785, "grad_norm": 0.1961059272289276, "learning_rate": 0.0009887518277375217, "loss": 0.0917, "num_input_tokens_seen": 42656848, "step": 19725 }, { "epoch": 3.2185970636215333, "grad_norm": 0.01787407509982586, "learning_rate": 0.0009887368096452617, "loss": 0.036, "num_input_tokens_seen": 42666800, "step": 19730 }, { "epoch": 3.2194127243066886, "grad_norm": 0.1734134703874588, "learning_rate": 0.0009887217816481298, "loss": 0.0505, "num_input_tokens_seen": 42678032, "step": 19735 }, { "epoch": 3.2202283849918434, "grad_norm": 0.13812491297721863, "learning_rate": 0.0009887067437464312, "loss": 0.1628, "num_input_tokens_seen": 42687984, "step": 19740 }, { "epoch": 3.221044045676998, "grad_norm": 0.17805027961730957, "learning_rate": 0.0009886916959404703, "loss": 0.1175, "num_input_tokens_seen": 42698896, "step": 19745 }, { "epoch": 3.2218597063621535, "grad_norm": 0.012442238628864288, "learning_rate": 0.0009886766382305526, "loss": 0.0697, "num_input_tokens_seen": 42709040, "step": 19750 }, { "epoch": 3.2226753670473083, "grad_norm": 0.2859947085380554, "learning_rate": 0.0009886615706169825, "loss": 0.0914, "num_input_tokens_seen": 42719088, "step": 19755 }, { "epoch": 3.223491027732463, "grad_norm": 0.037967607378959656, "learning_rate": 0.0009886464931000661, "loss": 0.0984, "num_input_tokens_seen": 42729616, "step": 19760 }, { "epoch": 3.2243066884176184, "grad_norm": 0.006067187059670687, "learning_rate": 0.0009886314056801084, "loss": 0.0457, "num_input_tokens_seen": 42740592, "step": 19765 }, { "epoch": 3.225122349102773, "grad_norm": 0.2507992386817932, "learning_rate": 0.0009886163083574154, "loss": 0.1842, "num_input_tokens_seen": 42751376, "step": 19770 }, { "epoch": 3.225938009787928, "grad_norm": 0.006261878181248903, "learning_rate": 0.000988601201132293, "loss": 0.141, "num_input_tokens_seen": 42763312, "step": 19775 }, { "epoch": 3.2267536704730833, "grad_norm": 0.004393375013023615, "learning_rate": 0.0009885860840050478, "loss": 0.0486, "num_input_tokens_seen": 42774896, "step": 19780 }, { "epoch": 3.227569331158238, "grad_norm": 0.041983719915151596, "learning_rate": 0.0009885709569759852, "loss": 0.107, "num_input_tokens_seen": 42786992, "step": 19785 }, { "epoch": 3.2283849918433933, "grad_norm": 0.03765320032835007, "learning_rate": 0.0009885558200454128, "loss": 0.0924, "num_input_tokens_seen": 42797776, "step": 19790 }, { "epoch": 3.229200652528548, "grad_norm": 0.06044596806168556, "learning_rate": 0.0009885406732136367, "loss": 0.1155, "num_input_tokens_seen": 42808848, "step": 19795 }, { "epoch": 3.230016313213703, "grad_norm": 0.1069084033370018, "learning_rate": 0.0009885255164809644, "loss": 0.0783, "num_input_tokens_seen": 42820784, "step": 19800 }, { "epoch": 3.2308319738988582, "grad_norm": 0.14403347671031952, "learning_rate": 0.0009885103498477026, "loss": 0.0549, "num_input_tokens_seen": 42832208, "step": 19805 }, { "epoch": 3.231647634584013, "grad_norm": 0.10448440164327621, "learning_rate": 0.0009884951733141586, "loss": 0.2749, "num_input_tokens_seen": 42843312, "step": 19810 }, { "epoch": 3.232463295269168, "grad_norm": 0.24592241644859314, "learning_rate": 0.0009884799868806406, "loss": 0.1802, "num_input_tokens_seen": 42854736, "step": 19815 }, { "epoch": 3.233278955954323, "grad_norm": 0.046691689640283585, "learning_rate": 0.000988464790547456, "loss": 0.0578, "num_input_tokens_seen": 42864656, "step": 19820 }, { "epoch": 3.234094616639478, "grad_norm": 0.053699150681495667, "learning_rate": 0.0009884495843149124, "loss": 0.1522, "num_input_tokens_seen": 42875344, "step": 19825 }, { "epoch": 3.2349102773246328, "grad_norm": 0.04752179607748985, "learning_rate": 0.0009884343681833185, "loss": 0.1695, "num_input_tokens_seen": 42885712, "step": 19830 }, { "epoch": 3.235725938009788, "grad_norm": 0.03858195245265961, "learning_rate": 0.0009884191421529825, "loss": 0.1487, "num_input_tokens_seen": 42898352, "step": 19835 }, { "epoch": 3.236541598694943, "grad_norm": 0.10333125293254852, "learning_rate": 0.000988403906224213, "loss": 0.0636, "num_input_tokens_seen": 42908496, "step": 19840 }, { "epoch": 3.237357259380098, "grad_norm": 0.18015356361865997, "learning_rate": 0.0009883886603973188, "loss": 0.1176, "num_input_tokens_seen": 42919280, "step": 19845 }, { "epoch": 3.238172920065253, "grad_norm": 0.07732725888490677, "learning_rate": 0.0009883734046726086, "loss": 0.1254, "num_input_tokens_seen": 42930160, "step": 19850 }, { "epoch": 3.2389885807504077, "grad_norm": 0.09112097322940826, "learning_rate": 0.0009883581390503922, "loss": 0.0903, "num_input_tokens_seen": 42941616, "step": 19855 }, { "epoch": 3.239804241435563, "grad_norm": 0.16242296993732452, "learning_rate": 0.0009883428635309784, "loss": 0.1832, "num_input_tokens_seen": 42952240, "step": 19860 }, { "epoch": 3.240619902120718, "grad_norm": 0.14074952900409698, "learning_rate": 0.0009883275781146768, "loss": 0.1466, "num_input_tokens_seen": 42962672, "step": 19865 }, { "epoch": 3.2414355628058726, "grad_norm": 0.14547309279441833, "learning_rate": 0.0009883122828017977, "loss": 0.0943, "num_input_tokens_seen": 42974800, "step": 19870 }, { "epoch": 3.242251223491028, "grad_norm": 0.13024355471134186, "learning_rate": 0.0009882969775926505, "loss": 0.0502, "num_input_tokens_seen": 42985648, "step": 19875 }, { "epoch": 3.2430668841761827, "grad_norm": 0.022221842780709267, "learning_rate": 0.0009882816624875454, "loss": 0.019, "num_input_tokens_seen": 42995824, "step": 19880 }, { "epoch": 3.2438825448613375, "grad_norm": 0.04413722828030586, "learning_rate": 0.0009882663374867933, "loss": 0.0398, "num_input_tokens_seen": 43007248, "step": 19885 }, { "epoch": 3.244698205546493, "grad_norm": 0.022984053939580917, "learning_rate": 0.0009882510025907042, "loss": 0.0813, "num_input_tokens_seen": 43018512, "step": 19890 }, { "epoch": 3.2455138662316476, "grad_norm": 0.08174191415309906, "learning_rate": 0.0009882356577995894, "loss": 0.1128, "num_input_tokens_seen": 43028368, "step": 19895 }, { "epoch": 3.2463295269168024, "grad_norm": 0.11336220800876617, "learning_rate": 0.0009882203031137595, "loss": 0.042, "num_input_tokens_seen": 43039120, "step": 19900 }, { "epoch": 3.2471451876019577, "grad_norm": 0.181237131357193, "learning_rate": 0.000988204938533526, "loss": 0.1334, "num_input_tokens_seen": 43049936, "step": 19905 }, { "epoch": 3.2479608482871125, "grad_norm": 0.14684025943279266, "learning_rate": 0.0009881895640591997, "loss": 0.0507, "num_input_tokens_seen": 43061808, "step": 19910 }, { "epoch": 3.2487765089722673, "grad_norm": 0.028737680986523628, "learning_rate": 0.0009881741796910928, "loss": 0.078, "num_input_tokens_seen": 43073456, "step": 19915 }, { "epoch": 3.2495921696574226, "grad_norm": 0.17885787785053253, "learning_rate": 0.0009881587854295168, "loss": 0.2179, "num_input_tokens_seen": 43084688, "step": 19920 }, { "epoch": 3.2504078303425774, "grad_norm": 0.038553569465875626, "learning_rate": 0.0009881433812747838, "loss": 0.1366, "num_input_tokens_seen": 43095504, "step": 19925 }, { "epoch": 3.2512234910277327, "grad_norm": 0.02528173290193081, "learning_rate": 0.000988127967227206, "loss": 0.0567, "num_input_tokens_seen": 43105648, "step": 19930 }, { "epoch": 3.2520391517128875, "grad_norm": 0.021175552159547806, "learning_rate": 0.0009881125432870956, "loss": 0.1676, "num_input_tokens_seen": 43117264, "step": 19935 }, { "epoch": 3.2528548123980423, "grad_norm": 0.07172536849975586, "learning_rate": 0.0009880971094547652, "loss": 0.0924, "num_input_tokens_seen": 43128016, "step": 19940 }, { "epoch": 3.2536704730831976, "grad_norm": 0.14829133450984955, "learning_rate": 0.0009880816657305278, "loss": 0.0931, "num_input_tokens_seen": 43139920, "step": 19945 }, { "epoch": 3.2544861337683524, "grad_norm": 0.08503178507089615, "learning_rate": 0.0009880662121146964, "loss": 0.1716, "num_input_tokens_seen": 43149808, "step": 19950 }, { "epoch": 3.255301794453507, "grad_norm": 0.06452854722738266, "learning_rate": 0.0009880507486075838, "loss": 0.0955, "num_input_tokens_seen": 43160944, "step": 19955 }, { "epoch": 3.2561174551386625, "grad_norm": 0.06388487666845322, "learning_rate": 0.0009880352752095038, "loss": 0.035, "num_input_tokens_seen": 43170032, "step": 19960 }, { "epoch": 3.2569331158238173, "grad_norm": 0.14313308894634247, "learning_rate": 0.0009880197919207698, "loss": 0.0736, "num_input_tokens_seen": 43179920, "step": 19965 }, { "epoch": 3.257748776508972, "grad_norm": 0.0758235901594162, "learning_rate": 0.0009880042987416957, "loss": 0.0517, "num_input_tokens_seen": 43190736, "step": 19970 }, { "epoch": 3.2585644371941274, "grad_norm": 0.0018469083588570356, "learning_rate": 0.0009879887956725953, "loss": 0.3477, "num_input_tokens_seen": 43201648, "step": 19975 }, { "epoch": 3.259380097879282, "grad_norm": 0.09347787499427795, "learning_rate": 0.0009879732827137828, "loss": 0.1401, "num_input_tokens_seen": 43211984, "step": 19980 }, { "epoch": 3.2601957585644374, "grad_norm": 0.16566364467144012, "learning_rate": 0.0009879577598655728, "loss": 0.3083, "num_input_tokens_seen": 43223120, "step": 19985 }, { "epoch": 3.2610114192495923, "grad_norm": 0.10149620473384857, "learning_rate": 0.0009879422271282798, "loss": 0.1296, "num_input_tokens_seen": 43233840, "step": 19990 }, { "epoch": 3.261827079934747, "grad_norm": 0.07267143577337265, "learning_rate": 0.0009879266845022187, "loss": 0.1318, "num_input_tokens_seen": 43245200, "step": 19995 }, { "epoch": 3.262642740619902, "grad_norm": 0.1777501404285431, "learning_rate": 0.0009879111319877041, "loss": 0.112, "num_input_tokens_seen": 43256944, "step": 20000 }, { "epoch": 3.263458401305057, "grad_norm": 0.04019446298480034, "learning_rate": 0.0009878955695850516, "loss": 0.1542, "num_input_tokens_seen": 43268432, "step": 20005 }, { "epoch": 3.264274061990212, "grad_norm": 0.07024483382701874, "learning_rate": 0.0009878799972945762, "loss": 0.1052, "num_input_tokens_seen": 43279792, "step": 20010 }, { "epoch": 3.2650897226753672, "grad_norm": 0.11250631511211395, "learning_rate": 0.000987864415116594, "loss": 0.169, "num_input_tokens_seen": 43290288, "step": 20015 }, { "epoch": 3.265905383360522, "grad_norm": 0.1379203498363495, "learning_rate": 0.0009878488230514206, "loss": 0.1573, "num_input_tokens_seen": 43299920, "step": 20020 }, { "epoch": 3.266721044045677, "grad_norm": 0.17190445959568024, "learning_rate": 0.0009878332210993717, "loss": 0.0863, "num_input_tokens_seen": 43311248, "step": 20025 }, { "epoch": 3.267536704730832, "grad_norm": 0.28690385818481445, "learning_rate": 0.0009878176092607638, "loss": 0.1576, "num_input_tokens_seen": 43320976, "step": 20030 }, { "epoch": 3.268352365415987, "grad_norm": 0.08246587961912155, "learning_rate": 0.0009878019875359132, "loss": 0.0845, "num_input_tokens_seen": 43331440, "step": 20035 }, { "epoch": 3.2691680261011418, "grad_norm": 0.039607934653759, "learning_rate": 0.0009877863559251366, "loss": 0.1099, "num_input_tokens_seen": 43343024, "step": 20040 }, { "epoch": 3.269983686786297, "grad_norm": 0.017975594848394394, "learning_rate": 0.0009877707144287505, "loss": 0.0499, "num_input_tokens_seen": 43353840, "step": 20045 }, { "epoch": 3.270799347471452, "grad_norm": 0.0691947340965271, "learning_rate": 0.0009877550630470722, "loss": 0.049, "num_input_tokens_seen": 43364656, "step": 20050 }, { "epoch": 3.2716150081566067, "grad_norm": 0.3025503158569336, "learning_rate": 0.000987739401780419, "loss": 0.24, "num_input_tokens_seen": 43375920, "step": 20055 }, { "epoch": 3.272430668841762, "grad_norm": 0.022506562992930412, "learning_rate": 0.0009877237306291076, "loss": 0.155, "num_input_tokens_seen": 43386608, "step": 20060 }, { "epoch": 3.2732463295269167, "grad_norm": 0.12372944504022598, "learning_rate": 0.0009877080495934564, "loss": 0.0375, "num_input_tokens_seen": 43397840, "step": 20065 }, { "epoch": 3.274061990212072, "grad_norm": 0.03336038067936897, "learning_rate": 0.0009876923586737828, "loss": 0.1941, "num_input_tokens_seen": 43409136, "step": 20070 }, { "epoch": 3.274877650897227, "grad_norm": 0.05050639435648918, "learning_rate": 0.000987676657870405, "loss": 0.0498, "num_input_tokens_seen": 43418864, "step": 20075 }, { "epoch": 3.2756933115823816, "grad_norm": 0.0558999739587307, "learning_rate": 0.0009876609471836408, "loss": 0.1609, "num_input_tokens_seen": 43430032, "step": 20080 }, { "epoch": 3.2765089722675365, "grad_norm": 0.02081811986863613, "learning_rate": 0.000987645226613809, "loss": 0.1736, "num_input_tokens_seen": 43441712, "step": 20085 }, { "epoch": 3.2773246329526917, "grad_norm": 0.09108427911996841, "learning_rate": 0.0009876294961612283, "loss": 0.0837, "num_input_tokens_seen": 43453200, "step": 20090 }, { "epoch": 3.2781402936378465, "grad_norm": 0.059019628912210464, "learning_rate": 0.0009876137558262168, "loss": 0.1473, "num_input_tokens_seen": 43464368, "step": 20095 }, { "epoch": 3.278955954323002, "grad_norm": 0.049325328320264816, "learning_rate": 0.0009875980056090943, "loss": 0.1024, "num_input_tokens_seen": 43475280, "step": 20100 }, { "epoch": 3.2797716150081566, "grad_norm": 0.12276256829500198, "learning_rate": 0.0009875822455101795, "loss": 0.0626, "num_input_tokens_seen": 43485904, "step": 20105 }, { "epoch": 3.2805872756933114, "grad_norm": 0.2100994735956192, "learning_rate": 0.000987566475529792, "loss": 0.123, "num_input_tokens_seen": 43496720, "step": 20110 }, { "epoch": 3.2814029363784667, "grad_norm": 0.018480392172932625, "learning_rate": 0.0009875506956682513, "loss": 0.0791, "num_input_tokens_seen": 43508080, "step": 20115 }, { "epoch": 3.2822185970636215, "grad_norm": 0.06958062946796417, "learning_rate": 0.0009875349059258773, "loss": 0.0787, "num_input_tokens_seen": 43519280, "step": 20120 }, { "epoch": 3.2830342577487763, "grad_norm": 0.22425442934036255, "learning_rate": 0.00098751910630299, "loss": 0.1221, "num_input_tokens_seen": 43529552, "step": 20125 }, { "epoch": 3.2838499184339316, "grad_norm": 0.014040003530681133, "learning_rate": 0.0009875032967999096, "loss": 0.1182, "num_input_tokens_seen": 43541008, "step": 20130 }, { "epoch": 3.2846655791190864, "grad_norm": 0.2088811844587326, "learning_rate": 0.0009874874774169562, "loss": 0.1187, "num_input_tokens_seen": 43551152, "step": 20135 }, { "epoch": 3.2854812398042412, "grad_norm": 0.1166122779250145, "learning_rate": 0.0009874716481544509, "loss": 0.226, "num_input_tokens_seen": 43562672, "step": 20140 }, { "epoch": 3.2862969004893965, "grad_norm": 0.1853681206703186, "learning_rate": 0.0009874558090127142, "loss": 0.1362, "num_input_tokens_seen": 43572944, "step": 20145 }, { "epoch": 3.2871125611745513, "grad_norm": 0.07401765882968903, "learning_rate": 0.0009874399599920669, "loss": 0.0824, "num_input_tokens_seen": 43585072, "step": 20150 }, { "epoch": 3.2879282218597066, "grad_norm": 0.04394443705677986, "learning_rate": 0.0009874241010928307, "loss": 0.0714, "num_input_tokens_seen": 43595920, "step": 20155 }, { "epoch": 3.2887438825448614, "grad_norm": 0.10552657395601273, "learning_rate": 0.0009874082323153266, "loss": 0.0889, "num_input_tokens_seen": 43605232, "step": 20160 }, { "epoch": 3.289559543230016, "grad_norm": 0.041619252413511276, "learning_rate": 0.0009873923536598765, "loss": 0.0367, "num_input_tokens_seen": 43616624, "step": 20165 }, { "epoch": 3.2903752039151715, "grad_norm": 0.16440622508525848, "learning_rate": 0.000987376465126802, "loss": 0.0673, "num_input_tokens_seen": 43627504, "step": 20170 }, { "epoch": 3.2911908646003263, "grad_norm": 0.016031792387366295, "learning_rate": 0.0009873605667164252, "loss": 0.1093, "num_input_tokens_seen": 43637776, "step": 20175 }, { "epoch": 3.292006525285481, "grad_norm": 0.24192920327186584, "learning_rate": 0.0009873446584290682, "loss": 0.1444, "num_input_tokens_seen": 43648784, "step": 20180 }, { "epoch": 3.2928221859706364, "grad_norm": 0.01155361719429493, "learning_rate": 0.0009873287402650535, "loss": 0.0349, "num_input_tokens_seen": 43658800, "step": 20185 }, { "epoch": 3.293637846655791, "grad_norm": 0.017669612541794777, "learning_rate": 0.0009873128122247035, "loss": 0.1177, "num_input_tokens_seen": 43669936, "step": 20190 }, { "epoch": 3.294453507340946, "grad_norm": 0.04354847967624664, "learning_rate": 0.0009872968743083414, "loss": 0.0945, "num_input_tokens_seen": 43680976, "step": 20195 }, { "epoch": 3.2952691680261013, "grad_norm": 0.05074286088347435, "learning_rate": 0.0009872809265162898, "loss": 0.1438, "num_input_tokens_seen": 43692112, "step": 20200 }, { "epoch": 3.296084828711256, "grad_norm": 0.024301722645759583, "learning_rate": 0.000987264968848872, "loss": 0.1897, "num_input_tokens_seen": 43702224, "step": 20205 }, { "epoch": 3.2969004893964113, "grad_norm": 0.11771446466445923, "learning_rate": 0.0009872490013064117, "loss": 0.1187, "num_input_tokens_seen": 43712720, "step": 20210 }, { "epoch": 3.297716150081566, "grad_norm": 0.07622315734624863, "learning_rate": 0.000987233023889232, "loss": 0.2055, "num_input_tokens_seen": 43724176, "step": 20215 }, { "epoch": 3.298531810766721, "grad_norm": 0.06392424553632736, "learning_rate": 0.000987217036597657, "loss": 0.0286, "num_input_tokens_seen": 43734032, "step": 20220 }, { "epoch": 3.299347471451876, "grad_norm": 0.10581985116004944, "learning_rate": 0.000987201039432011, "loss": 0.1012, "num_input_tokens_seen": 43744304, "step": 20225 }, { "epoch": 3.300163132137031, "grad_norm": 0.012859735637903214, "learning_rate": 0.0009871850323926177, "loss": 0.115, "num_input_tokens_seen": 43755888, "step": 20230 }, { "epoch": 3.300978792822186, "grad_norm": 0.03773471340537071, "learning_rate": 0.0009871690154798017, "loss": 0.1641, "num_input_tokens_seen": 43767056, "step": 20235 }, { "epoch": 3.301794453507341, "grad_norm": 0.03229673206806183, "learning_rate": 0.0009871529886938874, "loss": 0.15, "num_input_tokens_seen": 43778480, "step": 20240 }, { "epoch": 3.302610114192496, "grad_norm": 0.11208148300647736, "learning_rate": 0.0009871369520352, "loss": 0.093, "num_input_tokens_seen": 43788624, "step": 20245 }, { "epoch": 3.3034257748776508, "grad_norm": 0.07075408846139908, "learning_rate": 0.0009871209055040643, "loss": 0.1083, "num_input_tokens_seen": 43799216, "step": 20250 }, { "epoch": 3.304241435562806, "grad_norm": 0.008634911850094795, "learning_rate": 0.0009871048491008052, "loss": 0.1022, "num_input_tokens_seen": 43810480, "step": 20255 }, { "epoch": 3.305057096247961, "grad_norm": 0.028035888448357582, "learning_rate": 0.0009870887828257486, "loss": 0.0718, "num_input_tokens_seen": 43820688, "step": 20260 }, { "epoch": 3.3058727569331157, "grad_norm": 0.05893208459019661, "learning_rate": 0.00098707270667922, "loss": 0.1905, "num_input_tokens_seen": 43830256, "step": 20265 }, { "epoch": 3.306688417618271, "grad_norm": 0.11259466409683228, "learning_rate": 0.000987056620661545, "loss": 0.1289, "num_input_tokens_seen": 43841360, "step": 20270 }, { "epoch": 3.3075040783034257, "grad_norm": 0.23528766632080078, "learning_rate": 0.0009870405247730497, "loss": 0.0641, "num_input_tokens_seen": 43852848, "step": 20275 }, { "epoch": 3.3083197389885806, "grad_norm": 0.0631113052368164, "learning_rate": 0.0009870244190140602, "loss": 0.0684, "num_input_tokens_seen": 43863600, "step": 20280 }, { "epoch": 3.309135399673736, "grad_norm": 0.018527382984757423, "learning_rate": 0.000987008303384903, "loss": 0.1043, "num_input_tokens_seen": 43874512, "step": 20285 }, { "epoch": 3.3099510603588906, "grad_norm": 0.07803243398666382, "learning_rate": 0.000986992177885905, "loss": 0.0825, "num_input_tokens_seen": 43885808, "step": 20290 }, { "epoch": 3.310766721044046, "grad_norm": 0.054975420236587524, "learning_rate": 0.0009869760425173927, "loss": 0.0348, "num_input_tokens_seen": 43896816, "step": 20295 }, { "epoch": 3.3115823817292007, "grad_norm": 0.0050786943174898624, "learning_rate": 0.000986959897279693, "loss": 0.0825, "num_input_tokens_seen": 43908368, "step": 20300 }, { "epoch": 3.3123980424143555, "grad_norm": 0.2176738679409027, "learning_rate": 0.0009869437421731332, "loss": 0.1334, "num_input_tokens_seen": 43918992, "step": 20305 }, { "epoch": 3.3132137030995104, "grad_norm": 0.1001739352941513, "learning_rate": 0.0009869275771980405, "loss": 0.0686, "num_input_tokens_seen": 43930256, "step": 20310 }, { "epoch": 3.3140293637846656, "grad_norm": 0.14070114493370056, "learning_rate": 0.000986911402354743, "loss": 0.3225, "num_input_tokens_seen": 43941168, "step": 20315 }, { "epoch": 3.3148450244698204, "grad_norm": 0.1104494109749794, "learning_rate": 0.0009868952176435683, "loss": 0.1142, "num_input_tokens_seen": 43952368, "step": 20320 }, { "epoch": 3.3156606851549757, "grad_norm": 0.02809176966547966, "learning_rate": 0.0009868790230648443, "loss": 0.0501, "num_input_tokens_seen": 43963632, "step": 20325 }, { "epoch": 3.3164763458401305, "grad_norm": 0.0439760759472847, "learning_rate": 0.0009868628186188993, "loss": 0.091, "num_input_tokens_seen": 43975024, "step": 20330 }, { "epoch": 3.3172920065252853, "grad_norm": 0.012558380141854286, "learning_rate": 0.0009868466043060616, "loss": 0.0471, "num_input_tokens_seen": 43986608, "step": 20335 }, { "epoch": 3.3181076672104406, "grad_norm": 0.016795523464679718, "learning_rate": 0.00098683038012666, "loss": 0.0691, "num_input_tokens_seen": 43997968, "step": 20340 }, { "epoch": 3.3189233278955954, "grad_norm": 0.05473247915506363, "learning_rate": 0.0009868141460810226, "loss": 0.1192, "num_input_tokens_seen": 44010032, "step": 20345 }, { "epoch": 3.3197389885807502, "grad_norm": 0.03609495982527733, "learning_rate": 0.0009867979021694795, "loss": 0.0272, "num_input_tokens_seen": 44021104, "step": 20350 }, { "epoch": 3.3205546492659055, "grad_norm": 0.101466603577137, "learning_rate": 0.0009867816483923593, "loss": 0.0828, "num_input_tokens_seen": 44031952, "step": 20355 }, { "epoch": 3.3213703099510603, "grad_norm": 0.34413954615592957, "learning_rate": 0.0009867653847499913, "loss": 0.1781, "num_input_tokens_seen": 44042704, "step": 20360 }, { "epoch": 3.322185970636215, "grad_norm": 0.04273676499724388, "learning_rate": 0.0009867491112427055, "loss": 0.0603, "num_input_tokens_seen": 44052592, "step": 20365 }, { "epoch": 3.3230016313213704, "grad_norm": 0.35218381881713867, "learning_rate": 0.0009867328278708313, "loss": 0.276, "num_input_tokens_seen": 44065328, "step": 20370 }, { "epoch": 3.323817292006525, "grad_norm": 0.229897141456604, "learning_rate": 0.0009867165346346988, "loss": 0.0778, "num_input_tokens_seen": 44077552, "step": 20375 }, { "epoch": 3.3246329526916805, "grad_norm": 0.04176846519112587, "learning_rate": 0.0009867002315346383, "loss": 0.1624, "num_input_tokens_seen": 44088752, "step": 20380 }, { "epoch": 3.3254486133768353, "grad_norm": 0.17083647847175598, "learning_rate": 0.0009866839185709805, "loss": 0.0944, "num_input_tokens_seen": 44099376, "step": 20385 }, { "epoch": 3.32626427406199, "grad_norm": 0.1816277652978897, "learning_rate": 0.0009866675957440553, "loss": 0.1169, "num_input_tokens_seen": 44109488, "step": 20390 }, { "epoch": 3.3270799347471454, "grad_norm": 0.06937110424041748, "learning_rate": 0.0009866512630541942, "loss": 0.1128, "num_input_tokens_seen": 44119568, "step": 20395 }, { "epoch": 3.3278955954323, "grad_norm": 0.1718575358390808, "learning_rate": 0.0009866349205017277, "loss": 0.0818, "num_input_tokens_seen": 44130000, "step": 20400 }, { "epoch": 3.328711256117455, "grad_norm": 0.1907864212989807, "learning_rate": 0.0009866185680869873, "loss": 0.0938, "num_input_tokens_seen": 44140208, "step": 20405 }, { "epoch": 3.3295269168026103, "grad_norm": 0.01826365478336811, "learning_rate": 0.0009866022058103042, "loss": 0.0715, "num_input_tokens_seen": 44150160, "step": 20410 }, { "epoch": 3.330342577487765, "grad_norm": 0.018938470631837845, "learning_rate": 0.0009865858336720102, "loss": 0.0622, "num_input_tokens_seen": 44161008, "step": 20415 }, { "epoch": 3.33115823817292, "grad_norm": 0.1174740418791771, "learning_rate": 0.000986569451672437, "loss": 0.1543, "num_input_tokens_seen": 44170768, "step": 20420 }, { "epoch": 3.331973898858075, "grad_norm": 0.24229373037815094, "learning_rate": 0.0009865530598119163, "loss": 0.1628, "num_input_tokens_seen": 44180080, "step": 20425 }, { "epoch": 3.33278955954323, "grad_norm": 0.00437184190377593, "learning_rate": 0.000986536658090781, "loss": 0.1034, "num_input_tokens_seen": 44191216, "step": 20430 }, { "epoch": 3.3336052202283852, "grad_norm": 0.035282671451568604, "learning_rate": 0.0009865202465093631, "loss": 0.1143, "num_input_tokens_seen": 44201584, "step": 20435 }, { "epoch": 3.33442088091354, "grad_norm": 0.0404328852891922, "learning_rate": 0.000986503825067995, "loss": 0.1257, "num_input_tokens_seen": 44213840, "step": 20440 }, { "epoch": 3.335236541598695, "grad_norm": 0.1707407385110855, "learning_rate": 0.0009864873937670098, "loss": 0.145, "num_input_tokens_seen": 44224944, "step": 20445 }, { "epoch": 3.3360522022838497, "grad_norm": 0.0483018197119236, "learning_rate": 0.0009864709526067404, "loss": 0.0343, "num_input_tokens_seen": 44235536, "step": 20450 }, { "epoch": 3.336867862969005, "grad_norm": 0.069381944835186, "learning_rate": 0.0009864545015875199, "loss": 0.0415, "num_input_tokens_seen": 44245776, "step": 20455 }, { "epoch": 3.3376835236541598, "grad_norm": 0.012071680277585983, "learning_rate": 0.000986438040709682, "loss": 0.1051, "num_input_tokens_seen": 44255472, "step": 20460 }, { "epoch": 3.338499184339315, "grad_norm": 0.2652641534805298, "learning_rate": 0.00098642156997356, "loss": 0.2206, "num_input_tokens_seen": 44266000, "step": 20465 }, { "epoch": 3.33931484502447, "grad_norm": 0.04244496300816536, "learning_rate": 0.0009864050893794878, "loss": 0.2189, "num_input_tokens_seen": 44276624, "step": 20470 }, { "epoch": 3.3401305057096247, "grad_norm": 0.11729129403829575, "learning_rate": 0.0009863885989277994, "loss": 0.0754, "num_input_tokens_seen": 44287472, "step": 20475 }, { "epoch": 3.34094616639478, "grad_norm": 0.04826750606298447, "learning_rate": 0.0009863720986188291, "loss": 0.1146, "num_input_tokens_seen": 44296944, "step": 20480 }, { "epoch": 3.3417618270799347, "grad_norm": 0.2376585453748703, "learning_rate": 0.0009863555884529114, "loss": 0.1758, "num_input_tokens_seen": 44307952, "step": 20485 }, { "epoch": 3.3425774877650896, "grad_norm": 0.08156166970729828, "learning_rate": 0.0009863390684303804, "loss": 0.1008, "num_input_tokens_seen": 44318640, "step": 20490 }, { "epoch": 3.343393148450245, "grad_norm": 0.12889395654201508, "learning_rate": 0.0009863225385515714, "loss": 0.0658, "num_input_tokens_seen": 44329008, "step": 20495 }, { "epoch": 3.3442088091353996, "grad_norm": 0.0919957235455513, "learning_rate": 0.000986305998816819, "loss": 0.1266, "num_input_tokens_seen": 44340304, "step": 20500 }, { "epoch": 3.3450244698205545, "grad_norm": 0.1220528781414032, "learning_rate": 0.000986289449226459, "loss": 0.1175, "num_input_tokens_seen": 44351664, "step": 20505 }, { "epoch": 3.3458401305057097, "grad_norm": 0.020937541499733925, "learning_rate": 0.000986272889780826, "loss": 0.0989, "num_input_tokens_seen": 44362448, "step": 20510 }, { "epoch": 3.3466557911908645, "grad_norm": 0.01079709641635418, "learning_rate": 0.000986256320480256, "loss": 0.0785, "num_input_tokens_seen": 44373712, "step": 20515 }, { "epoch": 3.34747145187602, "grad_norm": 0.09133608639240265, "learning_rate": 0.0009862397413250852, "loss": 0.0963, "num_input_tokens_seen": 44384496, "step": 20520 }, { "epoch": 3.3482871125611746, "grad_norm": 0.11672952771186829, "learning_rate": 0.0009862231523156489, "loss": 0.1958, "num_input_tokens_seen": 44395952, "step": 20525 }, { "epoch": 3.3491027732463294, "grad_norm": 0.08248498290777206, "learning_rate": 0.0009862065534522837, "loss": 0.1382, "num_input_tokens_seen": 44407792, "step": 20530 }, { "epoch": 3.3499184339314847, "grad_norm": 0.08188489824533463, "learning_rate": 0.000986189944735326, "loss": 0.1545, "num_input_tokens_seen": 44419568, "step": 20535 }, { "epoch": 3.3507340946166395, "grad_norm": 0.08767145872116089, "learning_rate": 0.000986173326165112, "loss": 0.1524, "num_input_tokens_seen": 44429872, "step": 20540 }, { "epoch": 3.3515497553017943, "grad_norm": 0.21928314864635468, "learning_rate": 0.000986156697741979, "loss": 0.3474, "num_input_tokens_seen": 44442160, "step": 20545 }, { "epoch": 3.3523654159869496, "grad_norm": 0.01372506469488144, "learning_rate": 0.0009861400594662637, "loss": 0.2471, "num_input_tokens_seen": 44452336, "step": 20550 }, { "epoch": 3.3531810766721044, "grad_norm": 0.1195288896560669, "learning_rate": 0.0009861234113383035, "loss": 0.1279, "num_input_tokens_seen": 44464112, "step": 20555 }, { "epoch": 3.3539967373572592, "grad_norm": 0.01491206232458353, "learning_rate": 0.0009861067533584356, "loss": 0.1224, "num_input_tokens_seen": 44475024, "step": 20560 }, { "epoch": 3.3548123980424145, "grad_norm": 0.02331310696899891, "learning_rate": 0.0009860900855269976, "loss": 0.0526, "num_input_tokens_seen": 44486128, "step": 20565 }, { "epoch": 3.3556280587275693, "grad_norm": 0.10006996989250183, "learning_rate": 0.0009860734078443276, "loss": 0.1613, "num_input_tokens_seen": 44495568, "step": 20570 }, { "epoch": 3.356443719412724, "grad_norm": 0.06204470619559288, "learning_rate": 0.0009860567203107632, "loss": 0.1667, "num_input_tokens_seen": 44506064, "step": 20575 }, { "epoch": 3.3572593800978794, "grad_norm": 0.029366256669163704, "learning_rate": 0.0009860400229266427, "loss": 0.0747, "num_input_tokens_seen": 44517808, "step": 20580 }, { "epoch": 3.358075040783034, "grad_norm": 0.04264573007822037, "learning_rate": 0.0009860233156923047, "loss": 0.0994, "num_input_tokens_seen": 44529104, "step": 20585 }, { "epoch": 3.358890701468189, "grad_norm": 0.056957364082336426, "learning_rate": 0.0009860065986080876, "loss": 0.195, "num_input_tokens_seen": 44540432, "step": 20590 }, { "epoch": 3.3597063621533443, "grad_norm": 0.06419949233531952, "learning_rate": 0.00098598987167433, "loss": 0.0677, "num_input_tokens_seen": 44550128, "step": 20595 }, { "epoch": 3.360522022838499, "grad_norm": 0.11103334277868271, "learning_rate": 0.0009859731348913713, "loss": 0.0612, "num_input_tokens_seen": 44560880, "step": 20600 }, { "epoch": 3.3613376835236544, "grad_norm": 0.054705556482076645, "learning_rate": 0.0009859563882595507, "loss": 0.1947, "num_input_tokens_seen": 44571216, "step": 20605 }, { "epoch": 3.362153344208809, "grad_norm": 0.16497164964675903, "learning_rate": 0.0009859396317792074, "loss": 0.2826, "num_input_tokens_seen": 44581776, "step": 20610 }, { "epoch": 3.362969004893964, "grad_norm": 0.016342537477612495, "learning_rate": 0.0009859228654506807, "loss": 0.0585, "num_input_tokens_seen": 44591600, "step": 20615 }, { "epoch": 3.3637846655791193, "grad_norm": 0.029464807361364365, "learning_rate": 0.0009859060892743108, "loss": 0.0535, "num_input_tokens_seen": 44602544, "step": 20620 }, { "epoch": 3.364600326264274, "grad_norm": 0.15060758590698242, "learning_rate": 0.0009858893032504378, "loss": 0.1205, "num_input_tokens_seen": 44613584, "step": 20625 }, { "epoch": 3.365415986949429, "grad_norm": 0.014383463189005852, "learning_rate": 0.0009858725073794016, "loss": 0.1241, "num_input_tokens_seen": 44623248, "step": 20630 }, { "epoch": 3.366231647634584, "grad_norm": 0.020124254748225212, "learning_rate": 0.0009858557016615423, "loss": 0.0502, "num_input_tokens_seen": 44633232, "step": 20635 }, { "epoch": 3.367047308319739, "grad_norm": 0.16115230321884155, "learning_rate": 0.0009858388860972012, "loss": 0.1436, "num_input_tokens_seen": 44644016, "step": 20640 }, { "epoch": 3.367862969004894, "grad_norm": 0.013520710170269012, "learning_rate": 0.0009858220606867188, "loss": 0.022, "num_input_tokens_seen": 44654672, "step": 20645 }, { "epoch": 3.368678629690049, "grad_norm": 0.009846985340118408, "learning_rate": 0.000985805225430436, "loss": 0.0319, "num_input_tokens_seen": 44666768, "step": 20650 }, { "epoch": 3.369494290375204, "grad_norm": 0.089094378054142, "learning_rate": 0.0009857883803286937, "loss": 0.0989, "num_input_tokens_seen": 44677520, "step": 20655 }, { "epoch": 3.370309951060359, "grad_norm": 0.1536937952041626, "learning_rate": 0.0009857715253818338, "loss": 0.0803, "num_input_tokens_seen": 44688080, "step": 20660 }, { "epoch": 3.371125611745514, "grad_norm": 0.07965698093175888, "learning_rate": 0.000985754660590198, "loss": 0.0663, "num_input_tokens_seen": 44698288, "step": 20665 }, { "epoch": 3.3719412724306688, "grad_norm": 0.028182541951537132, "learning_rate": 0.0009857377859541275, "loss": 0.1655, "num_input_tokens_seen": 44710160, "step": 20670 }, { "epoch": 3.3727569331158236, "grad_norm": 0.10902436077594757, "learning_rate": 0.0009857209014739645, "loss": 0.0584, "num_input_tokens_seen": 44720592, "step": 20675 }, { "epoch": 3.373572593800979, "grad_norm": 0.07185492664575577, "learning_rate": 0.0009857040071500512, "loss": 0.171, "num_input_tokens_seen": 44731024, "step": 20680 }, { "epoch": 3.3743882544861337, "grad_norm": 0.051205482333898544, "learning_rate": 0.0009856871029827303, "loss": 0.1979, "num_input_tokens_seen": 44742352, "step": 20685 }, { "epoch": 3.375203915171289, "grad_norm": 0.02235202118754387, "learning_rate": 0.0009856701889723438, "loss": 0.0427, "num_input_tokens_seen": 44751856, "step": 20690 }, { "epoch": 3.3760195758564437, "grad_norm": 0.10820963233709335, "learning_rate": 0.0009856532651192351, "loss": 0.1165, "num_input_tokens_seen": 44763728, "step": 20695 }, { "epoch": 3.3768352365415986, "grad_norm": 0.11374247819185257, "learning_rate": 0.0009856363314237468, "loss": 0.1476, "num_input_tokens_seen": 44775440, "step": 20700 }, { "epoch": 3.377650897226754, "grad_norm": 0.014920140616595745, "learning_rate": 0.0009856193878862221, "loss": 0.1563, "num_input_tokens_seen": 44787472, "step": 20705 }, { "epoch": 3.3784665579119086, "grad_norm": 0.1235361248254776, "learning_rate": 0.0009856024345070045, "loss": 0.158, "num_input_tokens_seen": 44799056, "step": 20710 }, { "epoch": 3.3792822185970635, "grad_norm": 0.00849025510251522, "learning_rate": 0.0009855854712864376, "loss": 0.0113, "num_input_tokens_seen": 44810672, "step": 20715 }, { "epoch": 3.3800978792822187, "grad_norm": 0.2491769641637802, "learning_rate": 0.000985568498224865, "loss": 0.246, "num_input_tokens_seen": 44822224, "step": 20720 }, { "epoch": 3.3809135399673735, "grad_norm": 0.03148525208234787, "learning_rate": 0.0009855515153226308, "loss": 0.1343, "num_input_tokens_seen": 44832496, "step": 20725 }, { "epoch": 3.3817292006525284, "grad_norm": 0.17702309787273407, "learning_rate": 0.0009855345225800792, "loss": 0.0784, "num_input_tokens_seen": 44840464, "step": 20730 }, { "epoch": 3.3825448613376836, "grad_norm": 0.11792438477277756, "learning_rate": 0.0009855175199975546, "loss": 0.0817, "num_input_tokens_seen": 44850768, "step": 20735 }, { "epoch": 3.3833605220228384, "grad_norm": 0.046677011996507645, "learning_rate": 0.0009855005075754015, "loss": 0.1386, "num_input_tokens_seen": 44861904, "step": 20740 }, { "epoch": 3.3841761827079937, "grad_norm": 0.09238780289888382, "learning_rate": 0.0009854834853139647, "loss": 0.2265, "num_input_tokens_seen": 44871984, "step": 20745 }, { "epoch": 3.3849918433931485, "grad_norm": 0.09034372121095657, "learning_rate": 0.0009854664532135892, "loss": 0.226, "num_input_tokens_seen": 44882960, "step": 20750 }, { "epoch": 3.3858075040783033, "grad_norm": 0.16702663898468018, "learning_rate": 0.0009854494112746203, "loss": 0.0946, "num_input_tokens_seen": 44894640, "step": 20755 }, { "epoch": 3.3866231647634586, "grad_norm": 0.055394161492586136, "learning_rate": 0.000985432359497403, "loss": 0.0685, "num_input_tokens_seen": 44906128, "step": 20760 }, { "epoch": 3.3874388254486134, "grad_norm": 0.013766895048320293, "learning_rate": 0.0009854152978822834, "loss": 0.0934, "num_input_tokens_seen": 44915824, "step": 20765 }, { "epoch": 3.3882544861337682, "grad_norm": 0.2671952545642853, "learning_rate": 0.0009853982264296068, "loss": 0.0708, "num_input_tokens_seen": 44925840, "step": 20770 }, { "epoch": 3.3890701468189235, "grad_norm": 0.012830116786062717, "learning_rate": 0.0009853811451397195, "loss": 0.0483, "num_input_tokens_seen": 44936592, "step": 20775 }, { "epoch": 3.3898858075040783, "grad_norm": 0.01821967586874962, "learning_rate": 0.0009853640540129674, "loss": 0.205, "num_input_tokens_seen": 44947600, "step": 20780 }, { "epoch": 3.390701468189233, "grad_norm": 0.11865301430225372, "learning_rate": 0.0009853469530496971, "loss": 0.1086, "num_input_tokens_seen": 44957968, "step": 20785 }, { "epoch": 3.3915171288743884, "grad_norm": 0.21843115985393524, "learning_rate": 0.000985329842250255, "loss": 0.0617, "num_input_tokens_seen": 44970032, "step": 20790 }, { "epoch": 3.392332789559543, "grad_norm": 0.004521653056144714, "learning_rate": 0.000985312721614988, "loss": 0.0306, "num_input_tokens_seen": 44980400, "step": 20795 }, { "epoch": 3.393148450244698, "grad_norm": 0.17764700949192047, "learning_rate": 0.0009852955911442431, "loss": 0.1299, "num_input_tokens_seen": 44990480, "step": 20800 }, { "epoch": 3.3939641109298533, "grad_norm": 0.04476391151547432, "learning_rate": 0.0009852784508383673, "loss": 0.0808, "num_input_tokens_seen": 45001584, "step": 20805 }, { "epoch": 3.394779771615008, "grad_norm": 0.32288724184036255, "learning_rate": 0.0009852613006977081, "loss": 0.2153, "num_input_tokens_seen": 45012720, "step": 20810 }, { "epoch": 3.395595432300163, "grad_norm": 0.012930216267704964, "learning_rate": 0.0009852441407226132, "loss": 0.0305, "num_input_tokens_seen": 45022832, "step": 20815 }, { "epoch": 3.396411092985318, "grad_norm": 0.09052237868309021, "learning_rate": 0.00098522697091343, "loss": 0.1351, "num_input_tokens_seen": 45033648, "step": 20820 }, { "epoch": 3.397226753670473, "grad_norm": 0.02524031139910221, "learning_rate": 0.0009852097912705067, "loss": 0.1472, "num_input_tokens_seen": 45044592, "step": 20825 }, { "epoch": 3.3980424143556283, "grad_norm": 0.015985824167728424, "learning_rate": 0.0009851926017941917, "loss": 0.0751, "num_input_tokens_seen": 45055056, "step": 20830 }, { "epoch": 3.398858075040783, "grad_norm": 0.023439688608050346, "learning_rate": 0.0009851754024848328, "loss": 0.0788, "num_input_tokens_seen": 45065840, "step": 20835 }, { "epoch": 3.399673735725938, "grad_norm": 0.07412150502204895, "learning_rate": 0.0009851581933427792, "loss": 0.186, "num_input_tokens_seen": 45077200, "step": 20840 }, { "epoch": 3.400489396411093, "grad_norm": 0.01010909117758274, "learning_rate": 0.000985140974368379, "loss": 0.1162, "num_input_tokens_seen": 45088432, "step": 20845 }, { "epoch": 3.401305057096248, "grad_norm": 0.0620625801384449, "learning_rate": 0.0009851237455619818, "loss": 0.0669, "num_input_tokens_seen": 45099248, "step": 20850 }, { "epoch": 3.402120717781403, "grad_norm": 0.054616160690784454, "learning_rate": 0.0009851065069239361, "loss": 0.0995, "num_input_tokens_seen": 45109968, "step": 20855 }, { "epoch": 3.402936378466558, "grad_norm": 0.053330112248659134, "learning_rate": 0.0009850892584545921, "loss": 0.0316, "num_input_tokens_seen": 45121840, "step": 20860 }, { "epoch": 3.403752039151713, "grad_norm": 0.12266937643289566, "learning_rate": 0.0009850720001542985, "loss": 0.1227, "num_input_tokens_seen": 45132720, "step": 20865 }, { "epoch": 3.4045676998368677, "grad_norm": 0.02327810972929001, "learning_rate": 0.0009850547320234058, "loss": 0.0596, "num_input_tokens_seen": 45143472, "step": 20870 }, { "epoch": 3.405383360522023, "grad_norm": 0.006088678725063801, "learning_rate": 0.0009850374540622633, "loss": 0.026, "num_input_tokens_seen": 45153744, "step": 20875 }, { "epoch": 3.4061990212071778, "grad_norm": 0.005732911638915539, "learning_rate": 0.0009850201662712217, "loss": 0.1386, "num_input_tokens_seen": 45163920, "step": 20880 }, { "epoch": 3.407014681892333, "grad_norm": 0.013120281510055065, "learning_rate": 0.0009850028686506313, "loss": 0.0312, "num_input_tokens_seen": 45174928, "step": 20885 }, { "epoch": 3.407830342577488, "grad_norm": 0.014896417036652565, "learning_rate": 0.000984985561200842, "loss": 0.0457, "num_input_tokens_seen": 45185488, "step": 20890 }, { "epoch": 3.4086460032626427, "grad_norm": 0.14158938825130463, "learning_rate": 0.0009849682439222055, "loss": 0.0753, "num_input_tokens_seen": 45197200, "step": 20895 }, { "epoch": 3.4094616639477975, "grad_norm": 0.2206645905971527, "learning_rate": 0.000984950916815072, "loss": 0.0672, "num_input_tokens_seen": 45207600, "step": 20900 }, { "epoch": 3.4102773246329527, "grad_norm": 0.015935998409986496, "learning_rate": 0.0009849335798797932, "loss": 0.0726, "num_input_tokens_seen": 45218800, "step": 20905 }, { "epoch": 3.4110929853181076, "grad_norm": 0.006892753764986992, "learning_rate": 0.0009849162331167201, "loss": 0.1614, "num_input_tokens_seen": 45230224, "step": 20910 }, { "epoch": 3.411908646003263, "grad_norm": 0.35190969705581665, "learning_rate": 0.0009848988765262044, "loss": 0.1044, "num_input_tokens_seen": 45241840, "step": 20915 }, { "epoch": 3.4127243066884176, "grad_norm": 0.3081585764884949, "learning_rate": 0.0009848815101085977, "loss": 0.1927, "num_input_tokens_seen": 45254032, "step": 20920 }, { "epoch": 3.4135399673735725, "grad_norm": 0.008788962848484516, "learning_rate": 0.0009848641338642524, "loss": 0.1713, "num_input_tokens_seen": 45263696, "step": 20925 }, { "epoch": 3.4143556280587277, "grad_norm": 0.004804656840860844, "learning_rate": 0.00098484674779352, "loss": 0.1061, "num_input_tokens_seen": 45275120, "step": 20930 }, { "epoch": 3.4151712887438825, "grad_norm": 0.09571245312690735, "learning_rate": 0.0009848293518967533, "loss": 0.0516, "num_input_tokens_seen": 45285744, "step": 20935 }, { "epoch": 3.4159869494290374, "grad_norm": 0.05200956016778946, "learning_rate": 0.0009848119461743049, "loss": 0.3109, "num_input_tokens_seen": 45295760, "step": 20940 }, { "epoch": 3.4168026101141926, "grad_norm": 0.16579431295394897, "learning_rate": 0.000984794530626527, "loss": 0.0964, "num_input_tokens_seen": 45306288, "step": 20945 }, { "epoch": 3.4176182707993474, "grad_norm": 0.11425749212503433, "learning_rate": 0.0009847771052537732, "loss": 0.074, "num_input_tokens_seen": 45318352, "step": 20950 }, { "epoch": 3.4184339314845023, "grad_norm": 0.057450488209724426, "learning_rate": 0.0009847596700563966, "loss": 0.0443, "num_input_tokens_seen": 45328368, "step": 20955 }, { "epoch": 3.4192495921696575, "grad_norm": 0.09544433653354645, "learning_rate": 0.00098474222503475, "loss": 0.0842, "num_input_tokens_seen": 45339920, "step": 20960 }, { "epoch": 3.4200652528548123, "grad_norm": 0.08446510136127472, "learning_rate": 0.0009847247701891874, "loss": 0.0987, "num_input_tokens_seen": 45351632, "step": 20965 }, { "epoch": 3.4208809135399676, "grad_norm": 0.03102685697376728, "learning_rate": 0.0009847073055200624, "loss": 0.0527, "num_input_tokens_seen": 45362224, "step": 20970 }, { "epoch": 3.4216965742251224, "grad_norm": 0.030437711626291275, "learning_rate": 0.0009846898310277288, "loss": 0.1066, "num_input_tokens_seen": 45373488, "step": 20975 }, { "epoch": 3.4225122349102772, "grad_norm": 0.04457832872867584, "learning_rate": 0.000984672346712541, "loss": 0.0861, "num_input_tokens_seen": 45384560, "step": 20980 }, { "epoch": 3.4233278955954325, "grad_norm": 0.04948972165584564, "learning_rate": 0.0009846548525748533, "loss": 0.07, "num_input_tokens_seen": 45394256, "step": 20985 }, { "epoch": 3.4241435562805873, "grad_norm": 0.0155490068718791, "learning_rate": 0.0009846373486150201, "loss": 0.1207, "num_input_tokens_seen": 45405936, "step": 20990 }, { "epoch": 3.424959216965742, "grad_norm": 0.054467808455228806, "learning_rate": 0.0009846198348333964, "loss": 0.1241, "num_input_tokens_seen": 45415696, "step": 20995 }, { "epoch": 3.4257748776508974, "grad_norm": 0.023017987608909607, "learning_rate": 0.0009846023112303369, "loss": 0.1182, "num_input_tokens_seen": 45426800, "step": 21000 }, { "epoch": 3.426590538336052, "grad_norm": 0.036964334547519684, "learning_rate": 0.0009845847778061968, "loss": 0.0828, "num_input_tokens_seen": 45437168, "step": 21005 }, { "epoch": 3.427406199021207, "grad_norm": 0.23528705537319183, "learning_rate": 0.0009845672345613313, "loss": 0.2005, "num_input_tokens_seen": 45447888, "step": 21010 }, { "epoch": 3.4282218597063623, "grad_norm": 0.22169376909732819, "learning_rate": 0.0009845496814960962, "loss": 0.1643, "num_input_tokens_seen": 45459568, "step": 21015 }, { "epoch": 3.429037520391517, "grad_norm": 0.00393277732655406, "learning_rate": 0.0009845321186108468, "loss": 0.083, "num_input_tokens_seen": 45470768, "step": 21020 }, { "epoch": 3.429853181076672, "grad_norm": 0.11375081539154053, "learning_rate": 0.0009845145459059397, "loss": 0.0762, "num_input_tokens_seen": 45481456, "step": 21025 }, { "epoch": 3.430668841761827, "grad_norm": 0.007324701175093651, "learning_rate": 0.0009844969633817306, "loss": 0.1264, "num_input_tokens_seen": 45492592, "step": 21030 }, { "epoch": 3.431484502446982, "grad_norm": 0.031020818278193474, "learning_rate": 0.000984479371038576, "loss": 0.1417, "num_input_tokens_seen": 45503792, "step": 21035 }, { "epoch": 3.432300163132137, "grad_norm": 0.015361804515123367, "learning_rate": 0.0009844617688768323, "loss": 0.0588, "num_input_tokens_seen": 45514800, "step": 21040 }, { "epoch": 3.433115823817292, "grad_norm": 0.04145984724164009, "learning_rate": 0.000984444156896856, "loss": 0.1141, "num_input_tokens_seen": 45525168, "step": 21045 }, { "epoch": 3.433931484502447, "grad_norm": 0.05199075862765312, "learning_rate": 0.0009844265350990047, "loss": 0.1007, "num_input_tokens_seen": 45536080, "step": 21050 }, { "epoch": 3.434747145187602, "grad_norm": 0.19810503721237183, "learning_rate": 0.000984408903483635, "loss": 0.1237, "num_input_tokens_seen": 45545808, "step": 21055 }, { "epoch": 3.435562805872757, "grad_norm": 0.009147719480097294, "learning_rate": 0.0009843912620511042, "loss": 0.1346, "num_input_tokens_seen": 45557232, "step": 21060 }, { "epoch": 3.436378466557912, "grad_norm": 0.0113412756472826, "learning_rate": 0.00098437361080177, "loss": 0.0346, "num_input_tokens_seen": 45568336, "step": 21065 }, { "epoch": 3.437194127243067, "grad_norm": 0.014199744910001755, "learning_rate": 0.0009843559497359903, "loss": 0.1464, "num_input_tokens_seen": 45578544, "step": 21070 }, { "epoch": 3.438009787928222, "grad_norm": 0.03397858887910843, "learning_rate": 0.0009843382788541227, "loss": 0.0896, "num_input_tokens_seen": 45590608, "step": 21075 }, { "epoch": 3.4388254486133767, "grad_norm": 0.20899049937725067, "learning_rate": 0.0009843205981565253, "loss": 0.1218, "num_input_tokens_seen": 45600048, "step": 21080 }, { "epoch": 3.439641109298532, "grad_norm": 0.07323971390724182, "learning_rate": 0.0009843029076435567, "loss": 0.1632, "num_input_tokens_seen": 45608944, "step": 21085 }, { "epoch": 3.4404567699836868, "grad_norm": 0.022303447127342224, "learning_rate": 0.0009842852073155754, "loss": 0.1182, "num_input_tokens_seen": 45618832, "step": 21090 }, { "epoch": 3.4412724306688416, "grad_norm": 0.23006023466587067, "learning_rate": 0.00098426749717294, "loss": 0.1579, "num_input_tokens_seen": 45628144, "step": 21095 }, { "epoch": 3.442088091353997, "grad_norm": 0.0528857558965683, "learning_rate": 0.0009842497772160092, "loss": 0.1934, "num_input_tokens_seen": 45638480, "step": 21100 }, { "epoch": 3.4429037520391517, "grad_norm": 0.057266563177108765, "learning_rate": 0.0009842320474451427, "loss": 0.081, "num_input_tokens_seen": 45649648, "step": 21105 }, { "epoch": 3.443719412724307, "grad_norm": 0.041887782514095306, "learning_rate": 0.0009842143078606991, "loss": 0.0751, "num_input_tokens_seen": 45661168, "step": 21110 }, { "epoch": 3.4445350734094617, "grad_norm": 0.0473744235932827, "learning_rate": 0.0009841965584630385, "loss": 0.115, "num_input_tokens_seen": 45672432, "step": 21115 }, { "epoch": 3.4453507340946166, "grad_norm": 0.1000686064362526, "learning_rate": 0.0009841787992525203, "loss": 0.2541, "num_input_tokens_seen": 45683472, "step": 21120 }, { "epoch": 3.4461663947797714, "grad_norm": 0.029857605695724487, "learning_rate": 0.0009841610302295048, "loss": 0.0619, "num_input_tokens_seen": 45694704, "step": 21125 }, { "epoch": 3.4469820554649266, "grad_norm": 0.2285618633031845, "learning_rate": 0.0009841432513943516, "loss": 0.1447, "num_input_tokens_seen": 45705168, "step": 21130 }, { "epoch": 3.4477977161500815, "grad_norm": 0.09792362153530121, "learning_rate": 0.0009841254627474213, "loss": 0.1114, "num_input_tokens_seen": 45715792, "step": 21135 }, { "epoch": 3.4486133768352367, "grad_norm": 0.054127782583236694, "learning_rate": 0.000984107664289074, "loss": 0.0916, "num_input_tokens_seen": 45726576, "step": 21140 }, { "epoch": 3.4494290375203915, "grad_norm": 0.011056124232709408, "learning_rate": 0.0009840898560196712, "loss": 0.0446, "num_input_tokens_seen": 45737520, "step": 21145 }, { "epoch": 3.4502446982055464, "grad_norm": 0.1722433865070343, "learning_rate": 0.000984072037939573, "loss": 0.14, "num_input_tokens_seen": 45748176, "step": 21150 }, { "epoch": 3.4510603588907016, "grad_norm": 0.014301082119345665, "learning_rate": 0.000984054210049141, "loss": 0.0681, "num_input_tokens_seen": 45758864, "step": 21155 }, { "epoch": 3.4518760195758564, "grad_norm": 0.017598386853933334, "learning_rate": 0.0009840363723487365, "loss": 0.2171, "num_input_tokens_seen": 45769744, "step": 21160 }, { "epoch": 3.4526916802610113, "grad_norm": 0.21228720247745514, "learning_rate": 0.0009840185248387208, "loss": 0.2491, "num_input_tokens_seen": 45780400, "step": 21165 }, { "epoch": 3.4535073409461665, "grad_norm": 0.11573278903961182, "learning_rate": 0.0009840006675194558, "loss": 0.1264, "num_input_tokens_seen": 45790416, "step": 21170 }, { "epoch": 3.4543230016313213, "grad_norm": 0.05527227371931076, "learning_rate": 0.000983982800391303, "loss": 0.1217, "num_input_tokens_seen": 45801392, "step": 21175 }, { "epoch": 3.455138662316476, "grad_norm": 0.06245320290327072, "learning_rate": 0.0009839649234546248, "loss": 0.0428, "num_input_tokens_seen": 45812464, "step": 21180 }, { "epoch": 3.4559543230016314, "grad_norm": 0.05715889483690262, "learning_rate": 0.0009839470367097836, "loss": 0.1378, "num_input_tokens_seen": 45823056, "step": 21185 }, { "epoch": 3.4567699836867862, "grad_norm": 0.02741180546581745, "learning_rate": 0.0009839291401571417, "loss": 0.0969, "num_input_tokens_seen": 45832336, "step": 21190 }, { "epoch": 3.4575856443719415, "grad_norm": 0.009015440940856934, "learning_rate": 0.0009839112337970619, "loss": 0.0465, "num_input_tokens_seen": 45842864, "step": 21195 }, { "epoch": 3.4584013050570963, "grad_norm": 0.22681792080402374, "learning_rate": 0.0009838933176299072, "loss": 0.2291, "num_input_tokens_seen": 45854192, "step": 21200 }, { "epoch": 3.459216965742251, "grad_norm": 0.03709195926785469, "learning_rate": 0.0009838753916560404, "loss": 0.1055, "num_input_tokens_seen": 45865296, "step": 21205 }, { "epoch": 3.4600326264274064, "grad_norm": 0.0438421331346035, "learning_rate": 0.000983857455875825, "loss": 0.0986, "num_input_tokens_seen": 45876336, "step": 21210 }, { "epoch": 3.460848287112561, "grad_norm": 0.056209295988082886, "learning_rate": 0.0009838395102896244, "loss": 0.0809, "num_input_tokens_seen": 45886768, "step": 21215 }, { "epoch": 3.461663947797716, "grad_norm": 0.07024645805358887, "learning_rate": 0.0009838215548978024, "loss": 0.1117, "num_input_tokens_seen": 45896688, "step": 21220 }, { "epoch": 3.4624796084828713, "grad_norm": 0.011782780289649963, "learning_rate": 0.0009838035897007226, "loss": 0.0833, "num_input_tokens_seen": 45908496, "step": 21225 }, { "epoch": 3.463295269168026, "grad_norm": 0.007954503409564495, "learning_rate": 0.0009837856146987496, "loss": 0.0861, "num_input_tokens_seen": 45920144, "step": 21230 }, { "epoch": 3.464110929853181, "grad_norm": 0.01540245022624731, "learning_rate": 0.0009837676298922473, "loss": 0.0652, "num_input_tokens_seen": 45930480, "step": 21235 }, { "epoch": 3.464926590538336, "grad_norm": 0.008227720856666565, "learning_rate": 0.0009837496352815803, "loss": 0.1849, "num_input_tokens_seen": 45942416, "step": 21240 }, { "epoch": 3.465742251223491, "grad_norm": 0.025327688083052635, "learning_rate": 0.000983731630867113, "loss": 0.1903, "num_input_tokens_seen": 45953616, "step": 21245 }, { "epoch": 3.466557911908646, "grad_norm": 0.022125469520688057, "learning_rate": 0.0009837136166492109, "loss": 0.1485, "num_input_tokens_seen": 45965584, "step": 21250 }, { "epoch": 3.467373572593801, "grad_norm": 0.2036154568195343, "learning_rate": 0.0009836955926282385, "loss": 0.1585, "num_input_tokens_seen": 45977424, "step": 21255 }, { "epoch": 3.468189233278956, "grad_norm": 0.061766836792230606, "learning_rate": 0.0009836775588045613, "loss": 0.0947, "num_input_tokens_seen": 45988752, "step": 21260 }, { "epoch": 3.4690048939641107, "grad_norm": 0.08904801309108734, "learning_rate": 0.0009836595151785448, "loss": 0.1983, "num_input_tokens_seen": 45999184, "step": 21265 }, { "epoch": 3.469820554649266, "grad_norm": 0.04244118183851242, "learning_rate": 0.0009836414617505548, "loss": 0.0285, "num_input_tokens_seen": 46010544, "step": 21270 }, { "epoch": 3.470636215334421, "grad_norm": 0.07451055198907852, "learning_rate": 0.000983623398520957, "loss": 0.0926, "num_input_tokens_seen": 46021360, "step": 21275 }, { "epoch": 3.471451876019576, "grad_norm": 0.15908612310886383, "learning_rate": 0.0009836053254901173, "loss": 0.1483, "num_input_tokens_seen": 46030640, "step": 21280 }, { "epoch": 3.472267536704731, "grad_norm": 0.15909342467784882, "learning_rate": 0.0009835872426584024, "loss": 0.0829, "num_input_tokens_seen": 46041200, "step": 21285 }, { "epoch": 3.4730831973898857, "grad_norm": 0.2042553573846817, "learning_rate": 0.0009835691500261784, "loss": 0.1192, "num_input_tokens_seen": 46052208, "step": 21290 }, { "epoch": 3.473898858075041, "grad_norm": 0.06767347455024719, "learning_rate": 0.0009835510475938124, "loss": 0.0646, "num_input_tokens_seen": 46063536, "step": 21295 }, { "epoch": 3.4747145187601958, "grad_norm": 0.017927464097738266, "learning_rate": 0.0009835329353616708, "loss": 0.0989, "num_input_tokens_seen": 46075024, "step": 21300 }, { "epoch": 3.4755301794453506, "grad_norm": 0.23534156382083893, "learning_rate": 0.000983514813330121, "loss": 0.1022, "num_input_tokens_seen": 46084432, "step": 21305 }, { "epoch": 3.476345840130506, "grad_norm": 0.1075979694724083, "learning_rate": 0.00098349668149953, "loss": 0.0794, "num_input_tokens_seen": 46095504, "step": 21310 }, { "epoch": 3.4771615008156607, "grad_norm": 0.02730988711118698, "learning_rate": 0.0009834785398702653, "loss": 0.024, "num_input_tokens_seen": 46105520, "step": 21315 }, { "epoch": 3.4779771615008155, "grad_norm": 0.158670112490654, "learning_rate": 0.0009834603884426947, "loss": 0.1295, "num_input_tokens_seen": 46115344, "step": 21320 }, { "epoch": 3.4787928221859707, "grad_norm": 0.008138585835695267, "learning_rate": 0.000983442227217186, "loss": 0.1815, "num_input_tokens_seen": 46124784, "step": 21325 }, { "epoch": 3.4796084828711256, "grad_norm": 0.12669169902801514, "learning_rate": 0.0009834240561941072, "loss": 0.1283, "num_input_tokens_seen": 46135856, "step": 21330 }, { "epoch": 3.480424143556281, "grad_norm": 0.06928084045648575, "learning_rate": 0.000983405875373827, "loss": 0.0421, "num_input_tokens_seen": 46146960, "step": 21335 }, { "epoch": 3.4812398042414356, "grad_norm": 0.05004847049713135, "learning_rate": 0.0009833876847567132, "loss": 0.0972, "num_input_tokens_seen": 46158544, "step": 21340 }, { "epoch": 3.4820554649265905, "grad_norm": 0.2124181091785431, "learning_rate": 0.0009833694843431346, "loss": 0.109, "num_input_tokens_seen": 46168336, "step": 21345 }, { "epoch": 3.4828711256117453, "grad_norm": 0.20705543458461761, "learning_rate": 0.0009833512741334604, "loss": 0.2405, "num_input_tokens_seen": 46178128, "step": 21350 }, { "epoch": 3.4836867862969005, "grad_norm": 0.13747872412204742, "learning_rate": 0.0009833330541280595, "loss": 0.0734, "num_input_tokens_seen": 46188144, "step": 21355 }, { "epoch": 3.4845024469820554, "grad_norm": 0.05297991633415222, "learning_rate": 0.0009833148243273012, "loss": 0.0458, "num_input_tokens_seen": 46198768, "step": 21360 }, { "epoch": 3.4853181076672106, "grad_norm": 0.11282984167337418, "learning_rate": 0.0009832965847315547, "loss": 0.1525, "num_input_tokens_seen": 46209360, "step": 21365 }, { "epoch": 3.4861337683523654, "grad_norm": 0.06479454785585403, "learning_rate": 0.00098327833534119, "loss": 0.0563, "num_input_tokens_seen": 46219792, "step": 21370 }, { "epoch": 3.4869494290375203, "grad_norm": 0.03795412927865982, "learning_rate": 0.0009832600761565764, "loss": 0.0519, "num_input_tokens_seen": 46229424, "step": 21375 }, { "epoch": 3.4877650897226755, "grad_norm": 0.017168574035167694, "learning_rate": 0.0009832418071780845, "loss": 0.1306, "num_input_tokens_seen": 46240656, "step": 21380 }, { "epoch": 3.4885807504078303, "grad_norm": 0.05710841342806816, "learning_rate": 0.0009832235284060842, "loss": 0.1015, "num_input_tokens_seen": 46251312, "step": 21385 }, { "epoch": 3.489396411092985, "grad_norm": 0.1470293253660202, "learning_rate": 0.0009832052398409464, "loss": 0.0598, "num_input_tokens_seen": 46262832, "step": 21390 }, { "epoch": 3.4902120717781404, "grad_norm": 0.015464311465620995, "learning_rate": 0.000983186941483041, "loss": 0.0205, "num_input_tokens_seen": 46273616, "step": 21395 }, { "epoch": 3.4910277324632952, "grad_norm": 0.0574021190404892, "learning_rate": 0.0009831686333327397, "loss": 0.154, "num_input_tokens_seen": 46285456, "step": 21400 }, { "epoch": 3.49184339314845, "grad_norm": 0.04269903153181076, "learning_rate": 0.0009831503153904127, "loss": 0.0562, "num_input_tokens_seen": 46296848, "step": 21405 }, { "epoch": 3.4926590538336053, "grad_norm": 0.5759614706039429, "learning_rate": 0.000983131987656432, "loss": 0.2359, "num_input_tokens_seen": 46308400, "step": 21410 }, { "epoch": 3.49347471451876, "grad_norm": 0.02763795293867588, "learning_rate": 0.0009831136501311684, "loss": 0.0369, "num_input_tokens_seen": 46319312, "step": 21415 }, { "epoch": 3.4942903752039154, "grad_norm": 0.26870211958885193, "learning_rate": 0.000983095302814994, "loss": 0.2007, "num_input_tokens_seen": 46331184, "step": 21420 }, { "epoch": 3.49510603588907, "grad_norm": 0.10838611423969269, "learning_rate": 0.0009830769457082804, "loss": 0.0913, "num_input_tokens_seen": 46341264, "step": 21425 }, { "epoch": 3.495921696574225, "grad_norm": 0.325128972530365, "learning_rate": 0.0009830585788113994, "loss": 0.1205, "num_input_tokens_seen": 46352432, "step": 21430 }, { "epoch": 3.4967373572593803, "grad_norm": 0.09117142111063004, "learning_rate": 0.0009830402021247238, "loss": 0.0678, "num_input_tokens_seen": 46363280, "step": 21435 }, { "epoch": 3.497553017944535, "grad_norm": 0.09155073761940002, "learning_rate": 0.0009830218156486256, "loss": 0.2447, "num_input_tokens_seen": 46372848, "step": 21440 }, { "epoch": 3.49836867862969, "grad_norm": 0.022856619209051132, "learning_rate": 0.0009830034193834777, "loss": 0.0627, "num_input_tokens_seen": 46384976, "step": 21445 }, { "epoch": 3.499184339314845, "grad_norm": 0.4519825279712677, "learning_rate": 0.0009829850133296527, "loss": 0.1388, "num_input_tokens_seen": 46396560, "step": 21450 }, { "epoch": 3.5, "grad_norm": 0.06419207900762558, "learning_rate": 0.0009829665974875237, "loss": 0.1412, "num_input_tokens_seen": 46407472, "step": 21455 }, { "epoch": 3.500815660685155, "grad_norm": 0.19768834114074707, "learning_rate": 0.0009829481718574638, "loss": 0.1633, "num_input_tokens_seen": 46417872, "step": 21460 }, { "epoch": 3.50163132137031, "grad_norm": 0.04964315891265869, "learning_rate": 0.0009829297364398466, "loss": 0.0879, "num_input_tokens_seen": 46428432, "step": 21465 }, { "epoch": 3.502446982055465, "grad_norm": 0.020760485902428627, "learning_rate": 0.0009829112912350456, "loss": 0.0781, "num_input_tokens_seen": 46439856, "step": 21470 }, { "epoch": 3.50326264274062, "grad_norm": 0.020568421110510826, "learning_rate": 0.000982892836243435, "loss": 0.0753, "num_input_tokens_seen": 46451376, "step": 21475 }, { "epoch": 3.504078303425775, "grad_norm": 0.04842434450984001, "learning_rate": 0.000982874371465388, "loss": 0.0795, "num_input_tokens_seen": 46461456, "step": 21480 }, { "epoch": 3.50489396411093, "grad_norm": 0.09125185012817383, "learning_rate": 0.0009828558969012795, "loss": 0.0588, "num_input_tokens_seen": 46471600, "step": 21485 }, { "epoch": 3.5057096247960846, "grad_norm": 0.02235431969165802, "learning_rate": 0.0009828374125514837, "loss": 0.1059, "num_input_tokens_seen": 46482224, "step": 21490 }, { "epoch": 3.50652528548124, "grad_norm": 0.22898629307746887, "learning_rate": 0.0009828189184163752, "loss": 0.382, "num_input_tokens_seen": 46492496, "step": 21495 }, { "epoch": 3.5073409461663947, "grad_norm": 0.15465903282165527, "learning_rate": 0.0009828004144963288, "loss": 0.1664, "num_input_tokens_seen": 46503120, "step": 21500 }, { "epoch": 3.50815660685155, "grad_norm": 0.03542419150471687, "learning_rate": 0.0009827819007917195, "loss": 0.2168, "num_input_tokens_seen": 46513680, "step": 21505 }, { "epoch": 3.5089722675367048, "grad_norm": 0.127181276679039, "learning_rate": 0.0009827633773029228, "loss": 0.0798, "num_input_tokens_seen": 46523632, "step": 21510 }, { "epoch": 3.5097879282218596, "grad_norm": 0.039647456258535385, "learning_rate": 0.0009827448440303135, "loss": 0.1077, "num_input_tokens_seen": 46534352, "step": 21515 }, { "epoch": 3.5106035889070144, "grad_norm": 0.024184754118323326, "learning_rate": 0.0009827263009742678, "loss": 0.1848, "num_input_tokens_seen": 46547056, "step": 21520 }, { "epoch": 3.5114192495921697, "grad_norm": 0.054916176944971085, "learning_rate": 0.000982707748135161, "loss": 0.0938, "num_input_tokens_seen": 46557648, "step": 21525 }, { "epoch": 3.5122349102773245, "grad_norm": 0.25888592004776, "learning_rate": 0.0009826891855133693, "loss": 0.1358, "num_input_tokens_seen": 46567888, "step": 21530 }, { "epoch": 3.5130505709624797, "grad_norm": 0.25864580273628235, "learning_rate": 0.000982670613109269, "loss": 0.1325, "num_input_tokens_seen": 46578096, "step": 21535 }, { "epoch": 3.5138662316476346, "grad_norm": 0.1897091567516327, "learning_rate": 0.0009826520309232365, "loss": 0.2476, "num_input_tokens_seen": 46589104, "step": 21540 }, { "epoch": 3.5146818923327894, "grad_norm": 0.10447441786527634, "learning_rate": 0.0009826334389556482, "loss": 0.0694, "num_input_tokens_seen": 46600080, "step": 21545 }, { "epoch": 3.5154975530179446, "grad_norm": 0.023850787431001663, "learning_rate": 0.000982614837206881, "loss": 0.1558, "num_input_tokens_seen": 46610096, "step": 21550 }, { "epoch": 3.5163132137030995, "grad_norm": 0.036812786012887955, "learning_rate": 0.000982596225677312, "loss": 0.0632, "num_input_tokens_seen": 46620688, "step": 21555 }, { "epoch": 3.5171288743882547, "grad_norm": 0.16952641308307648, "learning_rate": 0.0009825776043673182, "loss": 0.0788, "num_input_tokens_seen": 46631824, "step": 21560 }, { "epoch": 3.5179445350734095, "grad_norm": 0.024134181439876556, "learning_rate": 0.000982558973277277, "loss": 0.1133, "num_input_tokens_seen": 46642832, "step": 21565 }, { "epoch": 3.5187601957585644, "grad_norm": 0.1285693347454071, "learning_rate": 0.0009825403324075662, "loss": 0.0894, "num_input_tokens_seen": 46653456, "step": 21570 }, { "epoch": 3.519575856443719, "grad_norm": 0.009325760416686535, "learning_rate": 0.0009825216817585633, "loss": 0.1061, "num_input_tokens_seen": 46665136, "step": 21575 }, { "epoch": 3.5203915171288744, "grad_norm": 0.1657373160123825, "learning_rate": 0.0009825030213306463, "loss": 0.1963, "num_input_tokens_seen": 46674544, "step": 21580 }, { "epoch": 3.5212071778140293, "grad_norm": 0.10782121121883392, "learning_rate": 0.0009824843511241936, "loss": 0.117, "num_input_tokens_seen": 46685264, "step": 21585 }, { "epoch": 3.5220228384991845, "grad_norm": 0.11278649419546127, "learning_rate": 0.0009824656711395834, "loss": 0.2149, "num_input_tokens_seen": 46696528, "step": 21590 }, { "epoch": 3.5228384991843393, "grad_norm": 0.11115585267543793, "learning_rate": 0.0009824469813771945, "loss": 0.1136, "num_input_tokens_seen": 46708176, "step": 21595 }, { "epoch": 3.523654159869494, "grad_norm": 0.030352916568517685, "learning_rate": 0.0009824282818374052, "loss": 0.0661, "num_input_tokens_seen": 46717328, "step": 21600 }, { "epoch": 3.5244698205546494, "grad_norm": 0.07619695365428925, "learning_rate": 0.000982409572520595, "loss": 0.2937, "num_input_tokens_seen": 46727568, "step": 21605 }, { "epoch": 3.5252854812398042, "grad_norm": 0.08703344315290451, "learning_rate": 0.0009823908534271426, "loss": 0.2221, "num_input_tokens_seen": 46738928, "step": 21610 }, { "epoch": 3.5261011419249595, "grad_norm": 0.07479031383991241, "learning_rate": 0.0009823721245574278, "loss": 0.1035, "num_input_tokens_seen": 46748752, "step": 21615 }, { "epoch": 3.5269168026101143, "grad_norm": 0.04100106284022331, "learning_rate": 0.0009823533859118299, "loss": 0.1089, "num_input_tokens_seen": 46760240, "step": 21620 }, { "epoch": 3.527732463295269, "grad_norm": 0.08685865998268127, "learning_rate": 0.0009823346374907287, "loss": 0.1158, "num_input_tokens_seen": 46769936, "step": 21625 }, { "epoch": 3.528548123980424, "grad_norm": 0.03426363319158554, "learning_rate": 0.000982315879294504, "loss": 0.0956, "num_input_tokens_seen": 46779888, "step": 21630 }, { "epoch": 3.529363784665579, "grad_norm": 0.1270504593849182, "learning_rate": 0.0009822971113235366, "loss": 0.1797, "num_input_tokens_seen": 46791504, "step": 21635 }, { "epoch": 3.530179445350734, "grad_norm": 0.10011230409145355, "learning_rate": 0.0009822783335782061, "loss": 0.1347, "num_input_tokens_seen": 46802096, "step": 21640 }, { "epoch": 3.5309951060358893, "grad_norm": 0.11784350126981735, "learning_rate": 0.0009822595460588935, "loss": 0.1285, "num_input_tokens_seen": 46813616, "step": 21645 }, { "epoch": 3.531810766721044, "grad_norm": 0.23490601778030396, "learning_rate": 0.0009822407487659792, "loss": 0.1382, "num_input_tokens_seen": 46824816, "step": 21650 }, { "epoch": 3.532626427406199, "grad_norm": 0.14736691117286682, "learning_rate": 0.0009822219416998445, "loss": 0.2027, "num_input_tokens_seen": 46835536, "step": 21655 }, { "epoch": 3.5334420880913537, "grad_norm": 0.20881640911102295, "learning_rate": 0.0009822031248608704, "loss": 0.2451, "num_input_tokens_seen": 46847280, "step": 21660 }, { "epoch": 3.534257748776509, "grad_norm": 0.06078454852104187, "learning_rate": 0.0009821842982494383, "loss": 0.085, "num_input_tokens_seen": 46859152, "step": 21665 }, { "epoch": 3.535073409461664, "grad_norm": 0.03408531844615936, "learning_rate": 0.0009821654618659297, "loss": 0.0769, "num_input_tokens_seen": 46869584, "step": 21670 }, { "epoch": 3.535889070146819, "grad_norm": 0.06222820654511452, "learning_rate": 0.0009821466157107263, "loss": 0.208, "num_input_tokens_seen": 46880432, "step": 21675 }, { "epoch": 3.536704730831974, "grad_norm": 0.09785876423120499, "learning_rate": 0.0009821277597842101, "loss": 0.0608, "num_input_tokens_seen": 46890768, "step": 21680 }, { "epoch": 3.5375203915171287, "grad_norm": 0.010254275985062122, "learning_rate": 0.0009821088940867632, "loss": 0.0897, "num_input_tokens_seen": 46902352, "step": 21685 }, { "epoch": 3.538336052202284, "grad_norm": 0.07269848138093948, "learning_rate": 0.0009820900186187681, "loss": 0.1248, "num_input_tokens_seen": 46912816, "step": 21690 }, { "epoch": 3.539151712887439, "grad_norm": 0.06444855034351349, "learning_rate": 0.0009820711333806068, "loss": 0.0661, "num_input_tokens_seen": 46923888, "step": 21695 }, { "epoch": 3.539967373572594, "grad_norm": 0.04911039397120476, "learning_rate": 0.000982052238372663, "loss": 0.0567, "num_input_tokens_seen": 46934832, "step": 21700 }, { "epoch": 3.540783034257749, "grad_norm": 0.19150716066360474, "learning_rate": 0.0009820333335953187, "loss": 0.1368, "num_input_tokens_seen": 46944784, "step": 21705 }, { "epoch": 3.5415986949429037, "grad_norm": 0.18624483048915863, "learning_rate": 0.0009820144190489574, "loss": 0.1731, "num_input_tokens_seen": 46955376, "step": 21710 }, { "epoch": 3.5424143556280585, "grad_norm": 0.010764437727630138, "learning_rate": 0.0009819954947339624, "loss": 0.1629, "num_input_tokens_seen": 46964944, "step": 21715 }, { "epoch": 3.5432300163132138, "grad_norm": 0.07276313006877899, "learning_rate": 0.0009819765606507173, "loss": 0.0411, "num_input_tokens_seen": 46974960, "step": 21720 }, { "epoch": 3.5440456769983686, "grad_norm": 0.09438583999872208, "learning_rate": 0.0009819576167996058, "loss": 0.1368, "num_input_tokens_seen": 46986416, "step": 21725 }, { "epoch": 3.544861337683524, "grad_norm": 0.005585776641964912, "learning_rate": 0.000981938663181012, "loss": 0.1781, "num_input_tokens_seen": 46996240, "step": 21730 }, { "epoch": 3.5456769983686787, "grad_norm": 0.08937297016382217, "learning_rate": 0.0009819196997953195, "loss": 0.1255, "num_input_tokens_seen": 47007472, "step": 21735 }, { "epoch": 3.5464926590538335, "grad_norm": 0.026922032237052917, "learning_rate": 0.000981900726642913, "loss": 0.0574, "num_input_tokens_seen": 47018128, "step": 21740 }, { "epoch": 3.5473083197389887, "grad_norm": 0.017074687406420708, "learning_rate": 0.0009818817437241768, "loss": 0.0855, "num_input_tokens_seen": 47029456, "step": 21745 }, { "epoch": 3.5481239804241436, "grad_norm": 0.03096526488661766, "learning_rate": 0.000981862751039496, "loss": 0.1789, "num_input_tokens_seen": 47039792, "step": 21750 }, { "epoch": 3.5489396411092984, "grad_norm": 0.06783930957317352, "learning_rate": 0.000981843748589255, "loss": 0.0915, "num_input_tokens_seen": 47050032, "step": 21755 }, { "epoch": 3.5497553017944536, "grad_norm": 0.08572400361299515, "learning_rate": 0.0009818247363738396, "loss": 0.1358, "num_input_tokens_seen": 47060336, "step": 21760 }, { "epoch": 3.5505709624796085, "grad_norm": 0.08011411875486374, "learning_rate": 0.0009818057143936344, "loss": 0.104, "num_input_tokens_seen": 47070160, "step": 21765 }, { "epoch": 3.5513866231647633, "grad_norm": 0.029326729476451874, "learning_rate": 0.000981786682649025, "loss": 0.0554, "num_input_tokens_seen": 47081008, "step": 21770 }, { "epoch": 3.5522022838499185, "grad_norm": 0.008124127052724361, "learning_rate": 0.0009817676411403976, "loss": 0.1112, "num_input_tokens_seen": 47091088, "step": 21775 }, { "epoch": 3.5530179445350734, "grad_norm": 0.10163454711437225, "learning_rate": 0.0009817485898681378, "loss": 0.0836, "num_input_tokens_seen": 47102384, "step": 21780 }, { "epoch": 3.5538336052202286, "grad_norm": 0.04621696472167969, "learning_rate": 0.0009817295288326315, "loss": 0.0513, "num_input_tokens_seen": 47114064, "step": 21785 }, { "epoch": 3.5546492659053834, "grad_norm": 0.09239888191223145, "learning_rate": 0.0009817104580342653, "loss": 0.103, "num_input_tokens_seen": 47123824, "step": 21790 }, { "epoch": 3.5554649265905383, "grad_norm": 0.017387012019753456, "learning_rate": 0.0009816913774734254, "loss": 0.048, "num_input_tokens_seen": 47136432, "step": 21795 }, { "epoch": 3.556280587275693, "grad_norm": 0.00779850734397769, "learning_rate": 0.0009816722871504987, "loss": 0.025, "num_input_tokens_seen": 47148496, "step": 21800 }, { "epoch": 3.5570962479608483, "grad_norm": 0.00785167794674635, "learning_rate": 0.0009816531870658722, "loss": 0.0417, "num_input_tokens_seen": 47159600, "step": 21805 }, { "epoch": 3.557911908646003, "grad_norm": 0.03706509619951248, "learning_rate": 0.0009816340772199328, "loss": 0.1361, "num_input_tokens_seen": 47170224, "step": 21810 }, { "epoch": 3.5587275693311584, "grad_norm": 0.04010167345404625, "learning_rate": 0.0009816149576130678, "loss": 0.0308, "num_input_tokens_seen": 47181360, "step": 21815 }, { "epoch": 3.5595432300163132, "grad_norm": 0.04599921405315399, "learning_rate": 0.0009815958282456648, "loss": 0.2459, "num_input_tokens_seen": 47192176, "step": 21820 }, { "epoch": 3.560358890701468, "grad_norm": 0.037334144115448, "learning_rate": 0.0009815766891181112, "loss": 0.1097, "num_input_tokens_seen": 47201616, "step": 21825 }, { "epoch": 3.5611745513866233, "grad_norm": 0.10492201894521713, "learning_rate": 0.0009815575402307953, "loss": 0.0899, "num_input_tokens_seen": 47211280, "step": 21830 }, { "epoch": 3.561990212071778, "grad_norm": 0.0777861624956131, "learning_rate": 0.0009815383815841047, "loss": 0.0296, "num_input_tokens_seen": 47221968, "step": 21835 }, { "epoch": 3.5628058727569334, "grad_norm": 0.08785879611968994, "learning_rate": 0.0009815192131784282, "loss": 0.3332, "num_input_tokens_seen": 47233136, "step": 21840 }, { "epoch": 3.563621533442088, "grad_norm": 0.009797224774956703, "learning_rate": 0.0009815000350141539, "loss": 0.1732, "num_input_tokens_seen": 47242672, "step": 21845 }, { "epoch": 3.564437194127243, "grad_norm": 0.02497190050780773, "learning_rate": 0.0009814808470916705, "loss": 0.1779, "num_input_tokens_seen": 47253552, "step": 21850 }, { "epoch": 3.565252854812398, "grad_norm": 0.08281011134386063, "learning_rate": 0.0009814616494113668, "loss": 0.1092, "num_input_tokens_seen": 47265680, "step": 21855 }, { "epoch": 3.566068515497553, "grad_norm": 0.03524525463581085, "learning_rate": 0.0009814424419736323, "loss": 0.0513, "num_input_tokens_seen": 47277200, "step": 21860 }, { "epoch": 3.566884176182708, "grad_norm": 0.022290315479040146, "learning_rate": 0.0009814232247788556, "loss": 0.1073, "num_input_tokens_seen": 47288240, "step": 21865 }, { "epoch": 3.567699836867863, "grad_norm": 0.03501790761947632, "learning_rate": 0.0009814039978274269, "loss": 0.0556, "num_input_tokens_seen": 47297808, "step": 21870 }, { "epoch": 3.568515497553018, "grad_norm": 0.06423972547054291, "learning_rate": 0.0009813847611197352, "loss": 0.1121, "num_input_tokens_seen": 47308304, "step": 21875 }, { "epoch": 3.569331158238173, "grad_norm": 0.16512851417064667, "learning_rate": 0.0009813655146561709, "loss": 0.0808, "num_input_tokens_seen": 47320080, "step": 21880 }, { "epoch": 3.5701468189233276, "grad_norm": 0.1547977477312088, "learning_rate": 0.0009813462584371236, "loss": 0.1394, "num_input_tokens_seen": 47331952, "step": 21885 }, { "epoch": 3.570962479608483, "grad_norm": 0.10016020387411118, "learning_rate": 0.0009813269924629838, "loss": 0.0572, "num_input_tokens_seen": 47342864, "step": 21890 }, { "epoch": 3.5717781402936377, "grad_norm": 0.12822580337524414, "learning_rate": 0.000981307716734142, "loss": 0.0958, "num_input_tokens_seen": 47353296, "step": 21895 }, { "epoch": 3.572593800978793, "grad_norm": 0.14780253171920776, "learning_rate": 0.0009812884312509883, "loss": 0.132, "num_input_tokens_seen": 47364720, "step": 21900 }, { "epoch": 3.573409461663948, "grad_norm": 0.0791921615600586, "learning_rate": 0.0009812691360139144, "loss": 0.0664, "num_input_tokens_seen": 47375920, "step": 21905 }, { "epoch": 3.5742251223491026, "grad_norm": 0.005884220823645592, "learning_rate": 0.000981249831023311, "loss": 0.1976, "num_input_tokens_seen": 47388144, "step": 21910 }, { "epoch": 3.575040783034258, "grad_norm": 0.052293986082077026, "learning_rate": 0.000981230516279569, "loss": 0.0787, "num_input_tokens_seen": 47398640, "step": 21915 }, { "epoch": 3.5758564437194127, "grad_norm": 0.047704145312309265, "learning_rate": 0.0009812111917830801, "loss": 0.0788, "num_input_tokens_seen": 47410000, "step": 21920 }, { "epoch": 3.576672104404568, "grad_norm": 0.12634558975696564, "learning_rate": 0.000981191857534236, "loss": 0.1064, "num_input_tokens_seen": 47419824, "step": 21925 }, { "epoch": 3.5774877650897228, "grad_norm": 0.27708667516708374, "learning_rate": 0.0009811725135334287, "loss": 0.2807, "num_input_tokens_seen": 47431120, "step": 21930 }, { "epoch": 3.5783034257748776, "grad_norm": 0.05823507532477379, "learning_rate": 0.0009811531597810497, "loss": 0.1329, "num_input_tokens_seen": 47441232, "step": 21935 }, { "epoch": 3.5791190864600324, "grad_norm": 0.034099794924259186, "learning_rate": 0.0009811337962774916, "loss": 0.1188, "num_input_tokens_seen": 47451504, "step": 21940 }, { "epoch": 3.5799347471451877, "grad_norm": 0.1668100655078888, "learning_rate": 0.0009811144230231468, "loss": 0.1736, "num_input_tokens_seen": 47463472, "step": 21945 }, { "epoch": 3.5807504078303425, "grad_norm": 0.013046424835920334, "learning_rate": 0.0009810950400184078, "loss": 0.145, "num_input_tokens_seen": 47474416, "step": 21950 }, { "epoch": 3.5815660685154977, "grad_norm": 0.05839097872376442, "learning_rate": 0.0009810756472636677, "loss": 0.1073, "num_input_tokens_seen": 47486000, "step": 21955 }, { "epoch": 3.5823817292006526, "grad_norm": 0.026946526020765305, "learning_rate": 0.000981056244759319, "loss": 0.0967, "num_input_tokens_seen": 47496464, "step": 21960 }, { "epoch": 3.5831973898858074, "grad_norm": 0.02627391740679741, "learning_rate": 0.0009810368325057555, "loss": 0.0605, "num_input_tokens_seen": 47506800, "step": 21965 }, { "epoch": 3.5840130505709626, "grad_norm": 0.01618942618370056, "learning_rate": 0.0009810174105033703, "loss": 0.1714, "num_input_tokens_seen": 47517008, "step": 21970 }, { "epoch": 3.5848287112561175, "grad_norm": 0.05231276527047157, "learning_rate": 0.000980997978752557, "loss": 0.1563, "num_input_tokens_seen": 47528176, "step": 21975 }, { "epoch": 3.5856443719412723, "grad_norm": 0.08935698866844177, "learning_rate": 0.0009809785372537094, "loss": 0.1748, "num_input_tokens_seen": 47539248, "step": 21980 }, { "epoch": 3.5864600326264275, "grad_norm": 0.015321357175707817, "learning_rate": 0.0009809590860072217, "loss": 0.0311, "num_input_tokens_seen": 47549712, "step": 21985 }, { "epoch": 3.5872756933115824, "grad_norm": 0.08399423211812973, "learning_rate": 0.0009809396250134881, "loss": 0.0744, "num_input_tokens_seen": 47560432, "step": 21990 }, { "epoch": 3.588091353996737, "grad_norm": 0.06792002171278, "learning_rate": 0.0009809201542729028, "loss": 0.0799, "num_input_tokens_seen": 47572112, "step": 21995 }, { "epoch": 3.5889070146818924, "grad_norm": 0.008914710953831673, "learning_rate": 0.0009809006737858603, "loss": 0.1498, "num_input_tokens_seen": 47583184, "step": 22000 }, { "epoch": 3.5897226753670473, "grad_norm": 0.004703295882791281, "learning_rate": 0.0009808811835527557, "loss": 0.1542, "num_input_tokens_seen": 47594544, "step": 22005 }, { "epoch": 3.5905383360522025, "grad_norm": 0.029907135292887688, "learning_rate": 0.000980861683573984, "loss": 0.0412, "num_input_tokens_seen": 47605136, "step": 22010 }, { "epoch": 3.5913539967373573, "grad_norm": 0.23002728819847107, "learning_rate": 0.00098084217384994, "loss": 0.137, "num_input_tokens_seen": 47615440, "step": 22015 }, { "epoch": 3.592169657422512, "grad_norm": 0.06879785656929016, "learning_rate": 0.0009808226543810198, "loss": 0.05, "num_input_tokens_seen": 47626128, "step": 22020 }, { "epoch": 3.592985318107667, "grad_norm": 0.10355670005083084, "learning_rate": 0.0009808031251676182, "loss": 0.1418, "num_input_tokens_seen": 47636784, "step": 22025 }, { "epoch": 3.5938009787928222, "grad_norm": 0.020710989832878113, "learning_rate": 0.0009807835862101313, "loss": 0.1934, "num_input_tokens_seen": 47648624, "step": 22030 }, { "epoch": 3.594616639477977, "grad_norm": 0.21865439414978027, "learning_rate": 0.0009807640375089552, "loss": 0.1058, "num_input_tokens_seen": 47659344, "step": 22035 }, { "epoch": 3.5954323001631323, "grad_norm": 0.047853246331214905, "learning_rate": 0.000980744479064486, "loss": 0.1016, "num_input_tokens_seen": 47670064, "step": 22040 }, { "epoch": 3.596247960848287, "grad_norm": 0.013760424219071865, "learning_rate": 0.00098072491087712, "loss": 0.1097, "num_input_tokens_seen": 47681264, "step": 22045 }, { "epoch": 3.597063621533442, "grad_norm": 0.17822307348251343, "learning_rate": 0.0009807053329472539, "loss": 0.3183, "num_input_tokens_seen": 47692304, "step": 22050 }, { "epoch": 3.597879282218597, "grad_norm": 0.15463876724243164, "learning_rate": 0.0009806857452752844, "loss": 0.1162, "num_input_tokens_seen": 47703600, "step": 22055 }, { "epoch": 3.598694942903752, "grad_norm": 0.025757692754268646, "learning_rate": 0.0009806661478616084, "loss": 0.0322, "num_input_tokens_seen": 47713520, "step": 22060 }, { "epoch": 3.5995106035889073, "grad_norm": 0.026838814839720726, "learning_rate": 0.000980646540706623, "loss": 0.0998, "num_input_tokens_seen": 47723600, "step": 22065 }, { "epoch": 3.600326264274062, "grad_norm": 0.021702522411942482, "learning_rate": 0.0009806269238107261, "loss": 0.1855, "num_input_tokens_seen": 47733808, "step": 22070 }, { "epoch": 3.601141924959217, "grad_norm": 0.06848857551813126, "learning_rate": 0.0009806072971743148, "loss": 0.0631, "num_input_tokens_seen": 47745296, "step": 22075 }, { "epoch": 3.6019575856443717, "grad_norm": 0.03980998322367668, "learning_rate": 0.000980587660797787, "loss": 0.0783, "num_input_tokens_seen": 47754736, "step": 22080 }, { "epoch": 3.602773246329527, "grad_norm": 0.01532980240881443, "learning_rate": 0.00098056801468154, "loss": 0.0586, "num_input_tokens_seen": 47766160, "step": 22085 }, { "epoch": 3.603588907014682, "grad_norm": 0.010872176848351955, "learning_rate": 0.0009805483588259732, "loss": 0.0167, "num_input_tokens_seen": 47777904, "step": 22090 }, { "epoch": 3.604404567699837, "grad_norm": 0.04009169712662697, "learning_rate": 0.000980528693231484, "loss": 0.1523, "num_input_tokens_seen": 47789328, "step": 22095 }, { "epoch": 3.605220228384992, "grad_norm": 0.010094402357935905, "learning_rate": 0.0009805090178984712, "loss": 0.0779, "num_input_tokens_seen": 47799472, "step": 22100 }, { "epoch": 3.6060358890701467, "grad_norm": 0.06072307005524635, "learning_rate": 0.0009804893328273336, "loss": 0.1104, "num_input_tokens_seen": 47811248, "step": 22105 }, { "epoch": 3.6068515497553015, "grad_norm": 0.05516495928168297, "learning_rate": 0.0009804696380184704, "loss": 0.0625, "num_input_tokens_seen": 47823152, "step": 22110 }, { "epoch": 3.607667210440457, "grad_norm": 0.2074514776468277, "learning_rate": 0.0009804499334722801, "loss": 0.0722, "num_input_tokens_seen": 47834448, "step": 22115 }, { "epoch": 3.6084828711256116, "grad_norm": 0.01784053072333336, "learning_rate": 0.0009804302191891625, "loss": 0.1881, "num_input_tokens_seen": 47845360, "step": 22120 }, { "epoch": 3.609298531810767, "grad_norm": 0.005364630371332169, "learning_rate": 0.0009804104951695173, "loss": 0.0771, "num_input_tokens_seen": 47854736, "step": 22125 }, { "epoch": 3.6101141924959217, "grad_norm": 0.2547522187232971, "learning_rate": 0.0009803907614137435, "loss": 0.1743, "num_input_tokens_seen": 47866864, "step": 22130 }, { "epoch": 3.6109298531810765, "grad_norm": 0.0751950666308403, "learning_rate": 0.0009803710179222419, "loss": 0.1087, "num_input_tokens_seen": 47877456, "step": 22135 }, { "epoch": 3.6117455138662318, "grad_norm": 0.002935384400188923, "learning_rate": 0.000980351264695412, "loss": 0.1391, "num_input_tokens_seen": 47889392, "step": 22140 }, { "epoch": 3.6125611745513866, "grad_norm": 0.026649711653590202, "learning_rate": 0.0009803315017336545, "loss": 0.0165, "num_input_tokens_seen": 47900016, "step": 22145 }, { "epoch": 3.613376835236542, "grad_norm": 0.020435314625501633, "learning_rate": 0.0009803117290373697, "loss": 0.185, "num_input_tokens_seen": 47910416, "step": 22150 }, { "epoch": 3.6141924959216967, "grad_norm": 0.15744924545288086, "learning_rate": 0.0009802919466069585, "loss": 0.1078, "num_input_tokens_seen": 47921584, "step": 22155 }, { "epoch": 3.6150081566068515, "grad_norm": 0.16279421746730804, "learning_rate": 0.0009802721544428215, "loss": 0.2165, "num_input_tokens_seen": 47932016, "step": 22160 }, { "epoch": 3.6158238172920063, "grad_norm": 0.0843036100268364, "learning_rate": 0.0009802523525453601, "loss": 0.2297, "num_input_tokens_seen": 47942864, "step": 22165 }, { "epoch": 3.6166394779771616, "grad_norm": 0.18664637207984924, "learning_rate": 0.0009802325409149757, "loss": 0.0993, "num_input_tokens_seen": 47953968, "step": 22170 }, { "epoch": 3.6174551386623164, "grad_norm": 0.00876756850630045, "learning_rate": 0.0009802127195520697, "loss": 0.0728, "num_input_tokens_seen": 47964688, "step": 22175 }, { "epoch": 3.6182707993474716, "grad_norm": 0.015477425418794155, "learning_rate": 0.0009801928884570434, "loss": 0.0471, "num_input_tokens_seen": 47974992, "step": 22180 }, { "epoch": 3.6190864600326265, "grad_norm": 0.07113178819417953, "learning_rate": 0.0009801730476302992, "loss": 0.1995, "num_input_tokens_seen": 47984688, "step": 22185 }, { "epoch": 3.6199021207177813, "grad_norm": 0.13852275907993317, "learning_rate": 0.000980153197072239, "loss": 0.1148, "num_input_tokens_seen": 47995792, "step": 22190 }, { "epoch": 3.6207177814029365, "grad_norm": 0.09939780831336975, "learning_rate": 0.0009801333367832651, "loss": 0.1039, "num_input_tokens_seen": 48005424, "step": 22195 }, { "epoch": 3.6215334420880914, "grad_norm": 0.07844390720129013, "learning_rate": 0.0009801134667637803, "loss": 0.0971, "num_input_tokens_seen": 48016368, "step": 22200 }, { "epoch": 3.622349102773246, "grad_norm": 0.16363392770290375, "learning_rate": 0.0009800935870141868, "loss": 0.0741, "num_input_tokens_seen": 48027568, "step": 22205 }, { "epoch": 3.6231647634584014, "grad_norm": 0.08116459101438522, "learning_rate": 0.0009800736975348878, "loss": 0.1015, "num_input_tokens_seen": 48039056, "step": 22210 }, { "epoch": 3.6239804241435563, "grad_norm": 0.025155600160360336, "learning_rate": 0.0009800537983262862, "loss": 0.0714, "num_input_tokens_seen": 48049936, "step": 22215 }, { "epoch": 3.624796084828711, "grad_norm": 0.11358506232500076, "learning_rate": 0.0009800338893887857, "loss": 0.0955, "num_input_tokens_seen": 48061232, "step": 22220 }, { "epoch": 3.6256117455138663, "grad_norm": 0.16867463290691376, "learning_rate": 0.000980013970722789, "loss": 0.0756, "num_input_tokens_seen": 48072336, "step": 22225 }, { "epoch": 3.626427406199021, "grad_norm": 0.12615631520748138, "learning_rate": 0.0009799940423287005, "loss": 0.188, "num_input_tokens_seen": 48083792, "step": 22230 }, { "epoch": 3.6272430668841764, "grad_norm": 0.027677416801452637, "learning_rate": 0.000979974104206924, "loss": 0.0424, "num_input_tokens_seen": 48094512, "step": 22235 }, { "epoch": 3.6280587275693312, "grad_norm": 0.038241248577833176, "learning_rate": 0.0009799541563578632, "loss": 0.1908, "num_input_tokens_seen": 48106416, "step": 22240 }, { "epoch": 3.628874388254486, "grad_norm": 0.04931412637233734, "learning_rate": 0.0009799341987819224, "loss": 0.1333, "num_input_tokens_seen": 48117840, "step": 22245 }, { "epoch": 3.629690048939641, "grad_norm": 0.028640341013669968, "learning_rate": 0.0009799142314795065, "loss": 0.0577, "num_input_tokens_seen": 48127888, "step": 22250 }, { "epoch": 3.630505709624796, "grad_norm": 0.03567443788051605, "learning_rate": 0.0009798942544510198, "loss": 0.0432, "num_input_tokens_seen": 48137200, "step": 22255 }, { "epoch": 3.631321370309951, "grad_norm": 0.06819088757038116, "learning_rate": 0.000979874267696867, "loss": 0.057, "num_input_tokens_seen": 48147632, "step": 22260 }, { "epoch": 3.632137030995106, "grad_norm": 0.1777115911245346, "learning_rate": 0.0009798542712174537, "loss": 0.1266, "num_input_tokens_seen": 48158064, "step": 22265 }, { "epoch": 3.632952691680261, "grad_norm": 0.013221224769949913, "learning_rate": 0.0009798342650131845, "loss": 0.0276, "num_input_tokens_seen": 48168176, "step": 22270 }, { "epoch": 3.633768352365416, "grad_norm": 0.09064479172229767, "learning_rate": 0.0009798142490844656, "loss": 0.2523, "num_input_tokens_seen": 48179472, "step": 22275 }, { "epoch": 3.634584013050571, "grad_norm": 0.09441931545734406, "learning_rate": 0.0009797942234317022, "loss": 0.1527, "num_input_tokens_seen": 48189232, "step": 22280 }, { "epoch": 3.635399673735726, "grad_norm": 0.1492157280445099, "learning_rate": 0.0009797741880553, "loss": 0.3185, "num_input_tokens_seen": 48200496, "step": 22285 }, { "epoch": 3.636215334420881, "grad_norm": 0.12546101212501526, "learning_rate": 0.0009797541429556653, "loss": 0.2057, "num_input_tokens_seen": 48211280, "step": 22290 }, { "epoch": 3.637030995106036, "grad_norm": 0.027052100747823715, "learning_rate": 0.0009797340881332044, "loss": 0.0607, "num_input_tokens_seen": 48221872, "step": 22295 }, { "epoch": 3.637846655791191, "grad_norm": 0.04728303104639053, "learning_rate": 0.0009797140235883236, "loss": 0.1707, "num_input_tokens_seen": 48233616, "step": 22300 }, { "epoch": 3.6386623164763456, "grad_norm": 0.10751637816429138, "learning_rate": 0.0009796939493214294, "loss": 0.1243, "num_input_tokens_seen": 48244976, "step": 22305 }, { "epoch": 3.639477977161501, "grad_norm": 0.17382705211639404, "learning_rate": 0.000979673865332929, "loss": 0.1327, "num_input_tokens_seen": 48256016, "step": 22310 }, { "epoch": 3.6402936378466557, "grad_norm": 0.0284186452627182, "learning_rate": 0.0009796537716232289, "loss": 0.0715, "num_input_tokens_seen": 48267632, "step": 22315 }, { "epoch": 3.641109298531811, "grad_norm": 0.028015002608299255, "learning_rate": 0.000979633668192737, "loss": 0.1001, "num_input_tokens_seen": 48278544, "step": 22320 }, { "epoch": 3.641924959216966, "grad_norm": 0.026279503479599953, "learning_rate": 0.0009796135550418602, "loss": 0.0556, "num_input_tokens_seen": 48289616, "step": 22325 }, { "epoch": 3.6427406199021206, "grad_norm": 0.07529424875974655, "learning_rate": 0.0009795934321710062, "loss": 0.1005, "num_input_tokens_seen": 48300400, "step": 22330 }, { "epoch": 3.6435562805872754, "grad_norm": 0.0067391046322882175, "learning_rate": 0.0009795732995805829, "loss": 0.1522, "num_input_tokens_seen": 48310448, "step": 22335 }, { "epoch": 3.6443719412724307, "grad_norm": 0.24121522903442383, "learning_rate": 0.0009795531572709983, "loss": 0.3409, "num_input_tokens_seen": 48320688, "step": 22340 }, { "epoch": 3.6451876019575855, "grad_norm": 0.06484576314687729, "learning_rate": 0.0009795330052426608, "loss": 0.0733, "num_input_tokens_seen": 48331376, "step": 22345 }, { "epoch": 3.6460032626427408, "grad_norm": 0.05939403548836708, "learning_rate": 0.0009795128434959785, "loss": 0.1592, "num_input_tokens_seen": 48342288, "step": 22350 }, { "epoch": 3.6468189233278956, "grad_norm": 0.06702617555856705, "learning_rate": 0.00097949267203136, "loss": 0.1425, "num_input_tokens_seen": 48351440, "step": 22355 }, { "epoch": 3.6476345840130504, "grad_norm": 0.04426341503858566, "learning_rate": 0.0009794724908492143, "loss": 0.0851, "num_input_tokens_seen": 48363632, "step": 22360 }, { "epoch": 3.6484502446982057, "grad_norm": 0.0624653585255146, "learning_rate": 0.0009794522999499503, "loss": 0.0316, "num_input_tokens_seen": 48374512, "step": 22365 }, { "epoch": 3.6492659053833605, "grad_norm": 0.10191851109266281, "learning_rate": 0.0009794320993339772, "loss": 0.0799, "num_input_tokens_seen": 48385232, "step": 22370 }, { "epoch": 3.6500815660685157, "grad_norm": 0.2434154897928238, "learning_rate": 0.0009794118890017046, "loss": 0.1618, "num_input_tokens_seen": 48396304, "step": 22375 }, { "epoch": 3.6508972267536706, "grad_norm": 0.1698484867811203, "learning_rate": 0.0009793916689535417, "loss": 0.0629, "num_input_tokens_seen": 48406384, "step": 22380 }, { "epoch": 3.6517128874388254, "grad_norm": 0.12525101006031036, "learning_rate": 0.0009793714391898984, "loss": 0.1334, "num_input_tokens_seen": 48416944, "step": 22385 }, { "epoch": 3.65252854812398, "grad_norm": 0.14747369289398193, "learning_rate": 0.000979351199711185, "loss": 0.0558, "num_input_tokens_seen": 48426928, "step": 22390 }, { "epoch": 3.6533442088091355, "grad_norm": 0.14484144747257233, "learning_rate": 0.0009793309505178112, "loss": 0.0951, "num_input_tokens_seen": 48437264, "step": 22395 }, { "epoch": 3.6541598694942903, "grad_norm": 0.03382772579789162, "learning_rate": 0.000979310691610188, "loss": 0.0677, "num_input_tokens_seen": 48446160, "step": 22400 }, { "epoch": 3.6549755301794455, "grad_norm": 0.06506810337305069, "learning_rate": 0.0009792904229887253, "loss": 0.102, "num_input_tokens_seen": 48456944, "step": 22405 }, { "epoch": 3.6557911908646004, "grad_norm": 0.2006702721118927, "learning_rate": 0.0009792701446538342, "loss": 0.2902, "num_input_tokens_seen": 48468464, "step": 22410 }, { "epoch": 3.656606851549755, "grad_norm": 0.04025622457265854, "learning_rate": 0.0009792498566059255, "loss": 0.1454, "num_input_tokens_seen": 48478864, "step": 22415 }, { "epoch": 3.6574225122349104, "grad_norm": 0.04909409210085869, "learning_rate": 0.0009792295588454106, "loss": 0.0857, "num_input_tokens_seen": 48489456, "step": 22420 }, { "epoch": 3.6582381729200653, "grad_norm": 0.2315702885389328, "learning_rate": 0.0009792092513727006, "loss": 0.1617, "num_input_tokens_seen": 48500496, "step": 22425 }, { "epoch": 3.65905383360522, "grad_norm": 0.059401609003543854, "learning_rate": 0.0009791889341882075, "loss": 0.0676, "num_input_tokens_seen": 48511888, "step": 22430 }, { "epoch": 3.6598694942903753, "grad_norm": 0.1892523616552353, "learning_rate": 0.0009791686072923424, "loss": 0.1063, "num_input_tokens_seen": 48522160, "step": 22435 }, { "epoch": 3.66068515497553, "grad_norm": 0.006050454918295145, "learning_rate": 0.0009791482706855178, "loss": 0.0272, "num_input_tokens_seen": 48532272, "step": 22440 }, { "epoch": 3.661500815660685, "grad_norm": 0.0315130352973938, "learning_rate": 0.0009791279243681456, "loss": 0.1565, "num_input_tokens_seen": 48543120, "step": 22445 }, { "epoch": 3.6623164763458402, "grad_norm": 0.015598422847688198, "learning_rate": 0.0009791075683406383, "loss": 0.0948, "num_input_tokens_seen": 48553648, "step": 22450 }, { "epoch": 3.663132137030995, "grad_norm": 0.08075518906116486, "learning_rate": 0.0009790872026034082, "loss": 0.1476, "num_input_tokens_seen": 48564240, "step": 22455 }, { "epoch": 3.6639477977161503, "grad_norm": 0.035409845411777496, "learning_rate": 0.0009790668271568684, "loss": 0.0687, "num_input_tokens_seen": 48574320, "step": 22460 }, { "epoch": 3.664763458401305, "grad_norm": 0.25173941254615784, "learning_rate": 0.0009790464420014312, "loss": 0.0941, "num_input_tokens_seen": 48584784, "step": 22465 }, { "epoch": 3.66557911908646, "grad_norm": 0.056600235402584076, "learning_rate": 0.0009790260471375105, "loss": 0.0999, "num_input_tokens_seen": 48595440, "step": 22470 }, { "epoch": 3.6663947797716148, "grad_norm": 0.12601543962955475, "learning_rate": 0.0009790056425655193, "loss": 0.0929, "num_input_tokens_seen": 48607920, "step": 22475 }, { "epoch": 3.66721044045677, "grad_norm": 0.044781044125556946, "learning_rate": 0.0009789852282858708, "loss": 0.1691, "num_input_tokens_seen": 48619696, "step": 22480 }, { "epoch": 3.668026101141925, "grad_norm": 0.05876341462135315, "learning_rate": 0.0009789648042989793, "loss": 0.1129, "num_input_tokens_seen": 48630544, "step": 22485 }, { "epoch": 3.66884176182708, "grad_norm": 0.007882521487772465, "learning_rate": 0.0009789443706052583, "loss": 0.1321, "num_input_tokens_seen": 48641072, "step": 22490 }, { "epoch": 3.669657422512235, "grad_norm": 0.010897364467382431, "learning_rate": 0.000978923927205122, "loss": 0.0425, "num_input_tokens_seen": 48651792, "step": 22495 }, { "epoch": 3.6704730831973897, "grad_norm": 0.04307050630450249, "learning_rate": 0.0009789034740989848, "loss": 0.1143, "num_input_tokens_seen": 48661296, "step": 22500 }, { "epoch": 3.671288743882545, "grad_norm": 0.4440848231315613, "learning_rate": 0.0009788830112872611, "loss": 0.099, "num_input_tokens_seen": 48671312, "step": 22505 }, { "epoch": 3.6721044045677, "grad_norm": 0.22522272169589996, "learning_rate": 0.0009788625387703658, "loss": 0.1167, "num_input_tokens_seen": 48681680, "step": 22510 }, { "epoch": 3.672920065252855, "grad_norm": 0.23165500164031982, "learning_rate": 0.0009788420565487136, "loss": 0.2092, "num_input_tokens_seen": 48692912, "step": 22515 }, { "epoch": 3.67373572593801, "grad_norm": 0.07002677768468857, "learning_rate": 0.0009788215646227196, "loss": 0.2015, "num_input_tokens_seen": 48704912, "step": 22520 }, { "epoch": 3.6745513866231647, "grad_norm": 0.09665162861347198, "learning_rate": 0.0009788010629927992, "loss": 0.0611, "num_input_tokens_seen": 48713232, "step": 22525 }, { "epoch": 3.6753670473083195, "grad_norm": 0.038766391575336456, "learning_rate": 0.000978780551659368, "loss": 0.0261, "num_input_tokens_seen": 48724976, "step": 22530 }, { "epoch": 3.676182707993475, "grad_norm": 0.05711549147963524, "learning_rate": 0.0009787600306228415, "loss": 0.2448, "num_input_tokens_seen": 48736624, "step": 22535 }, { "epoch": 3.6769983686786296, "grad_norm": 0.09710178524255753, "learning_rate": 0.0009787394998836355, "loss": 0.0471, "num_input_tokens_seen": 48747696, "step": 22540 }, { "epoch": 3.677814029363785, "grad_norm": 0.2320680022239685, "learning_rate": 0.0009787189594421663, "loss": 0.117, "num_input_tokens_seen": 48759632, "step": 22545 }, { "epoch": 3.6786296900489397, "grad_norm": 0.08304693549871445, "learning_rate": 0.00097869840929885, "loss": 0.1196, "num_input_tokens_seen": 48771152, "step": 22550 }, { "epoch": 3.6794453507340945, "grad_norm": 0.13316458463668823, "learning_rate": 0.0009786778494541033, "loss": 0.1519, "num_input_tokens_seen": 48783312, "step": 22555 }, { "epoch": 3.6802610114192493, "grad_norm": 0.009458227083086967, "learning_rate": 0.0009786572799083426, "loss": 0.0734, "num_input_tokens_seen": 48795728, "step": 22560 }, { "epoch": 3.6810766721044046, "grad_norm": 0.1085597574710846, "learning_rate": 0.000978636700661985, "loss": 0.1794, "num_input_tokens_seen": 48807152, "step": 22565 }, { "epoch": 3.6818923327895594, "grad_norm": 0.11425057798624039, "learning_rate": 0.0009786161117154475, "loss": 0.1324, "num_input_tokens_seen": 48817072, "step": 22570 }, { "epoch": 3.6827079934747147, "grad_norm": 0.011296875774860382, "learning_rate": 0.0009785955130691471, "loss": 0.0387, "num_input_tokens_seen": 48828080, "step": 22575 }, { "epoch": 3.6835236541598695, "grad_norm": 0.027069205418229103, "learning_rate": 0.0009785749047235017, "loss": 0.0839, "num_input_tokens_seen": 48838192, "step": 22580 }, { "epoch": 3.6843393148450243, "grad_norm": 0.043377745896577835, "learning_rate": 0.0009785542866789288, "loss": 0.04, "num_input_tokens_seen": 48849904, "step": 22585 }, { "epoch": 3.6851549755301796, "grad_norm": 0.017865043133497238, "learning_rate": 0.000978533658935846, "loss": 0.3016, "num_input_tokens_seen": 48861200, "step": 22590 }, { "epoch": 3.6859706362153344, "grad_norm": 0.11259305477142334, "learning_rate": 0.0009785130214946716, "loss": 0.09, "num_input_tokens_seen": 48871760, "step": 22595 }, { "epoch": 3.6867862969004896, "grad_norm": 0.023715078830718994, "learning_rate": 0.0009784923743558238, "loss": 0.0522, "num_input_tokens_seen": 48882128, "step": 22600 }, { "epoch": 3.6876019575856445, "grad_norm": 0.021904323250055313, "learning_rate": 0.000978471717519721, "loss": 0.0444, "num_input_tokens_seen": 48891824, "step": 22605 }, { "epoch": 3.6884176182707993, "grad_norm": 0.13719536364078522, "learning_rate": 0.0009784510509867818, "loss": 0.0905, "num_input_tokens_seen": 48903888, "step": 22610 }, { "epoch": 3.689233278955954, "grad_norm": 0.08701768517494202, "learning_rate": 0.0009784303747574254, "loss": 0.0619, "num_input_tokens_seen": 48914480, "step": 22615 }, { "epoch": 3.6900489396411094, "grad_norm": 0.13661521673202515, "learning_rate": 0.0009784096888320703, "loss": 0.1242, "num_input_tokens_seen": 48925680, "step": 22620 }, { "epoch": 3.690864600326264, "grad_norm": 0.009991122409701347, "learning_rate": 0.000978388993211136, "loss": 0.0959, "num_input_tokens_seen": 48937072, "step": 22625 }, { "epoch": 3.6916802610114194, "grad_norm": 0.08614683896303177, "learning_rate": 0.0009783682878950416, "loss": 0.0987, "num_input_tokens_seen": 48948240, "step": 22630 }, { "epoch": 3.6924959216965743, "grad_norm": 0.10593032091856003, "learning_rate": 0.0009783475728842074, "loss": 0.1475, "num_input_tokens_seen": 48958320, "step": 22635 }, { "epoch": 3.693311582381729, "grad_norm": 0.20387686789035797, "learning_rate": 0.0009783268481790527, "loss": 0.1945, "num_input_tokens_seen": 48969840, "step": 22640 }, { "epoch": 3.6941272430668843, "grad_norm": 0.059103433042764664, "learning_rate": 0.0009783061137799975, "loss": 0.124, "num_input_tokens_seen": 48981744, "step": 22645 }, { "epoch": 3.694942903752039, "grad_norm": 0.08534158766269684, "learning_rate": 0.000978285369687462, "loss": 0.1068, "num_input_tokens_seen": 48992368, "step": 22650 }, { "epoch": 3.695758564437194, "grad_norm": 0.025781484320759773, "learning_rate": 0.000978264615901867, "loss": 0.0279, "num_input_tokens_seen": 49003120, "step": 22655 }, { "epoch": 3.6965742251223492, "grad_norm": 0.19997680187225342, "learning_rate": 0.0009782438524236327, "loss": 0.1823, "num_input_tokens_seen": 49014256, "step": 22660 }, { "epoch": 3.697389885807504, "grad_norm": 0.03971175104379654, "learning_rate": 0.00097822307925318, "loss": 0.1205, "num_input_tokens_seen": 49025232, "step": 22665 }, { "epoch": 3.698205546492659, "grad_norm": 0.32710427045822144, "learning_rate": 0.00097820229639093, "loss": 0.1444, "num_input_tokens_seen": 49036176, "step": 22670 }, { "epoch": 3.699021207177814, "grad_norm": 0.1855972707271576, "learning_rate": 0.0009781815038373042, "loss": 0.1353, "num_input_tokens_seen": 49046832, "step": 22675 }, { "epoch": 3.699836867862969, "grad_norm": 0.028964513912796974, "learning_rate": 0.000978160701592723, "loss": 0.1412, "num_input_tokens_seen": 49057264, "step": 22680 }, { "epoch": 3.700652528548124, "grad_norm": 0.04488237202167511, "learning_rate": 0.000978139889657609, "loss": 0.0243, "num_input_tokens_seen": 49068592, "step": 22685 }, { "epoch": 3.701468189233279, "grad_norm": 0.00563571834936738, "learning_rate": 0.0009781190680323833, "loss": 0.1421, "num_input_tokens_seen": 49078640, "step": 22690 }, { "epoch": 3.702283849918434, "grad_norm": 0.10360507667064667, "learning_rate": 0.0009780982367174683, "loss": 0.0797, "num_input_tokens_seen": 49089616, "step": 22695 }, { "epoch": 3.7030995106035887, "grad_norm": 0.1571696400642395, "learning_rate": 0.000978077395713286, "loss": 0.1266, "num_input_tokens_seen": 49099312, "step": 22700 }, { "epoch": 3.703915171288744, "grad_norm": 0.04751509800553322, "learning_rate": 0.0009780565450202587, "loss": 0.1661, "num_input_tokens_seen": 49111056, "step": 22705 }, { "epoch": 3.7047308319738987, "grad_norm": 0.2686530351638794, "learning_rate": 0.0009780356846388091, "loss": 0.1756, "num_input_tokens_seen": 49122352, "step": 22710 }, { "epoch": 3.705546492659054, "grad_norm": 0.11259738355875015, "learning_rate": 0.00097801481456936, "loss": 0.0504, "num_input_tokens_seen": 49133136, "step": 22715 }, { "epoch": 3.706362153344209, "grad_norm": 0.1789553016424179, "learning_rate": 0.0009779939348123342, "loss": 0.0762, "num_input_tokens_seen": 49143952, "step": 22720 }, { "epoch": 3.7071778140293636, "grad_norm": 0.01377193909138441, "learning_rate": 0.000977973045368155, "loss": 0.1501, "num_input_tokens_seen": 49154864, "step": 22725 }, { "epoch": 3.707993474714519, "grad_norm": 0.1669149249792099, "learning_rate": 0.0009779521462372457, "loss": 0.1535, "num_input_tokens_seen": 49166192, "step": 22730 }, { "epoch": 3.7088091353996737, "grad_norm": 0.010271217674016953, "learning_rate": 0.0009779312374200298, "loss": 0.0722, "num_input_tokens_seen": 49177136, "step": 22735 }, { "epoch": 3.709624796084829, "grad_norm": 0.11398719996213913, "learning_rate": 0.0009779103189169309, "loss": 0.0827, "num_input_tokens_seen": 49187440, "step": 22740 }, { "epoch": 3.710440456769984, "grad_norm": 0.07981985062360764, "learning_rate": 0.0009778893907283733, "loss": 0.1428, "num_input_tokens_seen": 49199056, "step": 22745 }, { "epoch": 3.7112561174551386, "grad_norm": 0.25845813751220703, "learning_rate": 0.000977868452854781, "loss": 0.1377, "num_input_tokens_seen": 49208720, "step": 22750 }, { "epoch": 3.7120717781402934, "grad_norm": 0.15199466049671173, "learning_rate": 0.000977847505296578, "loss": 0.0882, "num_input_tokens_seen": 49219056, "step": 22755 }, { "epoch": 3.7128874388254487, "grad_norm": 0.14639122784137726, "learning_rate": 0.0009778265480541895, "loss": 0.1336, "num_input_tokens_seen": 49229232, "step": 22760 }, { "epoch": 3.7137030995106035, "grad_norm": 0.011554457247257233, "learning_rate": 0.0009778055811280396, "loss": 0.0255, "num_input_tokens_seen": 49241456, "step": 22765 }, { "epoch": 3.7145187601957588, "grad_norm": 0.009976423345506191, "learning_rate": 0.0009777846045185535, "loss": 0.1987, "num_input_tokens_seen": 49252144, "step": 22770 }, { "epoch": 3.7153344208809136, "grad_norm": 0.07433804869651794, "learning_rate": 0.0009777636182261562, "loss": 0.0468, "num_input_tokens_seen": 49262416, "step": 22775 }, { "epoch": 3.7161500815660684, "grad_norm": 0.02024008147418499, "learning_rate": 0.0009777426222512733, "loss": 0.0305, "num_input_tokens_seen": 49274224, "step": 22780 }, { "epoch": 3.7169657422512232, "grad_norm": 0.09279941767454147, "learning_rate": 0.0009777216165943298, "loss": 0.1684, "num_input_tokens_seen": 49284656, "step": 22785 }, { "epoch": 3.7177814029363785, "grad_norm": 0.07832145690917969, "learning_rate": 0.0009777006012557522, "loss": 0.1519, "num_input_tokens_seen": 49294896, "step": 22790 }, { "epoch": 3.7185970636215333, "grad_norm": 0.06239181011915207, "learning_rate": 0.0009776795762359654, "loss": 0.164, "num_input_tokens_seen": 49305200, "step": 22795 }, { "epoch": 3.7194127243066886, "grad_norm": 0.05114758387207985, "learning_rate": 0.0009776585415353963, "loss": 0.0504, "num_input_tokens_seen": 49316080, "step": 22800 }, { "epoch": 3.7202283849918434, "grad_norm": 0.017731616273522377, "learning_rate": 0.0009776374971544708, "loss": 0.0382, "num_input_tokens_seen": 49327024, "step": 22805 }, { "epoch": 3.721044045676998, "grad_norm": 0.06036202982068062, "learning_rate": 0.0009776164430936153, "loss": 0.0778, "num_input_tokens_seen": 49338224, "step": 22810 }, { "epoch": 3.7218597063621535, "grad_norm": 0.013572810217738152, "learning_rate": 0.000977595379353257, "loss": 0.0984, "num_input_tokens_seen": 49349936, "step": 22815 }, { "epoch": 3.7226753670473083, "grad_norm": 0.015775786712765694, "learning_rate": 0.0009775743059338223, "loss": 0.122, "num_input_tokens_seen": 49361200, "step": 22820 }, { "epoch": 3.7234910277324635, "grad_norm": 0.030214644968509674, "learning_rate": 0.0009775532228357385, "loss": 0.3579, "num_input_tokens_seen": 49372976, "step": 22825 }, { "epoch": 3.7243066884176184, "grad_norm": 0.041531722992658615, "learning_rate": 0.0009775321300594328, "loss": 0.0512, "num_input_tokens_seen": 49383824, "step": 22830 }, { "epoch": 3.725122349102773, "grad_norm": 0.08106119930744171, "learning_rate": 0.0009775110276053327, "loss": 0.1839, "num_input_tokens_seen": 49395504, "step": 22835 }, { "epoch": 3.725938009787928, "grad_norm": 0.020309995859861374, "learning_rate": 0.000977489915473866, "loss": 0.0665, "num_input_tokens_seen": 49404176, "step": 22840 }, { "epoch": 3.7267536704730833, "grad_norm": 0.04128960520029068, "learning_rate": 0.0009774687936654602, "loss": 0.0401, "num_input_tokens_seen": 49413776, "step": 22845 }, { "epoch": 3.727569331158238, "grad_norm": 0.024078309535980225, "learning_rate": 0.0009774476621805437, "loss": 0.1105, "num_input_tokens_seen": 49424848, "step": 22850 }, { "epoch": 3.7283849918433933, "grad_norm": 0.0830194428563118, "learning_rate": 0.0009774265210195446, "loss": 0.0731, "num_input_tokens_seen": 49436880, "step": 22855 }, { "epoch": 3.729200652528548, "grad_norm": 0.16092805564403534, "learning_rate": 0.0009774053701828913, "loss": 0.2289, "num_input_tokens_seen": 49448048, "step": 22860 }, { "epoch": 3.730016313213703, "grad_norm": 0.20131917297840118, "learning_rate": 0.0009773842096710127, "loss": 0.1552, "num_input_tokens_seen": 49458544, "step": 22865 }, { "epoch": 3.7308319738988582, "grad_norm": 0.21859797835350037, "learning_rate": 0.0009773630394843374, "loss": 0.1336, "num_input_tokens_seen": 49467344, "step": 22870 }, { "epoch": 3.731647634584013, "grad_norm": 0.18565644323825836, "learning_rate": 0.0009773418596232945, "loss": 0.1809, "num_input_tokens_seen": 49478064, "step": 22875 }, { "epoch": 3.732463295269168, "grad_norm": 0.051977213472127914, "learning_rate": 0.0009773206700883135, "loss": 0.1254, "num_input_tokens_seen": 49488464, "step": 22880 }, { "epoch": 3.733278955954323, "grad_norm": 0.027007022872567177, "learning_rate": 0.0009772994708798232, "loss": 0.1159, "num_input_tokens_seen": 49501264, "step": 22885 }, { "epoch": 3.734094616639478, "grad_norm": 0.19858905673027039, "learning_rate": 0.000977278261998254, "loss": 0.1456, "num_input_tokens_seen": 49511408, "step": 22890 }, { "epoch": 3.7349102773246328, "grad_norm": 0.09167278558015823, "learning_rate": 0.0009772570434440353, "loss": 0.1164, "num_input_tokens_seen": 49522896, "step": 22895 }, { "epoch": 3.735725938009788, "grad_norm": 0.03391212224960327, "learning_rate": 0.000977235815217597, "loss": 0.0852, "num_input_tokens_seen": 49535120, "step": 22900 }, { "epoch": 3.736541598694943, "grad_norm": 0.0225540641695261, "learning_rate": 0.0009772145773193695, "loss": 0.1699, "num_input_tokens_seen": 49546448, "step": 22905 }, { "epoch": 3.737357259380098, "grad_norm": 0.06987614184617996, "learning_rate": 0.0009771933297497831, "loss": 0.0708, "num_input_tokens_seen": 49557488, "step": 22910 }, { "epoch": 3.738172920065253, "grad_norm": 0.04545498266816139, "learning_rate": 0.0009771720725092687, "loss": 0.0383, "num_input_tokens_seen": 49567088, "step": 22915 }, { "epoch": 3.7389885807504077, "grad_norm": 0.015980003401637077, "learning_rate": 0.000977150805598257, "loss": 0.2011, "num_input_tokens_seen": 49578544, "step": 22920 }, { "epoch": 3.7398042414355626, "grad_norm": 0.12308619171380997, "learning_rate": 0.0009771295290171788, "loss": 0.1587, "num_input_tokens_seen": 49590000, "step": 22925 }, { "epoch": 3.740619902120718, "grad_norm": 0.022953316569328308, "learning_rate": 0.0009771082427664655, "loss": 0.3003, "num_input_tokens_seen": 49601328, "step": 22930 }, { "epoch": 3.7414355628058726, "grad_norm": 0.020980946719646454, "learning_rate": 0.0009770869468465483, "loss": 0.066, "num_input_tokens_seen": 49613264, "step": 22935 }, { "epoch": 3.742251223491028, "grad_norm": 0.08716975897550583, "learning_rate": 0.000977065641257859, "loss": 0.117, "num_input_tokens_seen": 49622352, "step": 22940 }, { "epoch": 3.7430668841761827, "grad_norm": 0.2300471067428589, "learning_rate": 0.000977044326000829, "loss": 0.3114, "num_input_tokens_seen": 49633264, "step": 22945 }, { "epoch": 3.7438825448613375, "grad_norm": 0.08867449313402176, "learning_rate": 0.0009770230010758907, "loss": 0.0752, "num_input_tokens_seen": 49644432, "step": 22950 }, { "epoch": 3.744698205546493, "grad_norm": 0.19246956706047058, "learning_rate": 0.0009770016664834762, "loss": 0.1718, "num_input_tokens_seen": 49656432, "step": 22955 }, { "epoch": 3.7455138662316476, "grad_norm": 0.021392393857240677, "learning_rate": 0.000976980322224018, "loss": 0.0801, "num_input_tokens_seen": 49667600, "step": 22960 }, { "epoch": 3.746329526916803, "grad_norm": 0.22876377403736115, "learning_rate": 0.0009769589682979481, "loss": 0.0842, "num_input_tokens_seen": 49678576, "step": 22965 }, { "epoch": 3.7471451876019577, "grad_norm": 0.12781397998332977, "learning_rate": 0.0009769376047056998, "loss": 0.1321, "num_input_tokens_seen": 49689296, "step": 22970 }, { "epoch": 3.7479608482871125, "grad_norm": 0.17140617966651917, "learning_rate": 0.0009769162314477058, "loss": 0.2144, "num_input_tokens_seen": 49700208, "step": 22975 }, { "epoch": 3.7487765089722673, "grad_norm": 0.013391259126365185, "learning_rate": 0.0009768948485243997, "loss": 0.0628, "num_input_tokens_seen": 49711696, "step": 22980 }, { "epoch": 3.7495921696574226, "grad_norm": 0.04102994501590729, "learning_rate": 0.0009768734559362142, "loss": 0.1631, "num_input_tokens_seen": 49721904, "step": 22985 }, { "epoch": 3.7504078303425774, "grad_norm": 0.08313404023647308, "learning_rate": 0.0009768520536835832, "loss": 0.2573, "num_input_tokens_seen": 49733328, "step": 22990 }, { "epoch": 3.7512234910277327, "grad_norm": 0.21866433322429657, "learning_rate": 0.0009768306417669405, "loss": 0.1553, "num_input_tokens_seen": 49744560, "step": 22995 }, { "epoch": 3.7520391517128875, "grad_norm": 0.027432316914200783, "learning_rate": 0.00097680922018672, "loss": 0.1966, "num_input_tokens_seen": 49756944, "step": 23000 }, { "epoch": 3.7528548123980423, "grad_norm": 0.03837420046329498, "learning_rate": 0.0009767877889433555, "loss": 0.0872, "num_input_tokens_seen": 49765776, "step": 23005 }, { "epoch": 3.753670473083197, "grad_norm": 0.09529531747102737, "learning_rate": 0.0009767663480372817, "loss": 0.0804, "num_input_tokens_seen": 49776400, "step": 23010 }, { "epoch": 3.7544861337683524, "grad_norm": 0.10651466995477676, "learning_rate": 0.0009767448974689332, "loss": 0.0706, "num_input_tokens_seen": 49787408, "step": 23015 }, { "epoch": 3.755301794453507, "grad_norm": 0.02113129198551178, "learning_rate": 0.0009767234372387444, "loss": 0.1721, "num_input_tokens_seen": 49797456, "step": 23020 }, { "epoch": 3.7561174551386625, "grad_norm": 0.14945995807647705, "learning_rate": 0.0009767019673471505, "loss": 0.1206, "num_input_tokens_seen": 49809328, "step": 23025 }, { "epoch": 3.7569331158238173, "grad_norm": 0.021181615069508553, "learning_rate": 0.0009766804877945864, "loss": 0.0364, "num_input_tokens_seen": 49819664, "step": 23030 }, { "epoch": 3.757748776508972, "grad_norm": 0.07378098368644714, "learning_rate": 0.0009766589985814875, "loss": 0.1234, "num_input_tokens_seen": 49831088, "step": 23035 }, { "epoch": 3.7585644371941274, "grad_norm": 0.011933263391256332, "learning_rate": 0.0009766374997082893, "loss": 0.138, "num_input_tokens_seen": 49842864, "step": 23040 }, { "epoch": 3.759380097879282, "grad_norm": 0.07167614996433258, "learning_rate": 0.0009766159911754277, "loss": 0.1442, "num_input_tokens_seen": 49852848, "step": 23045 }, { "epoch": 3.7601957585644374, "grad_norm": 0.012339459732174873, "learning_rate": 0.0009765944729833382, "loss": 0.0924, "num_input_tokens_seen": 49863088, "step": 23050 }, { "epoch": 3.7610114192495923, "grad_norm": 0.1635027825832367, "learning_rate": 0.0009765729451324573, "loss": 0.1175, "num_input_tokens_seen": 49873840, "step": 23055 }, { "epoch": 3.761827079934747, "grad_norm": 0.1554376184940338, "learning_rate": 0.000976551407623221, "loss": 0.1576, "num_input_tokens_seen": 49883888, "step": 23060 }, { "epoch": 3.762642740619902, "grad_norm": 0.011004294268786907, "learning_rate": 0.0009765298604560657, "loss": 0.1131, "num_input_tokens_seen": 49894448, "step": 23065 }, { "epoch": 3.763458401305057, "grad_norm": 0.014040950685739517, "learning_rate": 0.0009765083036314284, "loss": 0.1647, "num_input_tokens_seen": 49904400, "step": 23070 }, { "epoch": 3.764274061990212, "grad_norm": 0.017209839075803757, "learning_rate": 0.0009764867371497459, "loss": 0.0778, "num_input_tokens_seen": 49915184, "step": 23075 }, { "epoch": 3.7650897226753672, "grad_norm": 0.24092888832092285, "learning_rate": 0.000976465161011455, "loss": 0.0861, "num_input_tokens_seen": 49926128, "step": 23080 }, { "epoch": 3.765905383360522, "grad_norm": 0.10889220982789993, "learning_rate": 0.0009764435752169933, "loss": 0.0448, "num_input_tokens_seen": 49936432, "step": 23085 }, { "epoch": 3.766721044045677, "grad_norm": 0.0978965014219284, "learning_rate": 0.0009764219797667982, "loss": 0.0422, "num_input_tokens_seen": 49947760, "step": 23090 }, { "epoch": 3.767536704730832, "grad_norm": 0.3926611542701721, "learning_rate": 0.0009764003746613073, "loss": 0.0791, "num_input_tokens_seen": 49958704, "step": 23095 }, { "epoch": 3.768352365415987, "grad_norm": 0.2822864055633545, "learning_rate": 0.0009763787599009583, "loss": 0.1521, "num_input_tokens_seen": 49970064, "step": 23100 }, { "epoch": 3.7691680261011418, "grad_norm": 0.014437035657465458, "learning_rate": 0.0009763571354861895, "loss": 0.0534, "num_input_tokens_seen": 49980976, "step": 23105 }, { "epoch": 3.769983686786297, "grad_norm": 0.17819297313690186, "learning_rate": 0.0009763355014174391, "loss": 0.2183, "num_input_tokens_seen": 49991024, "step": 23110 }, { "epoch": 3.770799347471452, "grad_norm": 0.016189444810152054, "learning_rate": 0.0009763138576951454, "loss": 0.0935, "num_input_tokens_seen": 50001680, "step": 23115 }, { "epoch": 3.7716150081566067, "grad_norm": 0.03998810052871704, "learning_rate": 0.0009762922043197471, "loss": 0.0357, "num_input_tokens_seen": 50012112, "step": 23120 }, { "epoch": 3.772430668841762, "grad_norm": 0.02609841711819172, "learning_rate": 0.0009762705412916831, "loss": 0.0961, "num_input_tokens_seen": 50022768, "step": 23125 }, { "epoch": 3.7732463295269167, "grad_norm": 0.006209354382008314, "learning_rate": 0.0009762488686113924, "loss": 0.0296, "num_input_tokens_seen": 50034320, "step": 23130 }, { "epoch": 3.774061990212072, "grad_norm": 0.04003912955522537, "learning_rate": 0.0009762271862793143, "loss": 0.0711, "num_input_tokens_seen": 50044304, "step": 23135 }, { "epoch": 3.774877650897227, "grad_norm": 0.021071631461381912, "learning_rate": 0.000976205494295888, "loss": 0.1904, "num_input_tokens_seen": 50052976, "step": 23140 }, { "epoch": 3.7756933115823816, "grad_norm": 0.07991696894168854, "learning_rate": 0.0009761837926615533, "loss": 0.0411, "num_input_tokens_seen": 50064624, "step": 23145 }, { "epoch": 3.7765089722675365, "grad_norm": 0.0656970888376236, "learning_rate": 0.00097616208137675, "loss": 0.1568, "num_input_tokens_seen": 50075696, "step": 23150 }, { "epoch": 3.7773246329526917, "grad_norm": 0.22373802959918976, "learning_rate": 0.000976140360441918, "loss": 0.1426, "num_input_tokens_seen": 50086096, "step": 23155 }, { "epoch": 3.7781402936378465, "grad_norm": 0.034465014934539795, "learning_rate": 0.0009761186298574975, "loss": 0.1237, "num_input_tokens_seen": 50097520, "step": 23160 }, { "epoch": 3.778955954323002, "grad_norm": 0.041891466826200485, "learning_rate": 0.0009760968896239291, "loss": 0.0389, "num_input_tokens_seen": 50108880, "step": 23165 }, { "epoch": 3.7797716150081566, "grad_norm": 0.06839856505393982, "learning_rate": 0.0009760751397416532, "loss": 0.089, "num_input_tokens_seen": 50119504, "step": 23170 }, { "epoch": 3.7805872756933114, "grad_norm": 0.014309341087937355, "learning_rate": 0.0009760533802111107, "loss": 0.0488, "num_input_tokens_seen": 50130640, "step": 23175 }, { "epoch": 3.7814029363784667, "grad_norm": 0.15914183855056763, "learning_rate": 0.0009760316110327426, "loss": 0.0888, "num_input_tokens_seen": 50141616, "step": 23180 }, { "epoch": 3.7822185970636215, "grad_norm": 0.3128720223903656, "learning_rate": 0.00097600983220699, "loss": 0.1164, "num_input_tokens_seen": 50152336, "step": 23185 }, { "epoch": 3.7830342577487768, "grad_norm": 0.09936369210481644, "learning_rate": 0.0009759880437342941, "loss": 0.0577, "num_input_tokens_seen": 50163088, "step": 23190 }, { "epoch": 3.7838499184339316, "grad_norm": 0.2154925912618637, "learning_rate": 0.0009759662456150967, "loss": 0.1918, "num_input_tokens_seen": 50173648, "step": 23195 }, { "epoch": 3.7846655791190864, "grad_norm": 0.01144491694867611, "learning_rate": 0.0009759444378498397, "loss": 0.0797, "num_input_tokens_seen": 50184816, "step": 23200 }, { "epoch": 3.7854812398042412, "grad_norm": 0.014319537207484245, "learning_rate": 0.0009759226204389646, "loss": 0.0992, "num_input_tokens_seen": 50196560, "step": 23205 }, { "epoch": 3.7862969004893965, "grad_norm": 0.025292597711086273, "learning_rate": 0.0009759007933829141, "loss": 0.0594, "num_input_tokens_seen": 50207824, "step": 23210 }, { "epoch": 3.7871125611745513, "grad_norm": 0.08496753871440887, "learning_rate": 0.0009758789566821302, "loss": 0.1291, "num_input_tokens_seen": 50219216, "step": 23215 }, { "epoch": 3.7879282218597066, "grad_norm": 0.012534767389297485, "learning_rate": 0.0009758571103370556, "loss": 0.0247, "num_input_tokens_seen": 50229648, "step": 23220 }, { "epoch": 3.7887438825448614, "grad_norm": 0.048558078706264496, "learning_rate": 0.000975835254348133, "loss": 0.0374, "num_input_tokens_seen": 50240368, "step": 23225 }, { "epoch": 3.789559543230016, "grad_norm": 0.038768526166677475, "learning_rate": 0.0009758133887158053, "loss": 0.1518, "num_input_tokens_seen": 50251312, "step": 23230 }, { "epoch": 3.790375203915171, "grad_norm": 0.011934944428503513, "learning_rate": 0.0009757915134405155, "loss": 0.0362, "num_input_tokens_seen": 50262320, "step": 23235 }, { "epoch": 3.7911908646003263, "grad_norm": 0.02086496911942959, "learning_rate": 0.0009757696285227073, "loss": 0.1667, "num_input_tokens_seen": 50272080, "step": 23240 }, { "epoch": 3.792006525285481, "grad_norm": 0.01079615205526352, "learning_rate": 0.000975747733962824, "loss": 0.0395, "num_input_tokens_seen": 50283056, "step": 23245 }, { "epoch": 3.7928221859706364, "grad_norm": 0.005180465057492256, "learning_rate": 0.0009757258297613095, "loss": 0.2372, "num_input_tokens_seen": 50294800, "step": 23250 }, { "epoch": 3.793637846655791, "grad_norm": 0.11687880754470825, "learning_rate": 0.0009757039159186072, "loss": 0.1172, "num_input_tokens_seen": 50306512, "step": 23255 }, { "epoch": 3.794453507340946, "grad_norm": 0.12655872106552124, "learning_rate": 0.0009756819924351618, "loss": 0.1006, "num_input_tokens_seen": 50316912, "step": 23260 }, { "epoch": 3.7952691680261013, "grad_norm": 0.20242717862129211, "learning_rate": 0.0009756600593114174, "loss": 0.0794, "num_input_tokens_seen": 50328496, "step": 23265 }, { "epoch": 3.796084828711256, "grad_norm": 0.01801212504506111, "learning_rate": 0.0009756381165478183, "loss": 0.2224, "num_input_tokens_seen": 50338128, "step": 23270 }, { "epoch": 3.7969004893964113, "grad_norm": 0.08773397654294968, "learning_rate": 0.0009756161641448095, "loss": 0.1753, "num_input_tokens_seen": 50349328, "step": 23275 }, { "epoch": 3.797716150081566, "grad_norm": 0.18493768572807312, "learning_rate": 0.0009755942021028356, "loss": 0.2975, "num_input_tokens_seen": 50361232, "step": 23280 }, { "epoch": 3.798531810766721, "grad_norm": 0.2676745355129242, "learning_rate": 0.0009755722304223422, "loss": 0.2993, "num_input_tokens_seen": 50371632, "step": 23285 }, { "epoch": 3.799347471451876, "grad_norm": 0.08383552730083466, "learning_rate": 0.000975550249103774, "loss": 0.0929, "num_input_tokens_seen": 50381616, "step": 23290 }, { "epoch": 3.800163132137031, "grad_norm": 0.03349275887012482, "learning_rate": 0.0009755282581475768, "loss": 0.1189, "num_input_tokens_seen": 50392816, "step": 23295 }, { "epoch": 3.800978792822186, "grad_norm": 0.12421400845050812, "learning_rate": 0.0009755062575541962, "loss": 0.1084, "num_input_tokens_seen": 50403280, "step": 23300 }, { "epoch": 3.801794453507341, "grad_norm": 0.075412318110466, "learning_rate": 0.000975484247324078, "loss": 0.0887, "num_input_tokens_seen": 50414480, "step": 23305 }, { "epoch": 3.802610114192496, "grad_norm": 0.16242346167564392, "learning_rate": 0.0009754622274576684, "loss": 0.1853, "num_input_tokens_seen": 50424368, "step": 23310 }, { "epoch": 3.8034257748776508, "grad_norm": 0.29253003001213074, "learning_rate": 0.0009754401979554136, "loss": 0.2353, "num_input_tokens_seen": 50434608, "step": 23315 }, { "epoch": 3.804241435562806, "grad_norm": 0.03192012012004852, "learning_rate": 0.00097541815881776, "loss": 0.0414, "num_input_tokens_seen": 50445552, "step": 23320 }, { "epoch": 3.805057096247961, "grad_norm": 0.03622366860508919, "learning_rate": 0.0009753961100451544, "loss": 0.1547, "num_input_tokens_seen": 50456240, "step": 23325 }, { "epoch": 3.8058727569331157, "grad_norm": 0.007690200116485357, "learning_rate": 0.0009753740516380433, "loss": 0.1217, "num_input_tokens_seen": 50466160, "step": 23330 }, { "epoch": 3.806688417618271, "grad_norm": 0.01823308691382408, "learning_rate": 0.0009753519835968743, "loss": 0.1037, "num_input_tokens_seen": 50475024, "step": 23335 }, { "epoch": 3.8075040783034257, "grad_norm": 0.03313617780804634, "learning_rate": 0.0009753299059220941, "loss": 0.0663, "num_input_tokens_seen": 50487600, "step": 23340 }, { "epoch": 3.8083197389885806, "grad_norm": 0.08058440685272217, "learning_rate": 0.0009753078186141506, "loss": 0.1118, "num_input_tokens_seen": 50497488, "step": 23345 }, { "epoch": 3.809135399673736, "grad_norm": 0.0680818036198616, "learning_rate": 0.0009752857216734909, "loss": 0.0484, "num_input_tokens_seen": 50508336, "step": 23350 }, { "epoch": 3.8099510603588906, "grad_norm": 0.22939538955688477, "learning_rate": 0.0009752636151005633, "loss": 0.1434, "num_input_tokens_seen": 50518192, "step": 23355 }, { "epoch": 3.810766721044046, "grad_norm": 0.02211681194603443, "learning_rate": 0.0009752414988958156, "loss": 0.0673, "num_input_tokens_seen": 50529200, "step": 23360 }, { "epoch": 3.8115823817292007, "grad_norm": 0.2066287249326706, "learning_rate": 0.000975219373059696, "loss": 0.2095, "num_input_tokens_seen": 50539920, "step": 23365 }, { "epoch": 3.8123980424143555, "grad_norm": 0.07064596563577652, "learning_rate": 0.000975197237592653, "loss": 0.0655, "num_input_tokens_seen": 50550416, "step": 23370 }, { "epoch": 3.8132137030995104, "grad_norm": 0.01532050408422947, "learning_rate": 0.000975175092495135, "loss": 0.1925, "num_input_tokens_seen": 50561808, "step": 23375 }, { "epoch": 3.8140293637846656, "grad_norm": 0.02331993728876114, "learning_rate": 0.0009751529377675911, "loss": 0.1121, "num_input_tokens_seen": 50571792, "step": 23380 }, { "epoch": 3.8148450244698204, "grad_norm": 0.166970893740654, "learning_rate": 0.00097513077341047, "loss": 0.1895, "num_input_tokens_seen": 50583280, "step": 23385 }, { "epoch": 3.8156606851549757, "grad_norm": 0.20115573704242706, "learning_rate": 0.0009751085994242212, "loss": 0.18, "num_input_tokens_seen": 50593872, "step": 23390 }, { "epoch": 3.8164763458401305, "grad_norm": 0.0661921575665474, "learning_rate": 0.0009750864158092938, "loss": 0.1162, "num_input_tokens_seen": 50603600, "step": 23395 }, { "epoch": 3.8172920065252853, "grad_norm": 0.17341428995132446, "learning_rate": 0.0009750642225661375, "loss": 0.1361, "num_input_tokens_seen": 50614608, "step": 23400 }, { "epoch": 3.8181076672104406, "grad_norm": 0.05718767270445824, "learning_rate": 0.0009750420196952021, "loss": 0.1265, "num_input_tokens_seen": 50625552, "step": 23405 }, { "epoch": 3.8189233278955954, "grad_norm": 0.014998821541666985, "learning_rate": 0.0009750198071969376, "loss": 0.0404, "num_input_tokens_seen": 50636176, "step": 23410 }, { "epoch": 3.8197389885807507, "grad_norm": 0.016761574894189835, "learning_rate": 0.0009749975850717941, "loss": 0.0604, "num_input_tokens_seen": 50647408, "step": 23415 }, { "epoch": 3.8205546492659055, "grad_norm": 0.023346178233623505, "learning_rate": 0.0009749753533202218, "loss": 0.1211, "num_input_tokens_seen": 50658896, "step": 23420 }, { "epoch": 3.8213703099510603, "grad_norm": 0.09524130076169968, "learning_rate": 0.0009749531119426716, "loss": 0.0637, "num_input_tokens_seen": 50670160, "step": 23425 }, { "epoch": 3.822185970636215, "grad_norm": 0.04625507444143295, "learning_rate": 0.000974930860939594, "loss": 0.0956, "num_input_tokens_seen": 50680240, "step": 23430 }, { "epoch": 3.8230016313213704, "grad_norm": 0.10155382752418518, "learning_rate": 0.0009749086003114399, "loss": 0.0425, "num_input_tokens_seen": 50691376, "step": 23435 }, { "epoch": 3.823817292006525, "grad_norm": 0.05376620218157768, "learning_rate": 0.0009748863300586605, "loss": 0.0676, "num_input_tokens_seen": 50701808, "step": 23440 }, { "epoch": 3.8246329526916805, "grad_norm": 0.014472625218331814, "learning_rate": 0.0009748640501817074, "loss": 0.1025, "num_input_tokens_seen": 50712592, "step": 23445 }, { "epoch": 3.8254486133768353, "grad_norm": 0.022697031497955322, "learning_rate": 0.0009748417606810319, "loss": 0.0574, "num_input_tokens_seen": 50723088, "step": 23450 }, { "epoch": 3.82626427406199, "grad_norm": 0.25425323843955994, "learning_rate": 0.0009748194615570857, "loss": 0.217, "num_input_tokens_seen": 50733328, "step": 23455 }, { "epoch": 3.827079934747145, "grad_norm": 0.10280811786651611, "learning_rate": 0.0009747971528103207, "loss": 0.1424, "num_input_tokens_seen": 50744976, "step": 23460 }, { "epoch": 3.8278955954323, "grad_norm": 0.12097106873989105, "learning_rate": 0.0009747748344411891, "loss": 0.1551, "num_input_tokens_seen": 50755952, "step": 23465 }, { "epoch": 3.828711256117455, "grad_norm": 0.15204603970050812, "learning_rate": 0.0009747525064501433, "loss": 0.0997, "num_input_tokens_seen": 50766480, "step": 23470 }, { "epoch": 3.8295269168026103, "grad_norm": 0.027186892926692963, "learning_rate": 0.0009747301688376355, "loss": 0.1228, "num_input_tokens_seen": 50777808, "step": 23475 }, { "epoch": 3.830342577487765, "grad_norm": 0.03366658836603165, "learning_rate": 0.0009747078216041187, "loss": 0.0903, "num_input_tokens_seen": 50789552, "step": 23480 }, { "epoch": 3.83115823817292, "grad_norm": 0.25274983048439026, "learning_rate": 0.0009746854647500457, "loss": 0.1051, "num_input_tokens_seen": 50799920, "step": 23485 }, { "epoch": 3.831973898858075, "grad_norm": 0.006881623528897762, "learning_rate": 0.0009746630982758695, "loss": 0.0453, "num_input_tokens_seen": 50811792, "step": 23490 }, { "epoch": 3.83278955954323, "grad_norm": 0.002175088506191969, "learning_rate": 0.0009746407221820435, "loss": 0.0442, "num_input_tokens_seen": 50822960, "step": 23495 }, { "epoch": 3.8336052202283852, "grad_norm": 0.37563708424568176, "learning_rate": 0.0009746183364690212, "loss": 0.1254, "num_input_tokens_seen": 50833328, "step": 23500 }, { "epoch": 3.83442088091354, "grad_norm": 0.058402448892593384, "learning_rate": 0.0009745959411372561, "loss": 0.0492, "num_input_tokens_seen": 50844144, "step": 23505 }, { "epoch": 3.835236541598695, "grad_norm": 0.23560450971126556, "learning_rate": 0.0009745735361872023, "loss": 0.1266, "num_input_tokens_seen": 50855440, "step": 23510 }, { "epoch": 3.8360522022838497, "grad_norm": 0.13727842271327972, "learning_rate": 0.0009745511216193137, "loss": 0.1043, "num_input_tokens_seen": 50866288, "step": 23515 }, { "epoch": 3.836867862969005, "grad_norm": 0.031131109222769737, "learning_rate": 0.0009745286974340445, "loss": 0.1187, "num_input_tokens_seen": 50877360, "step": 23520 }, { "epoch": 3.8376835236541598, "grad_norm": 0.0038565329741686583, "learning_rate": 0.0009745062636318495, "loss": 0.0595, "num_input_tokens_seen": 50889424, "step": 23525 }, { "epoch": 3.838499184339315, "grad_norm": 0.13137976825237274, "learning_rate": 0.0009744838202131829, "loss": 0.1308, "num_input_tokens_seen": 50900560, "step": 23530 }, { "epoch": 3.83931484502447, "grad_norm": 0.0036349627189338207, "learning_rate": 0.0009744613671784999, "loss": 0.0695, "num_input_tokens_seen": 50910256, "step": 23535 }, { "epoch": 3.8401305057096247, "grad_norm": 0.07140156626701355, "learning_rate": 0.0009744389045282554, "loss": 0.1406, "num_input_tokens_seen": 50920208, "step": 23540 }, { "epoch": 3.84094616639478, "grad_norm": 0.06446415930986404, "learning_rate": 0.0009744164322629046, "loss": 0.0901, "num_input_tokens_seen": 50930832, "step": 23545 }, { "epoch": 3.8417618270799347, "grad_norm": 0.055119890719652176, "learning_rate": 0.0009743939503829027, "loss": 0.0645, "num_input_tokens_seen": 50942064, "step": 23550 }, { "epoch": 3.8425774877650896, "grad_norm": 0.18346649408340454, "learning_rate": 0.0009743714588887059, "loss": 0.377, "num_input_tokens_seen": 50953008, "step": 23555 }, { "epoch": 3.843393148450245, "grad_norm": 0.22450843453407288, "learning_rate": 0.0009743489577807696, "loss": 0.1515, "num_input_tokens_seen": 50964720, "step": 23560 }, { "epoch": 3.8442088091353996, "grad_norm": 0.015807831659913063, "learning_rate": 0.0009743264470595499, "loss": 0.1297, "num_input_tokens_seen": 50975088, "step": 23565 }, { "epoch": 3.8450244698205545, "grad_norm": 0.10449165105819702, "learning_rate": 0.0009743039267255031, "loss": 0.0935, "num_input_tokens_seen": 50984912, "step": 23570 }, { "epoch": 3.8458401305057097, "grad_norm": 0.005697912070900202, "learning_rate": 0.0009742813967790855, "loss": 0.0575, "num_input_tokens_seen": 50996304, "step": 23575 }, { "epoch": 3.8466557911908645, "grad_norm": 0.06416153907775879, "learning_rate": 0.0009742588572207538, "loss": 0.0658, "num_input_tokens_seen": 51007376, "step": 23580 }, { "epoch": 3.84747145187602, "grad_norm": 0.10796103626489639, "learning_rate": 0.0009742363080509647, "loss": 0.1991, "num_input_tokens_seen": 51018736, "step": 23585 }, { "epoch": 3.8482871125611746, "grad_norm": 0.012265535071492195, "learning_rate": 0.000974213749270175, "loss": 0.0476, "num_input_tokens_seen": 51029648, "step": 23590 }, { "epoch": 3.8491027732463294, "grad_norm": 0.0124428141862154, "learning_rate": 0.0009741911808788422, "loss": 0.1514, "num_input_tokens_seen": 51041104, "step": 23595 }, { "epoch": 3.8499184339314843, "grad_norm": 0.005789014510810375, "learning_rate": 0.0009741686028774236, "loss": 0.1404, "num_input_tokens_seen": 51052432, "step": 23600 }, { "epoch": 3.8507340946166395, "grad_norm": 0.05271648243069649, "learning_rate": 0.0009741460152663768, "loss": 0.0728, "num_input_tokens_seen": 51063440, "step": 23605 }, { "epoch": 3.8515497553017943, "grad_norm": 0.008030619472265244, "learning_rate": 0.0009741234180461593, "loss": 0.1906, "num_input_tokens_seen": 51074864, "step": 23610 }, { "epoch": 3.8523654159869496, "grad_norm": 0.1799931675195694, "learning_rate": 0.0009741008112172293, "loss": 0.1197, "num_input_tokens_seen": 51084848, "step": 23615 }, { "epoch": 3.8531810766721044, "grad_norm": 0.24197715520858765, "learning_rate": 0.0009740781947800452, "loss": 0.2887, "num_input_tokens_seen": 51095312, "step": 23620 }, { "epoch": 3.8539967373572592, "grad_norm": 0.06715114414691925, "learning_rate": 0.0009740555687350648, "loss": 0.078, "num_input_tokens_seen": 51106800, "step": 23625 }, { "epoch": 3.8548123980424145, "grad_norm": 0.05898895114660263, "learning_rate": 0.0009740329330827471, "loss": 0.1036, "num_input_tokens_seen": 51117648, "step": 23630 }, { "epoch": 3.8556280587275693, "grad_norm": 0.13618339598178864, "learning_rate": 0.0009740102878235505, "loss": 0.0797, "num_input_tokens_seen": 51129200, "step": 23635 }, { "epoch": 3.8564437194127246, "grad_norm": 0.22078992426395416, "learning_rate": 0.0009739876329579343, "loss": 0.1101, "num_input_tokens_seen": 51140080, "step": 23640 }, { "epoch": 3.8572593800978794, "grad_norm": 0.25214922428131104, "learning_rate": 0.0009739649684863572, "loss": 0.1807, "num_input_tokens_seen": 51151440, "step": 23645 }, { "epoch": 3.858075040783034, "grad_norm": 0.032653022557497025, "learning_rate": 0.0009739422944092789, "loss": 0.0547, "num_input_tokens_seen": 51162896, "step": 23650 }, { "epoch": 3.858890701468189, "grad_norm": 0.06532101333141327, "learning_rate": 0.0009739196107271586, "loss": 0.109, "num_input_tokens_seen": 51173232, "step": 23655 }, { "epoch": 3.8597063621533443, "grad_norm": 0.09505137801170349, "learning_rate": 0.0009738969174404562, "loss": 0.0568, "num_input_tokens_seen": 51184336, "step": 23660 }, { "epoch": 3.860522022838499, "grad_norm": 0.023910559713840485, "learning_rate": 0.0009738742145496318, "loss": 0.18, "num_input_tokens_seen": 51195280, "step": 23665 }, { "epoch": 3.8613376835236544, "grad_norm": 0.05344879627227783, "learning_rate": 0.000973851502055145, "loss": 0.0418, "num_input_tokens_seen": 51204976, "step": 23670 }, { "epoch": 3.862153344208809, "grad_norm": 0.16817040741443634, "learning_rate": 0.0009738287799574565, "loss": 0.1508, "num_input_tokens_seen": 51217328, "step": 23675 }, { "epoch": 3.862969004893964, "grad_norm": 0.07454685121774673, "learning_rate": 0.0009738060482570268, "loss": 0.0351, "num_input_tokens_seen": 51228400, "step": 23680 }, { "epoch": 3.863784665579119, "grad_norm": 0.1946832686662674, "learning_rate": 0.0009737833069543163, "loss": 0.1932, "num_input_tokens_seen": 51238640, "step": 23685 }, { "epoch": 3.864600326264274, "grad_norm": 0.2734370231628418, "learning_rate": 0.0009737605560497862, "loss": 0.2889, "num_input_tokens_seen": 51249136, "step": 23690 }, { "epoch": 3.865415986949429, "grad_norm": 0.07125987857580185, "learning_rate": 0.0009737377955438973, "loss": 0.051, "num_input_tokens_seen": 51260112, "step": 23695 }, { "epoch": 3.866231647634584, "grad_norm": 0.03408673778176308, "learning_rate": 0.000973715025437111, "loss": 0.1153, "num_input_tokens_seen": 51269616, "step": 23700 }, { "epoch": 3.867047308319739, "grad_norm": 0.06401803344488144, "learning_rate": 0.0009736922457298889, "loss": 0.131, "num_input_tokens_seen": 51280624, "step": 23705 }, { "epoch": 3.867862969004894, "grad_norm": 0.03508564084768295, "learning_rate": 0.0009736694564226924, "loss": 0.13, "num_input_tokens_seen": 51291952, "step": 23710 }, { "epoch": 3.868678629690049, "grad_norm": 0.23152831196784973, "learning_rate": 0.0009736466575159835, "loss": 0.1823, "num_input_tokens_seen": 51302672, "step": 23715 }, { "epoch": 3.869494290375204, "grad_norm": 0.008906074799597263, "learning_rate": 0.0009736238490102243, "loss": 0.1114, "num_input_tokens_seen": 51312848, "step": 23720 }, { "epoch": 3.870309951060359, "grad_norm": 0.008829626254737377, "learning_rate": 0.0009736010309058769, "loss": 0.0331, "num_input_tokens_seen": 51323376, "step": 23725 }, { "epoch": 3.871125611745514, "grad_norm": 0.11298642307519913, "learning_rate": 0.0009735782032034038, "loss": 0.1043, "num_input_tokens_seen": 51334192, "step": 23730 }, { "epoch": 3.8719412724306688, "grad_norm": 0.1311386078596115, "learning_rate": 0.0009735553659032674, "loss": 0.1297, "num_input_tokens_seen": 51345072, "step": 23735 }, { "epoch": 3.8727569331158236, "grad_norm": 0.09266921132802963, "learning_rate": 0.000973532519005931, "loss": 0.049, "num_input_tokens_seen": 51355280, "step": 23740 }, { "epoch": 3.873572593800979, "grad_norm": 0.015555300749838352, "learning_rate": 0.0009735096625118574, "loss": 0.0823, "num_input_tokens_seen": 51366608, "step": 23745 }, { "epoch": 3.8743882544861337, "grad_norm": 0.04085804149508476, "learning_rate": 0.0009734867964215099, "loss": 0.0795, "num_input_tokens_seen": 51377488, "step": 23750 }, { "epoch": 3.875203915171289, "grad_norm": 0.16222117841243744, "learning_rate": 0.0009734639207353516, "loss": 0.1495, "num_input_tokens_seen": 51389168, "step": 23755 }, { "epoch": 3.8760195758564437, "grad_norm": 0.06792500615119934, "learning_rate": 0.0009734410354538464, "loss": 0.205, "num_input_tokens_seen": 51400368, "step": 23760 }, { "epoch": 3.8768352365415986, "grad_norm": 0.10864797234535217, "learning_rate": 0.0009734181405774581, "loss": 0.0554, "num_input_tokens_seen": 51410288, "step": 23765 }, { "epoch": 3.877650897226754, "grad_norm": 0.013939526863396168, "learning_rate": 0.0009733952361066505, "loss": 0.0239, "num_input_tokens_seen": 51420432, "step": 23770 }, { "epoch": 3.8784665579119086, "grad_norm": 0.07833394408226013, "learning_rate": 0.0009733723220418877, "loss": 0.3111, "num_input_tokens_seen": 51429968, "step": 23775 }, { "epoch": 3.8792822185970635, "grad_norm": 0.007237799931317568, "learning_rate": 0.0009733493983836345, "loss": 0.0471, "num_input_tokens_seen": 51439312, "step": 23780 }, { "epoch": 3.8800978792822187, "grad_norm": 0.09092199802398682, "learning_rate": 0.0009733264651323553, "loss": 0.1733, "num_input_tokens_seen": 51449808, "step": 23785 }, { "epoch": 3.8809135399673735, "grad_norm": 0.015112137421965599, "learning_rate": 0.0009733035222885149, "loss": 0.0582, "num_input_tokens_seen": 51460496, "step": 23790 }, { "epoch": 3.8817292006525284, "grad_norm": 0.0755823478102684, "learning_rate": 0.000973280569852578, "loss": 0.1515, "num_input_tokens_seen": 51469424, "step": 23795 }, { "epoch": 3.8825448613376836, "grad_norm": 0.14067342877388, "learning_rate": 0.00097325760782501, "loss": 0.0405, "num_input_tokens_seen": 51480656, "step": 23800 }, { "epoch": 3.8833605220228384, "grad_norm": 0.03899535536766052, "learning_rate": 0.0009732346362062763, "loss": 0.1319, "num_input_tokens_seen": 51491696, "step": 23805 }, { "epoch": 3.8841761827079937, "grad_norm": 0.0993603765964508, "learning_rate": 0.0009732116549968421, "loss": 0.0957, "num_input_tokens_seen": 51501584, "step": 23810 }, { "epoch": 3.8849918433931485, "grad_norm": 0.19096675515174866, "learning_rate": 0.0009731886641971737, "loss": 0.1519, "num_input_tokens_seen": 51512080, "step": 23815 }, { "epoch": 3.8858075040783033, "grad_norm": 0.24470771849155426, "learning_rate": 0.0009731656638077367, "loss": 0.1327, "num_input_tokens_seen": 51522640, "step": 23820 }, { "epoch": 3.886623164763458, "grad_norm": 0.16861572861671448, "learning_rate": 0.0009731426538289971, "loss": 0.0593, "num_input_tokens_seen": 51534288, "step": 23825 }, { "epoch": 3.8874388254486134, "grad_norm": 0.10015416890382767, "learning_rate": 0.0009731196342614214, "loss": 0.0965, "num_input_tokens_seen": 51544720, "step": 23830 }, { "epoch": 3.8882544861337682, "grad_norm": 0.16817638278007507, "learning_rate": 0.0009730966051054763, "loss": 0.1047, "num_input_tokens_seen": 51555376, "step": 23835 }, { "epoch": 3.8890701468189235, "grad_norm": 0.10419394820928574, "learning_rate": 0.0009730735663616281, "loss": 0.2183, "num_input_tokens_seen": 51567568, "step": 23840 }, { "epoch": 3.8898858075040783, "grad_norm": 0.15064406394958496, "learning_rate": 0.0009730505180303441, "loss": 0.201, "num_input_tokens_seen": 51578384, "step": 23845 }, { "epoch": 3.890701468189233, "grad_norm": 0.22134579718112946, "learning_rate": 0.0009730274601120913, "loss": 0.1304, "num_input_tokens_seen": 51588656, "step": 23850 }, { "epoch": 3.8915171288743884, "grad_norm": 0.1771828681230545, "learning_rate": 0.0009730043926073369, "loss": 0.1495, "num_input_tokens_seen": 51600112, "step": 23855 }, { "epoch": 3.892332789559543, "grad_norm": 0.08342130482196808, "learning_rate": 0.0009729813155165484, "loss": 0.171, "num_input_tokens_seen": 51610864, "step": 23860 }, { "epoch": 3.8931484502446985, "grad_norm": 0.046608809381723404, "learning_rate": 0.0009729582288401934, "loss": 0.1579, "num_input_tokens_seen": 51621616, "step": 23865 }, { "epoch": 3.8939641109298533, "grad_norm": 0.15302029252052307, "learning_rate": 0.0009729351325787402, "loss": 0.1561, "num_input_tokens_seen": 51633264, "step": 23870 }, { "epoch": 3.894779771615008, "grad_norm": 0.04558643698692322, "learning_rate": 0.0009729120267326564, "loss": 0.0339, "num_input_tokens_seen": 51643056, "step": 23875 }, { "epoch": 3.895595432300163, "grad_norm": 0.032037895172834396, "learning_rate": 0.0009728889113024103, "loss": 0.0536, "num_input_tokens_seen": 51653200, "step": 23880 }, { "epoch": 3.896411092985318, "grad_norm": 0.022089485079050064, "learning_rate": 0.0009728657862884707, "loss": 0.102, "num_input_tokens_seen": 51664048, "step": 23885 }, { "epoch": 3.897226753670473, "grad_norm": 0.044861339032649994, "learning_rate": 0.0009728426516913061, "loss": 0.0744, "num_input_tokens_seen": 51674448, "step": 23890 }, { "epoch": 3.8980424143556283, "grad_norm": 0.04996919259428978, "learning_rate": 0.0009728195075113851, "loss": 0.075, "num_input_tokens_seen": 51683440, "step": 23895 }, { "epoch": 3.898858075040783, "grad_norm": 0.03168988600373268, "learning_rate": 0.000972796353749177, "loss": 0.0557, "num_input_tokens_seen": 51693552, "step": 23900 }, { "epoch": 3.899673735725938, "grad_norm": 0.06277398020029068, "learning_rate": 0.0009727731904051513, "loss": 0.0248, "num_input_tokens_seen": 51704048, "step": 23905 }, { "epoch": 3.9004893964110927, "grad_norm": 0.413730651140213, "learning_rate": 0.0009727500174797769, "loss": 0.2979, "num_input_tokens_seen": 51713264, "step": 23910 }, { "epoch": 3.901305057096248, "grad_norm": 0.02978765405714512, "learning_rate": 0.0009727268349735237, "loss": 0.0508, "num_input_tokens_seen": 51724304, "step": 23915 }, { "epoch": 3.902120717781403, "grad_norm": 0.03769057244062424, "learning_rate": 0.0009727036428868616, "loss": 0.1514, "num_input_tokens_seen": 51734832, "step": 23920 }, { "epoch": 3.902936378466558, "grad_norm": 0.12522292137145996, "learning_rate": 0.0009726804412202604, "loss": 0.0921, "num_input_tokens_seen": 51745584, "step": 23925 }, { "epoch": 3.903752039151713, "grad_norm": 0.056543026119470596, "learning_rate": 0.0009726572299741904, "loss": 0.0798, "num_input_tokens_seen": 51755856, "step": 23930 }, { "epoch": 3.9045676998368677, "grad_norm": 0.19713228940963745, "learning_rate": 0.0009726340091491221, "loss": 0.2008, "num_input_tokens_seen": 51766704, "step": 23935 }, { "epoch": 3.905383360522023, "grad_norm": 0.05433223396539688, "learning_rate": 0.000972610778745526, "loss": 0.0572, "num_input_tokens_seen": 51778288, "step": 23940 }, { "epoch": 3.9061990212071778, "grad_norm": 0.10950616747140884, "learning_rate": 0.0009725875387638729, "loss": 0.1825, "num_input_tokens_seen": 51788464, "step": 23945 }, { "epoch": 3.907014681892333, "grad_norm": 0.03161918371915817, "learning_rate": 0.0009725642892046339, "loss": 0.0664, "num_input_tokens_seen": 51798480, "step": 23950 }, { "epoch": 3.907830342577488, "grad_norm": 0.05995155870914459, "learning_rate": 0.00097254103006828, "loss": 0.0404, "num_input_tokens_seen": 51808912, "step": 23955 }, { "epoch": 3.9086460032626427, "grad_norm": 0.1624925136566162, "learning_rate": 0.0009725177613552827, "loss": 0.0665, "num_input_tokens_seen": 51819600, "step": 23960 }, { "epoch": 3.9094616639477975, "grad_norm": 0.10385416448116302, "learning_rate": 0.0009724944830661135, "loss": 0.1225, "num_input_tokens_seen": 51831824, "step": 23965 }, { "epoch": 3.9102773246329527, "grad_norm": 0.11179591715335846, "learning_rate": 0.0009724711952012442, "loss": 0.0817, "num_input_tokens_seen": 51842256, "step": 23970 }, { "epoch": 3.9110929853181076, "grad_norm": 0.35106027126312256, "learning_rate": 0.0009724478977611469, "loss": 0.1216, "num_input_tokens_seen": 51853840, "step": 23975 }, { "epoch": 3.911908646003263, "grad_norm": 0.047677185386419296, "learning_rate": 0.0009724245907462934, "loss": 0.3742, "num_input_tokens_seen": 51864464, "step": 23980 }, { "epoch": 3.9127243066884176, "grad_norm": 0.05719529092311859, "learning_rate": 0.0009724012741571563, "loss": 0.2108, "num_input_tokens_seen": 51874224, "step": 23985 }, { "epoch": 3.9135399673735725, "grad_norm": 0.14676021039485931, "learning_rate": 0.000972377947994208, "loss": 0.0799, "num_input_tokens_seen": 51886064, "step": 23990 }, { "epoch": 3.9143556280587277, "grad_norm": 0.026104595512151718, "learning_rate": 0.0009723546122579217, "loss": 0.0623, "num_input_tokens_seen": 51896880, "step": 23995 }, { "epoch": 3.9151712887438825, "grad_norm": 0.07807918637990952, "learning_rate": 0.0009723312669487696, "loss": 0.0688, "num_input_tokens_seen": 51907952, "step": 24000 }, { "epoch": 3.9159869494290374, "grad_norm": 0.11770875006914139, "learning_rate": 0.0009723079120672254, "loss": 0.1049, "num_input_tokens_seen": 51918352, "step": 24005 }, { "epoch": 3.9168026101141926, "grad_norm": 0.09638605266809464, "learning_rate": 0.0009722845476137621, "loss": 0.0385, "num_input_tokens_seen": 51928976, "step": 24010 }, { "epoch": 3.9176182707993474, "grad_norm": 0.02468789368867874, "learning_rate": 0.0009722611735888532, "loss": 0.188, "num_input_tokens_seen": 51940656, "step": 24015 }, { "epoch": 3.9184339314845023, "grad_norm": 0.09898632019758224, "learning_rate": 0.0009722377899929727, "loss": 0.053, "num_input_tokens_seen": 51951600, "step": 24020 }, { "epoch": 3.9192495921696575, "grad_norm": 0.010132327675819397, "learning_rate": 0.0009722143968265942, "loss": 0.0106, "num_input_tokens_seen": 51962608, "step": 24025 }, { "epoch": 3.9200652528548123, "grad_norm": 0.07563593238592148, "learning_rate": 0.0009721909940901918, "loss": 0.0415, "num_input_tokens_seen": 51972816, "step": 24030 }, { "epoch": 3.9208809135399676, "grad_norm": 0.01588386856019497, "learning_rate": 0.0009721675817842402, "loss": 0.1785, "num_input_tokens_seen": 51983824, "step": 24035 }, { "epoch": 3.9216965742251224, "grad_norm": 0.06717728078365326, "learning_rate": 0.0009721441599092133, "loss": 0.177, "num_input_tokens_seen": 51995120, "step": 24040 }, { "epoch": 3.9225122349102772, "grad_norm": 0.1424424946308136, "learning_rate": 0.0009721207284655862, "loss": 0.0805, "num_input_tokens_seen": 52004400, "step": 24045 }, { "epoch": 3.923327895595432, "grad_norm": 0.027268648147583008, "learning_rate": 0.0009720972874538334, "loss": 0.1578, "num_input_tokens_seen": 52014896, "step": 24050 }, { "epoch": 3.9241435562805873, "grad_norm": 0.019477305933833122, "learning_rate": 0.0009720738368744304, "loss": 0.1404, "num_input_tokens_seen": 52026160, "step": 24055 }, { "epoch": 3.924959216965742, "grad_norm": 0.035887766629457474, "learning_rate": 0.0009720503767278522, "loss": 0.0717, "num_input_tokens_seen": 52036784, "step": 24060 }, { "epoch": 3.9257748776508974, "grad_norm": 0.11439003050327301, "learning_rate": 0.0009720269070145742, "loss": 0.0399, "num_input_tokens_seen": 52046512, "step": 24065 }, { "epoch": 3.926590538336052, "grad_norm": 0.29793015122413635, "learning_rate": 0.000972003427735072, "loss": 0.1544, "num_input_tokens_seen": 52055664, "step": 24070 }, { "epoch": 3.927406199021207, "grad_norm": 0.23559176921844482, "learning_rate": 0.0009719799388898219, "loss": 0.1592, "num_input_tokens_seen": 52065424, "step": 24075 }, { "epoch": 3.9282218597063623, "grad_norm": 0.02124435268342495, "learning_rate": 0.0009719564404792993, "loss": 0.1231, "num_input_tokens_seen": 52074288, "step": 24080 }, { "epoch": 3.929037520391517, "grad_norm": 0.10373537242412567, "learning_rate": 0.0009719329325039807, "loss": 0.0768, "num_input_tokens_seen": 52086064, "step": 24085 }, { "epoch": 3.9298531810766724, "grad_norm": 0.18443343043327332, "learning_rate": 0.0009719094149643426, "loss": 0.0996, "num_input_tokens_seen": 52096560, "step": 24090 }, { "epoch": 3.930668841761827, "grad_norm": 0.10123252868652344, "learning_rate": 0.0009718858878608617, "loss": 0.0788, "num_input_tokens_seen": 52107280, "step": 24095 }, { "epoch": 3.931484502446982, "grad_norm": 0.20112597942352295, "learning_rate": 0.0009718623511940145, "loss": 0.1425, "num_input_tokens_seen": 52118064, "step": 24100 }, { "epoch": 3.932300163132137, "grad_norm": 0.01606622524559498, "learning_rate": 0.0009718388049642781, "loss": 0.0863, "num_input_tokens_seen": 52129104, "step": 24105 }, { "epoch": 3.933115823817292, "grad_norm": 0.08770886808633804, "learning_rate": 0.00097181524917213, "loss": 0.041, "num_input_tokens_seen": 52139600, "step": 24110 }, { "epoch": 3.933931484502447, "grad_norm": 0.005683081690222025, "learning_rate": 0.0009717916838180471, "loss": 0.0594, "num_input_tokens_seen": 52149744, "step": 24115 }, { "epoch": 3.934747145187602, "grad_norm": 0.06411273777484894, "learning_rate": 0.0009717681089025073, "loss": 0.0694, "num_input_tokens_seen": 52160592, "step": 24120 }, { "epoch": 3.935562805872757, "grad_norm": 0.08481893688440323, "learning_rate": 0.0009717445244259882, "loss": 0.052, "num_input_tokens_seen": 52170576, "step": 24125 }, { "epoch": 3.936378466557912, "grad_norm": 0.1713237464427948, "learning_rate": 0.0009717209303889679, "loss": 0.0488, "num_input_tokens_seen": 52181264, "step": 24130 }, { "epoch": 3.9371941272430666, "grad_norm": 0.10465515404939651, "learning_rate": 0.0009716973267919246, "loss": 0.0216, "num_input_tokens_seen": 52192432, "step": 24135 }, { "epoch": 3.938009787928222, "grad_norm": 0.25784167647361755, "learning_rate": 0.0009716737136353365, "loss": 0.2222, "num_input_tokens_seen": 52202896, "step": 24140 }, { "epoch": 3.9388254486133767, "grad_norm": 0.03767762333154678, "learning_rate": 0.0009716500909196824, "loss": 0.1513, "num_input_tokens_seen": 52214608, "step": 24145 }, { "epoch": 3.939641109298532, "grad_norm": 0.0148626072332263, "learning_rate": 0.0009716264586454406, "loss": 0.0515, "num_input_tokens_seen": 52226224, "step": 24150 }, { "epoch": 3.9404567699836868, "grad_norm": 0.1550069898366928, "learning_rate": 0.0009716028168130906, "loss": 0.224, "num_input_tokens_seen": 52237072, "step": 24155 }, { "epoch": 3.9412724306688416, "grad_norm": 0.013852902688086033, "learning_rate": 0.000971579165423111, "loss": 0.0268, "num_input_tokens_seen": 52247696, "step": 24160 }, { "epoch": 3.942088091353997, "grad_norm": 0.03343448415398598, "learning_rate": 0.0009715555044759815, "loss": 0.0365, "num_input_tokens_seen": 52259120, "step": 24165 }, { "epoch": 3.9429037520391517, "grad_norm": 0.03657007962465286, "learning_rate": 0.0009715318339721814, "loss": 0.2922, "num_input_tokens_seen": 52269136, "step": 24170 }, { "epoch": 3.943719412724307, "grad_norm": 0.1288510113954544, "learning_rate": 0.0009715081539121908, "loss": 0.0949, "num_input_tokens_seen": 52280560, "step": 24175 }, { "epoch": 3.9445350734094617, "grad_norm": 0.07137199491262436, "learning_rate": 0.0009714844642964891, "loss": 0.0433, "num_input_tokens_seen": 52289072, "step": 24180 }, { "epoch": 3.9453507340946166, "grad_norm": 0.04436810687184334, "learning_rate": 0.0009714607651255565, "loss": 0.1238, "num_input_tokens_seen": 52301264, "step": 24185 }, { "epoch": 3.9461663947797714, "grad_norm": 0.037092193961143494, "learning_rate": 0.0009714370563998736, "loss": 0.0356, "num_input_tokens_seen": 52311760, "step": 24190 }, { "epoch": 3.9469820554649266, "grad_norm": 0.03281337395310402, "learning_rate": 0.0009714133381199205, "loss": 0.0545, "num_input_tokens_seen": 52322160, "step": 24195 }, { "epoch": 3.9477977161500815, "grad_norm": 0.09527835249900818, "learning_rate": 0.0009713896102861782, "loss": 0.0856, "num_input_tokens_seen": 52331760, "step": 24200 }, { "epoch": 3.9486133768352367, "grad_norm": 0.13394109904766083, "learning_rate": 0.0009713658728991274, "loss": 0.1299, "num_input_tokens_seen": 52342512, "step": 24205 }, { "epoch": 3.9494290375203915, "grad_norm": 0.1500953584909439, "learning_rate": 0.0009713421259592493, "loss": 0.0533, "num_input_tokens_seen": 52352784, "step": 24210 }, { "epoch": 3.9502446982055464, "grad_norm": 0.1007457748055458, "learning_rate": 0.0009713183694670249, "loss": 0.0409, "num_input_tokens_seen": 52363920, "step": 24215 }, { "epoch": 3.9510603588907016, "grad_norm": 0.020904328674077988, "learning_rate": 0.000971294603422936, "loss": 0.0296, "num_input_tokens_seen": 52373904, "step": 24220 }, { "epoch": 3.9518760195758564, "grad_norm": 0.17763468623161316, "learning_rate": 0.000971270827827464, "loss": 0.1151, "num_input_tokens_seen": 52383152, "step": 24225 }, { "epoch": 3.9526916802610113, "grad_norm": 0.003645398421213031, "learning_rate": 0.0009712470426810909, "loss": 0.0381, "num_input_tokens_seen": 52393968, "step": 24230 }, { "epoch": 3.9535073409461665, "grad_norm": 0.004083213862031698, "learning_rate": 0.0009712232479842986, "loss": 0.1002, "num_input_tokens_seen": 52404368, "step": 24235 }, { "epoch": 3.9543230016313213, "grad_norm": 0.08057636767625809, "learning_rate": 0.0009711994437375693, "loss": 0.0359, "num_input_tokens_seen": 52415824, "step": 24240 }, { "epoch": 3.955138662316476, "grad_norm": 0.06122511997818947, "learning_rate": 0.0009711756299413856, "loss": 0.0688, "num_input_tokens_seen": 52426576, "step": 24245 }, { "epoch": 3.9559543230016314, "grad_norm": 0.014753523282706738, "learning_rate": 0.0009711518065962302, "loss": 0.1354, "num_input_tokens_seen": 52437456, "step": 24250 }, { "epoch": 3.9567699836867862, "grad_norm": 0.2708718180656433, "learning_rate": 0.0009711279737025856, "loss": 0.1888, "num_input_tokens_seen": 52448720, "step": 24255 }, { "epoch": 3.9575856443719415, "grad_norm": 0.18782684206962585, "learning_rate": 0.0009711041312609349, "loss": 0.1408, "num_input_tokens_seen": 52459024, "step": 24260 }, { "epoch": 3.9584013050570963, "grad_norm": 0.017271332442760468, "learning_rate": 0.0009710802792717613, "loss": 0.0117, "num_input_tokens_seen": 52469872, "step": 24265 }, { "epoch": 3.959216965742251, "grad_norm": 0.13320668041706085, "learning_rate": 0.0009710564177355483, "loss": 0.1292, "num_input_tokens_seen": 52481264, "step": 24270 }, { "epoch": 3.960032626427406, "grad_norm": 0.06218891963362694, "learning_rate": 0.0009710325466527794, "loss": 0.054, "num_input_tokens_seen": 52492240, "step": 24275 }, { "epoch": 3.960848287112561, "grad_norm": 0.002708859508857131, "learning_rate": 0.0009710086660239386, "loss": 0.105, "num_input_tokens_seen": 52502320, "step": 24280 }, { "epoch": 3.961663947797716, "grad_norm": 0.004188997205346823, "learning_rate": 0.0009709847758495094, "loss": 0.0715, "num_input_tokens_seen": 52512336, "step": 24285 }, { "epoch": 3.9624796084828713, "grad_norm": 0.009143562987446785, "learning_rate": 0.0009709608761299763, "loss": 0.1352, "num_input_tokens_seen": 52522192, "step": 24290 }, { "epoch": 3.963295269168026, "grad_norm": 0.10898889601230621, "learning_rate": 0.0009709369668658237, "loss": 0.084, "num_input_tokens_seen": 52532144, "step": 24295 }, { "epoch": 3.964110929853181, "grad_norm": 0.061856959015131, "learning_rate": 0.0009709130480575359, "loss": 0.1064, "num_input_tokens_seen": 52543824, "step": 24300 }, { "epoch": 3.964926590538336, "grad_norm": 0.16867460310459137, "learning_rate": 0.0009708891197055978, "loss": 0.1489, "num_input_tokens_seen": 52553552, "step": 24305 }, { "epoch": 3.965742251223491, "grad_norm": 0.03134358301758766, "learning_rate": 0.0009708651818104943, "loss": 0.1136, "num_input_tokens_seen": 52564528, "step": 24310 }, { "epoch": 3.9665579119086463, "grad_norm": 0.0166754350066185, "learning_rate": 0.0009708412343727106, "loss": 0.0494, "num_input_tokens_seen": 52574160, "step": 24315 }, { "epoch": 3.967373572593801, "grad_norm": 0.014139552600681782, "learning_rate": 0.000970817277392732, "loss": 0.2645, "num_input_tokens_seen": 52584368, "step": 24320 }, { "epoch": 3.968189233278956, "grad_norm": 0.20456832647323608, "learning_rate": 0.000970793310871044, "loss": 0.1731, "num_input_tokens_seen": 52595664, "step": 24325 }, { "epoch": 3.9690048939641107, "grad_norm": 0.05795443058013916, "learning_rate": 0.0009707693348081323, "loss": 0.1666, "num_input_tokens_seen": 52607472, "step": 24330 }, { "epoch": 3.969820554649266, "grad_norm": 0.031290389597415924, "learning_rate": 0.0009707453492044829, "loss": 0.0734, "num_input_tokens_seen": 52618864, "step": 24335 }, { "epoch": 3.970636215334421, "grad_norm": 0.0793827474117279, "learning_rate": 0.0009707213540605817, "loss": 0.1576, "num_input_tokens_seen": 52628656, "step": 24340 }, { "epoch": 3.971451876019576, "grad_norm": 0.055470243096351624, "learning_rate": 0.0009706973493769152, "loss": 0.1762, "num_input_tokens_seen": 52638832, "step": 24345 }, { "epoch": 3.972267536704731, "grad_norm": 0.05963335558772087, "learning_rate": 0.0009706733351539696, "loss": 0.0783, "num_input_tokens_seen": 52648400, "step": 24350 }, { "epoch": 3.9730831973898857, "grad_norm": 0.08069650828838348, "learning_rate": 0.0009706493113922318, "loss": 0.0823, "num_input_tokens_seen": 52659216, "step": 24355 }, { "epoch": 3.9738988580750405, "grad_norm": 0.08950478583574295, "learning_rate": 0.000970625278092189, "loss": 0.1453, "num_input_tokens_seen": 52669680, "step": 24360 }, { "epoch": 3.9747145187601958, "grad_norm": 0.1317376345396042, "learning_rate": 0.0009706012352543276, "loss": 0.1271, "num_input_tokens_seen": 52681008, "step": 24365 }, { "epoch": 3.9755301794453506, "grad_norm": 0.014402917586266994, "learning_rate": 0.0009705771828791353, "loss": 0.1912, "num_input_tokens_seen": 52691312, "step": 24370 }, { "epoch": 3.976345840130506, "grad_norm": 0.09558943659067154, "learning_rate": 0.0009705531209670993, "loss": 0.1728, "num_input_tokens_seen": 52701712, "step": 24375 }, { "epoch": 3.9771615008156607, "grad_norm": 0.020290644839406013, "learning_rate": 0.0009705290495187073, "loss": 0.1391, "num_input_tokens_seen": 52713648, "step": 24380 }, { "epoch": 3.9779771615008155, "grad_norm": 0.03894273191690445, "learning_rate": 0.0009705049685344474, "loss": 0.1819, "num_input_tokens_seen": 52724656, "step": 24385 }, { "epoch": 3.9787928221859707, "grad_norm": 0.017061561346054077, "learning_rate": 0.0009704808780148074, "loss": 0.1858, "num_input_tokens_seen": 52735824, "step": 24390 }, { "epoch": 3.9796084828711256, "grad_norm": 0.08352766185998917, "learning_rate": 0.0009704567779602754, "loss": 0.1287, "num_input_tokens_seen": 52747344, "step": 24395 }, { "epoch": 3.980424143556281, "grad_norm": 0.08225654065608978, "learning_rate": 0.0009704326683713402, "loss": 0.0856, "num_input_tokens_seen": 52758320, "step": 24400 }, { "epoch": 3.9812398042414356, "grad_norm": 0.143769770860672, "learning_rate": 0.00097040854924849, "loss": 0.1, "num_input_tokens_seen": 52768752, "step": 24405 }, { "epoch": 3.9820554649265905, "grad_norm": 0.03472739830613136, "learning_rate": 0.0009703844205922139, "loss": 0.1142, "num_input_tokens_seen": 52779376, "step": 24410 }, { "epoch": 3.9828711256117453, "grad_norm": 0.07649330794811249, "learning_rate": 0.0009703602824030007, "loss": 0.1169, "num_input_tokens_seen": 52790064, "step": 24415 }, { "epoch": 3.9836867862969005, "grad_norm": 0.15564045310020447, "learning_rate": 0.0009703361346813398, "loss": 0.0573, "num_input_tokens_seen": 52800080, "step": 24420 }, { "epoch": 3.9845024469820554, "grad_norm": 0.006799460854381323, "learning_rate": 0.0009703119774277205, "loss": 0.1825, "num_input_tokens_seen": 52811632, "step": 24425 }, { "epoch": 3.9853181076672106, "grad_norm": 0.08229997754096985, "learning_rate": 0.0009702878106426321, "loss": 0.0794, "num_input_tokens_seen": 52822192, "step": 24430 }, { "epoch": 3.9861337683523654, "grad_norm": 0.14275634288787842, "learning_rate": 0.0009702636343265649, "loss": 0.085, "num_input_tokens_seen": 52833168, "step": 24435 }, { "epoch": 3.9869494290375203, "grad_norm": 0.08394118398427963, "learning_rate": 0.0009702394484800084, "loss": 0.1069, "num_input_tokens_seen": 52843024, "step": 24440 }, { "epoch": 3.9877650897226755, "grad_norm": 0.09326915442943573, "learning_rate": 0.000970215253103453, "loss": 0.1826, "num_input_tokens_seen": 52852528, "step": 24445 }, { "epoch": 3.9885807504078303, "grad_norm": 0.13007716834545135, "learning_rate": 0.0009701910481973889, "loss": 0.1718, "num_input_tokens_seen": 52862288, "step": 24450 }, { "epoch": 3.9893964110929856, "grad_norm": 0.10196779668331146, "learning_rate": 0.0009701668337623069, "loss": 0.1418, "num_input_tokens_seen": 52873584, "step": 24455 }, { "epoch": 3.9902120717781404, "grad_norm": 0.15357103943824768, "learning_rate": 0.0009701426097986974, "loss": 0.1067, "num_input_tokens_seen": 52883856, "step": 24460 }, { "epoch": 3.9910277324632952, "grad_norm": 0.11803940683603287, "learning_rate": 0.0009701183763070516, "loss": 0.1075, "num_input_tokens_seen": 52895120, "step": 24465 }, { "epoch": 3.99184339314845, "grad_norm": 0.1026521846652031, "learning_rate": 0.0009700941332878605, "loss": 0.134, "num_input_tokens_seen": 52905648, "step": 24470 }, { "epoch": 3.9926590538336053, "grad_norm": 0.04510059580206871, "learning_rate": 0.0009700698807416153, "loss": 0.082, "num_input_tokens_seen": 52917168, "step": 24475 }, { "epoch": 3.99347471451876, "grad_norm": 0.029394259676337242, "learning_rate": 0.0009700456186688078, "loss": 0.1809, "num_input_tokens_seen": 52928208, "step": 24480 }, { "epoch": 3.9942903752039154, "grad_norm": 0.13904544711112976, "learning_rate": 0.0009700213470699295, "loss": 0.1228, "num_input_tokens_seen": 52939088, "step": 24485 }, { "epoch": 3.99510603588907, "grad_norm": 0.06425957381725311, "learning_rate": 0.0009699970659454723, "loss": 0.0245, "num_input_tokens_seen": 52949008, "step": 24490 }, { "epoch": 3.995921696574225, "grad_norm": 0.009191847406327724, "learning_rate": 0.0009699727752959284, "loss": 0.1855, "num_input_tokens_seen": 52959600, "step": 24495 }, { "epoch": 3.99673735725938, "grad_norm": 0.20780709385871887, "learning_rate": 0.00096994847512179, "loss": 0.0849, "num_input_tokens_seen": 52969488, "step": 24500 }, { "epoch": 3.997553017944535, "grad_norm": 0.05502910912036896, "learning_rate": 0.0009699241654235495, "loss": 0.1294, "num_input_tokens_seen": 52980848, "step": 24505 }, { "epoch": 3.99836867862969, "grad_norm": 0.06313939392566681, "learning_rate": 0.0009698998462016997, "loss": 0.0386, "num_input_tokens_seen": 52991600, "step": 24510 }, { "epoch": 3.999184339314845, "grad_norm": 0.10375002026557922, "learning_rate": 0.0009698755174567333, "loss": 0.0745, "num_input_tokens_seen": 53001680, "step": 24515 }, { "epoch": 4.0, "grad_norm": 0.1650952696800232, "learning_rate": 0.0009698511791891435, "loss": 0.2173, "num_input_tokens_seen": 53010912, "step": 24520 }, { "epoch": 4.0, "eval_loss": 0.12605686485767365, "eval_runtime": 103.1538, "eval_samples_per_second": 26.417, "eval_steps_per_second": 6.611, "num_input_tokens_seen": 53010912, "step": 24520 }, { "epoch": 4.000815660685155, "grad_norm": 0.02118796855211258, "learning_rate": 0.0009698268313994236, "loss": 0.044, "num_input_tokens_seen": 53022112, "step": 24525 }, { "epoch": 4.00163132137031, "grad_norm": 0.07317659258842468, "learning_rate": 0.0009698024740880668, "loss": 0.0935, "num_input_tokens_seen": 53033056, "step": 24530 }, { "epoch": 4.002446982055465, "grad_norm": 0.15030977129936218, "learning_rate": 0.0009697781072555672, "loss": 0.1966, "num_input_tokens_seen": 53043904, "step": 24535 }, { "epoch": 4.00326264274062, "grad_norm": 0.11619038134813309, "learning_rate": 0.0009697537309024181, "loss": 0.0746, "num_input_tokens_seen": 53054080, "step": 24540 }, { "epoch": 4.004078303425775, "grad_norm": 0.026329301297664642, "learning_rate": 0.0009697293450291136, "loss": 0.19, "num_input_tokens_seen": 53064256, "step": 24545 }, { "epoch": 4.00489396411093, "grad_norm": 0.01354842260479927, "learning_rate": 0.0009697049496361481, "loss": 0.125, "num_input_tokens_seen": 53075200, "step": 24550 }, { "epoch": 4.005709624796085, "grad_norm": 0.008432844653725624, "learning_rate": 0.000969680544724016, "loss": 0.0372, "num_input_tokens_seen": 53086496, "step": 24555 }, { "epoch": 4.006525285481239, "grad_norm": 0.13896256685256958, "learning_rate": 0.0009696561302932117, "loss": 0.1721, "num_input_tokens_seen": 53097216, "step": 24560 }, { "epoch": 4.007340946166395, "grad_norm": 0.012351407669484615, "learning_rate": 0.0009696317063442303, "loss": 0.1693, "num_input_tokens_seen": 53108160, "step": 24565 }, { "epoch": 4.00815660685155, "grad_norm": 0.04247596859931946, "learning_rate": 0.0009696072728775664, "loss": 0.0845, "num_input_tokens_seen": 53119520, "step": 24570 }, { "epoch": 4.008972267536705, "grad_norm": 0.03004402108490467, "learning_rate": 0.0009695828298937155, "loss": 0.1213, "num_input_tokens_seen": 53130976, "step": 24575 }, { "epoch": 4.00978792822186, "grad_norm": 0.023356273770332336, "learning_rate": 0.0009695583773931728, "loss": 0.0444, "num_input_tokens_seen": 53141504, "step": 24580 }, { "epoch": 4.010603588907014, "grad_norm": 0.06728272885084152, "learning_rate": 0.000969533915376434, "loss": 0.0355, "num_input_tokens_seen": 53151360, "step": 24585 }, { "epoch": 4.011419249592169, "grad_norm": 0.0781501904129982, "learning_rate": 0.0009695094438439947, "loss": 0.0826, "num_input_tokens_seen": 53163008, "step": 24590 }, { "epoch": 4.012234910277325, "grad_norm": 0.00982770137488842, "learning_rate": 0.000969484962796351, "loss": 0.1427, "num_input_tokens_seen": 53175200, "step": 24595 }, { "epoch": 4.01305057096248, "grad_norm": 0.009649750776588917, "learning_rate": 0.0009694604722339987, "loss": 0.1421, "num_input_tokens_seen": 53185952, "step": 24600 }, { "epoch": 4.013866231647635, "grad_norm": 0.038069020956754684, "learning_rate": 0.0009694359721574345, "loss": 0.2128, "num_input_tokens_seen": 53196704, "step": 24605 }, { "epoch": 4.014681892332789, "grad_norm": 0.0761687308549881, "learning_rate": 0.0009694114625671548, "loss": 0.1306, "num_input_tokens_seen": 53206272, "step": 24610 }, { "epoch": 4.015497553017944, "grad_norm": 0.02528243139386177, "learning_rate": 0.0009693869434636564, "loss": 0.1686, "num_input_tokens_seen": 53217248, "step": 24615 }, { "epoch": 4.0163132137031, "grad_norm": 0.1863255798816681, "learning_rate": 0.000969362414847436, "loss": 0.0935, "num_input_tokens_seen": 53226176, "step": 24620 }, { "epoch": 4.017128874388255, "grad_norm": 0.19547618925571442, "learning_rate": 0.0009693378767189909, "loss": 0.2296, "num_input_tokens_seen": 53236096, "step": 24625 }, { "epoch": 4.0179445350734095, "grad_norm": 0.05746988207101822, "learning_rate": 0.0009693133290788184, "loss": 0.1009, "num_input_tokens_seen": 53246496, "step": 24630 }, { "epoch": 4.018760195758564, "grad_norm": 0.04920806735754013, "learning_rate": 0.0009692887719274159, "loss": 0.1455, "num_input_tokens_seen": 53257792, "step": 24635 }, { "epoch": 4.019575856443719, "grad_norm": 0.04692168906331062, "learning_rate": 0.0009692642052652811, "loss": 0.1514, "num_input_tokens_seen": 53269312, "step": 24640 }, { "epoch": 4.020391517128874, "grad_norm": 0.03407781943678856, "learning_rate": 0.0009692396290929118, "loss": 0.0826, "num_input_tokens_seen": 53278944, "step": 24645 }, { "epoch": 4.02120717781403, "grad_norm": 0.019528646022081375, "learning_rate": 0.0009692150434108061, "loss": 0.1115, "num_input_tokens_seen": 53290656, "step": 24650 }, { "epoch": 4.0220228384991845, "grad_norm": 0.04901457577943802, "learning_rate": 0.0009691904482194625, "loss": 0.0774, "num_input_tokens_seen": 53300736, "step": 24655 }, { "epoch": 4.022838499184339, "grad_norm": 0.06730242073535919, "learning_rate": 0.000969165843519379, "loss": 0.1215, "num_input_tokens_seen": 53312032, "step": 24660 }, { "epoch": 4.023654159869494, "grad_norm": 0.26630842685699463, "learning_rate": 0.0009691412293110546, "loss": 0.2011, "num_input_tokens_seen": 53323616, "step": 24665 }, { "epoch": 4.024469820554649, "grad_norm": 0.0840611532330513, "learning_rate": 0.0009691166055949881, "loss": 0.0298, "num_input_tokens_seen": 53334848, "step": 24670 }, { "epoch": 4.025285481239805, "grad_norm": 0.020203417167067528, "learning_rate": 0.0009690919723716785, "loss": 0.0755, "num_input_tokens_seen": 53346208, "step": 24675 }, { "epoch": 4.0261011419249595, "grad_norm": 0.08350825309753418, "learning_rate": 0.000969067329641625, "loss": 0.1501, "num_input_tokens_seen": 53355520, "step": 24680 }, { "epoch": 4.026916802610114, "grad_norm": 0.05830421671271324, "learning_rate": 0.000969042677405327, "loss": 0.1252, "num_input_tokens_seen": 53366144, "step": 24685 }, { "epoch": 4.027732463295269, "grad_norm": 0.03297814726829529, "learning_rate": 0.0009690180156632839, "loss": 0.1003, "num_input_tokens_seen": 53377568, "step": 24690 }, { "epoch": 4.028548123980424, "grad_norm": 0.015535816550254822, "learning_rate": 0.000968993344415996, "loss": 0.0405, "num_input_tokens_seen": 53388320, "step": 24695 }, { "epoch": 4.029363784665579, "grad_norm": 0.05832860991358757, "learning_rate": 0.0009689686636639629, "loss": 0.1678, "num_input_tokens_seen": 53398880, "step": 24700 }, { "epoch": 4.0301794453507345, "grad_norm": 0.1182079017162323, "learning_rate": 0.000968943973407685, "loss": 0.0647, "num_input_tokens_seen": 53409376, "step": 24705 }, { "epoch": 4.030995106035889, "grad_norm": 0.018100082874298096, "learning_rate": 0.0009689192736476624, "loss": 0.1543, "num_input_tokens_seen": 53420736, "step": 24710 }, { "epoch": 4.031810766721044, "grad_norm": 0.1703413426876068, "learning_rate": 0.000968894564384396, "loss": 0.1288, "num_input_tokens_seen": 53431264, "step": 24715 }, { "epoch": 4.032626427406199, "grad_norm": 0.03496500477194786, "learning_rate": 0.0009688698456183863, "loss": 0.1077, "num_input_tokens_seen": 53441504, "step": 24720 }, { "epoch": 4.033442088091354, "grad_norm": 0.018825236707925797, "learning_rate": 0.0009688451173501345, "loss": 0.0318, "num_input_tokens_seen": 53451616, "step": 24725 }, { "epoch": 4.034257748776509, "grad_norm": 0.07746203988790512, "learning_rate": 0.0009688203795801415, "loss": 0.2134, "num_input_tokens_seen": 53463456, "step": 24730 }, { "epoch": 4.035073409461664, "grad_norm": 0.01029142364859581, "learning_rate": 0.0009687956323089088, "loss": 0.0487, "num_input_tokens_seen": 53474624, "step": 24735 }, { "epoch": 4.035889070146819, "grad_norm": 0.06122157722711563, "learning_rate": 0.000968770875536938, "loss": 0.4163, "num_input_tokens_seen": 53486304, "step": 24740 }, { "epoch": 4.036704730831974, "grad_norm": 0.2235114872455597, "learning_rate": 0.0009687461092647308, "loss": 0.2131, "num_input_tokens_seen": 53497152, "step": 24745 }, { "epoch": 4.037520391517129, "grad_norm": 0.09703657776117325, "learning_rate": 0.0009687213334927888, "loss": 0.2333, "num_input_tokens_seen": 53508704, "step": 24750 }, { "epoch": 4.0383360522022835, "grad_norm": 0.15249381959438324, "learning_rate": 0.0009686965482216145, "loss": 0.0816, "num_input_tokens_seen": 53519776, "step": 24755 }, { "epoch": 4.039151712887439, "grad_norm": 0.13171689212322235, "learning_rate": 0.00096867175345171, "loss": 0.1609, "num_input_tokens_seen": 53529952, "step": 24760 }, { "epoch": 4.039967373572594, "grad_norm": 0.0511307455599308, "learning_rate": 0.0009686469491835779, "loss": 0.157, "num_input_tokens_seen": 53541440, "step": 24765 }, { "epoch": 4.040783034257749, "grad_norm": 0.11280439794063568, "learning_rate": 0.0009686221354177209, "loss": 0.1204, "num_input_tokens_seen": 53551008, "step": 24770 }, { "epoch": 4.041598694942904, "grad_norm": 0.08131375908851624, "learning_rate": 0.0009685973121546417, "loss": 0.0589, "num_input_tokens_seen": 53560832, "step": 24775 }, { "epoch": 4.0424143556280585, "grad_norm": 0.07098285108804703, "learning_rate": 0.0009685724793948436, "loss": 0.1964, "num_input_tokens_seen": 53571264, "step": 24780 }, { "epoch": 4.043230016313213, "grad_norm": 0.04006614908576012, "learning_rate": 0.0009685476371388298, "loss": 0.0495, "num_input_tokens_seen": 53582272, "step": 24785 }, { "epoch": 4.044045676998369, "grad_norm": 0.07132290303707123, "learning_rate": 0.0009685227853871037, "loss": 0.0748, "num_input_tokens_seen": 53592992, "step": 24790 }, { "epoch": 4.044861337683524, "grad_norm": 0.02085905708372593, "learning_rate": 0.000968497924140169, "loss": 0.0274, "num_input_tokens_seen": 53603712, "step": 24795 }, { "epoch": 4.045676998368679, "grad_norm": 0.04866085574030876, "learning_rate": 0.0009684730533985296, "loss": 0.062, "num_input_tokens_seen": 53613984, "step": 24800 }, { "epoch": 4.0464926590538335, "grad_norm": 0.314547061920166, "learning_rate": 0.0009684481731626895, "loss": 0.2072, "num_input_tokens_seen": 53625024, "step": 24805 }, { "epoch": 4.047308319738988, "grad_norm": 0.016428275033831596, "learning_rate": 0.0009684232834331528, "loss": 0.1322, "num_input_tokens_seen": 53636192, "step": 24810 }, { "epoch": 4.048123980424143, "grad_norm": 0.23308736085891724, "learning_rate": 0.000968398384210424, "loss": 0.0738, "num_input_tokens_seen": 53645984, "step": 24815 }, { "epoch": 4.048939641109299, "grad_norm": 0.03797006979584694, "learning_rate": 0.0009683734754950078, "loss": 0.0557, "num_input_tokens_seen": 53655968, "step": 24820 }, { "epoch": 4.049755301794454, "grad_norm": 0.05711853504180908, "learning_rate": 0.0009683485572874089, "loss": 0.107, "num_input_tokens_seen": 53667232, "step": 24825 }, { "epoch": 4.0505709624796085, "grad_norm": 0.07612357288599014, "learning_rate": 0.0009683236295881324, "loss": 0.1029, "num_input_tokens_seen": 53678336, "step": 24830 }, { "epoch": 4.051386623164763, "grad_norm": 0.09866306930780411, "learning_rate": 0.0009682986923976834, "loss": 0.1011, "num_input_tokens_seen": 53688896, "step": 24835 }, { "epoch": 4.052202283849918, "grad_norm": 0.20437058806419373, "learning_rate": 0.0009682737457165673, "loss": 0.2586, "num_input_tokens_seen": 53699008, "step": 24840 }, { "epoch": 4.053017944535074, "grad_norm": 0.03260158374905586, "learning_rate": 0.0009682487895452898, "loss": 0.0862, "num_input_tokens_seen": 53709888, "step": 24845 }, { "epoch": 4.053833605220229, "grad_norm": 0.016189221292734146, "learning_rate": 0.0009682238238843565, "loss": 0.0317, "num_input_tokens_seen": 53720896, "step": 24850 }, { "epoch": 4.054649265905383, "grad_norm": 0.09101825207471848, "learning_rate": 0.0009681988487342735, "loss": 0.0554, "num_input_tokens_seen": 53731360, "step": 24855 }, { "epoch": 4.055464926590538, "grad_norm": 0.015849553048610687, "learning_rate": 0.0009681738640955466, "loss": 0.1559, "num_input_tokens_seen": 53742848, "step": 24860 }, { "epoch": 4.056280587275693, "grad_norm": 0.21268007159233093, "learning_rate": 0.0009681488699686827, "loss": 0.2324, "num_input_tokens_seen": 53753856, "step": 24865 }, { "epoch": 4.057096247960848, "grad_norm": 0.02260478027164936, "learning_rate": 0.000968123866354188, "loss": 0.051, "num_input_tokens_seen": 53762304, "step": 24870 }, { "epoch": 4.057911908646004, "grad_norm": 0.056082677096128464, "learning_rate": 0.0009680988532525693, "loss": 0.0717, "num_input_tokens_seen": 53773664, "step": 24875 }, { "epoch": 4.058727569331158, "grad_norm": 0.06552215665578842, "learning_rate": 0.0009680738306643335, "loss": 0.0703, "num_input_tokens_seen": 53784576, "step": 24880 }, { "epoch": 4.059543230016313, "grad_norm": 0.04463575780391693, "learning_rate": 0.0009680487985899878, "loss": 0.153, "num_input_tokens_seen": 53796096, "step": 24885 }, { "epoch": 4.060358890701468, "grad_norm": 0.012880692258477211, "learning_rate": 0.0009680237570300392, "loss": 0.0884, "num_input_tokens_seen": 53807552, "step": 24890 }, { "epoch": 4.061174551386623, "grad_norm": 0.0764424279332161, "learning_rate": 0.0009679987059849956, "loss": 0.0623, "num_input_tokens_seen": 53818272, "step": 24895 }, { "epoch": 4.061990212071779, "grad_norm": 0.23426131904125214, "learning_rate": 0.0009679736454553645, "loss": 0.095, "num_input_tokens_seen": 53829696, "step": 24900 }, { "epoch": 4.062805872756933, "grad_norm": 0.16571703553199768, "learning_rate": 0.0009679485754416538, "loss": 0.19, "num_input_tokens_seen": 53841024, "step": 24905 }, { "epoch": 4.063621533442088, "grad_norm": 0.04391804337501526, "learning_rate": 0.0009679234959443717, "loss": 0.0576, "num_input_tokens_seen": 53850816, "step": 24910 }, { "epoch": 4.064437194127243, "grad_norm": 0.09299265593290329, "learning_rate": 0.0009678984069640262, "loss": 0.1523, "num_input_tokens_seen": 53861344, "step": 24915 }, { "epoch": 4.065252854812398, "grad_norm": 0.13993260264396667, "learning_rate": 0.000967873308501126, "loss": 0.1648, "num_input_tokens_seen": 53872384, "step": 24920 }, { "epoch": 4.066068515497553, "grad_norm": 0.01173485815525055, "learning_rate": 0.0009678482005561795, "loss": 0.0924, "num_input_tokens_seen": 53884160, "step": 24925 }, { "epoch": 4.066884176182708, "grad_norm": 0.015828793868422508, "learning_rate": 0.0009678230831296959, "loss": 0.0988, "num_input_tokens_seen": 53892320, "step": 24930 }, { "epoch": 4.067699836867863, "grad_norm": 0.1745096892118454, "learning_rate": 0.000967797956222184, "loss": 0.0591, "num_input_tokens_seen": 53901920, "step": 24935 }, { "epoch": 4.068515497553018, "grad_norm": 0.06484830379486084, "learning_rate": 0.000967772819834153, "loss": 0.0235, "num_input_tokens_seen": 53911968, "step": 24940 }, { "epoch": 4.069331158238173, "grad_norm": 0.22511164844036102, "learning_rate": 0.0009677476739661124, "loss": 0.1797, "num_input_tokens_seen": 53923520, "step": 24945 }, { "epoch": 4.070146818923328, "grad_norm": 0.013632331043481827, "learning_rate": 0.0009677225186185719, "loss": 0.0258, "num_input_tokens_seen": 53934400, "step": 24950 }, { "epoch": 4.0709624796084825, "grad_norm": 0.23946814239025116, "learning_rate": 0.0009676973537920411, "loss": 0.2613, "num_input_tokens_seen": 53945696, "step": 24955 }, { "epoch": 4.071778140293638, "grad_norm": 0.20356135070323944, "learning_rate": 0.0009676721794870302, "loss": 0.1221, "num_input_tokens_seen": 53955488, "step": 24960 }, { "epoch": 4.072593800978793, "grad_norm": 0.2573193907737732, "learning_rate": 0.0009676469957040492, "loss": 0.0804, "num_input_tokens_seen": 53965792, "step": 24965 }, { "epoch": 4.073409461663948, "grad_norm": 0.04237401485443115, "learning_rate": 0.0009676218024436087, "loss": 0.1027, "num_input_tokens_seen": 53976352, "step": 24970 }, { "epoch": 4.074225122349103, "grad_norm": 0.03187128156423569, "learning_rate": 0.0009675965997062192, "loss": 0.0646, "num_input_tokens_seen": 53987744, "step": 24975 }, { "epoch": 4.075040783034257, "grad_norm": 0.14574794471263885, "learning_rate": 0.0009675713874923912, "loss": 0.1111, "num_input_tokens_seen": 53999136, "step": 24980 }, { "epoch": 4.075856443719413, "grad_norm": 0.005596504081040621, "learning_rate": 0.0009675461658026361, "loss": 0.0442, "num_input_tokens_seen": 54010112, "step": 24985 }, { "epoch": 4.076672104404568, "grad_norm": 0.04081597551703453, "learning_rate": 0.0009675209346374647, "loss": 0.0305, "num_input_tokens_seen": 54020992, "step": 24990 }, { "epoch": 4.077487765089723, "grad_norm": 0.16552519798278809, "learning_rate": 0.0009674956939973885, "loss": 0.078, "num_input_tokens_seen": 54031488, "step": 24995 }, { "epoch": 4.078303425774878, "grad_norm": 0.17626450955867767, "learning_rate": 0.0009674704438829189, "loss": 0.0988, "num_input_tokens_seen": 54043104, "step": 25000 }, { "epoch": 4.079119086460032, "grad_norm": 0.22518527507781982, "learning_rate": 0.0009674451842945679, "loss": 0.1335, "num_input_tokens_seen": 54052096, "step": 25005 }, { "epoch": 4.079934747145187, "grad_norm": 0.06224973499774933, "learning_rate": 0.0009674199152328472, "loss": 0.0489, "num_input_tokens_seen": 54062400, "step": 25010 }, { "epoch": 4.080750407830343, "grad_norm": 0.032965682446956635, "learning_rate": 0.0009673946366982689, "loss": 0.0575, "num_input_tokens_seen": 54073120, "step": 25015 }, { "epoch": 4.081566068515498, "grad_norm": 0.05587991327047348, "learning_rate": 0.0009673693486913453, "loss": 0.036, "num_input_tokens_seen": 54083296, "step": 25020 }, { "epoch": 4.082381729200653, "grad_norm": 0.052622903138399124, "learning_rate": 0.000967344051212589, "loss": 0.1853, "num_input_tokens_seen": 54093728, "step": 25025 }, { "epoch": 4.083197389885807, "grad_norm": 0.058794185519218445, "learning_rate": 0.0009673187442625126, "loss": 0.0643, "num_input_tokens_seen": 54104512, "step": 25030 }, { "epoch": 4.084013050570962, "grad_norm": 0.4008449912071228, "learning_rate": 0.0009672934278416292, "loss": 0.0552, "num_input_tokens_seen": 54116256, "step": 25035 }, { "epoch": 4.084828711256117, "grad_norm": 0.04696516692638397, "learning_rate": 0.0009672681019504514, "loss": 0.0188, "num_input_tokens_seen": 54126496, "step": 25040 }, { "epoch": 4.085644371941273, "grad_norm": 0.006844913586974144, "learning_rate": 0.0009672427665894929, "loss": 0.0958, "num_input_tokens_seen": 54138656, "step": 25045 }, { "epoch": 4.0864600326264275, "grad_norm": 0.01937456987798214, "learning_rate": 0.0009672174217592671, "loss": 0.0123, "num_input_tokens_seen": 54150336, "step": 25050 }, { "epoch": 4.087275693311582, "grad_norm": 0.06921794265508652, "learning_rate": 0.0009671920674602874, "loss": 0.0251, "num_input_tokens_seen": 54161376, "step": 25055 }, { "epoch": 4.088091353996737, "grad_norm": 0.3637893795967102, "learning_rate": 0.0009671667036930678, "loss": 0.049, "num_input_tokens_seen": 54171104, "step": 25060 }, { "epoch": 4.088907014681892, "grad_norm": 0.07828755676746368, "learning_rate": 0.0009671413304581224, "loss": 0.0705, "num_input_tokens_seen": 54182592, "step": 25065 }, { "epoch": 4.089722675367048, "grad_norm": 0.11320872604846954, "learning_rate": 0.0009671159477559652, "loss": 0.1043, "num_input_tokens_seen": 54194048, "step": 25070 }, { "epoch": 4.0905383360522025, "grad_norm": 0.06699980795383453, "learning_rate": 0.0009670905555871108, "loss": 0.1065, "num_input_tokens_seen": 54204896, "step": 25075 }, { "epoch": 4.091353996737357, "grad_norm": 0.10126443952322006, "learning_rate": 0.0009670651539520737, "loss": 0.0786, "num_input_tokens_seen": 54216288, "step": 25080 }, { "epoch": 4.092169657422512, "grad_norm": 0.09108009189367294, "learning_rate": 0.0009670397428513688, "loss": 0.0177, "num_input_tokens_seen": 54226976, "step": 25085 }, { "epoch": 4.092985318107667, "grad_norm": 0.008087173104286194, "learning_rate": 0.000967014322285511, "loss": 0.1152, "num_input_tokens_seen": 54236928, "step": 25090 }, { "epoch": 4.093800978792822, "grad_norm": 0.24143271148204803, "learning_rate": 0.0009669888922550154, "loss": 0.185, "num_input_tokens_seen": 54247712, "step": 25095 }, { "epoch": 4.0946166394779775, "grad_norm": 0.01384643279016018, "learning_rate": 0.0009669634527603977, "loss": 0.0329, "num_input_tokens_seen": 54258144, "step": 25100 }, { "epoch": 4.095432300163132, "grad_norm": 0.062448494136333466, "learning_rate": 0.000966938003802173, "loss": 0.1463, "num_input_tokens_seen": 54268448, "step": 25105 }, { "epoch": 4.096247960848287, "grad_norm": 0.2051960825920105, "learning_rate": 0.0009669125453808573, "loss": 0.1625, "num_input_tokens_seen": 54280736, "step": 25110 }, { "epoch": 4.097063621533442, "grad_norm": 0.2943427562713623, "learning_rate": 0.0009668870774969668, "loss": 0.1793, "num_input_tokens_seen": 54291968, "step": 25115 }, { "epoch": 4.097879282218597, "grad_norm": 0.036557041108608246, "learning_rate": 0.0009668616001510173, "loss": 0.1396, "num_input_tokens_seen": 54303712, "step": 25120 }, { "epoch": 4.0986949429037525, "grad_norm": 0.20468497276306152, "learning_rate": 0.0009668361133435252, "loss": 0.0988, "num_input_tokens_seen": 54314880, "step": 25125 }, { "epoch": 4.099510603588907, "grad_norm": 0.1012115404009819, "learning_rate": 0.0009668106170750071, "loss": 0.1233, "num_input_tokens_seen": 54325376, "step": 25130 }, { "epoch": 4.100326264274062, "grad_norm": 0.11558778584003448, "learning_rate": 0.0009667851113459795, "loss": 0.0793, "num_input_tokens_seen": 54335968, "step": 25135 }, { "epoch": 4.101141924959217, "grad_norm": 0.23821662366390228, "learning_rate": 0.0009667595961569595, "loss": 0.0979, "num_input_tokens_seen": 54346400, "step": 25140 }, { "epoch": 4.101957585644372, "grad_norm": 0.013337861746549606, "learning_rate": 0.0009667340715084641, "loss": 0.0484, "num_input_tokens_seen": 54357280, "step": 25145 }, { "epoch": 4.102773246329527, "grad_norm": 0.027568155899643898, "learning_rate": 0.0009667085374010107, "loss": 0.203, "num_input_tokens_seen": 54368416, "step": 25150 }, { "epoch": 4.103588907014682, "grad_norm": 0.13918833434581757, "learning_rate": 0.0009666829938351169, "loss": 0.1497, "num_input_tokens_seen": 54378720, "step": 25155 }, { "epoch": 4.104404567699837, "grad_norm": 0.005044138990342617, "learning_rate": 0.0009666574408113, "loss": 0.0733, "num_input_tokens_seen": 54389280, "step": 25160 }, { "epoch": 4.105220228384992, "grad_norm": 0.007398922927677631, "learning_rate": 0.0009666318783300782, "loss": 0.1414, "num_input_tokens_seen": 54399520, "step": 25165 }, { "epoch": 4.106035889070147, "grad_norm": 0.027200058102607727, "learning_rate": 0.0009666063063919693, "loss": 0.1109, "num_input_tokens_seen": 54410016, "step": 25170 }, { "epoch": 4.1068515497553015, "grad_norm": 0.0715019479393959, "learning_rate": 0.0009665807249974917, "loss": 0.0765, "num_input_tokens_seen": 54420480, "step": 25175 }, { "epoch": 4.107667210440456, "grad_norm": 0.2523505985736847, "learning_rate": 0.0009665551341471639, "loss": 0.078, "num_input_tokens_seen": 54429856, "step": 25180 }, { "epoch": 4.108482871125612, "grad_norm": 0.07486454397439957, "learning_rate": 0.0009665295338415044, "loss": 0.0845, "num_input_tokens_seen": 54439392, "step": 25185 }, { "epoch": 4.109298531810767, "grad_norm": 0.02025793120265007, "learning_rate": 0.0009665039240810319, "loss": 0.1363, "num_input_tokens_seen": 54451168, "step": 25190 }, { "epoch": 4.110114192495922, "grad_norm": 0.2634422183036804, "learning_rate": 0.0009664783048662658, "loss": 0.0735, "num_input_tokens_seen": 54462304, "step": 25195 }, { "epoch": 4.1109298531810765, "grad_norm": 0.17543160915374756, "learning_rate": 0.0009664526761977249, "loss": 0.0702, "num_input_tokens_seen": 54473216, "step": 25200 }, { "epoch": 4.111745513866231, "grad_norm": 0.04653778672218323, "learning_rate": 0.0009664270380759289, "loss": 0.0496, "num_input_tokens_seen": 54484224, "step": 25205 }, { "epoch": 4.112561174551387, "grad_norm": 0.09117867797613144, "learning_rate": 0.0009664013905013971, "loss": 0.0245, "num_input_tokens_seen": 54495904, "step": 25210 }, { "epoch": 4.113376835236542, "grad_norm": 0.24651305377483368, "learning_rate": 0.0009663757334746497, "loss": 0.2096, "num_input_tokens_seen": 54506240, "step": 25215 }, { "epoch": 4.114192495921697, "grad_norm": 0.017522353678941727, "learning_rate": 0.0009663500669962063, "loss": 0.016, "num_input_tokens_seen": 54517632, "step": 25220 }, { "epoch": 4.1150081566068515, "grad_norm": 0.021647047251462936, "learning_rate": 0.0009663243910665872, "loss": 0.056, "num_input_tokens_seen": 54527040, "step": 25225 }, { "epoch": 4.115823817292006, "grad_norm": 0.008239752613008022, "learning_rate": 0.0009662987056863128, "loss": 0.1381, "num_input_tokens_seen": 54537728, "step": 25230 }, { "epoch": 4.116639477977161, "grad_norm": 0.27086710929870605, "learning_rate": 0.0009662730108559034, "loss": 0.0667, "num_input_tokens_seen": 54547968, "step": 25235 }, { "epoch": 4.117455138662317, "grad_norm": 0.1596733182668686, "learning_rate": 0.0009662473065758801, "loss": 0.1615, "num_input_tokens_seen": 54558176, "step": 25240 }, { "epoch": 4.118270799347472, "grad_norm": 0.21179279685020447, "learning_rate": 0.0009662215928467636, "loss": 0.0515, "num_input_tokens_seen": 54568928, "step": 25245 }, { "epoch": 4.1190864600326265, "grad_norm": 0.018240327015519142, "learning_rate": 0.000966195869669075, "loss": 0.0339, "num_input_tokens_seen": 54579232, "step": 25250 }, { "epoch": 4.119902120717781, "grad_norm": 0.2262805551290512, "learning_rate": 0.0009661701370433358, "loss": 0.0826, "num_input_tokens_seen": 54589600, "step": 25255 }, { "epoch": 4.120717781402936, "grad_norm": 0.008236533030867577, "learning_rate": 0.0009661443949700674, "loss": 0.0907, "num_input_tokens_seen": 54600832, "step": 25260 }, { "epoch": 4.121533442088092, "grad_norm": 0.2041776180267334, "learning_rate": 0.0009661186434497915, "loss": 0.0405, "num_input_tokens_seen": 54612416, "step": 25265 }, { "epoch": 4.122349102773247, "grad_norm": 0.11392532289028168, "learning_rate": 0.0009660928824830299, "loss": 0.1556, "num_input_tokens_seen": 54623328, "step": 25270 }, { "epoch": 4.123164763458401, "grad_norm": 0.023981567472219467, "learning_rate": 0.0009660671120703048, "loss": 0.0831, "num_input_tokens_seen": 54634752, "step": 25275 }, { "epoch": 4.123980424143556, "grad_norm": 0.030910201370716095, "learning_rate": 0.0009660413322121384, "loss": 0.0507, "num_input_tokens_seen": 54644288, "step": 25280 }, { "epoch": 4.124796084828711, "grad_norm": 0.0698685273528099, "learning_rate": 0.0009660155429090531, "loss": 0.0225, "num_input_tokens_seen": 54655232, "step": 25285 }, { "epoch": 4.125611745513866, "grad_norm": 0.24555733799934387, "learning_rate": 0.0009659897441615717, "loss": 0.1784, "num_input_tokens_seen": 54666336, "step": 25290 }, { "epoch": 4.126427406199022, "grad_norm": 0.003053902881219983, "learning_rate": 0.000965963935970217, "loss": 0.0313, "num_input_tokens_seen": 54676448, "step": 25295 }, { "epoch": 4.127243066884176, "grad_norm": 0.4340955317020416, "learning_rate": 0.0009659381183355121, "loss": 0.2067, "num_input_tokens_seen": 54688032, "step": 25300 }, { "epoch": 4.128058727569331, "grad_norm": 0.008606837131083012, "learning_rate": 0.0009659122912579801, "loss": 0.1159, "num_input_tokens_seen": 54698528, "step": 25305 }, { "epoch": 4.128874388254486, "grad_norm": 0.18576228618621826, "learning_rate": 0.0009658864547381445, "loss": 0.0435, "num_input_tokens_seen": 54709440, "step": 25310 }, { "epoch": 4.129690048939641, "grad_norm": 0.03524043411016464, "learning_rate": 0.0009658606087765288, "loss": 0.0268, "num_input_tokens_seen": 54719424, "step": 25315 }, { "epoch": 4.130505709624796, "grad_norm": 0.03319951519370079, "learning_rate": 0.0009658347533736569, "loss": 0.0383, "num_input_tokens_seen": 54729728, "step": 25320 }, { "epoch": 4.131321370309951, "grad_norm": 0.2569403052330017, "learning_rate": 0.0009658088885300528, "loss": 0.1364, "num_input_tokens_seen": 54740608, "step": 25325 }, { "epoch": 4.132137030995106, "grad_norm": 0.2813706696033478, "learning_rate": 0.0009657830142462406, "loss": 0.4054, "num_input_tokens_seen": 54749856, "step": 25330 }, { "epoch": 4.132952691680261, "grad_norm": 0.016006600111722946, "learning_rate": 0.0009657571305227449, "loss": 0.0408, "num_input_tokens_seen": 54760864, "step": 25335 }, { "epoch": 4.133768352365416, "grad_norm": 0.28111621737480164, "learning_rate": 0.0009657312373600899, "loss": 0.1102, "num_input_tokens_seen": 54772832, "step": 25340 }, { "epoch": 4.134584013050571, "grad_norm": 0.05557915195822716, "learning_rate": 0.0009657053347588005, "loss": 0.114, "num_input_tokens_seen": 54783616, "step": 25345 }, { "epoch": 4.135399673735726, "grad_norm": 0.09768980741500854, "learning_rate": 0.0009656794227194019, "loss": 0.0851, "num_input_tokens_seen": 54793888, "step": 25350 }, { "epoch": 4.136215334420881, "grad_norm": 0.06314717233181, "learning_rate": 0.0009656535012424189, "loss": 0.1181, "num_input_tokens_seen": 54805600, "step": 25355 }, { "epoch": 4.137030995106036, "grad_norm": 0.1482965648174286, "learning_rate": 0.000965627570328377, "loss": 0.1069, "num_input_tokens_seen": 54817120, "step": 25360 }, { "epoch": 4.137846655791191, "grad_norm": 0.03432433307170868, "learning_rate": 0.0009656016299778017, "loss": 0.1857, "num_input_tokens_seen": 54829344, "step": 25365 }, { "epoch": 4.138662316476346, "grad_norm": 0.08786375820636749, "learning_rate": 0.0009655756801912188, "loss": 0.1095, "num_input_tokens_seen": 54838976, "step": 25370 }, { "epoch": 4.1394779771615005, "grad_norm": 0.20477358996868134, "learning_rate": 0.000965549720969154, "loss": 0.0533, "num_input_tokens_seen": 54849408, "step": 25375 }, { "epoch": 4.140293637846656, "grad_norm": 0.24416247010231018, "learning_rate": 0.0009655237523121336, "loss": 0.1572, "num_input_tokens_seen": 54860032, "step": 25380 }, { "epoch": 4.141109298531811, "grad_norm": 0.08682450652122498, "learning_rate": 0.0009654977742206837, "loss": 0.089, "num_input_tokens_seen": 54871168, "step": 25385 }, { "epoch": 4.141924959216966, "grad_norm": 0.15808038413524628, "learning_rate": 0.000965471786695331, "loss": 0.0574, "num_input_tokens_seen": 54880800, "step": 25390 }, { "epoch": 4.142740619902121, "grad_norm": 0.3128909766674042, "learning_rate": 0.0009654457897366021, "loss": 0.2258, "num_input_tokens_seen": 54890464, "step": 25395 }, { "epoch": 4.143556280587275, "grad_norm": 0.021523917093873024, "learning_rate": 0.0009654197833450235, "loss": 0.1372, "num_input_tokens_seen": 54901344, "step": 25400 }, { "epoch": 4.14437194127243, "grad_norm": 0.17342549562454224, "learning_rate": 0.0009653937675211229, "loss": 0.1227, "num_input_tokens_seen": 54912800, "step": 25405 }, { "epoch": 4.145187601957586, "grad_norm": 0.037385955452919006, "learning_rate": 0.000965367742265427, "loss": 0.0842, "num_input_tokens_seen": 54923872, "step": 25410 }, { "epoch": 4.146003262642741, "grad_norm": 0.017450451850891113, "learning_rate": 0.0009653417075784635, "loss": 0.0476, "num_input_tokens_seen": 54934112, "step": 25415 }, { "epoch": 4.146818923327896, "grad_norm": 0.010421866551041603, "learning_rate": 0.0009653156634607601, "loss": 0.0577, "num_input_tokens_seen": 54944064, "step": 25420 }, { "epoch": 4.14763458401305, "grad_norm": 0.022267503663897514, "learning_rate": 0.0009652896099128443, "loss": 0.0636, "num_input_tokens_seen": 54953920, "step": 25425 }, { "epoch": 4.148450244698205, "grad_norm": 0.024997280910611153, "learning_rate": 0.0009652635469352443, "loss": 0.1155, "num_input_tokens_seen": 54963968, "step": 25430 }, { "epoch": 4.149265905383361, "grad_norm": 0.050787921994924545, "learning_rate": 0.0009652374745284884, "loss": 0.0298, "num_input_tokens_seen": 54975008, "step": 25435 }, { "epoch": 4.150081566068516, "grad_norm": 0.00542342197149992, "learning_rate": 0.0009652113926931048, "loss": 0.0607, "num_input_tokens_seen": 54986336, "step": 25440 }, { "epoch": 4.150897226753671, "grad_norm": 0.006053715944290161, "learning_rate": 0.0009651853014296223, "loss": 0.0287, "num_input_tokens_seen": 54996384, "step": 25445 }, { "epoch": 4.151712887438825, "grad_norm": 0.09369000047445297, "learning_rate": 0.0009651592007385694, "loss": 0.1321, "num_input_tokens_seen": 55006688, "step": 25450 }, { "epoch": 4.15252854812398, "grad_norm": 0.0828031525015831, "learning_rate": 0.0009651330906204752, "loss": 0.1098, "num_input_tokens_seen": 55018496, "step": 25455 }, { "epoch": 4.153344208809135, "grad_norm": 0.015311665832996368, "learning_rate": 0.0009651069710758689, "loss": 0.0383, "num_input_tokens_seen": 55028416, "step": 25460 }, { "epoch": 4.154159869494291, "grad_norm": 0.022664785385131836, "learning_rate": 0.0009650808421052798, "loss": 0.1705, "num_input_tokens_seen": 55039232, "step": 25465 }, { "epoch": 4.1549755301794455, "grad_norm": 0.20808342099189758, "learning_rate": 0.0009650547037092374, "loss": 0.1587, "num_input_tokens_seen": 55051168, "step": 25470 }, { "epoch": 4.1557911908646, "grad_norm": 0.016271889209747314, "learning_rate": 0.0009650285558882715, "loss": 0.2009, "num_input_tokens_seen": 55061216, "step": 25475 }, { "epoch": 4.156606851549755, "grad_norm": 0.2235141098499298, "learning_rate": 0.0009650023986429119, "loss": 0.073, "num_input_tokens_seen": 55071968, "step": 25480 }, { "epoch": 4.15742251223491, "grad_norm": 0.11861073225736618, "learning_rate": 0.000964976231973689, "loss": 0.0904, "num_input_tokens_seen": 55081824, "step": 25485 }, { "epoch": 4.158238172920065, "grad_norm": 0.025952542200684547, "learning_rate": 0.0009649500558811328, "loss": 0.0308, "num_input_tokens_seen": 55094208, "step": 25490 }, { "epoch": 4.1590538336052205, "grad_norm": 0.03323453292250633, "learning_rate": 0.0009649238703657739, "loss": 0.2475, "num_input_tokens_seen": 55104608, "step": 25495 }, { "epoch": 4.159869494290375, "grad_norm": 0.18540321290493011, "learning_rate": 0.0009648976754281429, "loss": 0.0877, "num_input_tokens_seen": 55115168, "step": 25500 }, { "epoch": 4.16068515497553, "grad_norm": 0.025260310620069504, "learning_rate": 0.0009648714710687708, "loss": 0.328, "num_input_tokens_seen": 55125920, "step": 25505 }, { "epoch": 4.161500815660685, "grad_norm": 0.13722355663776398, "learning_rate": 0.0009648452572881885, "loss": 0.2166, "num_input_tokens_seen": 55135936, "step": 25510 }, { "epoch": 4.16231647634584, "grad_norm": 0.138593390583992, "learning_rate": 0.0009648190340869274, "loss": 0.0896, "num_input_tokens_seen": 55147136, "step": 25515 }, { "epoch": 4.1631321370309955, "grad_norm": 0.05859887972474098, "learning_rate": 0.000964792801465519, "loss": 0.1762, "num_input_tokens_seen": 55158432, "step": 25520 }, { "epoch": 4.16394779771615, "grad_norm": 0.12412280589342117, "learning_rate": 0.0009647665594244947, "loss": 0.1473, "num_input_tokens_seen": 55169728, "step": 25525 }, { "epoch": 4.164763458401305, "grad_norm": 0.1191917136311531, "learning_rate": 0.0009647403079643866, "loss": 0.0689, "num_input_tokens_seen": 55180640, "step": 25530 }, { "epoch": 4.16557911908646, "grad_norm": 0.06216058135032654, "learning_rate": 0.0009647140470857267, "loss": 0.0957, "num_input_tokens_seen": 55191616, "step": 25535 }, { "epoch": 4.166394779771615, "grad_norm": 0.14134946465492249, "learning_rate": 0.0009646877767890469, "loss": 0.1614, "num_input_tokens_seen": 55203392, "step": 25540 }, { "epoch": 4.16721044045677, "grad_norm": 0.06047775596380234, "learning_rate": 0.00096466149707488, "loss": 0.0884, "num_input_tokens_seen": 55214912, "step": 25545 }, { "epoch": 4.168026101141925, "grad_norm": 0.06157161295413971, "learning_rate": 0.0009646352079437582, "loss": 0.0517, "num_input_tokens_seen": 55225248, "step": 25550 }, { "epoch": 4.16884176182708, "grad_norm": 0.018876446411013603, "learning_rate": 0.0009646089093962145, "loss": 0.0912, "num_input_tokens_seen": 55234688, "step": 25555 }, { "epoch": 4.169657422512235, "grad_norm": 0.07278304547071457, "learning_rate": 0.0009645826014327819, "loss": 0.1574, "num_input_tokens_seen": 55245760, "step": 25560 }, { "epoch": 4.17047308319739, "grad_norm": 0.21307507157325745, "learning_rate": 0.0009645562840539935, "loss": 0.0803, "num_input_tokens_seen": 55256416, "step": 25565 }, { "epoch": 4.171288743882545, "grad_norm": 0.03809243068099022, "learning_rate": 0.0009645299572603827, "loss": 0.0958, "num_input_tokens_seen": 55267392, "step": 25570 }, { "epoch": 4.1721044045677, "grad_norm": 0.0994822233915329, "learning_rate": 0.000964503621052483, "loss": 0.2607, "num_input_tokens_seen": 55278304, "step": 25575 }, { "epoch": 4.172920065252855, "grad_norm": 0.3972223103046417, "learning_rate": 0.0009644772754308281, "loss": 0.2883, "num_input_tokens_seen": 55288992, "step": 25580 }, { "epoch": 4.17373572593801, "grad_norm": 0.08899694681167603, "learning_rate": 0.0009644509203959522, "loss": 0.0906, "num_input_tokens_seen": 55299232, "step": 25585 }, { "epoch": 4.174551386623165, "grad_norm": 0.11821216344833374, "learning_rate": 0.0009644245559483891, "loss": 0.0933, "num_input_tokens_seen": 55309600, "step": 25590 }, { "epoch": 4.1753670473083195, "grad_norm": 0.014808449894189835, "learning_rate": 0.0009643981820886731, "loss": 0.1414, "num_input_tokens_seen": 55320032, "step": 25595 }, { "epoch": 4.176182707993474, "grad_norm": 0.032600466161966324, "learning_rate": 0.0009643717988173389, "loss": 0.1385, "num_input_tokens_seen": 55327904, "step": 25600 }, { "epoch": 4.17699836867863, "grad_norm": 0.1723560392856598, "learning_rate": 0.0009643454061349211, "loss": 0.2089, "num_input_tokens_seen": 55337824, "step": 25605 }, { "epoch": 4.177814029363785, "grad_norm": 0.012177304364740849, "learning_rate": 0.0009643190040419545, "loss": 0.0925, "num_input_tokens_seen": 55349536, "step": 25610 }, { "epoch": 4.17862969004894, "grad_norm": 0.05429260432720184, "learning_rate": 0.0009642925925389743, "loss": 0.2556, "num_input_tokens_seen": 55360672, "step": 25615 }, { "epoch": 4.1794453507340945, "grad_norm": 0.05302607640624046, "learning_rate": 0.0009642661716265156, "loss": 0.0524, "num_input_tokens_seen": 55370656, "step": 25620 }, { "epoch": 4.180261011419249, "grad_norm": 0.07791854441165924, "learning_rate": 0.0009642397413051142, "loss": 0.1616, "num_input_tokens_seen": 55380704, "step": 25625 }, { "epoch": 4.181076672104404, "grad_norm": 0.03771020844578743, "learning_rate": 0.0009642133015753054, "loss": 0.121, "num_input_tokens_seen": 55393024, "step": 25630 }, { "epoch": 4.18189233278956, "grad_norm": 0.06244561821222305, "learning_rate": 0.0009641868524376252, "loss": 0.078, "num_input_tokens_seen": 55404736, "step": 25635 }, { "epoch": 4.182707993474715, "grad_norm": 0.15027554333209991, "learning_rate": 0.0009641603938926093, "loss": 0.067, "num_input_tokens_seen": 55415072, "step": 25640 }, { "epoch": 4.1835236541598695, "grad_norm": 0.03861184045672417, "learning_rate": 0.0009641339259407946, "loss": 0.0421, "num_input_tokens_seen": 55425888, "step": 25645 }, { "epoch": 4.184339314845024, "grad_norm": 0.015195484273135662, "learning_rate": 0.0009641074485827168, "loss": 0.0822, "num_input_tokens_seen": 55436224, "step": 25650 }, { "epoch": 4.185154975530179, "grad_norm": 0.08453787118196487, "learning_rate": 0.0009640809618189129, "loss": 0.16, "num_input_tokens_seen": 55447392, "step": 25655 }, { "epoch": 4.185970636215335, "grad_norm": 0.16250330209732056, "learning_rate": 0.0009640544656499197, "loss": 0.1534, "num_input_tokens_seen": 55457472, "step": 25660 }, { "epoch": 4.18678629690049, "grad_norm": 0.0841694101691246, "learning_rate": 0.0009640279600762738, "loss": 0.1424, "num_input_tokens_seen": 55468448, "step": 25665 }, { "epoch": 4.1876019575856445, "grad_norm": 0.020344849675893784, "learning_rate": 0.0009640014450985129, "loss": 0.0319, "num_input_tokens_seen": 55478016, "step": 25670 }, { "epoch": 4.188417618270799, "grad_norm": 0.062538743019104, "learning_rate": 0.0009639749207171739, "loss": 0.0769, "num_input_tokens_seen": 55488672, "step": 25675 }, { "epoch": 4.189233278955954, "grad_norm": 0.04574638605117798, "learning_rate": 0.0009639483869327946, "loss": 0.0249, "num_input_tokens_seen": 55499232, "step": 25680 }, { "epoch": 4.190048939641109, "grad_norm": 0.03264615312218666, "learning_rate": 0.0009639218437459125, "loss": 0.0221, "num_input_tokens_seen": 55509120, "step": 25685 }, { "epoch": 4.190864600326265, "grad_norm": 0.1373731940984726, "learning_rate": 0.000963895291157066, "loss": 0.2198, "num_input_tokens_seen": 55520544, "step": 25690 }, { "epoch": 4.191680261011419, "grad_norm": 0.008453291840851307, "learning_rate": 0.0009638687291667927, "loss": 0.0718, "num_input_tokens_seen": 55531136, "step": 25695 }, { "epoch": 4.192495921696574, "grad_norm": 0.17602114379405975, "learning_rate": 0.0009638421577756313, "loss": 0.0732, "num_input_tokens_seen": 55541440, "step": 25700 }, { "epoch": 4.193311582381729, "grad_norm": 0.25807738304138184, "learning_rate": 0.0009638155769841201, "loss": 0.1683, "num_input_tokens_seen": 55552992, "step": 25705 }, { "epoch": 4.194127243066884, "grad_norm": 0.10710060596466064, "learning_rate": 0.0009637889867927978, "loss": 0.1119, "num_input_tokens_seen": 55564000, "step": 25710 }, { "epoch": 4.19494290375204, "grad_norm": 0.013491766527295113, "learning_rate": 0.0009637623872022034, "loss": 0.0564, "num_input_tokens_seen": 55574880, "step": 25715 }, { "epoch": 4.195758564437194, "grad_norm": 0.1762889325618744, "learning_rate": 0.0009637357782128758, "loss": 0.1068, "num_input_tokens_seen": 55585920, "step": 25720 }, { "epoch": 4.196574225122349, "grad_norm": 0.014259211719036102, "learning_rate": 0.0009637091598253544, "loss": 0.1228, "num_input_tokens_seen": 55595456, "step": 25725 }, { "epoch": 4.197389885807504, "grad_norm": 0.030640989542007446, "learning_rate": 0.0009636825320401787, "loss": 0.1722, "num_input_tokens_seen": 55606592, "step": 25730 }, { "epoch": 4.198205546492659, "grad_norm": 0.03938770666718483, "learning_rate": 0.0009636558948578882, "loss": 0.0333, "num_input_tokens_seen": 55617184, "step": 25735 }, { "epoch": 4.199021207177814, "grad_norm": 0.09662085026502609, "learning_rate": 0.0009636292482790229, "loss": 0.0808, "num_input_tokens_seen": 55627584, "step": 25740 }, { "epoch": 4.199836867862969, "grad_norm": 0.09255876392126083, "learning_rate": 0.0009636025923041227, "loss": 0.1373, "num_input_tokens_seen": 55638336, "step": 25745 }, { "epoch": 4.200652528548124, "grad_norm": 0.21851196885108948, "learning_rate": 0.0009635759269337276, "loss": 0.1107, "num_input_tokens_seen": 55648480, "step": 25750 }, { "epoch": 4.201468189233279, "grad_norm": 0.22205887734889984, "learning_rate": 0.0009635492521683785, "loss": 0.0723, "num_input_tokens_seen": 55658368, "step": 25755 }, { "epoch": 4.202283849918434, "grad_norm": 0.00717106182128191, "learning_rate": 0.0009635225680086157, "loss": 0.1347, "num_input_tokens_seen": 55669344, "step": 25760 }, { "epoch": 4.203099510603589, "grad_norm": 0.06024621054530144, "learning_rate": 0.00096349587445498, "loss": 0.1066, "num_input_tokens_seen": 55680832, "step": 25765 }, { "epoch": 4.2039151712887435, "grad_norm": 0.00822582095861435, "learning_rate": 0.0009634691715080124, "loss": 0.0453, "num_input_tokens_seen": 55692416, "step": 25770 }, { "epoch": 4.204730831973899, "grad_norm": 0.033295344561338425, "learning_rate": 0.0009634424591682542, "loss": 0.2381, "num_input_tokens_seen": 55702752, "step": 25775 }, { "epoch": 4.205546492659054, "grad_norm": 0.00885600782930851, "learning_rate": 0.0009634157374362466, "loss": 0.1732, "num_input_tokens_seen": 55715680, "step": 25780 }, { "epoch": 4.206362153344209, "grad_norm": 0.037626128643751144, "learning_rate": 0.0009633890063125313, "loss": 0.1091, "num_input_tokens_seen": 55727008, "step": 25785 }, { "epoch": 4.207177814029364, "grad_norm": 0.01951495371758938, "learning_rate": 0.0009633622657976498, "loss": 0.0224, "num_input_tokens_seen": 55738272, "step": 25790 }, { "epoch": 4.2079934747145185, "grad_norm": 0.01373792253434658, "learning_rate": 0.0009633355158921441, "loss": 0.0835, "num_input_tokens_seen": 55748416, "step": 25795 }, { "epoch": 4.208809135399674, "grad_norm": 0.09148267656564713, "learning_rate": 0.0009633087565965564, "loss": 0.0556, "num_input_tokens_seen": 55758336, "step": 25800 }, { "epoch": 4.209624796084829, "grad_norm": 0.10780200362205505, "learning_rate": 0.0009632819879114291, "loss": 0.0759, "num_input_tokens_seen": 55768032, "step": 25805 }, { "epoch": 4.210440456769984, "grad_norm": 0.19272269308567047, "learning_rate": 0.0009632552098373045, "loss": 0.0823, "num_input_tokens_seen": 55777728, "step": 25810 }, { "epoch": 4.211256117455139, "grad_norm": 0.0974721908569336, "learning_rate": 0.0009632284223747255, "loss": 0.1946, "num_input_tokens_seen": 55787808, "step": 25815 }, { "epoch": 4.212071778140293, "grad_norm": 0.01590495929121971, "learning_rate": 0.0009632016255242348, "loss": 0.1211, "num_input_tokens_seen": 55798528, "step": 25820 }, { "epoch": 4.212887438825448, "grad_norm": 0.0683855265378952, "learning_rate": 0.0009631748192863756, "loss": 0.0979, "num_input_tokens_seen": 55807552, "step": 25825 }, { "epoch": 4.213703099510604, "grad_norm": 0.030019963160157204, "learning_rate": 0.0009631480036616911, "loss": 0.1347, "num_input_tokens_seen": 55819296, "step": 25830 }, { "epoch": 4.214518760195759, "grad_norm": 0.16835179924964905, "learning_rate": 0.0009631211786507248, "loss": 0.3311, "num_input_tokens_seen": 55830048, "step": 25835 }, { "epoch": 4.215334420880914, "grad_norm": 0.2539372146129608, "learning_rate": 0.0009630943442540202, "loss": 0.1926, "num_input_tokens_seen": 55841408, "step": 25840 }, { "epoch": 4.216150081566068, "grad_norm": 0.023150641471147537, "learning_rate": 0.0009630675004721212, "loss": 0.0445, "num_input_tokens_seen": 55852160, "step": 25845 }, { "epoch": 4.216965742251223, "grad_norm": 0.18378613889217377, "learning_rate": 0.000963040647305572, "loss": 0.0621, "num_input_tokens_seen": 55861984, "step": 25850 }, { "epoch": 4.217781402936378, "grad_norm": 0.0326051265001297, "learning_rate": 0.0009630137847549166, "loss": 0.0805, "num_input_tokens_seen": 55872800, "step": 25855 }, { "epoch": 4.218597063621534, "grad_norm": 0.030857285484671593, "learning_rate": 0.0009629869128206997, "loss": 0.0333, "num_input_tokens_seen": 55884512, "step": 25860 }, { "epoch": 4.219412724306689, "grad_norm": 0.07170385867357254, "learning_rate": 0.0009629600315034652, "loss": 0.0389, "num_input_tokens_seen": 55897024, "step": 25865 }, { "epoch": 4.220228384991843, "grad_norm": 0.020618334412574768, "learning_rate": 0.0009629331408037588, "loss": 0.1663, "num_input_tokens_seen": 55907520, "step": 25870 }, { "epoch": 4.221044045676998, "grad_norm": 0.10644153505563736, "learning_rate": 0.0009629062407221248, "loss": 0.0783, "num_input_tokens_seen": 55917280, "step": 25875 }, { "epoch": 4.221859706362153, "grad_norm": 0.04173273965716362, "learning_rate": 0.0009628793312591086, "loss": 0.0846, "num_input_tokens_seen": 55928832, "step": 25880 }, { "epoch": 4.222675367047309, "grad_norm": 0.06491757184267044, "learning_rate": 0.0009628524124152555, "loss": 0.1031, "num_input_tokens_seen": 55939904, "step": 25885 }, { "epoch": 4.2234910277324635, "grad_norm": 0.009601933881640434, "learning_rate": 0.0009628254841911113, "loss": 0.0698, "num_input_tokens_seen": 55951968, "step": 25890 }, { "epoch": 4.224306688417618, "grad_norm": 0.02336297743022442, "learning_rate": 0.0009627985465872214, "loss": 0.0163, "num_input_tokens_seen": 55962240, "step": 25895 }, { "epoch": 4.225122349102773, "grad_norm": 0.0032236254774034023, "learning_rate": 0.0009627715996041319, "loss": 0.06, "num_input_tokens_seen": 55973216, "step": 25900 }, { "epoch": 4.225938009787928, "grad_norm": 0.006209230981767178, "learning_rate": 0.0009627446432423888, "loss": 0.0677, "num_input_tokens_seen": 55984544, "step": 25905 }, { "epoch": 4.226753670473083, "grad_norm": 0.010295428335666656, "learning_rate": 0.0009627176775025385, "loss": 0.1451, "num_input_tokens_seen": 55995616, "step": 25910 }, { "epoch": 4.2275693311582385, "grad_norm": 0.007961004041135311, "learning_rate": 0.0009626907023851275, "loss": 0.0774, "num_input_tokens_seen": 56006752, "step": 25915 }, { "epoch": 4.228384991843393, "grad_norm": 0.16366617381572723, "learning_rate": 0.0009626637178907024, "loss": 0.1335, "num_input_tokens_seen": 56017280, "step": 25920 }, { "epoch": 4.229200652528548, "grad_norm": 0.024116847664117813, "learning_rate": 0.0009626367240198101, "loss": 0.1689, "num_input_tokens_seen": 56027904, "step": 25925 }, { "epoch": 4.230016313213703, "grad_norm": 0.11132140457630157, "learning_rate": 0.0009626097207729978, "loss": 0.1548, "num_input_tokens_seen": 56038752, "step": 25930 }, { "epoch": 4.230831973898858, "grad_norm": 0.016673635691404343, "learning_rate": 0.0009625827081508125, "loss": 0.0446, "num_input_tokens_seen": 56048832, "step": 25935 }, { "epoch": 4.231647634584013, "grad_norm": 0.06682579219341278, "learning_rate": 0.000962555686153802, "loss": 0.0731, "num_input_tokens_seen": 56060800, "step": 25940 }, { "epoch": 4.232463295269168, "grad_norm": 0.017317702993750572, "learning_rate": 0.0009625286547825136, "loss": 0.0203, "num_input_tokens_seen": 56071808, "step": 25945 }, { "epoch": 4.233278955954323, "grad_norm": 0.012221962213516235, "learning_rate": 0.0009625016140374952, "loss": 0.0339, "num_input_tokens_seen": 56082336, "step": 25950 }, { "epoch": 4.234094616639478, "grad_norm": 0.07057840377092361, "learning_rate": 0.0009624745639192949, "loss": 0.0544, "num_input_tokens_seen": 56093120, "step": 25955 }, { "epoch": 4.234910277324633, "grad_norm": 0.011993489228188992, "learning_rate": 0.0009624475044284609, "loss": 0.1421, "num_input_tokens_seen": 56104320, "step": 25960 }, { "epoch": 4.235725938009788, "grad_norm": 0.24318675696849823, "learning_rate": 0.0009624204355655416, "loss": 0.0602, "num_input_tokens_seen": 56115136, "step": 25965 }, { "epoch": 4.236541598694943, "grad_norm": 0.21818304061889648, "learning_rate": 0.0009623933573310855, "loss": 0.2945, "num_input_tokens_seen": 56125696, "step": 25970 }, { "epoch": 4.237357259380098, "grad_norm": 0.1993371695280075, "learning_rate": 0.0009623662697256414, "loss": 0.3022, "num_input_tokens_seen": 56136832, "step": 25975 }, { "epoch": 4.238172920065253, "grad_norm": 0.29056215286254883, "learning_rate": 0.0009623391727497584, "loss": 0.1117, "num_input_tokens_seen": 56147296, "step": 25980 }, { "epoch": 4.238988580750408, "grad_norm": 0.04809493198990822, "learning_rate": 0.0009623120664039855, "loss": 0.0876, "num_input_tokens_seen": 56156896, "step": 25985 }, { "epoch": 4.239804241435563, "grad_norm": 0.01413186639547348, "learning_rate": 0.000962284950688872, "loss": 0.1441, "num_input_tokens_seen": 56166720, "step": 25990 }, { "epoch": 4.240619902120717, "grad_norm": 0.1408049762248993, "learning_rate": 0.0009622578256049675, "loss": 0.1734, "num_input_tokens_seen": 56177888, "step": 25995 }, { "epoch": 4.241435562805873, "grad_norm": 0.09817873686552048, "learning_rate": 0.0009622306911528219, "loss": 0.0329, "num_input_tokens_seen": 56188160, "step": 26000 }, { "epoch": 4.242251223491028, "grad_norm": 0.02216365560889244, "learning_rate": 0.0009622035473329848, "loss": 0.1569, "num_input_tokens_seen": 56198688, "step": 26005 }, { "epoch": 4.243066884176183, "grad_norm": 0.08225827664136887, "learning_rate": 0.0009621763941460067, "loss": 0.0653, "num_input_tokens_seen": 56210144, "step": 26010 }, { "epoch": 4.2438825448613375, "grad_norm": 0.035414330661296844, "learning_rate": 0.0009621492315924375, "loss": 0.1642, "num_input_tokens_seen": 56220928, "step": 26015 }, { "epoch": 4.244698205546492, "grad_norm": 0.22662504017353058, "learning_rate": 0.0009621220596728278, "loss": 0.1058, "num_input_tokens_seen": 56232032, "step": 26020 }, { "epoch": 4.245513866231648, "grad_norm": 0.016972634941339493, "learning_rate": 0.0009620948783877285, "loss": 0.118, "num_input_tokens_seen": 56242624, "step": 26025 }, { "epoch": 4.246329526916803, "grad_norm": 0.07601569592952728, "learning_rate": 0.0009620676877376902, "loss": 0.0433, "num_input_tokens_seen": 56253600, "step": 26030 }, { "epoch": 4.247145187601958, "grad_norm": 0.05941057205200195, "learning_rate": 0.000962040487723264, "loss": 0.1097, "num_input_tokens_seen": 56264992, "step": 26035 }, { "epoch": 4.2479608482871125, "grad_norm": 0.09792166948318481, "learning_rate": 0.0009620132783450011, "loss": 0.0593, "num_input_tokens_seen": 56275232, "step": 26040 }, { "epoch": 4.248776508972267, "grad_norm": 0.05090722069144249, "learning_rate": 0.0009619860596034531, "loss": 0.1732, "num_input_tokens_seen": 56287776, "step": 26045 }, { "epoch": 4.249592169657422, "grad_norm": 0.021216444671154022, "learning_rate": 0.0009619588314991716, "loss": 0.0687, "num_input_tokens_seen": 56298784, "step": 26050 }, { "epoch": 4.250407830342578, "grad_norm": 0.15228860080242157, "learning_rate": 0.0009619315940327082, "loss": 0.1041, "num_input_tokens_seen": 56309088, "step": 26055 }, { "epoch": 4.251223491027733, "grad_norm": 0.02385716140270233, "learning_rate": 0.0009619043472046151, "loss": 0.1266, "num_input_tokens_seen": 56320832, "step": 26060 }, { "epoch": 4.2520391517128875, "grad_norm": 0.01335175335407257, "learning_rate": 0.0009618770910154444, "loss": 0.1649, "num_input_tokens_seen": 56331360, "step": 26065 }, { "epoch": 4.252854812398042, "grad_norm": 0.07662574201822281, "learning_rate": 0.0009618498254657486, "loss": 0.0968, "num_input_tokens_seen": 56342944, "step": 26070 }, { "epoch": 4.253670473083197, "grad_norm": 0.10685458034276962, "learning_rate": 0.00096182255055608, "loss": 0.0863, "num_input_tokens_seen": 56354432, "step": 26075 }, { "epoch": 4.254486133768353, "grad_norm": 0.2515588104724884, "learning_rate": 0.0009617952662869918, "loss": 0.0752, "num_input_tokens_seen": 56365696, "step": 26080 }, { "epoch": 4.255301794453508, "grad_norm": 0.016076816245913506, "learning_rate": 0.0009617679726590366, "loss": 0.1153, "num_input_tokens_seen": 56377376, "step": 26085 }, { "epoch": 4.2561174551386625, "grad_norm": 0.048764199018478394, "learning_rate": 0.0009617406696727676, "loss": 0.1156, "num_input_tokens_seen": 56388064, "step": 26090 }, { "epoch": 4.256933115823817, "grad_norm": 0.16464506089687347, "learning_rate": 0.0009617133573287382, "loss": 0.1028, "num_input_tokens_seen": 56399488, "step": 26095 }, { "epoch": 4.257748776508972, "grad_norm": 0.08378614485263824, "learning_rate": 0.0009616860356275019, "loss": 0.1784, "num_input_tokens_seen": 56411104, "step": 26100 }, { "epoch": 4.258564437194127, "grad_norm": 0.09539467096328735, "learning_rate": 0.0009616587045696124, "loss": 0.0475, "num_input_tokens_seen": 56422496, "step": 26105 }, { "epoch": 4.259380097879283, "grad_norm": 0.02436433918774128, "learning_rate": 0.0009616313641556235, "loss": 0.0608, "num_input_tokens_seen": 56432512, "step": 26110 }, { "epoch": 4.260195758564437, "grad_norm": 0.0581025667488575, "learning_rate": 0.0009616040143860896, "loss": 0.0504, "num_input_tokens_seen": 56442816, "step": 26115 }, { "epoch": 4.261011419249592, "grad_norm": 0.02602909319102764, "learning_rate": 0.0009615766552615645, "loss": 0.0794, "num_input_tokens_seen": 56454080, "step": 26120 }, { "epoch": 4.261827079934747, "grad_norm": 0.1946924775838852, "learning_rate": 0.0009615492867826032, "loss": 0.0812, "num_input_tokens_seen": 56464352, "step": 26125 }, { "epoch": 4.262642740619902, "grad_norm": 0.1757528930902481, "learning_rate": 0.00096152190894976, "loss": 0.1193, "num_input_tokens_seen": 56476416, "step": 26130 }, { "epoch": 4.263458401305057, "grad_norm": 0.21124711632728577, "learning_rate": 0.0009614945217635897, "loss": 0.0683, "num_input_tokens_seen": 56487360, "step": 26135 }, { "epoch": 4.264274061990212, "grad_norm": 0.17990908026695251, "learning_rate": 0.0009614671252246476, "loss": 0.1003, "num_input_tokens_seen": 56499008, "step": 26140 }, { "epoch": 4.265089722675367, "grad_norm": 0.1713842749595642, "learning_rate": 0.0009614397193334887, "loss": 0.0782, "num_input_tokens_seen": 56511040, "step": 26145 }, { "epoch": 4.265905383360522, "grad_norm": 0.27467039227485657, "learning_rate": 0.0009614123040906686, "loss": 0.1319, "num_input_tokens_seen": 56522240, "step": 26150 }, { "epoch": 4.266721044045677, "grad_norm": 0.00788316410034895, "learning_rate": 0.0009613848794967428, "loss": 0.1168, "num_input_tokens_seen": 56532960, "step": 26155 }, { "epoch": 4.267536704730832, "grad_norm": 0.18166938424110413, "learning_rate": 0.0009613574455522671, "loss": 0.2217, "num_input_tokens_seen": 56543488, "step": 26160 }, { "epoch": 4.268352365415987, "grad_norm": 0.16838808357715607, "learning_rate": 0.0009613300022577974, "loss": 0.0748, "num_input_tokens_seen": 56553536, "step": 26165 }, { "epoch": 4.269168026101142, "grad_norm": 0.028545338660478592, "learning_rate": 0.00096130254961389, "loss": 0.0584, "num_input_tokens_seen": 56563648, "step": 26170 }, { "epoch": 4.269983686786297, "grad_norm": 0.01613200642168522, "learning_rate": 0.0009612750876211014, "loss": 0.1161, "num_input_tokens_seen": 56575136, "step": 26175 }, { "epoch": 4.270799347471452, "grad_norm": 0.10914766043424606, "learning_rate": 0.0009612476162799878, "loss": 0.0396, "num_input_tokens_seen": 56586720, "step": 26180 }, { "epoch": 4.271615008156607, "grad_norm": 0.24682168662548065, "learning_rate": 0.0009612201355911061, "loss": 0.1341, "num_input_tokens_seen": 56597984, "step": 26185 }, { "epoch": 4.2724306688417615, "grad_norm": 0.004787517245858908, "learning_rate": 0.0009611926455550135, "loss": 0.0284, "num_input_tokens_seen": 56608736, "step": 26190 }, { "epoch": 4.273246329526917, "grad_norm": 0.024321412667632103, "learning_rate": 0.0009611651461722666, "loss": 0.1023, "num_input_tokens_seen": 56620128, "step": 26195 }, { "epoch": 4.274061990212072, "grad_norm": 0.014418189413845539, "learning_rate": 0.0009611376374434231, "loss": 0.1351, "num_input_tokens_seen": 56631008, "step": 26200 }, { "epoch": 4.274877650897227, "grad_norm": 0.021421290934085846, "learning_rate": 0.0009611101193690403, "loss": 0.1299, "num_input_tokens_seen": 56642112, "step": 26205 }, { "epoch": 4.275693311582382, "grad_norm": 0.2636556625366211, "learning_rate": 0.0009610825919496761, "loss": 0.0863, "num_input_tokens_seen": 56652192, "step": 26210 }, { "epoch": 4.2765089722675365, "grad_norm": 0.09782522916793823, "learning_rate": 0.0009610550551858881, "loss": 0.058, "num_input_tokens_seen": 56663488, "step": 26215 }, { "epoch": 4.277324632952691, "grad_norm": 0.019692903384566307, "learning_rate": 0.0009610275090782347, "loss": 0.0179, "num_input_tokens_seen": 56673408, "step": 26220 }, { "epoch": 4.278140293637847, "grad_norm": 0.19901689887046814, "learning_rate": 0.0009609999536272738, "loss": 0.1461, "num_input_tokens_seen": 56683968, "step": 26225 }, { "epoch": 4.278955954323002, "grad_norm": 0.18883618712425232, "learning_rate": 0.0009609723888335641, "loss": 0.2123, "num_input_tokens_seen": 56694016, "step": 26230 }, { "epoch": 4.279771615008157, "grad_norm": 0.06648790836334229, "learning_rate": 0.0009609448146976642, "loss": 0.1283, "num_input_tokens_seen": 56704096, "step": 26235 }, { "epoch": 4.280587275693311, "grad_norm": 0.07960224896669388, "learning_rate": 0.0009609172312201328, "loss": 0.2726, "num_input_tokens_seen": 56712768, "step": 26240 }, { "epoch": 4.281402936378466, "grad_norm": 0.04163384810090065, "learning_rate": 0.000960889638401529, "loss": 0.0597, "num_input_tokens_seen": 56723776, "step": 26245 }, { "epoch": 4.282218597063622, "grad_norm": 0.10396461188793182, "learning_rate": 0.0009608620362424121, "loss": 0.0566, "num_input_tokens_seen": 56733696, "step": 26250 }, { "epoch": 4.283034257748777, "grad_norm": 0.024622034281492233, "learning_rate": 0.0009608344247433412, "loss": 0.1117, "num_input_tokens_seen": 56745216, "step": 26255 }, { "epoch": 4.283849918433932, "grad_norm": 0.16428062319755554, "learning_rate": 0.0009608068039048763, "loss": 0.1339, "num_input_tokens_seen": 56754656, "step": 26260 }, { "epoch": 4.284665579119086, "grad_norm": 0.05322808027267456, "learning_rate": 0.0009607791737275769, "loss": 0.036, "num_input_tokens_seen": 56765408, "step": 26265 }, { "epoch": 4.285481239804241, "grad_norm": 0.10171199589967728, "learning_rate": 0.0009607515342120028, "loss": 0.0805, "num_input_tokens_seen": 56776384, "step": 26270 }, { "epoch": 4.286296900489396, "grad_norm": 0.053133487701416016, "learning_rate": 0.0009607238853587144, "loss": 0.0336, "num_input_tokens_seen": 56788320, "step": 26275 }, { "epoch": 4.287112561174552, "grad_norm": 0.09317631274461746, "learning_rate": 0.0009606962271682722, "loss": 0.0892, "num_input_tokens_seen": 56799552, "step": 26280 }, { "epoch": 4.287928221859707, "grad_norm": 0.09496995061635971, "learning_rate": 0.0009606685596412364, "loss": 0.1756, "num_input_tokens_seen": 56810048, "step": 26285 }, { "epoch": 4.288743882544861, "grad_norm": 0.17305903136730194, "learning_rate": 0.0009606408827781679, "loss": 0.0797, "num_input_tokens_seen": 56820736, "step": 26290 }, { "epoch": 4.289559543230016, "grad_norm": 0.12145748734474182, "learning_rate": 0.0009606131965796274, "loss": 0.0712, "num_input_tokens_seen": 56831808, "step": 26295 }, { "epoch": 4.290375203915171, "grad_norm": 0.015286357142031193, "learning_rate": 0.0009605855010461761, "loss": 0.1748, "num_input_tokens_seen": 56842816, "step": 26300 }, { "epoch": 4.291190864600326, "grad_norm": 0.1779191792011261, "learning_rate": 0.0009605577961783756, "loss": 0.0942, "num_input_tokens_seen": 56853440, "step": 26305 }, { "epoch": 4.2920065252854815, "grad_norm": 0.16392691433429718, "learning_rate": 0.0009605300819767869, "loss": 0.1803, "num_input_tokens_seen": 56864192, "step": 26310 }, { "epoch": 4.292822185970636, "grad_norm": 0.027073819190263748, "learning_rate": 0.000960502358441972, "loss": 0.0244, "num_input_tokens_seen": 56872800, "step": 26315 }, { "epoch": 4.293637846655791, "grad_norm": 0.09286334365606308, "learning_rate": 0.0009604746255744925, "loss": 0.0476, "num_input_tokens_seen": 56882688, "step": 26320 }, { "epoch": 4.294453507340946, "grad_norm": 0.1313088983297348, "learning_rate": 0.0009604468833749105, "loss": 0.1409, "num_input_tokens_seen": 56893152, "step": 26325 }, { "epoch": 4.295269168026101, "grad_norm": 0.20351779460906982, "learning_rate": 0.0009604191318437885, "loss": 0.1661, "num_input_tokens_seen": 56904128, "step": 26330 }, { "epoch": 4.2960848287112565, "grad_norm": 0.056847669184207916, "learning_rate": 0.0009603913709816886, "loss": 0.1283, "num_input_tokens_seen": 56915136, "step": 26335 }, { "epoch": 4.296900489396411, "grad_norm": 0.04496561363339424, "learning_rate": 0.0009603636007891735, "loss": 0.0262, "num_input_tokens_seen": 56926208, "step": 26340 }, { "epoch": 4.297716150081566, "grad_norm": 0.13187801837921143, "learning_rate": 0.0009603358212668061, "loss": 0.1805, "num_input_tokens_seen": 56935520, "step": 26345 }, { "epoch": 4.298531810766721, "grad_norm": 0.01551650557667017, "learning_rate": 0.0009603080324151492, "loss": 0.0794, "num_input_tokens_seen": 56946560, "step": 26350 }, { "epoch": 4.299347471451876, "grad_norm": 0.22867026925086975, "learning_rate": 0.0009602802342347661, "loss": 0.1025, "num_input_tokens_seen": 56957120, "step": 26355 }, { "epoch": 4.300163132137031, "grad_norm": 0.17811556160449982, "learning_rate": 0.0009602524267262203, "loss": 0.1104, "num_input_tokens_seen": 56970368, "step": 26360 }, { "epoch": 4.300978792822186, "grad_norm": 0.09705094248056412, "learning_rate": 0.0009602246098900749, "loss": 0.1768, "num_input_tokens_seen": 56980672, "step": 26365 }, { "epoch": 4.301794453507341, "grad_norm": 0.04232777655124664, "learning_rate": 0.0009601967837268941, "loss": 0.0868, "num_input_tokens_seen": 56991232, "step": 26370 }, { "epoch": 4.302610114192496, "grad_norm": 0.33170196413993835, "learning_rate": 0.0009601689482372417, "loss": 0.1073, "num_input_tokens_seen": 57002112, "step": 26375 }, { "epoch": 4.303425774877651, "grad_norm": 0.03127500042319298, "learning_rate": 0.0009601411034216818, "loss": 0.056, "num_input_tokens_seen": 57011936, "step": 26380 }, { "epoch": 4.304241435562806, "grad_norm": 0.19058465957641602, "learning_rate": 0.0009601132492807787, "loss": 0.0724, "num_input_tokens_seen": 57022432, "step": 26385 }, { "epoch": 4.30505709624796, "grad_norm": 0.016696345061063766, "learning_rate": 0.000960085385815097, "loss": 0.0464, "num_input_tokens_seen": 57032896, "step": 26390 }, { "epoch": 4.305872756933116, "grad_norm": 0.07727955281734467, "learning_rate": 0.0009600575130252012, "loss": 0.0479, "num_input_tokens_seen": 57042880, "step": 26395 }, { "epoch": 4.306688417618271, "grad_norm": 0.17325547337532043, "learning_rate": 0.0009600296309116563, "loss": 0.16, "num_input_tokens_seen": 57053280, "step": 26400 }, { "epoch": 4.307504078303426, "grad_norm": 0.2629067301750183, "learning_rate": 0.0009600017394750274, "loss": 0.1369, "num_input_tokens_seen": 57063456, "step": 26405 }, { "epoch": 4.308319738988581, "grad_norm": 0.1544959545135498, "learning_rate": 0.0009599738387158794, "loss": 0.09, "num_input_tokens_seen": 57074496, "step": 26410 }, { "epoch": 4.309135399673735, "grad_norm": 0.007932339794933796, "learning_rate": 0.0009599459286347783, "loss": 0.0117, "num_input_tokens_seen": 57085696, "step": 26415 }, { "epoch": 4.309951060358891, "grad_norm": 0.008668801747262478, "learning_rate": 0.0009599180092322894, "loss": 0.0259, "num_input_tokens_seen": 57095648, "step": 26420 }, { "epoch": 4.310766721044046, "grad_norm": 0.030321603640913963, "learning_rate": 0.0009598900805089786, "loss": 0.0511, "num_input_tokens_seen": 57107168, "step": 26425 }, { "epoch": 4.311582381729201, "grad_norm": 0.0036541877780109644, "learning_rate": 0.0009598621424654119, "loss": 0.0722, "num_input_tokens_seen": 57118016, "step": 26430 }, { "epoch": 4.3123980424143555, "grad_norm": 0.011160695925354958, "learning_rate": 0.0009598341951021557, "loss": 0.0955, "num_input_tokens_seen": 57129632, "step": 26435 }, { "epoch": 4.31321370309951, "grad_norm": 0.08019030094146729, "learning_rate": 0.0009598062384197759, "loss": 0.1186, "num_input_tokens_seen": 57140576, "step": 26440 }, { "epoch": 4.314029363784665, "grad_norm": 0.025762809440493584, "learning_rate": 0.0009597782724188395, "loss": 0.0847, "num_input_tokens_seen": 57151712, "step": 26445 }, { "epoch": 4.314845024469821, "grad_norm": 0.058879684656858444, "learning_rate": 0.0009597502970999132, "loss": 0.0694, "num_input_tokens_seen": 57162272, "step": 26450 }, { "epoch": 4.315660685154976, "grad_norm": 0.04242071136832237, "learning_rate": 0.0009597223124635639, "loss": 0.014, "num_input_tokens_seen": 57173952, "step": 26455 }, { "epoch": 4.3164763458401305, "grad_norm": 0.07131559401750565, "learning_rate": 0.0009596943185103586, "loss": 0.175, "num_input_tokens_seen": 57184000, "step": 26460 }, { "epoch": 4.317292006525285, "grad_norm": 0.15778687596321106, "learning_rate": 0.0009596663152408648, "loss": 0.1615, "num_input_tokens_seen": 57195264, "step": 26465 }, { "epoch": 4.31810766721044, "grad_norm": 0.02540893293917179, "learning_rate": 0.0009596383026556501, "loss": 0.063, "num_input_tokens_seen": 57205056, "step": 26470 }, { "epoch": 4.318923327895595, "grad_norm": 0.10403016209602356, "learning_rate": 0.000959610280755282, "loss": 0.2099, "num_input_tokens_seen": 57215296, "step": 26475 }, { "epoch": 4.319738988580751, "grad_norm": 0.18612545728683472, "learning_rate": 0.0009595822495403286, "loss": 0.183, "num_input_tokens_seen": 57225632, "step": 26480 }, { "epoch": 4.3205546492659055, "grad_norm": 0.022068077698349953, "learning_rate": 0.0009595542090113579, "loss": 0.0378, "num_input_tokens_seen": 57237504, "step": 26485 }, { "epoch": 4.32137030995106, "grad_norm": 0.12466895580291748, "learning_rate": 0.0009595261591689381, "loss": 0.0562, "num_input_tokens_seen": 57249696, "step": 26490 }, { "epoch": 4.322185970636215, "grad_norm": 0.006654700729995966, "learning_rate": 0.0009594981000136377, "loss": 0.0703, "num_input_tokens_seen": 57259232, "step": 26495 }, { "epoch": 4.32300163132137, "grad_norm": 0.028172895312309265, "learning_rate": 0.0009594700315460254, "loss": 0.0881, "num_input_tokens_seen": 57269408, "step": 26500 }, { "epoch": 4.323817292006526, "grad_norm": 0.013215692713856697, "learning_rate": 0.0009594419537666701, "loss": 0.0691, "num_input_tokens_seen": 57280352, "step": 26505 }, { "epoch": 4.3246329526916805, "grad_norm": 0.03416256979107857, "learning_rate": 0.0009594138666761407, "loss": 0.1872, "num_input_tokens_seen": 57289248, "step": 26510 }, { "epoch": 4.325448613376835, "grad_norm": 0.1410515457391739, "learning_rate": 0.0009593857702750065, "loss": 0.0417, "num_input_tokens_seen": 57298720, "step": 26515 }, { "epoch": 4.32626427406199, "grad_norm": 0.038631804287433624, "learning_rate": 0.0009593576645638369, "loss": 0.0654, "num_input_tokens_seen": 57310016, "step": 26520 }, { "epoch": 4.327079934747145, "grad_norm": 0.07176709920167923, "learning_rate": 0.0009593295495432015, "loss": 0.1958, "num_input_tokens_seen": 57321216, "step": 26525 }, { "epoch": 4.327895595432301, "grad_norm": 0.017286749556660652, "learning_rate": 0.00095930142521367, "loss": 0.0294, "num_input_tokens_seen": 57330240, "step": 26530 }, { "epoch": 4.328711256117455, "grad_norm": 0.005593335721641779, "learning_rate": 0.0009592732915758127, "loss": 0.1743, "num_input_tokens_seen": 57341344, "step": 26535 }, { "epoch": 4.32952691680261, "grad_norm": 0.2700396180152893, "learning_rate": 0.0009592451486301991, "loss": 0.0932, "num_input_tokens_seen": 57352736, "step": 26540 }, { "epoch": 4.330342577487765, "grad_norm": 0.0752822533249855, "learning_rate": 0.0009592169963774004, "loss": 0.055, "num_input_tokens_seen": 57363200, "step": 26545 }, { "epoch": 4.33115823817292, "grad_norm": 0.011770925484597683, "learning_rate": 0.0009591888348179865, "loss": 0.0351, "num_input_tokens_seen": 57373952, "step": 26550 }, { "epoch": 4.331973898858075, "grad_norm": 0.07157646864652634, "learning_rate": 0.0009591606639525283, "loss": 0.0491, "num_input_tokens_seen": 57384000, "step": 26555 }, { "epoch": 4.33278955954323, "grad_norm": 0.03235434740781784, "learning_rate": 0.0009591324837815969, "loss": 0.1104, "num_input_tokens_seen": 57394432, "step": 26560 }, { "epoch": 4.333605220228385, "grad_norm": 0.18330805003643036, "learning_rate": 0.0009591042943057631, "loss": 0.1976, "num_input_tokens_seen": 57405088, "step": 26565 }, { "epoch": 4.33442088091354, "grad_norm": 0.013964567333459854, "learning_rate": 0.0009590760955255985, "loss": 0.035, "num_input_tokens_seen": 57416608, "step": 26570 }, { "epoch": 4.335236541598695, "grad_norm": 0.0749783143401146, "learning_rate": 0.0009590478874416744, "loss": 0.1481, "num_input_tokens_seen": 57427744, "step": 26575 }, { "epoch": 4.33605220228385, "grad_norm": 0.19421903789043427, "learning_rate": 0.0009590196700545626, "loss": 0.2615, "num_input_tokens_seen": 57438112, "step": 26580 }, { "epoch": 4.3368678629690045, "grad_norm": 0.08299185335636139, "learning_rate": 0.0009589914433648347, "loss": 0.0568, "num_input_tokens_seen": 57448896, "step": 26585 }, { "epoch": 4.33768352365416, "grad_norm": 0.11819956451654434, "learning_rate": 0.000958963207373063, "loss": 0.055, "num_input_tokens_seen": 57460160, "step": 26590 }, { "epoch": 4.338499184339315, "grad_norm": 0.05758635699748993, "learning_rate": 0.0009589349620798197, "loss": 0.0679, "num_input_tokens_seen": 57470656, "step": 26595 }, { "epoch": 4.33931484502447, "grad_norm": 0.010048212483525276, "learning_rate": 0.0009589067074856772, "loss": 0.0374, "num_input_tokens_seen": 57481504, "step": 26600 }, { "epoch": 4.340130505709625, "grad_norm": 0.009367452003061771, "learning_rate": 0.0009588784435912082, "loss": 0.0595, "num_input_tokens_seen": 57491520, "step": 26605 }, { "epoch": 4.3409461663947795, "grad_norm": 0.21798332035541534, "learning_rate": 0.0009588501703969852, "loss": 0.1723, "num_input_tokens_seen": 57501344, "step": 26610 }, { "epoch": 4.341761827079935, "grad_norm": 0.013095865026116371, "learning_rate": 0.0009588218879035815, "loss": 0.0281, "num_input_tokens_seen": 57513120, "step": 26615 }, { "epoch": 4.34257748776509, "grad_norm": 0.04659920558333397, "learning_rate": 0.0009587935961115701, "loss": 0.0643, "num_input_tokens_seen": 57523584, "step": 26620 }, { "epoch": 4.343393148450245, "grad_norm": 0.4134010374546051, "learning_rate": 0.0009587652950215247, "loss": 0.2004, "num_input_tokens_seen": 57534080, "step": 26625 }, { "epoch": 4.3442088091354, "grad_norm": 0.022348370403051376, "learning_rate": 0.0009587369846340184, "loss": 0.1715, "num_input_tokens_seen": 57545632, "step": 26630 }, { "epoch": 4.3450244698205545, "grad_norm": 0.10105215013027191, "learning_rate": 0.000958708664949625, "loss": 0.0796, "num_input_tokens_seen": 57556832, "step": 26635 }, { "epoch": 4.345840130505709, "grad_norm": 0.023678358644247055, "learning_rate": 0.0009586803359689189, "loss": 0.128, "num_input_tokens_seen": 57567552, "step": 26640 }, { "epoch": 4.346655791190865, "grad_norm": 0.019319554790854454, "learning_rate": 0.0009586519976924739, "loss": 0.0737, "num_input_tokens_seen": 57579424, "step": 26645 }, { "epoch": 4.34747145187602, "grad_norm": 0.24461629986763, "learning_rate": 0.0009586236501208642, "loss": 0.0585, "num_input_tokens_seen": 57590400, "step": 26650 }, { "epoch": 4.348287112561175, "grad_norm": 0.0134728467091918, "learning_rate": 0.0009585952932546644, "loss": 0.0408, "num_input_tokens_seen": 57599648, "step": 26655 }, { "epoch": 4.349102773246329, "grad_norm": 0.08431003242731094, "learning_rate": 0.0009585669270944493, "loss": 0.0275, "num_input_tokens_seen": 57611168, "step": 26660 }, { "epoch": 4.349918433931484, "grad_norm": 0.19880840182304382, "learning_rate": 0.0009585385516407936, "loss": 0.1469, "num_input_tokens_seen": 57622048, "step": 26665 }, { "epoch": 4.350734094616639, "grad_norm": 0.01987231895327568, "learning_rate": 0.0009585101668942726, "loss": 0.0594, "num_input_tokens_seen": 57632832, "step": 26670 }, { "epoch": 4.351549755301795, "grad_norm": 0.025066286325454712, "learning_rate": 0.0009584817728554613, "loss": 0.1467, "num_input_tokens_seen": 57644480, "step": 26675 }, { "epoch": 4.35236541598695, "grad_norm": 0.04371904954314232, "learning_rate": 0.0009584533695249353, "loss": 0.1442, "num_input_tokens_seen": 57654464, "step": 26680 }, { "epoch": 4.353181076672104, "grad_norm": 0.008882477879524231, "learning_rate": 0.0009584249569032701, "loss": 0.1217, "num_input_tokens_seen": 57664672, "step": 26685 }, { "epoch": 4.353996737357259, "grad_norm": 0.02217107079923153, "learning_rate": 0.0009583965349910417, "loss": 0.1414, "num_input_tokens_seen": 57675360, "step": 26690 }, { "epoch": 4.354812398042414, "grad_norm": 0.05907650664448738, "learning_rate": 0.0009583681037888259, "loss": 0.1975, "num_input_tokens_seen": 57686624, "step": 26695 }, { "epoch": 4.35562805872757, "grad_norm": 0.07159577310085297, "learning_rate": 0.0009583396632971991, "loss": 0.0371, "num_input_tokens_seen": 57698048, "step": 26700 }, { "epoch": 4.356443719412725, "grad_norm": 0.12222054600715637, "learning_rate": 0.0009583112135167376, "loss": 0.3591, "num_input_tokens_seen": 57709216, "step": 26705 }, { "epoch": 4.357259380097879, "grad_norm": 0.016294648870825768, "learning_rate": 0.0009582827544480177, "loss": 0.1798, "num_input_tokens_seen": 57719488, "step": 26710 }, { "epoch": 4.358075040783034, "grad_norm": 0.02656223438680172, "learning_rate": 0.0009582542860916166, "loss": 0.0431, "num_input_tokens_seen": 57731040, "step": 26715 }, { "epoch": 4.358890701468189, "grad_norm": 0.21412135660648346, "learning_rate": 0.000958225808448111, "loss": 0.0866, "num_input_tokens_seen": 57741728, "step": 26720 }, { "epoch": 4.359706362153344, "grad_norm": 0.06581278145313263, "learning_rate": 0.0009581973215180782, "loss": 0.1054, "num_input_tokens_seen": 57752128, "step": 26725 }, { "epoch": 4.3605220228384995, "grad_norm": 0.06512191146612167, "learning_rate": 0.0009581688253020952, "loss": 0.142, "num_input_tokens_seen": 57762944, "step": 26730 }, { "epoch": 4.361337683523654, "grad_norm": 0.059873901307582855, "learning_rate": 0.00095814031980074, "loss": 0.0894, "num_input_tokens_seen": 57775296, "step": 26735 }, { "epoch": 4.362153344208809, "grad_norm": 0.2084333449602127, "learning_rate": 0.0009581118050145898, "loss": 0.3417, "num_input_tokens_seen": 57786368, "step": 26740 }, { "epoch": 4.362969004893964, "grad_norm": 0.19248606264591217, "learning_rate": 0.0009580832809442228, "loss": 0.1961, "num_input_tokens_seen": 57797312, "step": 26745 }, { "epoch": 4.363784665579119, "grad_norm": 0.03354114666581154, "learning_rate": 0.000958054747590217, "loss": 0.1853, "num_input_tokens_seen": 57808896, "step": 26750 }, { "epoch": 4.364600326264274, "grad_norm": 0.04537074267864227, "learning_rate": 0.0009580262049531508, "loss": 0.0527, "num_input_tokens_seen": 57819744, "step": 26755 }, { "epoch": 4.365415986949429, "grad_norm": 0.10104655474424362, "learning_rate": 0.0009579976530336023, "loss": 0.0981, "num_input_tokens_seen": 57830944, "step": 26760 }, { "epoch": 4.366231647634584, "grad_norm": 0.07620599120855331, "learning_rate": 0.0009579690918321504, "loss": 0.1167, "num_input_tokens_seen": 57841920, "step": 26765 }, { "epoch": 4.367047308319739, "grad_norm": 0.03413588926196098, "learning_rate": 0.0009579405213493739, "loss": 0.0761, "num_input_tokens_seen": 57853056, "step": 26770 }, { "epoch": 4.367862969004894, "grad_norm": 0.14624758064746857, "learning_rate": 0.0009579119415858518, "loss": 0.1401, "num_input_tokens_seen": 57863552, "step": 26775 }, { "epoch": 4.368678629690049, "grad_norm": 0.042857468128204346, "learning_rate": 0.0009578833525421633, "loss": 0.1191, "num_input_tokens_seen": 57872992, "step": 26780 }, { "epoch": 4.369494290375204, "grad_norm": 0.058174289762973785, "learning_rate": 0.0009578547542188878, "loss": 0.0965, "num_input_tokens_seen": 57884224, "step": 26785 }, { "epoch": 4.370309951060359, "grad_norm": 0.11993542313575745, "learning_rate": 0.0009578261466166049, "loss": 0.104, "num_input_tokens_seen": 57895712, "step": 26790 }, { "epoch": 4.371125611745514, "grad_norm": 0.03129759058356285, "learning_rate": 0.0009577975297358943, "loss": 0.0799, "num_input_tokens_seen": 57907328, "step": 26795 }, { "epoch": 4.371941272430669, "grad_norm": 0.08179029077291489, "learning_rate": 0.0009577689035773359, "loss": 0.1652, "num_input_tokens_seen": 57918464, "step": 26800 }, { "epoch": 4.372756933115824, "grad_norm": 0.08137882500886917, "learning_rate": 0.0009577402681415102, "loss": 0.0689, "num_input_tokens_seen": 57929088, "step": 26805 }, { "epoch": 4.373572593800978, "grad_norm": 0.04663487523794174, "learning_rate": 0.0009577116234289971, "loss": 0.2543, "num_input_tokens_seen": 57941248, "step": 26810 }, { "epoch": 4.374388254486134, "grad_norm": 0.04174930229783058, "learning_rate": 0.0009576829694403772, "loss": 0.0331, "num_input_tokens_seen": 57951968, "step": 26815 }, { "epoch": 4.375203915171289, "grad_norm": 0.21580137312412262, "learning_rate": 0.0009576543061762315, "loss": 0.1422, "num_input_tokens_seen": 57961568, "step": 26820 }, { "epoch": 4.376019575856444, "grad_norm": 0.23943717777729034, "learning_rate": 0.0009576256336371407, "loss": 0.137, "num_input_tokens_seen": 57972832, "step": 26825 }, { "epoch": 4.376835236541599, "grad_norm": 0.061017557978630066, "learning_rate": 0.0009575969518236857, "loss": 0.1647, "num_input_tokens_seen": 57982816, "step": 26830 }, { "epoch": 4.377650897226753, "grad_norm": 0.08061721920967102, "learning_rate": 0.0009575682607364482, "loss": 0.0811, "num_input_tokens_seen": 57994784, "step": 26835 }, { "epoch": 4.378466557911908, "grad_norm": 0.04571057856082916, "learning_rate": 0.0009575395603760095, "loss": 0.0377, "num_input_tokens_seen": 58006208, "step": 26840 }, { "epoch": 4.379282218597064, "grad_norm": 0.02869790978729725, "learning_rate": 0.000957510850742951, "loss": 0.0307, "num_input_tokens_seen": 58016256, "step": 26845 }, { "epoch": 4.380097879282219, "grad_norm": 0.10976076126098633, "learning_rate": 0.0009574821318378547, "loss": 0.134, "num_input_tokens_seen": 58027680, "step": 26850 }, { "epoch": 4.3809135399673735, "grad_norm": 0.06502792984247208, "learning_rate": 0.0009574534036613028, "loss": 0.1922, "num_input_tokens_seen": 58038496, "step": 26855 }, { "epoch": 4.381729200652528, "grad_norm": 0.2433139979839325, "learning_rate": 0.0009574246662138772, "loss": 0.1004, "num_input_tokens_seen": 58047776, "step": 26860 }, { "epoch": 4.382544861337683, "grad_norm": 0.124003104865551, "learning_rate": 0.0009573959194961604, "loss": 0.1118, "num_input_tokens_seen": 58058112, "step": 26865 }, { "epoch": 4.383360522022839, "grad_norm": 0.048630520701408386, "learning_rate": 0.0009573671635087352, "loss": 0.0627, "num_input_tokens_seen": 58068608, "step": 26870 }, { "epoch": 4.384176182707994, "grad_norm": 0.03981039673089981, "learning_rate": 0.0009573383982521841, "loss": 0.0459, "num_input_tokens_seen": 58079680, "step": 26875 }, { "epoch": 4.3849918433931485, "grad_norm": 0.04121852666139603, "learning_rate": 0.0009573096237270903, "loss": 0.0925, "num_input_tokens_seen": 58090176, "step": 26880 }, { "epoch": 4.385807504078303, "grad_norm": 0.2329862415790558, "learning_rate": 0.0009572808399340368, "loss": 0.0928, "num_input_tokens_seen": 58101472, "step": 26885 }, { "epoch": 4.386623164763458, "grad_norm": 0.08368705213069916, "learning_rate": 0.000957252046873607, "loss": 0.0682, "num_input_tokens_seen": 58112576, "step": 26890 }, { "epoch": 4.387438825448613, "grad_norm": 0.0444665402173996, "learning_rate": 0.0009572232445463843, "loss": 0.0524, "num_input_tokens_seen": 58122976, "step": 26895 }, { "epoch": 4.388254486133769, "grad_norm": 0.02356015332043171, "learning_rate": 0.0009571944329529526, "loss": 0.058, "num_input_tokens_seen": 58134368, "step": 26900 }, { "epoch": 4.3890701468189235, "grad_norm": 0.002911847084760666, "learning_rate": 0.0009571656120938956, "loss": 0.0363, "num_input_tokens_seen": 58145312, "step": 26905 }, { "epoch": 4.389885807504078, "grad_norm": 0.24054285883903503, "learning_rate": 0.0009571367819697978, "loss": 0.2026, "num_input_tokens_seen": 58155680, "step": 26910 }, { "epoch": 4.390701468189233, "grad_norm": 0.027164777740836143, "learning_rate": 0.000957107942581243, "loss": 0.0445, "num_input_tokens_seen": 58164960, "step": 26915 }, { "epoch": 4.391517128874388, "grad_norm": 0.08999812602996826, "learning_rate": 0.0009570790939288159, "loss": 0.0515, "num_input_tokens_seen": 58174624, "step": 26920 }, { "epoch": 4.392332789559543, "grad_norm": 0.4166439473628998, "learning_rate": 0.0009570502360131011, "loss": 0.1268, "num_input_tokens_seen": 58185344, "step": 26925 }, { "epoch": 4.3931484502446985, "grad_norm": 0.03616182506084442, "learning_rate": 0.0009570213688346833, "loss": 0.0233, "num_input_tokens_seen": 58196416, "step": 26930 }, { "epoch": 4.393964110929853, "grad_norm": 0.6690056920051575, "learning_rate": 0.000956992492394148, "loss": 0.0621, "num_input_tokens_seen": 58206592, "step": 26935 }, { "epoch": 4.394779771615008, "grad_norm": 0.14354459941387177, "learning_rate": 0.00095696360669208, "loss": 0.1493, "num_input_tokens_seen": 58217568, "step": 26940 }, { "epoch": 4.395595432300163, "grad_norm": 0.32492372393608093, "learning_rate": 0.0009569347117290647, "loss": 0.0811, "num_input_tokens_seen": 58227552, "step": 26945 }, { "epoch": 4.396411092985318, "grad_norm": 0.01738920249044895, "learning_rate": 0.0009569058075056878, "loss": 0.054, "num_input_tokens_seen": 58239040, "step": 26950 }, { "epoch": 4.397226753670473, "grad_norm": 0.011106864549219608, "learning_rate": 0.0009568768940225352, "loss": 0.1509, "num_input_tokens_seen": 58250304, "step": 26955 }, { "epoch": 4.398042414355628, "grad_norm": 0.3137091398239136, "learning_rate": 0.0009568479712801926, "loss": 0.1059, "num_input_tokens_seen": 58262144, "step": 26960 }, { "epoch": 4.398858075040783, "grad_norm": 0.022524043917655945, "learning_rate": 0.0009568190392792464, "loss": 0.0957, "num_input_tokens_seen": 58272736, "step": 26965 }, { "epoch": 4.399673735725938, "grad_norm": 0.03002866730093956, "learning_rate": 0.000956790098020283, "loss": 0.0216, "num_input_tokens_seen": 58283040, "step": 26970 }, { "epoch": 4.400489396411093, "grad_norm": 0.17999090254306793, "learning_rate": 0.0009567611475038886, "loss": 0.1068, "num_input_tokens_seen": 58294432, "step": 26975 }, { "epoch": 4.401305057096248, "grad_norm": 0.0031778549309819937, "learning_rate": 0.0009567321877306501, "loss": 0.0222, "num_input_tokens_seen": 58303328, "step": 26980 }, { "epoch": 4.402120717781403, "grad_norm": 0.0474659726023674, "learning_rate": 0.0009567032187011546, "loss": 0.0455, "num_input_tokens_seen": 58314752, "step": 26985 }, { "epoch": 4.402936378466558, "grad_norm": 0.06852079182863235, "learning_rate": 0.0009566742404159887, "loss": 0.1312, "num_input_tokens_seen": 58325024, "step": 26990 }, { "epoch": 4.403752039151713, "grad_norm": 0.0092054707929492, "learning_rate": 0.0009566452528757402, "loss": 0.1629, "num_input_tokens_seen": 58334464, "step": 26995 }, { "epoch": 4.404567699836868, "grad_norm": 0.003914583474397659, "learning_rate": 0.0009566162560809963, "loss": 0.0191, "num_input_tokens_seen": 58345312, "step": 27000 }, { "epoch": 4.4053833605220225, "grad_norm": 0.16951356828212738, "learning_rate": 0.0009565872500323447, "loss": 0.0657, "num_input_tokens_seen": 58355424, "step": 27005 }, { "epoch": 4.406199021207178, "grad_norm": 0.0581197664141655, "learning_rate": 0.0009565582347303733, "loss": 0.0428, "num_input_tokens_seen": 58366304, "step": 27010 }, { "epoch": 4.407014681892333, "grad_norm": 0.02101009152829647, "learning_rate": 0.00095652921017567, "loss": 0.061, "num_input_tokens_seen": 58377376, "step": 27015 }, { "epoch": 4.407830342577488, "grad_norm": 0.08173485100269318, "learning_rate": 0.0009565001763688233, "loss": 0.1788, "num_input_tokens_seen": 58387424, "step": 27020 }, { "epoch": 4.408646003262643, "grad_norm": 0.1883484423160553, "learning_rate": 0.0009564711333104213, "loss": 0.1001, "num_input_tokens_seen": 58398880, "step": 27025 }, { "epoch": 4.4094616639477975, "grad_norm": 0.10336413234472275, "learning_rate": 0.0009564420810010526, "loss": 0.1442, "num_input_tokens_seen": 58410464, "step": 27030 }, { "epoch": 4.410277324632952, "grad_norm": 0.03497812896966934, "learning_rate": 0.0009564130194413061, "loss": 0.0291, "num_input_tokens_seen": 58422560, "step": 27035 }, { "epoch": 4.411092985318108, "grad_norm": 0.008337237872183323, "learning_rate": 0.0009563839486317709, "loss": 0.1733, "num_input_tokens_seen": 58432576, "step": 27040 }, { "epoch": 4.411908646003263, "grad_norm": 0.12369904667139053, "learning_rate": 0.000956354868573036, "loss": 0.1701, "num_input_tokens_seen": 58443488, "step": 27045 }, { "epoch": 4.412724306688418, "grad_norm": 0.1232222467660904, "learning_rate": 0.0009563257792656908, "loss": 0.1001, "num_input_tokens_seen": 58454560, "step": 27050 }, { "epoch": 4.4135399673735725, "grad_norm": 0.23827281594276428, "learning_rate": 0.0009562966807103246, "loss": 0.1455, "num_input_tokens_seen": 58465568, "step": 27055 }, { "epoch": 4.414355628058727, "grad_norm": 0.06323964148759842, "learning_rate": 0.0009562675729075274, "loss": 0.037, "num_input_tokens_seen": 58477056, "step": 27060 }, { "epoch": 4.415171288743883, "grad_norm": 0.17134052515029907, "learning_rate": 0.0009562384558578891, "loss": 0.1215, "num_input_tokens_seen": 58487904, "step": 27065 }, { "epoch": 4.415986949429038, "grad_norm": 0.06725971400737762, "learning_rate": 0.0009562093295619996, "loss": 0.2767, "num_input_tokens_seen": 58500384, "step": 27070 }, { "epoch": 4.416802610114193, "grad_norm": 0.014682374894618988, "learning_rate": 0.0009561801940204493, "loss": 0.0501, "num_input_tokens_seen": 58511104, "step": 27075 }, { "epoch": 4.417618270799347, "grad_norm": 0.005113726481795311, "learning_rate": 0.0009561510492338287, "loss": 0.1159, "num_input_tokens_seen": 58522528, "step": 27080 }, { "epoch": 4.418433931484502, "grad_norm": 0.051373984664678574, "learning_rate": 0.0009561218952027286, "loss": 0.1983, "num_input_tokens_seen": 58533184, "step": 27085 }, { "epoch": 4.419249592169657, "grad_norm": 0.09865567088127136, "learning_rate": 0.0009560927319277395, "loss": 0.0901, "num_input_tokens_seen": 58543328, "step": 27090 }, { "epoch": 4.420065252854813, "grad_norm": 0.12814849615097046, "learning_rate": 0.0009560635594094524, "loss": 0.1032, "num_input_tokens_seen": 58554784, "step": 27095 }, { "epoch": 4.420880913539968, "grad_norm": 0.26408764719963074, "learning_rate": 0.000956034377648459, "loss": 0.1338, "num_input_tokens_seen": 58565280, "step": 27100 }, { "epoch": 4.421696574225122, "grad_norm": 0.08768882602453232, "learning_rate": 0.0009560051866453503, "loss": 0.1002, "num_input_tokens_seen": 58575456, "step": 27105 }, { "epoch": 4.422512234910277, "grad_norm": 0.020314859226346016, "learning_rate": 0.000955975986400718, "loss": 0.0671, "num_input_tokens_seen": 58586176, "step": 27110 }, { "epoch": 4.423327895595432, "grad_norm": 0.12767770886421204, "learning_rate": 0.000955946776915154, "loss": 0.1762, "num_input_tokens_seen": 58596864, "step": 27115 }, { "epoch": 4.424143556280587, "grad_norm": 0.009916570968925953, "learning_rate": 0.00095591755818925, "loss": 0.1992, "num_input_tokens_seen": 58607680, "step": 27120 }, { "epoch": 4.424959216965743, "grad_norm": 0.05374537780880928, "learning_rate": 0.0009558883302235984, "loss": 0.1024, "num_input_tokens_seen": 58618400, "step": 27125 }, { "epoch": 4.425774877650897, "grad_norm": 0.03968321159482002, "learning_rate": 0.0009558590930187913, "loss": 0.0513, "num_input_tokens_seen": 58629152, "step": 27130 }, { "epoch": 4.426590538336052, "grad_norm": 0.05687025561928749, "learning_rate": 0.0009558298465754216, "loss": 0.0983, "num_input_tokens_seen": 58639744, "step": 27135 }, { "epoch": 4.427406199021207, "grad_norm": 0.022792354226112366, "learning_rate": 0.0009558005908940816, "loss": 0.0336, "num_input_tokens_seen": 58649536, "step": 27140 }, { "epoch": 4.428221859706362, "grad_norm": 0.1321583092212677, "learning_rate": 0.0009557713259753647, "loss": 0.0354, "num_input_tokens_seen": 58660384, "step": 27145 }, { "epoch": 4.4290375203915175, "grad_norm": 0.022462155669927597, "learning_rate": 0.0009557420518198634, "loss": 0.1426, "num_input_tokens_seen": 58671136, "step": 27150 }, { "epoch": 4.429853181076672, "grad_norm": 0.2902876138687134, "learning_rate": 0.0009557127684281714, "loss": 0.1398, "num_input_tokens_seen": 58683424, "step": 27155 }, { "epoch": 4.430668841761827, "grad_norm": 0.02551012486219406, "learning_rate": 0.000955683475800882, "loss": 0.1193, "num_input_tokens_seen": 58693600, "step": 27160 }, { "epoch": 4.431484502446982, "grad_norm": 0.10253246873617172, "learning_rate": 0.0009556541739385889, "loss": 0.1524, "num_input_tokens_seen": 58704064, "step": 27165 }, { "epoch": 4.432300163132137, "grad_norm": 0.218844935297966, "learning_rate": 0.000955624862841886, "loss": 0.1446, "num_input_tokens_seen": 58713632, "step": 27170 }, { "epoch": 4.433115823817292, "grad_norm": 0.2918366491794586, "learning_rate": 0.0009555955425113672, "loss": 0.1021, "num_input_tokens_seen": 58724352, "step": 27175 }, { "epoch": 4.433931484502447, "grad_norm": 0.06699206680059433, "learning_rate": 0.0009555662129476266, "loss": 0.0543, "num_input_tokens_seen": 58735392, "step": 27180 }, { "epoch": 4.434747145187602, "grad_norm": 0.046586476266384125, "learning_rate": 0.0009555368741512589, "loss": 0.0569, "num_input_tokens_seen": 58744928, "step": 27185 }, { "epoch": 4.435562805872757, "grad_norm": 0.041165731847286224, "learning_rate": 0.0009555075261228586, "loss": 0.0791, "num_input_tokens_seen": 58755520, "step": 27190 }, { "epoch": 4.436378466557912, "grad_norm": 0.013963720761239529, "learning_rate": 0.0009554781688630204, "loss": 0.0346, "num_input_tokens_seen": 58765984, "step": 27195 }, { "epoch": 4.437194127243067, "grad_norm": 0.20940876007080078, "learning_rate": 0.0009554488023723394, "loss": 0.0938, "num_input_tokens_seen": 58777760, "step": 27200 }, { "epoch": 4.438009787928221, "grad_norm": 0.08399229496717453, "learning_rate": 0.0009554194266514105, "loss": 0.0483, "num_input_tokens_seen": 58789152, "step": 27205 }, { "epoch": 4.438825448613377, "grad_norm": 0.013586705550551414, "learning_rate": 0.0009553900417008292, "loss": 0.0297, "num_input_tokens_seen": 58800096, "step": 27210 }, { "epoch": 4.439641109298532, "grad_norm": 0.2480028122663498, "learning_rate": 0.000955360647521191, "loss": 0.0942, "num_input_tokens_seen": 58809472, "step": 27215 }, { "epoch": 4.440456769983687, "grad_norm": 0.019358092918992043, "learning_rate": 0.0009553312441130916, "loss": 0.1742, "num_input_tokens_seen": 58820672, "step": 27220 }, { "epoch": 4.441272430668842, "grad_norm": 0.01376861147582531, "learning_rate": 0.0009553018314771269, "loss": 0.0367, "num_input_tokens_seen": 58831360, "step": 27225 }, { "epoch": 4.442088091353996, "grad_norm": 0.2584722340106964, "learning_rate": 0.0009552724096138931, "loss": 0.0807, "num_input_tokens_seen": 58841664, "step": 27230 }, { "epoch": 4.442903752039152, "grad_norm": 0.0029593873769044876, "learning_rate": 0.0009552429785239863, "loss": 0.1452, "num_input_tokens_seen": 58852736, "step": 27235 }, { "epoch": 4.443719412724307, "grad_norm": 0.2969488501548767, "learning_rate": 0.0009552135382080029, "loss": 0.0455, "num_input_tokens_seen": 58863520, "step": 27240 }, { "epoch": 4.444535073409462, "grad_norm": 0.21504615247249603, "learning_rate": 0.0009551840886665398, "loss": 0.1008, "num_input_tokens_seen": 58874080, "step": 27245 }, { "epoch": 4.445350734094617, "grad_norm": 0.019169187173247337, "learning_rate": 0.0009551546299001938, "loss": 0.0939, "num_input_tokens_seen": 58884864, "step": 27250 }, { "epoch": 4.446166394779771, "grad_norm": 0.05577266216278076, "learning_rate": 0.0009551251619095616, "loss": 0.2479, "num_input_tokens_seen": 58895904, "step": 27255 }, { "epoch": 4.446982055464926, "grad_norm": 0.06436615437269211, "learning_rate": 0.0009550956846952408, "loss": 0.2503, "num_input_tokens_seen": 58905984, "step": 27260 }, { "epoch": 4.447797716150082, "grad_norm": 0.014609013684093952, "learning_rate": 0.0009550661982578286, "loss": 0.1163, "num_input_tokens_seen": 58917536, "step": 27265 }, { "epoch": 4.448613376835237, "grad_norm": 0.09037666767835617, "learning_rate": 0.0009550367025979225, "loss": 0.1394, "num_input_tokens_seen": 58928224, "step": 27270 }, { "epoch": 4.4494290375203915, "grad_norm": 0.020922953262925148, "learning_rate": 0.0009550071977161203, "loss": 0.0755, "num_input_tokens_seen": 58939488, "step": 27275 }, { "epoch": 4.450244698205546, "grad_norm": 0.053212665021419525, "learning_rate": 0.0009549776836130202, "loss": 0.1124, "num_input_tokens_seen": 58951520, "step": 27280 }, { "epoch": 4.451060358890701, "grad_norm": 0.04497351124882698, "learning_rate": 0.0009549481602892201, "loss": 0.0697, "num_input_tokens_seen": 58962048, "step": 27285 }, { "epoch": 4.451876019575856, "grad_norm": 0.2501903176307678, "learning_rate": 0.0009549186277453184, "loss": 0.1667, "num_input_tokens_seen": 58973696, "step": 27290 }, { "epoch": 4.452691680261012, "grad_norm": 0.7072194218635559, "learning_rate": 0.0009548890859819138, "loss": 0.0931, "num_input_tokens_seen": 58983840, "step": 27295 }, { "epoch": 4.4535073409461665, "grad_norm": 0.06406120955944061, "learning_rate": 0.0009548595349996045, "loss": 0.0534, "num_input_tokens_seen": 58993952, "step": 27300 }, { "epoch": 4.454323001631321, "grad_norm": 0.038360778242349625, "learning_rate": 0.0009548299747989897, "loss": 0.0509, "num_input_tokens_seen": 59004576, "step": 27305 }, { "epoch": 4.455138662316476, "grad_norm": 0.019249223172664642, "learning_rate": 0.0009548004053806686, "loss": 0.0643, "num_input_tokens_seen": 59014464, "step": 27310 }, { "epoch": 4.455954323001631, "grad_norm": 0.16556453704833984, "learning_rate": 0.0009547708267452403, "loss": 0.0886, "num_input_tokens_seen": 59024352, "step": 27315 }, { "epoch": 4.456769983686787, "grad_norm": 0.11377817392349243, "learning_rate": 0.0009547412388933042, "loss": 0.0644, "num_input_tokens_seen": 59035488, "step": 27320 }, { "epoch": 4.4575856443719415, "grad_norm": 0.019814448431134224, "learning_rate": 0.0009547116418254601, "loss": 0.0831, "num_input_tokens_seen": 59047072, "step": 27325 }, { "epoch": 4.458401305057096, "grad_norm": 0.11277088522911072, "learning_rate": 0.0009546820355423077, "loss": 0.1107, "num_input_tokens_seen": 59058144, "step": 27330 }, { "epoch": 4.459216965742251, "grad_norm": 0.005291223991662264, "learning_rate": 0.0009546524200444471, "loss": 0.1291, "num_input_tokens_seen": 59068576, "step": 27335 }, { "epoch": 4.460032626427406, "grad_norm": 0.023128168657422066, "learning_rate": 0.0009546227953324784, "loss": 0.1454, "num_input_tokens_seen": 59079808, "step": 27340 }, { "epoch": 4.460848287112561, "grad_norm": 0.3094649016857147, "learning_rate": 0.000954593161407002, "loss": 0.3053, "num_input_tokens_seen": 59090432, "step": 27345 }, { "epoch": 4.4616639477977165, "grad_norm": 0.07266154885292053, "learning_rate": 0.0009545635182686185, "loss": 0.1429, "num_input_tokens_seen": 59100736, "step": 27350 }, { "epoch": 4.462479608482871, "grad_norm": 0.20185202360153198, "learning_rate": 0.0009545338659179286, "loss": 0.0734, "num_input_tokens_seen": 59111776, "step": 27355 }, { "epoch": 4.463295269168026, "grad_norm": 0.05168718472123146, "learning_rate": 0.0009545042043555334, "loss": 0.0698, "num_input_tokens_seen": 59123936, "step": 27360 }, { "epoch": 4.464110929853181, "grad_norm": 0.017242785543203354, "learning_rate": 0.000954474533582034, "loss": 0.0958, "num_input_tokens_seen": 59135200, "step": 27365 }, { "epoch": 4.464926590538336, "grad_norm": 0.03838271647691727, "learning_rate": 0.0009544448535980315, "loss": 0.1033, "num_input_tokens_seen": 59146592, "step": 27370 }, { "epoch": 4.465742251223491, "grad_norm": 0.0059380610473454, "learning_rate": 0.0009544151644041275, "loss": 0.0748, "num_input_tokens_seen": 59157472, "step": 27375 }, { "epoch": 4.466557911908646, "grad_norm": 0.05592265725135803, "learning_rate": 0.0009543854660009237, "loss": 0.0528, "num_input_tokens_seen": 59169216, "step": 27380 }, { "epoch": 4.467373572593801, "grad_norm": 0.027988268062472343, "learning_rate": 0.0009543557583890221, "loss": 0.0278, "num_input_tokens_seen": 59180800, "step": 27385 }, { "epoch": 4.468189233278956, "grad_norm": 0.5290764570236206, "learning_rate": 0.0009543260415690247, "loss": 0.2604, "num_input_tokens_seen": 59192736, "step": 27390 }, { "epoch": 4.469004893964111, "grad_norm": 0.3840823173522949, "learning_rate": 0.0009542963155415336, "loss": 0.3098, "num_input_tokens_seen": 59202208, "step": 27395 }, { "epoch": 4.4698205546492655, "grad_norm": 0.09972764551639557, "learning_rate": 0.0009542665803071515, "loss": 0.117, "num_input_tokens_seen": 59213312, "step": 27400 }, { "epoch": 4.470636215334421, "grad_norm": 0.028418170288205147, "learning_rate": 0.0009542368358664806, "loss": 0.0422, "num_input_tokens_seen": 59223936, "step": 27405 }, { "epoch": 4.471451876019576, "grad_norm": 0.005448495969176292, "learning_rate": 0.0009542070822201244, "loss": 0.0976, "num_input_tokens_seen": 59234176, "step": 27410 }, { "epoch": 4.472267536704731, "grad_norm": 0.02386247180402279, "learning_rate": 0.0009541773193686851, "loss": 0.1321, "num_input_tokens_seen": 59245952, "step": 27415 }, { "epoch": 4.473083197389886, "grad_norm": 0.016697218641638756, "learning_rate": 0.0009541475473127664, "loss": 0.2068, "num_input_tokens_seen": 59257216, "step": 27420 }, { "epoch": 4.4738988580750405, "grad_norm": 0.02813301980495453, "learning_rate": 0.0009541177660529715, "loss": 0.1181, "num_input_tokens_seen": 59267936, "step": 27425 }, { "epoch": 4.474714518760196, "grad_norm": 0.1297505646944046, "learning_rate": 0.0009540879755899041, "loss": 0.1286, "num_input_tokens_seen": 59279232, "step": 27430 }, { "epoch": 4.475530179445351, "grad_norm": 0.12573681771755219, "learning_rate": 0.0009540581759241676, "loss": 0.1628, "num_input_tokens_seen": 59288800, "step": 27435 }, { "epoch": 4.476345840130506, "grad_norm": 0.2438529133796692, "learning_rate": 0.0009540283670563663, "loss": 0.2941, "num_input_tokens_seen": 59299904, "step": 27440 }, { "epoch": 4.477161500815661, "grad_norm": 0.08795158565044403, "learning_rate": 0.0009539985489871041, "loss": 0.0748, "num_input_tokens_seen": 59310592, "step": 27445 }, { "epoch": 4.4779771615008155, "grad_norm": 0.08853715658187866, "learning_rate": 0.0009539687217169855, "loss": 0.0883, "num_input_tokens_seen": 59320768, "step": 27450 }, { "epoch": 4.47879282218597, "grad_norm": 0.1433527171611786, "learning_rate": 0.0009539388852466146, "loss": 0.0713, "num_input_tokens_seen": 59332992, "step": 27455 }, { "epoch": 4.479608482871126, "grad_norm": 0.04177452251315117, "learning_rate": 0.0009539090395765966, "loss": 0.0501, "num_input_tokens_seen": 59342944, "step": 27460 }, { "epoch": 4.480424143556281, "grad_norm": 0.0390019528567791, "learning_rate": 0.000953879184707536, "loss": 0.1618, "num_input_tokens_seen": 59354016, "step": 27465 }, { "epoch": 4.481239804241436, "grad_norm": 0.06062839925289154, "learning_rate": 0.0009538493206400378, "loss": 0.0576, "num_input_tokens_seen": 59364064, "step": 27470 }, { "epoch": 4.4820554649265905, "grad_norm": 0.10372152924537659, "learning_rate": 0.0009538194473747077, "loss": 0.0856, "num_input_tokens_seen": 59374336, "step": 27475 }, { "epoch": 4.482871125611745, "grad_norm": 0.1782025396823883, "learning_rate": 0.0009537895649121504, "loss": 0.069, "num_input_tokens_seen": 59385056, "step": 27480 }, { "epoch": 4.4836867862969, "grad_norm": 0.044130004942417145, "learning_rate": 0.0009537596732529721, "loss": 0.0647, "num_input_tokens_seen": 59393952, "step": 27485 }, { "epoch": 4.484502446982056, "grad_norm": 0.20328426361083984, "learning_rate": 0.0009537297723977784, "loss": 0.1263, "num_input_tokens_seen": 59405824, "step": 27490 }, { "epoch": 4.485318107667211, "grad_norm": 0.17632223665714264, "learning_rate": 0.0009536998623471752, "loss": 0.1787, "num_input_tokens_seen": 59415968, "step": 27495 }, { "epoch": 4.486133768352365, "grad_norm": 0.2297552525997162, "learning_rate": 0.0009536699431017688, "loss": 0.1442, "num_input_tokens_seen": 59426336, "step": 27500 }, { "epoch": 4.48694942903752, "grad_norm": 0.0059665110893547535, "learning_rate": 0.0009536400146621653, "loss": 0.0369, "num_input_tokens_seen": 59436864, "step": 27505 }, { "epoch": 4.487765089722675, "grad_norm": 0.20988836884498596, "learning_rate": 0.0009536100770289717, "loss": 0.1199, "num_input_tokens_seen": 59448128, "step": 27510 }, { "epoch": 4.488580750407831, "grad_norm": 0.008611970581114292, "learning_rate": 0.0009535801302027942, "loss": 0.0509, "num_input_tokens_seen": 59458432, "step": 27515 }, { "epoch": 4.489396411092986, "grad_norm": 0.21014344692230225, "learning_rate": 0.0009535501741842401, "loss": 0.2112, "num_input_tokens_seen": 59468160, "step": 27520 }, { "epoch": 4.49021207177814, "grad_norm": 0.02096942998468876, "learning_rate": 0.0009535202089739162, "loss": 0.0287, "num_input_tokens_seen": 59478144, "step": 27525 }, { "epoch": 4.491027732463295, "grad_norm": 0.004998327232897282, "learning_rate": 0.0009534902345724301, "loss": 0.0732, "num_input_tokens_seen": 59489792, "step": 27530 }, { "epoch": 4.49184339314845, "grad_norm": 0.016319639980793, "learning_rate": 0.000953460250980389, "loss": 0.0483, "num_input_tokens_seen": 59501376, "step": 27535 }, { "epoch": 4.492659053833605, "grad_norm": 0.47428780794143677, "learning_rate": 0.0009534302581984007, "loss": 0.1153, "num_input_tokens_seen": 59511936, "step": 27540 }, { "epoch": 4.493474714518761, "grad_norm": 0.07189547270536423, "learning_rate": 0.000953400256227073, "loss": 0.0709, "num_input_tokens_seen": 59522912, "step": 27545 }, { "epoch": 4.494290375203915, "grad_norm": 0.008183644153177738, "learning_rate": 0.0009533702450670138, "loss": 0.0362, "num_input_tokens_seen": 59533632, "step": 27550 }, { "epoch": 4.49510603588907, "grad_norm": 0.013985889032483101, "learning_rate": 0.0009533402247188317, "loss": 0.1478, "num_input_tokens_seen": 59545024, "step": 27555 }, { "epoch": 4.495921696574225, "grad_norm": 0.011940013617277145, "learning_rate": 0.0009533101951831347, "loss": 0.0556, "num_input_tokens_seen": 59555616, "step": 27560 }, { "epoch": 4.49673735725938, "grad_norm": 0.006115328054875135, "learning_rate": 0.0009532801564605315, "loss": 0.0559, "num_input_tokens_seen": 59567136, "step": 27565 }, { "epoch": 4.497553017944535, "grad_norm": 0.16617755591869354, "learning_rate": 0.000953250108551631, "loss": 0.2774, "num_input_tokens_seen": 59580000, "step": 27570 }, { "epoch": 4.49836867862969, "grad_norm": 0.05506671965122223, "learning_rate": 0.0009532200514570419, "loss": 0.0339, "num_input_tokens_seen": 59591008, "step": 27575 }, { "epoch": 4.499184339314845, "grad_norm": 0.008953276090323925, "learning_rate": 0.0009531899851773737, "loss": 0.0657, "num_input_tokens_seen": 59601920, "step": 27580 }, { "epoch": 4.5, "grad_norm": 0.11161115020513535, "learning_rate": 0.0009531599097132354, "loss": 0.0558, "num_input_tokens_seen": 59611968, "step": 27585 }, { "epoch": 4.500815660685155, "grad_norm": 0.015438306145370007, "learning_rate": 0.0009531298250652367, "loss": 0.046, "num_input_tokens_seen": 59623328, "step": 27590 }, { "epoch": 4.50163132137031, "grad_norm": 0.018507203087210655, "learning_rate": 0.0009530997312339873, "loss": 0.1303, "num_input_tokens_seen": 59634208, "step": 27595 }, { "epoch": 4.502446982055465, "grad_norm": 0.24117828905582428, "learning_rate": 0.000953069628220097, "loss": 0.1026, "num_input_tokens_seen": 59645472, "step": 27600 }, { "epoch": 4.50326264274062, "grad_norm": 0.06984904408454895, "learning_rate": 0.0009530395160241759, "loss": 0.0374, "num_input_tokens_seen": 59656960, "step": 27605 }, { "epoch": 4.504078303425775, "grad_norm": 0.011626221239566803, "learning_rate": 0.0009530093946468343, "loss": 0.0235, "num_input_tokens_seen": 59668224, "step": 27610 }, { "epoch": 4.50489396411093, "grad_norm": 0.06135258078575134, "learning_rate": 0.0009529792640886827, "loss": 0.1165, "num_input_tokens_seen": 59679168, "step": 27615 }, { "epoch": 4.505709624796085, "grad_norm": 0.041614778339862823, "learning_rate": 0.0009529491243503316, "loss": 0.0838, "num_input_tokens_seen": 59689376, "step": 27620 }, { "epoch": 4.506525285481239, "grad_norm": 0.18257805705070496, "learning_rate": 0.000952918975432392, "loss": 0.2499, "num_input_tokens_seen": 59700256, "step": 27625 }, { "epoch": 4.507340946166395, "grad_norm": 0.059920214116573334, "learning_rate": 0.0009528888173354746, "loss": 0.093, "num_input_tokens_seen": 59711648, "step": 27630 }, { "epoch": 4.50815660685155, "grad_norm": 0.16995491087436676, "learning_rate": 0.000952858650060191, "loss": 0.2813, "num_input_tokens_seen": 59721152, "step": 27635 }, { "epoch": 4.508972267536705, "grad_norm": 0.0451311320066452, "learning_rate": 0.0009528284736071522, "loss": 0.096, "num_input_tokens_seen": 59731264, "step": 27640 }, { "epoch": 4.50978792822186, "grad_norm": 0.21006529033184052, "learning_rate": 0.00095279828797697, "loss": 0.2281, "num_input_tokens_seen": 59741504, "step": 27645 }, { "epoch": 4.510603588907014, "grad_norm": 0.016192087903618813, "learning_rate": 0.000952768093170256, "loss": 0.0549, "num_input_tokens_seen": 59751904, "step": 27650 }, { "epoch": 4.511419249592169, "grad_norm": 0.17167527973651886, "learning_rate": 0.0009527378891876223, "loss": 0.2543, "num_input_tokens_seen": 59762976, "step": 27655 }, { "epoch": 4.512234910277325, "grad_norm": 0.04856706038117409, "learning_rate": 0.0009527076760296809, "loss": 0.131, "num_input_tokens_seen": 59773664, "step": 27660 }, { "epoch": 4.51305057096248, "grad_norm": 0.03441392630338669, "learning_rate": 0.0009526774536970442, "loss": 0.1098, "num_input_tokens_seen": 59783168, "step": 27665 }, { "epoch": 4.513866231647635, "grad_norm": 0.04515177384018898, "learning_rate": 0.0009526472221903247, "loss": 0.088, "num_input_tokens_seen": 59794688, "step": 27670 }, { "epoch": 4.514681892332789, "grad_norm": 0.014667819254100323, "learning_rate": 0.0009526169815101349, "loss": 0.0503, "num_input_tokens_seen": 59804832, "step": 27675 }, { "epoch": 4.515497553017944, "grad_norm": 0.09431620687246323, "learning_rate": 0.0009525867316570877, "loss": 0.145, "num_input_tokens_seen": 59814944, "step": 27680 }, { "epoch": 4.5163132137031, "grad_norm": 0.023899326100945473, "learning_rate": 0.0009525564726317963, "loss": 0.0793, "num_input_tokens_seen": 59826176, "step": 27685 }, { "epoch": 4.517128874388255, "grad_norm": 0.04530876874923706, "learning_rate": 0.000952526204434874, "loss": 0.038, "num_input_tokens_seen": 59838240, "step": 27690 }, { "epoch": 4.5179445350734095, "grad_norm": 0.026498528197407722, "learning_rate": 0.000952495927066934, "loss": 0.0548, "num_input_tokens_seen": 59848032, "step": 27695 }, { "epoch": 4.518760195758564, "grad_norm": 0.027610991150140762, "learning_rate": 0.00095246564052859, "loss": 0.0211, "num_input_tokens_seen": 59859328, "step": 27700 }, { "epoch": 4.519575856443719, "grad_norm": 0.0709771141409874, "learning_rate": 0.0009524353448204558, "loss": 0.0723, "num_input_tokens_seen": 59870272, "step": 27705 }, { "epoch": 4.520391517128875, "grad_norm": 0.008169763721525669, "learning_rate": 0.0009524050399431454, "loss": 0.0665, "num_input_tokens_seen": 59880416, "step": 27710 }, { "epoch": 4.52120717781403, "grad_norm": 0.09817571938037872, "learning_rate": 0.0009523747258972729, "loss": 0.11, "num_input_tokens_seen": 59891168, "step": 27715 }, { "epoch": 4.5220228384991845, "grad_norm": 0.11686165630817413, "learning_rate": 0.0009523444026834528, "loss": 0.2663, "num_input_tokens_seen": 59901600, "step": 27720 }, { "epoch": 4.522838499184339, "grad_norm": 0.03325748071074486, "learning_rate": 0.0009523140703022995, "loss": 0.0841, "num_input_tokens_seen": 59912832, "step": 27725 }, { "epoch": 4.523654159869494, "grad_norm": 0.26222917437553406, "learning_rate": 0.0009522837287544277, "loss": 0.1265, "num_input_tokens_seen": 59924288, "step": 27730 }, { "epoch": 4.524469820554649, "grad_norm": 0.04080116003751755, "learning_rate": 0.0009522533780404526, "loss": 0.0865, "num_input_tokens_seen": 59935072, "step": 27735 }, { "epoch": 4.525285481239804, "grad_norm": 0.02146266959607601, "learning_rate": 0.0009522230181609888, "loss": 0.1188, "num_input_tokens_seen": 59945856, "step": 27740 }, { "epoch": 4.5261011419249595, "grad_norm": 0.008499844931066036, "learning_rate": 0.000952192649116652, "loss": 0.0873, "num_input_tokens_seen": 59957152, "step": 27745 }, { "epoch": 4.526916802610114, "grad_norm": 0.06477084755897522, "learning_rate": 0.0009521622709080574, "loss": 0.1054, "num_input_tokens_seen": 59968000, "step": 27750 }, { "epoch": 4.527732463295269, "grad_norm": 0.03540358319878578, "learning_rate": 0.0009521318835358208, "loss": 0.0768, "num_input_tokens_seen": 59978496, "step": 27755 }, { "epoch": 4.528548123980424, "grad_norm": 0.01290238369256258, "learning_rate": 0.000952101487000558, "loss": 0.1064, "num_input_tokens_seen": 59989376, "step": 27760 }, { "epoch": 4.529363784665579, "grad_norm": 0.007645154371857643, "learning_rate": 0.0009520710813028852, "loss": 0.0936, "num_input_tokens_seen": 59999296, "step": 27765 }, { "epoch": 4.5301794453507345, "grad_norm": 0.18740314245224, "learning_rate": 0.0009520406664434183, "loss": 0.2108, "num_input_tokens_seen": 60008928, "step": 27770 }, { "epoch": 4.530995106035889, "grad_norm": 0.1460702270269394, "learning_rate": 0.0009520102424227739, "loss": 0.1085, "num_input_tokens_seen": 60020640, "step": 27775 }, { "epoch": 4.531810766721044, "grad_norm": 0.26850706338882446, "learning_rate": 0.0009519798092415683, "loss": 0.128, "num_input_tokens_seen": 60031904, "step": 27780 }, { "epoch": 4.532626427406199, "grad_norm": 0.23345030844211578, "learning_rate": 0.0009519493669004189, "loss": 0.0953, "num_input_tokens_seen": 60041440, "step": 27785 }, { "epoch": 4.533442088091354, "grad_norm": 0.12963812053203583, "learning_rate": 0.0009519189153999419, "loss": 0.0624, "num_input_tokens_seen": 60052256, "step": 27790 }, { "epoch": 4.5342577487765094, "grad_norm": 0.10406608879566193, "learning_rate": 0.0009518884547407549, "loss": 0.0787, "num_input_tokens_seen": 60062624, "step": 27795 }, { "epoch": 4.535073409461664, "grad_norm": 0.16851423680782318, "learning_rate": 0.0009518579849234752, "loss": 0.2199, "num_input_tokens_seen": 60073280, "step": 27800 }, { "epoch": 4.535889070146819, "grad_norm": 0.01687563955783844, "learning_rate": 0.00095182750594872, "loss": 0.0738, "num_input_tokens_seen": 60084288, "step": 27805 }, { "epoch": 4.536704730831974, "grad_norm": 0.06263386458158493, "learning_rate": 0.0009517970178171074, "loss": 0.0758, "num_input_tokens_seen": 60095200, "step": 27810 }, { "epoch": 4.537520391517129, "grad_norm": 0.019296538084745407, "learning_rate": 0.000951766520529255, "loss": 0.1504, "num_input_tokens_seen": 60104960, "step": 27815 }, { "epoch": 4.5383360522022835, "grad_norm": 0.009546547196805477, "learning_rate": 0.0009517360140857809, "loss": 0.023, "num_input_tokens_seen": 60116704, "step": 27820 }, { "epoch": 4.539151712887438, "grad_norm": 0.09043900668621063, "learning_rate": 0.0009517054984873035, "loss": 0.1035, "num_input_tokens_seen": 60128800, "step": 27825 }, { "epoch": 4.539967373572594, "grad_norm": 0.0927947387099266, "learning_rate": 0.0009516749737344412, "loss": 0.2508, "num_input_tokens_seen": 60140960, "step": 27830 }, { "epoch": 4.540783034257749, "grad_norm": 0.035468216985464096, "learning_rate": 0.0009516444398278125, "loss": 0.1376, "num_input_tokens_seen": 60151616, "step": 27835 }, { "epoch": 4.541598694942904, "grad_norm": 0.01672246865928173, "learning_rate": 0.0009516138967680363, "loss": 0.1912, "num_input_tokens_seen": 60163392, "step": 27840 }, { "epoch": 4.5424143556280585, "grad_norm": 0.018259450793266296, "learning_rate": 0.0009515833445557314, "loss": 0.1607, "num_input_tokens_seen": 60174848, "step": 27845 }, { "epoch": 4.543230016313213, "grad_norm": 0.038537368178367615, "learning_rate": 0.0009515527831915174, "loss": 0.0199, "num_input_tokens_seen": 60184384, "step": 27850 }, { "epoch": 4.544045676998369, "grad_norm": 0.14104370772838593, "learning_rate": 0.0009515222126760132, "loss": 0.1638, "num_input_tokens_seen": 60196064, "step": 27855 }, { "epoch": 4.544861337683524, "grad_norm": 0.03136323764920235, "learning_rate": 0.0009514916330098386, "loss": 0.1443, "num_input_tokens_seen": 60205760, "step": 27860 }, { "epoch": 4.545676998368679, "grad_norm": 0.05053570494055748, "learning_rate": 0.0009514610441936133, "loss": 0.0778, "num_input_tokens_seen": 60218208, "step": 27865 }, { "epoch": 4.5464926590538335, "grad_norm": 0.04487035796046257, "learning_rate": 0.0009514304462279574, "loss": 0.0618, "num_input_tokens_seen": 60230016, "step": 27870 }, { "epoch": 4.547308319738988, "grad_norm": 0.00935713853687048, "learning_rate": 0.0009513998391134906, "loss": 0.1061, "num_input_tokens_seen": 60238976, "step": 27875 }, { "epoch": 4.548123980424144, "grad_norm": 0.09090052545070648, "learning_rate": 0.0009513692228508336, "loss": 0.1155, "num_input_tokens_seen": 60248864, "step": 27880 }, { "epoch": 4.548939641109299, "grad_norm": 0.0810205340385437, "learning_rate": 0.0009513385974406066, "loss": 0.1643, "num_input_tokens_seen": 60259328, "step": 27885 }, { "epoch": 4.549755301794454, "grad_norm": 0.2484816461801529, "learning_rate": 0.0009513079628834305, "loss": 0.172, "num_input_tokens_seen": 60269152, "step": 27890 }, { "epoch": 4.5505709624796085, "grad_norm": 0.007728502620011568, "learning_rate": 0.0009512773191799258, "loss": 0.0496, "num_input_tokens_seen": 60279616, "step": 27895 }, { "epoch": 4.551386623164763, "grad_norm": 0.010481576435267925, "learning_rate": 0.0009512466663307138, "loss": 0.1528, "num_input_tokens_seen": 60291264, "step": 27900 }, { "epoch": 4.552202283849918, "grad_norm": 0.10634084790945053, "learning_rate": 0.0009512160043364157, "loss": 0.1042, "num_input_tokens_seen": 60302592, "step": 27905 }, { "epoch": 4.553017944535073, "grad_norm": 0.03876103460788727, "learning_rate": 0.0009511853331976527, "loss": 0.0544, "num_input_tokens_seen": 60312992, "step": 27910 }, { "epoch": 4.553833605220229, "grad_norm": 0.026083072647452354, "learning_rate": 0.0009511546529150467, "loss": 0.1053, "num_input_tokens_seen": 60324032, "step": 27915 }, { "epoch": 4.554649265905383, "grad_norm": 0.05046556144952774, "learning_rate": 0.0009511239634892195, "loss": 0.0663, "num_input_tokens_seen": 60334432, "step": 27920 }, { "epoch": 4.555464926590538, "grad_norm": 0.006147427950054407, "learning_rate": 0.0009510932649207926, "loss": 0.0284, "num_input_tokens_seen": 60344576, "step": 27925 }, { "epoch": 4.556280587275693, "grad_norm": 0.0055056107230484486, "learning_rate": 0.0009510625572103886, "loss": 0.1075, "num_input_tokens_seen": 60354752, "step": 27930 }, { "epoch": 4.557096247960848, "grad_norm": 0.019017960876226425, "learning_rate": 0.0009510318403586297, "loss": 0.0389, "num_input_tokens_seen": 60365728, "step": 27935 }, { "epoch": 4.557911908646004, "grad_norm": 0.033328790217638016, "learning_rate": 0.0009510011143661382, "loss": 0.0932, "num_input_tokens_seen": 60375936, "step": 27940 }, { "epoch": 4.558727569331158, "grad_norm": 0.10524723678827286, "learning_rate": 0.0009509703792335371, "loss": 0.0775, "num_input_tokens_seen": 60386400, "step": 27945 }, { "epoch": 4.559543230016313, "grad_norm": 0.043684348464012146, "learning_rate": 0.0009509396349614492, "loss": 0.1881, "num_input_tokens_seen": 60397952, "step": 27950 }, { "epoch": 4.560358890701468, "grad_norm": 0.006683858577162027, "learning_rate": 0.0009509088815504975, "loss": 0.1703, "num_input_tokens_seen": 60409600, "step": 27955 }, { "epoch": 4.561174551386623, "grad_norm": 0.022528601810336113, "learning_rate": 0.0009508781190013053, "loss": 0.0743, "num_input_tokens_seen": 60419136, "step": 27960 }, { "epoch": 4.561990212071779, "grad_norm": 0.19460022449493408, "learning_rate": 0.0009508473473144961, "loss": 0.1594, "num_input_tokens_seen": 60429504, "step": 27965 }, { "epoch": 4.562805872756933, "grad_norm": 0.011187287978827953, "learning_rate": 0.0009508165664906933, "loss": 0.037, "num_input_tokens_seen": 60439936, "step": 27970 }, { "epoch": 4.563621533442088, "grad_norm": 0.042578887194395065, "learning_rate": 0.000950785776530521, "loss": 0.1405, "num_input_tokens_seen": 60450336, "step": 27975 }, { "epoch": 4.564437194127243, "grad_norm": 0.19036757946014404, "learning_rate": 0.0009507549774346029, "loss": 0.0967, "num_input_tokens_seen": 60459296, "step": 27980 }, { "epoch": 4.565252854812398, "grad_norm": 0.02649342082440853, "learning_rate": 0.0009507241692035635, "loss": 0.0385, "num_input_tokens_seen": 60469632, "step": 27985 }, { "epoch": 4.566068515497553, "grad_norm": 0.15176327526569366, "learning_rate": 0.0009506933518380272, "loss": 0.1407, "num_input_tokens_seen": 60479488, "step": 27990 }, { "epoch": 4.566884176182708, "grad_norm": 0.017907990142703056, "learning_rate": 0.0009506625253386181, "loss": 0.2209, "num_input_tokens_seen": 60489824, "step": 27995 }, { "epoch": 4.567699836867863, "grad_norm": 0.2888742983341217, "learning_rate": 0.0009506316897059614, "loss": 0.1817, "num_input_tokens_seen": 60499776, "step": 28000 }, { "epoch": 4.568515497553018, "grad_norm": 0.01035986002534628, "learning_rate": 0.0009506008449406818, "loss": 0.0258, "num_input_tokens_seen": 60509952, "step": 28005 }, { "epoch": 4.569331158238173, "grad_norm": 0.12834765017032623, "learning_rate": 0.0009505699910434043, "loss": 0.1524, "num_input_tokens_seen": 60520704, "step": 28010 }, { "epoch": 4.570146818923328, "grad_norm": 0.1346168965101242, "learning_rate": 0.0009505391280147545, "loss": 0.0391, "num_input_tokens_seen": 60532640, "step": 28015 }, { "epoch": 4.5709624796084825, "grad_norm": 0.15639248490333557, "learning_rate": 0.0009505082558553577, "loss": 0.169, "num_input_tokens_seen": 60543360, "step": 28020 }, { "epoch": 4.571778140293638, "grad_norm": 0.01339662540704012, "learning_rate": 0.0009504773745658395, "loss": 0.0576, "num_input_tokens_seen": 60554080, "step": 28025 }, { "epoch": 4.572593800978793, "grad_norm": 0.0473957359790802, "learning_rate": 0.0009504464841468259, "loss": 0.0252, "num_input_tokens_seen": 60564576, "step": 28030 }, { "epoch": 4.573409461663948, "grad_norm": 0.035671476274728775, "learning_rate": 0.000950415584598943, "loss": 0.1969, "num_input_tokens_seen": 60574880, "step": 28035 }, { "epoch": 4.574225122349103, "grad_norm": 0.166676864027977, "learning_rate": 0.0009503846759228167, "loss": 0.0505, "num_input_tokens_seen": 60585216, "step": 28040 }, { "epoch": 4.575040783034257, "grad_norm": 0.19653046131134033, "learning_rate": 0.0009503537581190736, "loss": 0.1203, "num_input_tokens_seen": 60595840, "step": 28045 }, { "epoch": 4.575856443719413, "grad_norm": 0.13469751179218292, "learning_rate": 0.0009503228311883402, "loss": 0.0871, "num_input_tokens_seen": 60606720, "step": 28050 }, { "epoch": 4.576672104404568, "grad_norm": 0.0519726388156414, "learning_rate": 0.0009502918951312436, "loss": 0.1482, "num_input_tokens_seen": 60618368, "step": 28055 }, { "epoch": 4.577487765089723, "grad_norm": 0.017064429819583893, "learning_rate": 0.0009502609499484104, "loss": 0.0481, "num_input_tokens_seen": 60628352, "step": 28060 }, { "epoch": 4.578303425774878, "grad_norm": 0.13285928964614868, "learning_rate": 0.0009502299956404679, "loss": 0.1352, "num_input_tokens_seen": 60639168, "step": 28065 }, { "epoch": 4.579119086460032, "grad_norm": 0.01909302920103073, "learning_rate": 0.0009501990322080433, "loss": 0.2349, "num_input_tokens_seen": 60649760, "step": 28070 }, { "epoch": 4.579934747145187, "grad_norm": 0.09038439393043518, "learning_rate": 0.0009501680596517641, "loss": 0.2203, "num_input_tokens_seen": 60660000, "step": 28075 }, { "epoch": 4.580750407830343, "grad_norm": 0.2496732622385025, "learning_rate": 0.0009501370779722582, "loss": 0.1181, "num_input_tokens_seen": 60670752, "step": 28080 }, { "epoch": 4.581566068515498, "grad_norm": 0.050215430557727814, "learning_rate": 0.0009501060871701534, "loss": 0.0693, "num_input_tokens_seen": 60681408, "step": 28085 }, { "epoch": 4.582381729200653, "grad_norm": 0.0064918166026473045, "learning_rate": 0.0009500750872460778, "loss": 0.1827, "num_input_tokens_seen": 60691936, "step": 28090 }, { "epoch": 4.583197389885807, "grad_norm": 0.01403888314962387, "learning_rate": 0.0009500440782006594, "loss": 0.1614, "num_input_tokens_seen": 60702848, "step": 28095 }, { "epoch": 4.584013050570962, "grad_norm": 0.21205657720565796, "learning_rate": 0.000950013060034527, "loss": 0.1736, "num_input_tokens_seen": 60713920, "step": 28100 }, { "epoch": 4.584828711256117, "grad_norm": 0.07595892995595932, "learning_rate": 0.0009499820327483091, "loss": 0.0665, "num_input_tokens_seen": 60724992, "step": 28105 }, { "epoch": 4.585644371941273, "grad_norm": 0.10927172750234604, "learning_rate": 0.0009499509963426342, "loss": 0.0705, "num_input_tokens_seen": 60735520, "step": 28110 }, { "epoch": 4.5864600326264275, "grad_norm": 0.20014338195323944, "learning_rate": 0.0009499199508181318, "loss": 0.1299, "num_input_tokens_seen": 60747072, "step": 28115 }, { "epoch": 4.587275693311582, "grad_norm": 0.12331785261631012, "learning_rate": 0.0009498888961754308, "loss": 0.0715, "num_input_tokens_seen": 60756768, "step": 28120 }, { "epoch": 4.588091353996737, "grad_norm": 0.29597270488739014, "learning_rate": 0.0009498578324151606, "loss": 0.1716, "num_input_tokens_seen": 60767392, "step": 28125 }, { "epoch": 4.588907014681892, "grad_norm": 0.01776668056845665, "learning_rate": 0.0009498267595379506, "loss": 0.2, "num_input_tokens_seen": 60776416, "step": 28130 }, { "epoch": 4.589722675367048, "grad_norm": 0.02030642330646515, "learning_rate": 0.0009497956775444307, "loss": 0.0556, "num_input_tokens_seen": 60786688, "step": 28135 }, { "epoch": 4.5905383360522025, "grad_norm": 0.14282387495040894, "learning_rate": 0.0009497645864352309, "loss": 0.0482, "num_input_tokens_seen": 60797600, "step": 28140 }, { "epoch": 4.591353996737357, "grad_norm": 0.002983206883072853, "learning_rate": 0.0009497334862109812, "loss": 0.0327, "num_input_tokens_seen": 60808192, "step": 28145 }, { "epoch": 4.592169657422512, "grad_norm": 0.2608118951320648, "learning_rate": 0.0009497023768723119, "loss": 0.1167, "num_input_tokens_seen": 60818912, "step": 28150 }, { "epoch": 4.592985318107667, "grad_norm": 0.12593863904476166, "learning_rate": 0.0009496712584198532, "loss": 0.0521, "num_input_tokens_seen": 60829568, "step": 28155 }, { "epoch": 4.593800978792823, "grad_norm": 0.050070811063051224, "learning_rate": 0.0009496401308542363, "loss": 0.0943, "num_input_tokens_seen": 60841440, "step": 28160 }, { "epoch": 4.5946166394779775, "grad_norm": 0.01761269010603428, "learning_rate": 0.0009496089941760915, "loss": 0.0159, "num_input_tokens_seen": 60852672, "step": 28165 }, { "epoch": 4.595432300163132, "grad_norm": 0.38470232486724854, "learning_rate": 0.0009495778483860502, "loss": 0.2002, "num_input_tokens_seen": 60864384, "step": 28170 }, { "epoch": 4.596247960848287, "grad_norm": 0.32193905115127563, "learning_rate": 0.0009495466934847434, "loss": 0.1645, "num_input_tokens_seen": 60874656, "step": 28175 }, { "epoch": 4.597063621533442, "grad_norm": 0.04758863151073456, "learning_rate": 0.0009495155294728026, "loss": 0.0498, "num_input_tokens_seen": 60885920, "step": 28180 }, { "epoch": 4.597879282218597, "grad_norm": 0.0736926794052124, "learning_rate": 0.0009494843563508594, "loss": 0.1112, "num_input_tokens_seen": 60896992, "step": 28185 }, { "epoch": 4.598694942903752, "grad_norm": 0.25502830743789673, "learning_rate": 0.0009494531741195454, "loss": 0.0813, "num_input_tokens_seen": 60908768, "step": 28190 }, { "epoch": 4.599510603588907, "grad_norm": 0.057509299367666245, "learning_rate": 0.0009494219827794928, "loss": 0.0414, "num_input_tokens_seen": 60919680, "step": 28195 }, { "epoch": 4.600326264274062, "grad_norm": 0.05833054706454277, "learning_rate": 0.0009493907823313334, "loss": 0.122, "num_input_tokens_seen": 60930368, "step": 28200 }, { "epoch": 4.601141924959217, "grad_norm": 0.010716291144490242, "learning_rate": 0.0009493595727756998, "loss": 0.0178, "num_input_tokens_seen": 60940896, "step": 28205 }, { "epoch": 4.601957585644372, "grad_norm": 0.12371497601270676, "learning_rate": 0.0009493283541132245, "loss": 0.1883, "num_input_tokens_seen": 60951424, "step": 28210 }, { "epoch": 4.602773246329527, "grad_norm": 0.16348916292190552, "learning_rate": 0.0009492971263445401, "loss": 0.1108, "num_input_tokens_seen": 60961088, "step": 28215 }, { "epoch": 4.603588907014682, "grad_norm": 0.06231715530157089, "learning_rate": 0.0009492658894702792, "loss": 0.1384, "num_input_tokens_seen": 60971424, "step": 28220 }, { "epoch": 4.604404567699837, "grad_norm": 0.016711369156837463, "learning_rate": 0.0009492346434910753, "loss": 0.1207, "num_input_tokens_seen": 60981056, "step": 28225 }, { "epoch": 4.605220228384992, "grad_norm": 0.053427740931510925, "learning_rate": 0.0009492033884075615, "loss": 0.0644, "num_input_tokens_seen": 60992288, "step": 28230 }, { "epoch": 4.606035889070147, "grad_norm": 0.057806648313999176, "learning_rate": 0.000949172124220371, "loss": 0.0199, "num_input_tokens_seen": 61003008, "step": 28235 }, { "epoch": 4.6068515497553015, "grad_norm": 0.03352320194244385, "learning_rate": 0.0009491408509301378, "loss": 0.195, "num_input_tokens_seen": 61011904, "step": 28240 }, { "epoch": 4.607667210440457, "grad_norm": 0.017068684101104736, "learning_rate": 0.0009491095685374954, "loss": 0.0537, "num_input_tokens_seen": 61022976, "step": 28245 }, { "epoch": 4.608482871125612, "grad_norm": 0.11282926797866821, "learning_rate": 0.0009490782770430777, "loss": 0.0291, "num_input_tokens_seen": 61034976, "step": 28250 }, { "epoch": 4.609298531810767, "grad_norm": 0.13886576890945435, "learning_rate": 0.0009490469764475191, "loss": 0.0539, "num_input_tokens_seen": 61045696, "step": 28255 }, { "epoch": 4.610114192495922, "grad_norm": 0.2587529420852661, "learning_rate": 0.0009490156667514541, "loss": 0.0959, "num_input_tokens_seen": 61056192, "step": 28260 }, { "epoch": 4.6109298531810765, "grad_norm": 0.013154418207705021, "learning_rate": 0.0009489843479555167, "loss": 0.0607, "num_input_tokens_seen": 61065856, "step": 28265 }, { "epoch": 4.611745513866231, "grad_norm": 0.004272149410098791, "learning_rate": 0.000948953020060342, "loss": 0.0547, "num_input_tokens_seen": 61075424, "step": 28270 }, { "epoch": 4.612561174551386, "grad_norm": 0.300809383392334, "learning_rate": 0.0009489216830665649, "loss": 0.0647, "num_input_tokens_seen": 61087168, "step": 28275 }, { "epoch": 4.613376835236542, "grad_norm": 0.005980401299893856, "learning_rate": 0.0009488903369748203, "loss": 0.0921, "num_input_tokens_seen": 61098816, "step": 28280 }, { "epoch": 4.614192495921697, "grad_norm": 0.12887540459632874, "learning_rate": 0.0009488589817857435, "loss": 0.1491, "num_input_tokens_seen": 61108992, "step": 28285 }, { "epoch": 4.6150081566068515, "grad_norm": 0.1607290506362915, "learning_rate": 0.0009488276174999702, "loss": 0.0865, "num_input_tokens_seen": 61120032, "step": 28290 }, { "epoch": 4.615823817292006, "grad_norm": 0.16185465455055237, "learning_rate": 0.0009487962441181357, "loss": 0.0644, "num_input_tokens_seen": 61130496, "step": 28295 }, { "epoch": 4.616639477977161, "grad_norm": 0.13092803955078125, "learning_rate": 0.0009487648616408762, "loss": 0.0513, "num_input_tokens_seen": 61141344, "step": 28300 }, { "epoch": 4.617455138662317, "grad_norm": 0.07150975614786148, "learning_rate": 0.0009487334700688273, "loss": 0.0256, "num_input_tokens_seen": 61152512, "step": 28305 }, { "epoch": 4.618270799347472, "grad_norm": 0.02035592496395111, "learning_rate": 0.0009487020694026254, "loss": 0.0387, "num_input_tokens_seen": 61164416, "step": 28310 }, { "epoch": 4.6190864600326265, "grad_norm": 0.0039632623083889484, "learning_rate": 0.0009486706596429068, "loss": 0.2028, "num_input_tokens_seen": 61176160, "step": 28315 }, { "epoch": 4.619902120717781, "grad_norm": 0.023158259689807892, "learning_rate": 0.0009486392407903082, "loss": 0.2388, "num_input_tokens_seen": 61186560, "step": 28320 }, { "epoch": 4.620717781402936, "grad_norm": 0.10569548606872559, "learning_rate": 0.000948607812845466, "loss": 0.0666, "num_input_tokens_seen": 61197504, "step": 28325 }, { "epoch": 4.621533442088092, "grad_norm": 0.020724743604660034, "learning_rate": 0.0009485763758090176, "loss": 0.0308, "num_input_tokens_seen": 61209088, "step": 28330 }, { "epoch": 4.622349102773247, "grad_norm": 0.21098117530345917, "learning_rate": 0.0009485449296815999, "loss": 0.1628, "num_input_tokens_seen": 61220224, "step": 28335 }, { "epoch": 4.623164763458401, "grad_norm": 0.03836962580680847, "learning_rate": 0.00094851347446385, "loss": 0.0523, "num_input_tokens_seen": 61232480, "step": 28340 }, { "epoch": 4.623980424143556, "grad_norm": 0.2680718004703522, "learning_rate": 0.0009484820101564058, "loss": 0.1557, "num_input_tokens_seen": 61244032, "step": 28345 }, { "epoch": 4.624796084828711, "grad_norm": 0.19660809636116028, "learning_rate": 0.0009484505367599045, "loss": 0.1351, "num_input_tokens_seen": 61256096, "step": 28350 }, { "epoch": 4.625611745513866, "grad_norm": 0.04200530797243118, "learning_rate": 0.0009484190542749844, "loss": 0.1997, "num_input_tokens_seen": 61268576, "step": 28355 }, { "epoch": 4.626427406199021, "grad_norm": 0.05554460734128952, "learning_rate": 0.0009483875627022831, "loss": 0.0761, "num_input_tokens_seen": 61279584, "step": 28360 }, { "epoch": 4.627243066884176, "grad_norm": 0.020894384011626244, "learning_rate": 0.0009483560620424391, "loss": 0.1132, "num_input_tokens_seen": 61289824, "step": 28365 }, { "epoch": 4.628058727569331, "grad_norm": 0.071620874106884, "learning_rate": 0.0009483245522960909, "loss": 0.1386, "num_input_tokens_seen": 61299296, "step": 28370 }, { "epoch": 4.628874388254486, "grad_norm": 0.03438745439052582, "learning_rate": 0.0009482930334638766, "loss": 0.0269, "num_input_tokens_seen": 61309600, "step": 28375 }, { "epoch": 4.629690048939641, "grad_norm": 0.05470510572195053, "learning_rate": 0.0009482615055464354, "loss": 0.1482, "num_input_tokens_seen": 61320288, "step": 28380 }, { "epoch": 4.630505709624796, "grad_norm": 0.09955763071775436, "learning_rate": 0.0009482299685444062, "loss": 0.117, "num_input_tokens_seen": 61331328, "step": 28385 }, { "epoch": 4.631321370309951, "grad_norm": 0.006198828108608723, "learning_rate": 0.0009481984224584279, "loss": 0.1589, "num_input_tokens_seen": 61341248, "step": 28390 }, { "epoch": 4.632137030995106, "grad_norm": 0.2537461221218109, "learning_rate": 0.0009481668672891401, "loss": 0.1139, "num_input_tokens_seen": 61350880, "step": 28395 }, { "epoch": 4.632952691680261, "grad_norm": 0.09156087040901184, "learning_rate": 0.0009481353030371822, "loss": 0.083, "num_input_tokens_seen": 61361760, "step": 28400 }, { "epoch": 4.633768352365416, "grad_norm": 0.011091694235801697, "learning_rate": 0.0009481037297031939, "loss": 0.1354, "num_input_tokens_seen": 61372896, "step": 28405 }, { "epoch": 4.634584013050571, "grad_norm": 0.23919807374477386, "learning_rate": 0.0009480721472878151, "loss": 0.2114, "num_input_tokens_seen": 61382752, "step": 28410 }, { "epoch": 4.635399673735726, "grad_norm": 0.012990964576601982, "learning_rate": 0.0009480405557916858, "loss": 0.0337, "num_input_tokens_seen": 61393792, "step": 28415 }, { "epoch": 4.636215334420881, "grad_norm": 0.057514190673828125, "learning_rate": 0.0009480089552154461, "loss": 0.1262, "num_input_tokens_seen": 61403968, "step": 28420 }, { "epoch": 4.637030995106036, "grad_norm": 0.25216689705848694, "learning_rate": 0.0009479773455597367, "loss": 0.1167, "num_input_tokens_seen": 61415648, "step": 28425 }, { "epoch": 4.637846655791191, "grad_norm": 0.12715749442577362, "learning_rate": 0.0009479457268251981, "loss": 0.1525, "num_input_tokens_seen": 61425696, "step": 28430 }, { "epoch": 4.638662316476346, "grad_norm": 0.006238611415028572, "learning_rate": 0.0009479140990124711, "loss": 0.065, "num_input_tokens_seen": 61436928, "step": 28435 }, { "epoch": 4.6394779771615005, "grad_norm": 0.14752565324306488, "learning_rate": 0.0009478824621221967, "loss": 0.2457, "num_input_tokens_seen": 61447648, "step": 28440 }, { "epoch": 4.640293637846656, "grad_norm": 0.03811931237578392, "learning_rate": 0.0009478508161550159, "loss": 0.1437, "num_input_tokens_seen": 61459776, "step": 28445 }, { "epoch": 4.641109298531811, "grad_norm": 0.03835804760456085, "learning_rate": 0.0009478191611115702, "loss": 0.081, "num_input_tokens_seen": 61471040, "step": 28450 }, { "epoch": 4.641924959216966, "grad_norm": 0.0996306762099266, "learning_rate": 0.0009477874969925011, "loss": 0.0583, "num_input_tokens_seen": 61481888, "step": 28455 }, { "epoch": 4.642740619902121, "grad_norm": 0.2590426206588745, "learning_rate": 0.0009477558237984503, "loss": 0.1078, "num_input_tokens_seen": 61492928, "step": 28460 }, { "epoch": 4.643556280587275, "grad_norm": 0.21474169194698334, "learning_rate": 0.0009477241415300599, "loss": 0.1292, "num_input_tokens_seen": 61504320, "step": 28465 }, { "epoch": 4.64437194127243, "grad_norm": 0.021407373249530792, "learning_rate": 0.0009476924501879715, "loss": 0.0183, "num_input_tokens_seen": 61515424, "step": 28470 }, { "epoch": 4.645187601957586, "grad_norm": 0.03953413665294647, "learning_rate": 0.0009476607497728279, "loss": 0.0356, "num_input_tokens_seen": 61526176, "step": 28475 }, { "epoch": 4.646003262642741, "grad_norm": 0.09875276684761047, "learning_rate": 0.0009476290402852712, "loss": 0.1282, "num_input_tokens_seen": 61536288, "step": 28480 }, { "epoch": 4.646818923327896, "grad_norm": 0.0387643501162529, "learning_rate": 0.0009475973217259442, "loss": 0.0396, "num_input_tokens_seen": 61548544, "step": 28485 }, { "epoch": 4.64763458401305, "grad_norm": 0.010005326010286808, "learning_rate": 0.0009475655940954896, "loss": 0.0581, "num_input_tokens_seen": 61559776, "step": 28490 }, { "epoch": 4.648450244698205, "grad_norm": 0.30142971873283386, "learning_rate": 0.0009475338573945504, "loss": 0.1899, "num_input_tokens_seen": 61570784, "step": 28495 }, { "epoch": 4.649265905383361, "grad_norm": 0.060554053634405136, "learning_rate": 0.0009475021116237699, "loss": 0.1408, "num_input_tokens_seen": 61581856, "step": 28500 }, { "epoch": 4.650081566068516, "grad_norm": 0.010634462349116802, "learning_rate": 0.0009474703567837915, "loss": 0.0715, "num_input_tokens_seen": 61593376, "step": 28505 }, { "epoch": 4.650897226753671, "grad_norm": 0.20411312580108643, "learning_rate": 0.0009474385928752585, "loss": 0.2613, "num_input_tokens_seen": 61604256, "step": 28510 }, { "epoch": 4.651712887438825, "grad_norm": 0.014688130468130112, "learning_rate": 0.0009474068198988151, "loss": 0.1626, "num_input_tokens_seen": 61615008, "step": 28515 }, { "epoch": 4.65252854812398, "grad_norm": 0.021405626088380814, "learning_rate": 0.0009473750378551046, "loss": 0.0247, "num_input_tokens_seen": 61626848, "step": 28520 }, { "epoch": 4.653344208809135, "grad_norm": 0.02764919400215149, "learning_rate": 0.0009473432467447715, "loss": 0.0585, "num_input_tokens_seen": 61637952, "step": 28525 }, { "epoch": 4.654159869494291, "grad_norm": 0.4175470769405365, "learning_rate": 0.00094731144656846, "loss": 0.2282, "num_input_tokens_seen": 61648288, "step": 28530 }, { "epoch": 4.6549755301794455, "grad_norm": 0.059521906077861786, "learning_rate": 0.0009472796373268147, "loss": 0.0559, "num_input_tokens_seen": 61659936, "step": 28535 }, { "epoch": 4.6557911908646, "grad_norm": 0.1181897521018982, "learning_rate": 0.00094724781902048, "loss": 0.0752, "num_input_tokens_seen": 61670464, "step": 28540 }, { "epoch": 4.656606851549755, "grad_norm": 0.06549588590860367, "learning_rate": 0.0009472159916501011, "loss": 0.0248, "num_input_tokens_seen": 61681568, "step": 28545 }, { "epoch": 4.65742251223491, "grad_norm": 0.008692199364304543, "learning_rate": 0.0009471841552163225, "loss": 0.0928, "num_input_tokens_seen": 61691808, "step": 28550 }, { "epoch": 4.658238172920065, "grad_norm": 0.1296404004096985, "learning_rate": 0.0009471523097197898, "loss": 0.1294, "num_input_tokens_seen": 61702400, "step": 28555 }, { "epoch": 4.6590538336052205, "grad_norm": 0.011472326703369617, "learning_rate": 0.0009471204551611483, "loss": 0.1363, "num_input_tokens_seen": 61713824, "step": 28560 }, { "epoch": 4.659869494290375, "grad_norm": 0.017907777801156044, "learning_rate": 0.0009470885915410437, "loss": 0.1489, "num_input_tokens_seen": 61724096, "step": 28565 }, { "epoch": 4.66068515497553, "grad_norm": 0.03549930080771446, "learning_rate": 0.0009470567188601214, "loss": 0.0473, "num_input_tokens_seen": 61735136, "step": 28570 }, { "epoch": 4.661500815660685, "grad_norm": 0.05104723572731018, "learning_rate": 0.0009470248371190277, "loss": 0.0625, "num_input_tokens_seen": 61746368, "step": 28575 }, { "epoch": 4.66231647634584, "grad_norm": 0.2593303620815277, "learning_rate": 0.0009469929463184086, "loss": 0.3085, "num_input_tokens_seen": 61756128, "step": 28580 }, { "epoch": 4.6631321370309955, "grad_norm": 0.03945760801434517, "learning_rate": 0.0009469610464589104, "loss": 0.0243, "num_input_tokens_seen": 61766464, "step": 28585 }, { "epoch": 4.66394779771615, "grad_norm": 0.017860667780041695, "learning_rate": 0.0009469291375411795, "loss": 0.1135, "num_input_tokens_seen": 61777632, "step": 28590 }, { "epoch": 4.664763458401305, "grad_norm": 0.020938122645020485, "learning_rate": 0.0009468972195658626, "loss": 0.105, "num_input_tokens_seen": 61788992, "step": 28595 }, { "epoch": 4.66557911908646, "grad_norm": 0.03804948925971985, "learning_rate": 0.0009468652925336068, "loss": 0.0745, "num_input_tokens_seen": 61799840, "step": 28600 }, { "epoch": 4.666394779771615, "grad_norm": 0.17097699642181396, "learning_rate": 0.0009468333564450587, "loss": 0.1053, "num_input_tokens_seen": 61810240, "step": 28605 }, { "epoch": 4.6672104404567705, "grad_norm": 0.059029195457696915, "learning_rate": 0.000946801411300866, "loss": 0.0978, "num_input_tokens_seen": 61820512, "step": 28610 }, { "epoch": 4.668026101141925, "grad_norm": 0.06193551421165466, "learning_rate": 0.0009467694571016758, "loss": 0.1145, "num_input_tokens_seen": 61831328, "step": 28615 }, { "epoch": 4.66884176182708, "grad_norm": 0.031025558710098267, "learning_rate": 0.0009467374938481359, "loss": 0.1367, "num_input_tokens_seen": 61842976, "step": 28620 }, { "epoch": 4.669657422512235, "grad_norm": 0.09817475825548172, "learning_rate": 0.0009467055215408939, "loss": 0.0633, "num_input_tokens_seen": 61854912, "step": 28625 }, { "epoch": 4.67047308319739, "grad_norm": 0.051113102585077286, "learning_rate": 0.0009466735401805977, "loss": 0.0644, "num_input_tokens_seen": 61866208, "step": 28630 }, { "epoch": 4.671288743882545, "grad_norm": 0.17623735964298248, "learning_rate": 0.0009466415497678957, "loss": 0.1229, "num_input_tokens_seen": 61877088, "step": 28635 }, { "epoch": 4.672104404567699, "grad_norm": 0.1480836421251297, "learning_rate": 0.000946609550303436, "loss": 0.2016, "num_input_tokens_seen": 61887328, "step": 28640 }, { "epoch": 4.672920065252855, "grad_norm": 0.12157644331455231, "learning_rate": 0.0009465775417878673, "loss": 0.1657, "num_input_tokens_seen": 61898336, "step": 28645 }, { "epoch": 4.67373572593801, "grad_norm": 0.0039813462644815445, "learning_rate": 0.0009465455242218382, "loss": 0.0227, "num_input_tokens_seen": 61909728, "step": 28650 }, { "epoch": 4.674551386623165, "grad_norm": 0.15658128261566162, "learning_rate": 0.0009465134976059975, "loss": 0.0684, "num_input_tokens_seen": 61921312, "step": 28655 }, { "epoch": 4.6753670473083195, "grad_norm": 0.19435112178325653, "learning_rate": 0.0009464814619409942, "loss": 0.0361, "num_input_tokens_seen": 61931744, "step": 28660 }, { "epoch": 4.676182707993474, "grad_norm": 0.1114906296133995, "learning_rate": 0.0009464494172274778, "loss": 0.1178, "num_input_tokens_seen": 61943264, "step": 28665 }, { "epoch": 4.67699836867863, "grad_norm": 0.3585332930088043, "learning_rate": 0.0009464173634660978, "loss": 0.1103, "num_input_tokens_seen": 61954112, "step": 28670 }, { "epoch": 4.677814029363785, "grad_norm": 0.030214810743927956, "learning_rate": 0.0009463853006575032, "loss": 0.0855, "num_input_tokens_seen": 61964384, "step": 28675 }, { "epoch": 4.67862969004894, "grad_norm": 0.13624903559684753, "learning_rate": 0.0009463532288023444, "loss": 0.0304, "num_input_tokens_seen": 61976608, "step": 28680 }, { "epoch": 4.6794453507340945, "grad_norm": 0.006839347537606955, "learning_rate": 0.0009463211479012712, "loss": 0.0412, "num_input_tokens_seen": 61987616, "step": 28685 }, { "epoch": 4.680261011419249, "grad_norm": 0.2887722849845886, "learning_rate": 0.0009462890579549338, "loss": 0.0814, "num_input_tokens_seen": 61997984, "step": 28690 }, { "epoch": 4.681076672104405, "grad_norm": 0.1711539328098297, "learning_rate": 0.0009462569589639825, "loss": 0.1158, "num_input_tokens_seen": 62008384, "step": 28695 }, { "epoch": 4.68189233278956, "grad_norm": 0.014359569177031517, "learning_rate": 0.0009462248509290676, "loss": 0.0336, "num_input_tokens_seen": 62019040, "step": 28700 }, { "epoch": 4.682707993474715, "grad_norm": 0.12640659511089325, "learning_rate": 0.0009461927338508402, "loss": 0.1402, "num_input_tokens_seen": 62030240, "step": 28705 }, { "epoch": 4.6835236541598695, "grad_norm": 0.029281822964549065, "learning_rate": 0.0009461606077299509, "loss": 0.1383, "num_input_tokens_seen": 62040768, "step": 28710 }, { "epoch": 4.684339314845024, "grad_norm": 0.019829019904136658, "learning_rate": 0.000946128472567051, "loss": 0.0498, "num_input_tokens_seen": 62051168, "step": 28715 }, { "epoch": 4.685154975530179, "grad_norm": 0.3932690918445587, "learning_rate": 0.0009460963283627917, "loss": 0.0642, "num_input_tokens_seen": 62062336, "step": 28720 }, { "epoch": 4.685970636215334, "grad_norm": 0.13580350577831268, "learning_rate": 0.0009460641751178243, "loss": 0.126, "num_input_tokens_seen": 62071232, "step": 28725 }, { "epoch": 4.68678629690049, "grad_norm": 0.0017230919329449534, "learning_rate": 0.0009460320128328003, "loss": 0.0224, "num_input_tokens_seen": 62082560, "step": 28730 }, { "epoch": 4.6876019575856445, "grad_norm": 0.2777078449726105, "learning_rate": 0.0009459998415083721, "loss": 0.1692, "num_input_tokens_seen": 62094304, "step": 28735 }, { "epoch": 4.688417618270799, "grad_norm": 0.08183332532644272, "learning_rate": 0.000945967661145191, "loss": 0.1268, "num_input_tokens_seen": 62104160, "step": 28740 }, { "epoch": 4.689233278955954, "grad_norm": 0.1658097356557846, "learning_rate": 0.0009459354717439097, "loss": 0.0926, "num_input_tokens_seen": 62114208, "step": 28745 }, { "epoch": 4.690048939641109, "grad_norm": 0.07683463394641876, "learning_rate": 0.0009459032733051805, "loss": 0.0867, "num_input_tokens_seen": 62124320, "step": 28750 }, { "epoch": 4.690864600326265, "grad_norm": 0.1957731395959854, "learning_rate": 0.0009458710658296555, "loss": 0.0979, "num_input_tokens_seen": 62135904, "step": 28755 }, { "epoch": 4.691680261011419, "grad_norm": 0.02776450477540493, "learning_rate": 0.000945838849317988, "loss": 0.0774, "num_input_tokens_seen": 62145632, "step": 28760 }, { "epoch": 4.692495921696574, "grad_norm": 0.10273412615060806, "learning_rate": 0.0009458066237708302, "loss": 0.1534, "num_input_tokens_seen": 62157376, "step": 28765 }, { "epoch": 4.693311582381729, "grad_norm": 0.01814623735845089, "learning_rate": 0.0009457743891888359, "loss": 0.0768, "num_input_tokens_seen": 62167648, "step": 28770 }, { "epoch": 4.694127243066884, "grad_norm": 0.2564343214035034, "learning_rate": 0.0009457421455726582, "loss": 0.1296, "num_input_tokens_seen": 62178784, "step": 28775 }, { "epoch": 4.69494290375204, "grad_norm": 0.07235551625490189, "learning_rate": 0.0009457098929229503, "loss": 0.1122, "num_input_tokens_seen": 62189600, "step": 28780 }, { "epoch": 4.695758564437194, "grad_norm": 0.18390199542045593, "learning_rate": 0.0009456776312403661, "loss": 0.1042, "num_input_tokens_seen": 62200768, "step": 28785 }, { "epoch": 4.696574225122349, "grad_norm": 0.039058052003383636, "learning_rate": 0.0009456453605255592, "loss": 0.0469, "num_input_tokens_seen": 62211520, "step": 28790 }, { "epoch": 4.697389885807504, "grad_norm": 0.2684352397918701, "learning_rate": 0.0009456130807791839, "loss": 0.3105, "num_input_tokens_seen": 62222624, "step": 28795 }, { "epoch": 4.698205546492659, "grad_norm": 0.025328971445560455, "learning_rate": 0.000945580792001894, "loss": 0.0929, "num_input_tokens_seen": 62232000, "step": 28800 }, { "epoch": 4.699021207177814, "grad_norm": 0.1835407316684723, "learning_rate": 0.0009455484941943442, "loss": 0.0953, "num_input_tokens_seen": 62241952, "step": 28805 }, { "epoch": 4.699836867862969, "grad_norm": 0.015583495609462261, "learning_rate": 0.0009455161873571889, "loss": 0.0627, "num_input_tokens_seen": 62252864, "step": 28810 }, { "epoch": 4.700652528548124, "grad_norm": 0.06854277104139328, "learning_rate": 0.000945483871491083, "loss": 0.0716, "num_input_tokens_seen": 62264096, "step": 28815 }, { "epoch": 4.701468189233279, "grad_norm": 0.07336314022541046, "learning_rate": 0.0009454515465966812, "loss": 0.1379, "num_input_tokens_seen": 62274176, "step": 28820 }, { "epoch": 4.702283849918434, "grad_norm": 0.019611230120062828, "learning_rate": 0.0009454192126746388, "loss": 0.039, "num_input_tokens_seen": 62285472, "step": 28825 }, { "epoch": 4.703099510603589, "grad_norm": 0.0149730509147048, "learning_rate": 0.000945386869725611, "loss": 0.0883, "num_input_tokens_seen": 62296256, "step": 28830 }, { "epoch": 4.7039151712887435, "grad_norm": 0.1251792311668396, "learning_rate": 0.0009453545177502532, "loss": 0.1685, "num_input_tokens_seen": 62307456, "step": 28835 }, { "epoch": 4.704730831973899, "grad_norm": 0.025677544996142387, "learning_rate": 0.0009453221567492211, "loss": 0.0219, "num_input_tokens_seen": 62317696, "step": 28840 }, { "epoch": 4.705546492659054, "grad_norm": 0.17929667234420776, "learning_rate": 0.0009452897867231705, "loss": 0.1908, "num_input_tokens_seen": 62328672, "step": 28845 }, { "epoch": 4.706362153344209, "grad_norm": 0.10408436506986618, "learning_rate": 0.0009452574076727576, "loss": 0.1479, "num_input_tokens_seen": 62339648, "step": 28850 }, { "epoch": 4.707177814029364, "grad_norm": 0.047870147973299026, "learning_rate": 0.0009452250195986385, "loss": 0.1515, "num_input_tokens_seen": 62350592, "step": 28855 }, { "epoch": 4.7079934747145185, "grad_norm": 0.07525742053985596, "learning_rate": 0.0009451926225014695, "loss": 0.0627, "num_input_tokens_seen": 62362528, "step": 28860 }, { "epoch": 4.708809135399674, "grad_norm": 0.015628913417458534, "learning_rate": 0.0009451602163819073, "loss": 0.1018, "num_input_tokens_seen": 62373600, "step": 28865 }, { "epoch": 4.709624796084829, "grad_norm": 0.05882667005062103, "learning_rate": 0.0009451278012406086, "loss": 0.0225, "num_input_tokens_seen": 62385568, "step": 28870 }, { "epoch": 4.710440456769984, "grad_norm": 0.008061857894062996, "learning_rate": 0.0009450953770782304, "loss": 0.0281, "num_input_tokens_seen": 62395872, "step": 28875 }, { "epoch": 4.711256117455139, "grad_norm": 0.07395133376121521, "learning_rate": 0.0009450629438954296, "loss": 0.0657, "num_input_tokens_seen": 62407296, "step": 28880 }, { "epoch": 4.712071778140293, "grad_norm": 0.0533161461353302, "learning_rate": 0.0009450305016928636, "loss": 0.0343, "num_input_tokens_seen": 62418656, "step": 28885 }, { "epoch": 4.712887438825448, "grad_norm": 0.01694626919925213, "learning_rate": 0.00094499805047119, "loss": 0.03, "num_input_tokens_seen": 62429792, "step": 28890 }, { "epoch": 4.713703099510604, "grad_norm": 0.0854320377111435, "learning_rate": 0.0009449655902310665, "loss": 0.1937, "num_input_tokens_seen": 62439936, "step": 28895 }, { "epoch": 4.714518760195759, "grad_norm": 0.08638182282447815, "learning_rate": 0.0009449331209731507, "loss": 0.1911, "num_input_tokens_seen": 62450720, "step": 28900 }, { "epoch": 4.715334420880914, "grad_norm": 0.18696919083595276, "learning_rate": 0.0009449006426981007, "loss": 0.1344, "num_input_tokens_seen": 62461856, "step": 28905 }, { "epoch": 4.716150081566068, "grad_norm": 0.02467340975999832, "learning_rate": 0.0009448681554065749, "loss": 0.1609, "num_input_tokens_seen": 62472064, "step": 28910 }, { "epoch": 4.716965742251223, "grad_norm": 0.13728247582912445, "learning_rate": 0.0009448356590992316, "loss": 0.0877, "num_input_tokens_seen": 62482976, "step": 28915 }, { "epoch": 4.717781402936378, "grad_norm": 0.03479057550430298, "learning_rate": 0.0009448031537767292, "loss": 0.1147, "num_input_tokens_seen": 62492960, "step": 28920 }, { "epoch": 4.718597063621534, "grad_norm": 0.008539623580873013, "learning_rate": 0.0009447706394397266, "loss": 0.0833, "num_input_tokens_seen": 62503648, "step": 28925 }, { "epoch": 4.719412724306689, "grad_norm": 0.2139635682106018, "learning_rate": 0.0009447381160888831, "loss": 0.1307, "num_input_tokens_seen": 62513920, "step": 28930 }, { "epoch": 4.720228384991843, "grad_norm": 0.21587203443050385, "learning_rate": 0.0009447055837248572, "loss": 0.1285, "num_input_tokens_seen": 62524960, "step": 28935 }, { "epoch": 4.721044045676998, "grad_norm": 0.1398542821407318, "learning_rate": 0.0009446730423483085, "loss": 0.0451, "num_input_tokens_seen": 62535776, "step": 28940 }, { "epoch": 4.721859706362153, "grad_norm": 0.010080617852509022, "learning_rate": 0.0009446404919598965, "loss": 0.0644, "num_input_tokens_seen": 62548096, "step": 28945 }, { "epoch": 4.722675367047309, "grad_norm": 0.1806686967611313, "learning_rate": 0.000944607932560281, "loss": 0.2239, "num_input_tokens_seen": 62559168, "step": 28950 }, { "epoch": 4.7234910277324635, "grad_norm": 0.10033036023378372, "learning_rate": 0.0009445753641501215, "loss": 0.0873, "num_input_tokens_seen": 62570080, "step": 28955 }, { "epoch": 4.724306688417618, "grad_norm": 0.015866931527853012, "learning_rate": 0.0009445427867300785, "loss": 0.1447, "num_input_tokens_seen": 62580384, "step": 28960 }, { "epoch": 4.725122349102773, "grad_norm": 0.01197098009288311, "learning_rate": 0.0009445102003008119, "loss": 0.1351, "num_input_tokens_seen": 62591488, "step": 28965 }, { "epoch": 4.725938009787928, "grad_norm": 0.15940040349960327, "learning_rate": 0.0009444776048629822, "loss": 0.1103, "num_input_tokens_seen": 62603264, "step": 28970 }, { "epoch": 4.726753670473083, "grad_norm": 0.038952793926000595, "learning_rate": 0.0009444450004172498, "loss": 0.0648, "num_input_tokens_seen": 62614624, "step": 28975 }, { "epoch": 4.7275693311582385, "grad_norm": 0.25991666316986084, "learning_rate": 0.0009444123869642758, "loss": 0.1644, "num_input_tokens_seen": 62625856, "step": 28980 }, { "epoch": 4.728384991843393, "grad_norm": 0.016241293400526047, "learning_rate": 0.000944379764504721, "loss": 0.0401, "num_input_tokens_seen": 62635616, "step": 28985 }, { "epoch": 4.729200652528548, "grad_norm": 0.038051947951316833, "learning_rate": 0.0009443471330392466, "loss": 0.1934, "num_input_tokens_seen": 62645920, "step": 28990 }, { "epoch": 4.730016313213703, "grad_norm": 0.05174148455262184, "learning_rate": 0.0009443144925685137, "loss": 0.0372, "num_input_tokens_seen": 62656864, "step": 28995 }, { "epoch": 4.730831973898858, "grad_norm": 0.10264194756746292, "learning_rate": 0.0009442818430931841, "loss": 0.1653, "num_input_tokens_seen": 62666496, "step": 29000 }, { "epoch": 4.731647634584013, "grad_norm": 0.20121224224567413, "learning_rate": 0.0009442491846139192, "loss": 0.1943, "num_input_tokens_seen": 62677152, "step": 29005 }, { "epoch": 4.732463295269168, "grad_norm": 0.04519479721784592, "learning_rate": 0.0009442165171313811, "loss": 0.0546, "num_input_tokens_seen": 62687104, "step": 29010 }, { "epoch": 4.733278955954323, "grad_norm": 0.02056044153869152, "learning_rate": 0.0009441838406462318, "loss": 0.0996, "num_input_tokens_seen": 62697472, "step": 29015 }, { "epoch": 4.734094616639478, "grad_norm": 0.0412328764796257, "learning_rate": 0.0009441511551591333, "loss": 0.1148, "num_input_tokens_seen": 62708736, "step": 29020 }, { "epoch": 4.734910277324633, "grad_norm": 0.007981553673744202, "learning_rate": 0.0009441184606707484, "loss": 0.0868, "num_input_tokens_seen": 62720672, "step": 29025 }, { "epoch": 4.735725938009788, "grad_norm": 0.017568625509738922, "learning_rate": 0.0009440857571817394, "loss": 0.1278, "num_input_tokens_seen": 62732576, "step": 29030 }, { "epoch": 4.736541598694943, "grad_norm": 0.16702494025230408, "learning_rate": 0.000944053044692769, "loss": 0.0729, "num_input_tokens_seen": 62742880, "step": 29035 }, { "epoch": 4.737357259380098, "grad_norm": 0.21429851651191711, "learning_rate": 0.0009440203232045005, "loss": 0.0599, "num_input_tokens_seen": 62753184, "step": 29040 }, { "epoch": 4.738172920065253, "grad_norm": 0.04229350760579109, "learning_rate": 0.000943987592717597, "loss": 0.1866, "num_input_tokens_seen": 62763936, "step": 29045 }, { "epoch": 4.738988580750408, "grad_norm": 0.21390216052532196, "learning_rate": 0.0009439548532327216, "loss": 0.1543, "num_input_tokens_seen": 62775328, "step": 29050 }, { "epoch": 4.739804241435563, "grad_norm": 0.033763255923986435, "learning_rate": 0.0009439221047505377, "loss": 0.0381, "num_input_tokens_seen": 62786656, "step": 29055 }, { "epoch": 4.740619902120718, "grad_norm": 0.4876824915409088, "learning_rate": 0.0009438893472717094, "loss": 0.1986, "num_input_tokens_seen": 62797664, "step": 29060 }, { "epoch": 4.741435562805873, "grad_norm": 0.05257996916770935, "learning_rate": 0.0009438565807969005, "loss": 0.1252, "num_input_tokens_seen": 62808416, "step": 29065 }, { "epoch": 4.742251223491028, "grad_norm": 0.13803677260875702, "learning_rate": 0.0009438238053267746, "loss": 0.1153, "num_input_tokens_seen": 62818624, "step": 29070 }, { "epoch": 4.743066884176183, "grad_norm": 0.03434896469116211, "learning_rate": 0.0009437910208619964, "loss": 0.1261, "num_input_tokens_seen": 62828928, "step": 29075 }, { "epoch": 4.7438825448613375, "grad_norm": 0.051103875041007996, "learning_rate": 0.0009437582274032301, "loss": 0.1211, "num_input_tokens_seen": 62838784, "step": 29080 }, { "epoch": 4.744698205546492, "grad_norm": 0.06288142502307892, "learning_rate": 0.0009437254249511404, "loss": 0.1152, "num_input_tokens_seen": 62849888, "step": 29085 }, { "epoch": 4.745513866231647, "grad_norm": 0.006992727518081665, "learning_rate": 0.0009436926135063922, "loss": 0.0351, "num_input_tokens_seen": 62862848, "step": 29090 }, { "epoch": 4.746329526916803, "grad_norm": 0.08556337654590607, "learning_rate": 0.0009436597930696502, "loss": 0.0419, "num_input_tokens_seen": 62873440, "step": 29095 }, { "epoch": 4.747145187601958, "grad_norm": 0.14834004640579224, "learning_rate": 0.0009436269636415798, "loss": 0.0841, "num_input_tokens_seen": 62882528, "step": 29100 }, { "epoch": 4.7479608482871125, "grad_norm": 0.033871617168188095, "learning_rate": 0.000943594125222846, "loss": 0.0712, "num_input_tokens_seen": 62893824, "step": 29105 }, { "epoch": 4.748776508972267, "grad_norm": 0.02020910568535328, "learning_rate": 0.0009435612778141146, "loss": 0.1716, "num_input_tokens_seen": 62903840, "step": 29110 }, { "epoch": 4.749592169657422, "grad_norm": 0.21955829858779907, "learning_rate": 0.0009435284214160513, "loss": 0.2021, "num_input_tokens_seen": 62915040, "step": 29115 }, { "epoch": 4.750407830342578, "grad_norm": 0.013592018745839596, "learning_rate": 0.0009434955560293217, "loss": 0.0469, "num_input_tokens_seen": 62927136, "step": 29120 }, { "epoch": 4.751223491027733, "grad_norm": 0.04946539178490639, "learning_rate": 0.0009434626816545922, "loss": 0.1829, "num_input_tokens_seen": 62938144, "step": 29125 }, { "epoch": 4.7520391517128875, "grad_norm": 0.4289376437664032, "learning_rate": 0.0009434297982925288, "loss": 0.3736, "num_input_tokens_seen": 62949888, "step": 29130 }, { "epoch": 4.752854812398042, "grad_norm": 0.014377152547240257, "learning_rate": 0.000943396905943798, "loss": 0.047, "num_input_tokens_seen": 62960864, "step": 29135 }, { "epoch": 4.753670473083197, "grad_norm": 0.07028697431087494, "learning_rate": 0.0009433640046090664, "loss": 0.1671, "num_input_tokens_seen": 62970720, "step": 29140 }, { "epoch": 4.754486133768353, "grad_norm": 0.14718550443649292, "learning_rate": 0.0009433310942890009, "loss": 0.1276, "num_input_tokens_seen": 62980704, "step": 29145 }, { "epoch": 4.755301794453508, "grad_norm": 0.05152687057852745, "learning_rate": 0.0009432981749842683, "loss": 0.0898, "num_input_tokens_seen": 62991008, "step": 29150 }, { "epoch": 4.7561174551386625, "grad_norm": 0.04468585178256035, "learning_rate": 0.0009432652466955358, "loss": 0.0548, "num_input_tokens_seen": 63000864, "step": 29155 }, { "epoch": 4.756933115823817, "grad_norm": 0.018594171851873398, "learning_rate": 0.0009432323094234708, "loss": 0.0329, "num_input_tokens_seen": 63011776, "step": 29160 }, { "epoch": 4.757748776508972, "grad_norm": 0.21704287827014923, "learning_rate": 0.0009431993631687408, "loss": 0.1588, "num_input_tokens_seen": 63023296, "step": 29165 }, { "epoch": 4.758564437194127, "grad_norm": 0.05709686875343323, "learning_rate": 0.0009431664079320134, "loss": 0.1016, "num_input_tokens_seen": 63035168, "step": 29170 }, { "epoch": 4.759380097879282, "grad_norm": 0.011319992132484913, "learning_rate": 0.0009431334437139565, "loss": 0.1046, "num_input_tokens_seen": 63046272, "step": 29175 }, { "epoch": 4.760195758564437, "grad_norm": 0.06291552633047104, "learning_rate": 0.0009431004705152384, "loss": 0.1456, "num_input_tokens_seen": 63056096, "step": 29180 }, { "epoch": 4.761011419249592, "grad_norm": 0.09090526401996613, "learning_rate": 0.0009430674883365269, "loss": 0.0367, "num_input_tokens_seen": 63066432, "step": 29185 }, { "epoch": 4.761827079934747, "grad_norm": 0.19455280900001526, "learning_rate": 0.0009430344971784909, "loss": 0.0772, "num_input_tokens_seen": 63077856, "step": 29190 }, { "epoch": 4.762642740619902, "grad_norm": 0.023099692538380623, "learning_rate": 0.0009430014970417986, "loss": 0.1152, "num_input_tokens_seen": 63088160, "step": 29195 }, { "epoch": 4.763458401305057, "grad_norm": 0.2389380782842636, "learning_rate": 0.0009429684879271191, "loss": 0.1268, "num_input_tokens_seen": 63098240, "step": 29200 }, { "epoch": 4.764274061990212, "grad_norm": 0.2107832282781601, "learning_rate": 0.0009429354698351212, "loss": 0.088, "num_input_tokens_seen": 63109472, "step": 29205 }, { "epoch": 4.765089722675367, "grad_norm": 0.004489370156079531, "learning_rate": 0.0009429024427664741, "loss": 0.0149, "num_input_tokens_seen": 63121120, "step": 29210 }, { "epoch": 4.765905383360522, "grad_norm": 0.0221620574593544, "learning_rate": 0.0009428694067218473, "loss": 0.0978, "num_input_tokens_seen": 63131264, "step": 29215 }, { "epoch": 4.766721044045677, "grad_norm": 0.07446268945932388, "learning_rate": 0.0009428363617019099, "loss": 0.0791, "num_input_tokens_seen": 63143488, "step": 29220 }, { "epoch": 4.767536704730832, "grad_norm": 0.1201087012887001, "learning_rate": 0.0009428033077073319, "loss": 0.2029, "num_input_tokens_seen": 63154080, "step": 29225 }, { "epoch": 4.768352365415987, "grad_norm": 0.05503406375646591, "learning_rate": 0.0009427702447387833, "loss": 0.1802, "num_input_tokens_seen": 63165728, "step": 29230 }, { "epoch": 4.769168026101142, "grad_norm": 0.0661977156996727, "learning_rate": 0.0009427371727969338, "loss": 0.168, "num_input_tokens_seen": 63176224, "step": 29235 }, { "epoch": 4.769983686786297, "grad_norm": 0.01881541684269905, "learning_rate": 0.000942704091882454, "loss": 0.0746, "num_input_tokens_seen": 63185792, "step": 29240 }, { "epoch": 4.770799347471452, "grad_norm": 0.05708598718047142, "learning_rate": 0.0009426710019960141, "loss": 0.0806, "num_input_tokens_seen": 63196800, "step": 29245 }, { "epoch": 4.771615008156607, "grad_norm": 0.0396232083439827, "learning_rate": 0.0009426379031382848, "loss": 0.0472, "num_input_tokens_seen": 63207744, "step": 29250 }, { "epoch": 4.7724306688417615, "grad_norm": 0.037397295236587524, "learning_rate": 0.0009426047953099368, "loss": 0.0347, "num_input_tokens_seen": 63219712, "step": 29255 }, { "epoch": 4.773246329526917, "grad_norm": 0.246902734041214, "learning_rate": 0.0009425716785116412, "loss": 0.3137, "num_input_tokens_seen": 63229408, "step": 29260 }, { "epoch": 4.774061990212072, "grad_norm": 0.0640609934926033, "learning_rate": 0.0009425385527440691, "loss": 0.0455, "num_input_tokens_seen": 63239424, "step": 29265 }, { "epoch": 4.774877650897227, "grad_norm": 0.012092667631804943, "learning_rate": 0.0009425054180078917, "loss": 0.1158, "num_input_tokens_seen": 63251232, "step": 29270 }, { "epoch": 4.775693311582382, "grad_norm": 0.03524189069867134, "learning_rate": 0.0009424722743037808, "loss": 0.0792, "num_input_tokens_seen": 63263584, "step": 29275 }, { "epoch": 4.7765089722675365, "grad_norm": 0.04715615138411522, "learning_rate": 0.0009424391216324078, "loss": 0.0403, "num_input_tokens_seen": 63273376, "step": 29280 }, { "epoch": 4.777324632952691, "grad_norm": 0.029380058869719505, "learning_rate": 0.0009424059599944449, "loss": 0.085, "num_input_tokens_seen": 63282176, "step": 29285 }, { "epoch": 4.778140293637847, "grad_norm": 0.21220123767852783, "learning_rate": 0.0009423727893905638, "loss": 0.0706, "num_input_tokens_seen": 63293440, "step": 29290 }, { "epoch": 4.778955954323002, "grad_norm": 0.2774854600429535, "learning_rate": 0.0009423396098214372, "loss": 0.0728, "num_input_tokens_seen": 63303136, "step": 29295 }, { "epoch": 4.779771615008157, "grad_norm": 0.3588896691799164, "learning_rate": 0.0009423064212877371, "loss": 0.311, "num_input_tokens_seen": 63314944, "step": 29300 }, { "epoch": 4.780587275693311, "grad_norm": 0.18132130801677704, "learning_rate": 0.0009422732237901361, "loss": 0.0476, "num_input_tokens_seen": 63324736, "step": 29305 }, { "epoch": 4.781402936378466, "grad_norm": 0.1575162261724472, "learning_rate": 0.0009422400173293073, "loss": 0.1672, "num_input_tokens_seen": 63334880, "step": 29310 }, { "epoch": 4.782218597063622, "grad_norm": 0.29085320234298706, "learning_rate": 0.0009422068019059235, "loss": 0.2645, "num_input_tokens_seen": 63346336, "step": 29315 }, { "epoch": 4.783034257748777, "grad_norm": 0.2051205039024353, "learning_rate": 0.0009421735775206582, "loss": 0.0909, "num_input_tokens_seen": 63357088, "step": 29320 }, { "epoch": 4.783849918433932, "grad_norm": 0.18533475697040558, "learning_rate": 0.000942140344174184, "loss": 0.0807, "num_input_tokens_seen": 63368512, "step": 29325 }, { "epoch": 4.784665579119086, "grad_norm": 0.05170245096087456, "learning_rate": 0.0009421071018671749, "loss": 0.0812, "num_input_tokens_seen": 63380288, "step": 29330 }, { "epoch": 4.785481239804241, "grad_norm": 0.032106757164001465, "learning_rate": 0.0009420738506003047, "loss": 0.0393, "num_input_tokens_seen": 63391808, "step": 29335 }, { "epoch": 4.786296900489396, "grad_norm": 0.009413869120180607, "learning_rate": 0.0009420405903742471, "loss": 0.0281, "num_input_tokens_seen": 63402528, "step": 29340 }, { "epoch": 4.787112561174552, "grad_norm": 0.14599008858203888, "learning_rate": 0.000942007321189676, "loss": 0.1658, "num_input_tokens_seen": 63414368, "step": 29345 }, { "epoch": 4.787928221859707, "grad_norm": 0.29077810049057007, "learning_rate": 0.0009419740430472659, "loss": 0.0954, "num_input_tokens_seen": 63424896, "step": 29350 }, { "epoch": 4.788743882544861, "grad_norm": 0.04795224219560623, "learning_rate": 0.0009419407559476911, "loss": 0.1213, "num_input_tokens_seen": 63435200, "step": 29355 }, { "epoch": 4.789559543230016, "grad_norm": 0.23997856676578522, "learning_rate": 0.0009419074598916262, "loss": 0.1989, "num_input_tokens_seen": 63444960, "step": 29360 }, { "epoch": 4.790375203915171, "grad_norm": 0.2853996753692627, "learning_rate": 0.0009418741548797462, "loss": 0.1052, "num_input_tokens_seen": 63455520, "step": 29365 }, { "epoch": 4.791190864600326, "grad_norm": 0.08539510518312454, "learning_rate": 0.0009418408409127257, "loss": 0.0527, "num_input_tokens_seen": 63466784, "step": 29370 }, { "epoch": 4.7920065252854815, "grad_norm": 0.1659606695175171, "learning_rate": 0.0009418075179912402, "loss": 0.0795, "num_input_tokens_seen": 63477664, "step": 29375 }, { "epoch": 4.792822185970636, "grad_norm": 0.10241387784481049, "learning_rate": 0.0009417741861159648, "loss": 0.09, "num_input_tokens_seen": 63488864, "step": 29380 }, { "epoch": 4.793637846655791, "grad_norm": 0.25465595722198486, "learning_rate": 0.0009417408452875751, "loss": 0.1875, "num_input_tokens_seen": 63500000, "step": 29385 }, { "epoch": 4.794453507340946, "grad_norm": 0.013809128664433956, "learning_rate": 0.0009417074955067467, "loss": 0.1046, "num_input_tokens_seen": 63511424, "step": 29390 }, { "epoch": 4.795269168026101, "grad_norm": 0.04915191978216171, "learning_rate": 0.0009416741367741557, "loss": 0.1079, "num_input_tokens_seen": 63521216, "step": 29395 }, { "epoch": 4.7960848287112565, "grad_norm": 0.26613113284111023, "learning_rate": 0.0009416407690904778, "loss": 0.1033, "num_input_tokens_seen": 63531488, "step": 29400 }, { "epoch": 4.796900489396411, "grad_norm": 0.012351097539067268, "learning_rate": 0.0009416073924563897, "loss": 0.079, "num_input_tokens_seen": 63542784, "step": 29405 }, { "epoch": 4.797716150081566, "grad_norm": 0.005100678652524948, "learning_rate": 0.0009415740068725674, "loss": 0.06, "num_input_tokens_seen": 63554240, "step": 29410 }, { "epoch": 4.798531810766721, "grad_norm": 0.2662050127983093, "learning_rate": 0.0009415406123396878, "loss": 0.1314, "num_input_tokens_seen": 63566208, "step": 29415 }, { "epoch": 4.799347471451876, "grad_norm": 0.2572949230670929, "learning_rate": 0.0009415072088584275, "loss": 0.1517, "num_input_tokens_seen": 63576640, "step": 29420 }, { "epoch": 4.800163132137031, "grad_norm": 0.5557500720024109, "learning_rate": 0.0009414737964294635, "loss": 0.1396, "num_input_tokens_seen": 63587264, "step": 29425 }, { "epoch": 4.800978792822186, "grad_norm": 0.05724980682134628, "learning_rate": 0.0009414403750534731, "loss": 0.0484, "num_input_tokens_seen": 63597824, "step": 29430 }, { "epoch": 4.801794453507341, "grad_norm": 0.007257545366883278, "learning_rate": 0.0009414069447311333, "loss": 0.0988, "num_input_tokens_seen": 63608896, "step": 29435 }, { "epoch": 4.802610114192496, "grad_norm": 0.12548641860485077, "learning_rate": 0.0009413735054631218, "loss": 0.1451, "num_input_tokens_seen": 63619840, "step": 29440 }, { "epoch": 4.803425774877651, "grad_norm": 0.022364402189850807, "learning_rate": 0.0009413400572501164, "loss": 0.0349, "num_input_tokens_seen": 63630112, "step": 29445 }, { "epoch": 4.804241435562806, "grad_norm": 0.13613943755626678, "learning_rate": 0.0009413066000927948, "loss": 0.037, "num_input_tokens_seen": 63640384, "step": 29450 }, { "epoch": 4.80505709624796, "grad_norm": 0.06001855060458183, "learning_rate": 0.0009412731339918353, "loss": 0.0666, "num_input_tokens_seen": 63650752, "step": 29455 }, { "epoch": 4.805872756933116, "grad_norm": 0.08017107099294662, "learning_rate": 0.0009412396589479157, "loss": 0.1239, "num_input_tokens_seen": 63660544, "step": 29460 }, { "epoch": 4.806688417618271, "grad_norm": 0.007623352110385895, "learning_rate": 0.0009412061749617147, "loss": 0.0372, "num_input_tokens_seen": 63671328, "step": 29465 }, { "epoch": 4.807504078303426, "grad_norm": 0.08126839250326157, "learning_rate": 0.0009411726820339109, "loss": 0.0415, "num_input_tokens_seen": 63682304, "step": 29470 }, { "epoch": 4.808319738988581, "grad_norm": 0.02001064084470272, "learning_rate": 0.000941139180165183, "loss": 0.0137, "num_input_tokens_seen": 63692608, "step": 29475 }, { "epoch": 4.809135399673735, "grad_norm": 0.010059705004096031, "learning_rate": 0.0009411056693562101, "loss": 0.0354, "num_input_tokens_seen": 63704448, "step": 29480 }, { "epoch": 4.809951060358891, "grad_norm": 0.10895252227783203, "learning_rate": 0.000941072149607671, "loss": 0.2122, "num_input_tokens_seen": 63714848, "step": 29485 }, { "epoch": 4.810766721044046, "grad_norm": 0.02574615553021431, "learning_rate": 0.0009410386209202455, "loss": 0.0484, "num_input_tokens_seen": 63726336, "step": 29490 }, { "epoch": 4.811582381729201, "grad_norm": 0.00544143607839942, "learning_rate": 0.0009410050832946127, "loss": 0.0905, "num_input_tokens_seen": 63737792, "step": 29495 }, { "epoch": 4.8123980424143555, "grad_norm": 0.03158778324723244, "learning_rate": 0.0009409715367314527, "loss": 0.0503, "num_input_tokens_seen": 63748256, "step": 29500 }, { "epoch": 4.81321370309951, "grad_norm": 0.1094653531908989, "learning_rate": 0.0009409379812314447, "loss": 0.0539, "num_input_tokens_seen": 63758432, "step": 29505 }, { "epoch": 4.814029363784666, "grad_norm": 0.0789109617471695, "learning_rate": 0.0009409044167952694, "loss": 0.1382, "num_input_tokens_seen": 63767744, "step": 29510 }, { "epoch": 4.814845024469821, "grad_norm": 0.3024810254573822, "learning_rate": 0.0009408708434236066, "loss": 0.1406, "num_input_tokens_seen": 63779648, "step": 29515 }, { "epoch": 4.815660685154976, "grad_norm": 0.028889697045087814, "learning_rate": 0.000940837261117137, "loss": 0.1202, "num_input_tokens_seen": 63790368, "step": 29520 }, { "epoch": 4.8164763458401305, "grad_norm": 0.009988417848944664, "learning_rate": 0.000940803669876541, "loss": 0.0549, "num_input_tokens_seen": 63801536, "step": 29525 }, { "epoch": 4.817292006525285, "grad_norm": 0.03842491656541824, "learning_rate": 0.0009407700697024995, "loss": 0.1412, "num_input_tokens_seen": 63812224, "step": 29530 }, { "epoch": 4.81810766721044, "grad_norm": 0.12616626918315887, "learning_rate": 0.0009407364605956933, "loss": 0.0376, "num_input_tokens_seen": 63823520, "step": 29535 }, { "epoch": 4.818923327895595, "grad_norm": 0.24169041216373444, "learning_rate": 0.0009407028425568036, "loss": 0.0622, "num_input_tokens_seen": 63835808, "step": 29540 }, { "epoch": 4.819738988580751, "grad_norm": 0.3266375958919525, "learning_rate": 0.0009406692155865117, "loss": 0.1315, "num_input_tokens_seen": 63846528, "step": 29545 }, { "epoch": 4.8205546492659055, "grad_norm": 0.0059782578609883785, "learning_rate": 0.0009406355796854993, "loss": 0.0899, "num_input_tokens_seen": 63857760, "step": 29550 }, { "epoch": 4.82137030995106, "grad_norm": 0.13781596720218658, "learning_rate": 0.0009406019348544478, "loss": 0.0945, "num_input_tokens_seen": 63867840, "step": 29555 }, { "epoch": 4.822185970636215, "grad_norm": 0.005153140053153038, "learning_rate": 0.000940568281094039, "loss": 0.1169, "num_input_tokens_seen": 63878848, "step": 29560 }, { "epoch": 4.82300163132137, "grad_norm": 0.012185310013592243, "learning_rate": 0.0009405346184049552, "loss": 0.0916, "num_input_tokens_seen": 63890816, "step": 29565 }, { "epoch": 4.823817292006526, "grad_norm": 0.2665148377418518, "learning_rate": 0.0009405009467878787, "loss": 0.1471, "num_input_tokens_seen": 63901632, "step": 29570 }, { "epoch": 4.8246329526916805, "grad_norm": 0.3611166775226593, "learning_rate": 0.0009404672662434914, "loss": 0.2292, "num_input_tokens_seen": 63911520, "step": 29575 }, { "epoch": 4.825448613376835, "grad_norm": 0.11029206961393356, "learning_rate": 0.0009404335767724763, "loss": 0.2225, "num_input_tokens_seen": 63922176, "step": 29580 }, { "epoch": 4.82626427406199, "grad_norm": 0.09035732597112656, "learning_rate": 0.000940399878375516, "loss": 0.2299, "num_input_tokens_seen": 63932544, "step": 29585 }, { "epoch": 4.827079934747145, "grad_norm": 0.055607203394174576, "learning_rate": 0.0009403661710532936, "loss": 0.103, "num_input_tokens_seen": 63944032, "step": 29590 }, { "epoch": 4.827895595432301, "grad_norm": 0.04743117094039917, "learning_rate": 0.0009403324548064919, "loss": 0.0749, "num_input_tokens_seen": 63955968, "step": 29595 }, { "epoch": 4.828711256117455, "grad_norm": 0.0531173013150692, "learning_rate": 0.0009402987296357946, "loss": 0.0851, "num_input_tokens_seen": 63967136, "step": 29600 }, { "epoch": 4.82952691680261, "grad_norm": 0.0809497982263565, "learning_rate": 0.0009402649955418848, "loss": 0.1069, "num_input_tokens_seen": 63977728, "step": 29605 }, { "epoch": 4.830342577487765, "grad_norm": 0.07989180833101273, "learning_rate": 0.0009402312525254464, "loss": 0.1295, "num_input_tokens_seen": 63988256, "step": 29610 }, { "epoch": 4.83115823817292, "grad_norm": 0.07448780536651611, "learning_rate": 0.0009401975005871632, "loss": 0.2039, "num_input_tokens_seen": 63998400, "step": 29615 }, { "epoch": 4.831973898858075, "grad_norm": 0.26234593987464905, "learning_rate": 0.0009401637397277193, "loss": 0.0886, "num_input_tokens_seen": 64009312, "step": 29620 }, { "epoch": 4.8327895595432295, "grad_norm": 0.1408432275056839, "learning_rate": 0.0009401299699477988, "loss": 0.1501, "num_input_tokens_seen": 64020192, "step": 29625 }, { "epoch": 4.833605220228385, "grad_norm": 0.20116648077964783, "learning_rate": 0.0009400961912480861, "loss": 0.1166, "num_input_tokens_seen": 64032352, "step": 29630 }, { "epoch": 4.83442088091354, "grad_norm": 0.044378455728292465, "learning_rate": 0.0009400624036292657, "loss": 0.072, "num_input_tokens_seen": 64043360, "step": 29635 }, { "epoch": 4.835236541598695, "grad_norm": 0.06201139837503433, "learning_rate": 0.0009400286070920226, "loss": 0.1189, "num_input_tokens_seen": 64053952, "step": 29640 }, { "epoch": 4.83605220228385, "grad_norm": 0.08605514466762543, "learning_rate": 0.0009399948016370415, "loss": 0.0357, "num_input_tokens_seen": 64063424, "step": 29645 }, { "epoch": 4.8368678629690045, "grad_norm": 0.07138156145811081, "learning_rate": 0.0009399609872650075, "loss": 0.0497, "num_input_tokens_seen": 64073568, "step": 29650 }, { "epoch": 4.83768352365416, "grad_norm": 0.07217609882354736, "learning_rate": 0.000939927163976606, "loss": 0.1953, "num_input_tokens_seen": 64084704, "step": 29655 }, { "epoch": 4.838499184339315, "grad_norm": 0.0773155689239502, "learning_rate": 0.0009398933317725225, "loss": 0.231, "num_input_tokens_seen": 64095200, "step": 29660 }, { "epoch": 4.83931484502447, "grad_norm": 0.10701734572649002, "learning_rate": 0.0009398594906534424, "loss": 0.1688, "num_input_tokens_seen": 64105728, "step": 29665 }, { "epoch": 4.840130505709625, "grad_norm": 0.24366922676563263, "learning_rate": 0.0009398256406200518, "loss": 0.1445, "num_input_tokens_seen": 64116544, "step": 29670 }, { "epoch": 4.8409461663947795, "grad_norm": 0.06484881788492203, "learning_rate": 0.0009397917816730368, "loss": 0.106, "num_input_tokens_seen": 64128064, "step": 29675 }, { "epoch": 4.841761827079935, "grad_norm": 0.14218920469284058, "learning_rate": 0.0009397579138130832, "loss": 0.1686, "num_input_tokens_seen": 64139328, "step": 29680 }, { "epoch": 4.84257748776509, "grad_norm": 0.1667938232421875, "learning_rate": 0.0009397240370408777, "loss": 0.1401, "num_input_tokens_seen": 64148992, "step": 29685 }, { "epoch": 4.843393148450245, "grad_norm": 0.06840559095144272, "learning_rate": 0.0009396901513571068, "loss": 0.2409, "num_input_tokens_seen": 64159232, "step": 29690 }, { "epoch": 4.8442088091354, "grad_norm": 0.04041779041290283, "learning_rate": 0.0009396562567624572, "loss": 0.0819, "num_input_tokens_seen": 64169728, "step": 29695 }, { "epoch": 4.8450244698205545, "grad_norm": 0.1621711403131485, "learning_rate": 0.0009396223532576159, "loss": 0.0646, "num_input_tokens_seen": 64181760, "step": 29700 }, { "epoch": 4.845840130505709, "grad_norm": 0.03825107589364052, "learning_rate": 0.0009395884408432696, "loss": 0.1504, "num_input_tokens_seen": 64192480, "step": 29705 }, { "epoch": 4.846655791190865, "grad_norm": 0.0052245259284973145, "learning_rate": 0.0009395545195201062, "loss": 0.1366, "num_input_tokens_seen": 64203008, "step": 29710 }, { "epoch": 4.84747145187602, "grad_norm": 0.031087854877114296, "learning_rate": 0.0009395205892888126, "loss": 0.0807, "num_input_tokens_seen": 64213696, "step": 29715 }, { "epoch": 4.848287112561175, "grad_norm": 0.02025986835360527, "learning_rate": 0.0009394866501500769, "loss": 0.0323, "num_input_tokens_seen": 64222784, "step": 29720 }, { "epoch": 4.849102773246329, "grad_norm": 0.017888156697154045, "learning_rate": 0.0009394527021045866, "loss": 0.0199, "num_input_tokens_seen": 64233312, "step": 29725 }, { "epoch": 4.849918433931484, "grad_norm": 0.030029356479644775, "learning_rate": 0.0009394187451530298, "loss": 0.0646, "num_input_tokens_seen": 64245056, "step": 29730 }, { "epoch": 4.850734094616639, "grad_norm": 0.16684281826019287, "learning_rate": 0.0009393847792960948, "loss": 0.1564, "num_input_tokens_seen": 64256032, "step": 29735 }, { "epoch": 4.851549755301795, "grad_norm": 0.028144538402557373, "learning_rate": 0.0009393508045344697, "loss": 0.097, "num_input_tokens_seen": 64266848, "step": 29740 }, { "epoch": 4.85236541598695, "grad_norm": 0.17019401490688324, "learning_rate": 0.0009393168208688432, "loss": 0.116, "num_input_tokens_seen": 64276448, "step": 29745 }, { "epoch": 4.853181076672104, "grad_norm": 0.01753625087440014, "learning_rate": 0.0009392828282999042, "loss": 0.1403, "num_input_tokens_seen": 64288000, "step": 29750 }, { "epoch": 4.853996737357259, "grad_norm": 0.05499972775578499, "learning_rate": 0.0009392488268283412, "loss": 0.0819, "num_input_tokens_seen": 64299104, "step": 29755 }, { "epoch": 4.854812398042414, "grad_norm": 0.02356194145977497, "learning_rate": 0.0009392148164548436, "loss": 0.0414, "num_input_tokens_seen": 64310880, "step": 29760 }, { "epoch": 4.85562805872757, "grad_norm": 0.23860061168670654, "learning_rate": 0.0009391807971801005, "loss": 0.0683, "num_input_tokens_seen": 64321696, "step": 29765 }, { "epoch": 4.856443719412725, "grad_norm": 0.02195931412279606, "learning_rate": 0.0009391467690048014, "loss": 0.067, "num_input_tokens_seen": 64332544, "step": 29770 }, { "epoch": 4.857259380097879, "grad_norm": 0.08725380897521973, "learning_rate": 0.000939112731929636, "loss": 0.1052, "num_input_tokens_seen": 64344640, "step": 29775 }, { "epoch": 4.858075040783034, "grad_norm": 0.17282482981681824, "learning_rate": 0.000939078685955294, "loss": 0.1707, "num_input_tokens_seen": 64356608, "step": 29780 }, { "epoch": 4.858890701468189, "grad_norm": 0.030611051246523857, "learning_rate": 0.0009390446310824654, "loss": 0.0402, "num_input_tokens_seen": 64366592, "step": 29785 }, { "epoch": 4.859706362153344, "grad_norm": 0.006273448932915926, "learning_rate": 0.0009390105673118405, "loss": 0.1194, "num_input_tokens_seen": 64378560, "step": 29790 }, { "epoch": 4.8605220228384995, "grad_norm": 0.2683796286582947, "learning_rate": 0.0009389764946441094, "loss": 0.1358, "num_input_tokens_seen": 64389664, "step": 29795 }, { "epoch": 4.861337683523654, "grad_norm": 0.07152576744556427, "learning_rate": 0.0009389424130799628, "loss": 0.0708, "num_input_tokens_seen": 64400544, "step": 29800 }, { "epoch": 4.862153344208809, "grad_norm": 0.08048900961875916, "learning_rate": 0.0009389083226200914, "loss": 0.0461, "num_input_tokens_seen": 64411744, "step": 29805 }, { "epoch": 4.862969004893964, "grad_norm": 0.07328837364912033, "learning_rate": 0.0009388742232651859, "loss": 0.0862, "num_input_tokens_seen": 64421440, "step": 29810 }, { "epoch": 4.863784665579119, "grad_norm": 0.038748499006032944, "learning_rate": 0.0009388401150159377, "loss": 0.0792, "num_input_tokens_seen": 64432256, "step": 29815 }, { "epoch": 4.864600326264274, "grad_norm": 0.02115444466471672, "learning_rate": 0.0009388059978730377, "loss": 0.0326, "num_input_tokens_seen": 64442688, "step": 29820 }, { "epoch": 4.865415986949429, "grad_norm": 0.022876601666212082, "learning_rate": 0.0009387718718371776, "loss": 0.0351, "num_input_tokens_seen": 64453792, "step": 29825 }, { "epoch": 4.866231647634584, "grad_norm": 0.016570046544075012, "learning_rate": 0.0009387377369090489, "loss": 0.0518, "num_input_tokens_seen": 64465248, "step": 29830 }, { "epoch": 4.867047308319739, "grad_norm": 0.024501780048012733, "learning_rate": 0.0009387035930893433, "loss": 0.024, "num_input_tokens_seen": 64477792, "step": 29835 }, { "epoch": 4.867862969004894, "grad_norm": 0.015719836577773094, "learning_rate": 0.0009386694403787529, "loss": 0.1272, "num_input_tokens_seen": 64489824, "step": 29840 }, { "epoch": 4.868678629690049, "grad_norm": 0.4363064467906952, "learning_rate": 0.0009386352787779697, "loss": 0.1239, "num_input_tokens_seen": 64501248, "step": 29845 }, { "epoch": 4.869494290375204, "grad_norm": 0.02297714538872242, "learning_rate": 0.0009386011082876863, "loss": 0.0636, "num_input_tokens_seen": 64513088, "step": 29850 }, { "epoch": 4.870309951060359, "grad_norm": 0.04365037381649017, "learning_rate": 0.000938566928908595, "loss": 0.127, "num_input_tokens_seen": 64524096, "step": 29855 }, { "epoch": 4.871125611745514, "grad_norm": 0.01078298594802618, "learning_rate": 0.0009385327406413883, "loss": 0.0282, "num_input_tokens_seen": 64534944, "step": 29860 }, { "epoch": 4.871941272430669, "grad_norm": 0.07390675693750381, "learning_rate": 0.0009384985434867597, "loss": 0.0329, "num_input_tokens_seen": 64546464, "step": 29865 }, { "epoch": 4.872756933115824, "grad_norm": 0.016646305099129677, "learning_rate": 0.0009384643374454014, "loss": 0.023, "num_input_tokens_seen": 64558464, "step": 29870 }, { "epoch": 4.873572593800979, "grad_norm": 0.03762689605355263, "learning_rate": 0.0009384301225180074, "loss": 0.0652, "num_input_tokens_seen": 64568864, "step": 29875 }, { "epoch": 4.874388254486134, "grad_norm": 0.0683576911687851, "learning_rate": 0.0009383958987052706, "loss": 0.1253, "num_input_tokens_seen": 64580192, "step": 29880 }, { "epoch": 4.875203915171289, "grad_norm": 0.0032370425760746002, "learning_rate": 0.0009383616660078849, "loss": 0.2725, "num_input_tokens_seen": 64589632, "step": 29885 }, { "epoch": 4.876019575856444, "grad_norm": 0.01514100655913353, "learning_rate": 0.0009383274244265438, "loss": 0.1108, "num_input_tokens_seen": 64600192, "step": 29890 }, { "epoch": 4.876835236541599, "grad_norm": 0.06531043350696564, "learning_rate": 0.0009382931739619416, "loss": 0.0504, "num_input_tokens_seen": 64610656, "step": 29895 }, { "epoch": 4.877650897226753, "grad_norm": 0.058074701577425, "learning_rate": 0.000938258914614772, "loss": 0.0522, "num_input_tokens_seen": 64621728, "step": 29900 }, { "epoch": 4.878466557911908, "grad_norm": 0.015192513354122639, "learning_rate": 0.0009382246463857295, "loss": 0.0948, "num_input_tokens_seen": 64633024, "step": 29905 }, { "epoch": 4.879282218597064, "grad_norm": 0.07241601496934891, "learning_rate": 0.0009381903692755087, "loss": 0.1767, "num_input_tokens_seen": 64644480, "step": 29910 }, { "epoch": 4.880097879282219, "grad_norm": 0.0064300913363695145, "learning_rate": 0.0009381560832848043, "loss": 0.0913, "num_input_tokens_seen": 64655520, "step": 29915 }, { "epoch": 4.8809135399673735, "grad_norm": 0.2770669162273407, "learning_rate": 0.0009381217884143109, "loss": 0.2039, "num_input_tokens_seen": 64667392, "step": 29920 }, { "epoch": 4.881729200652528, "grad_norm": 0.09652873128652573, "learning_rate": 0.0009380874846647236, "loss": 0.0567, "num_input_tokens_seen": 64678528, "step": 29925 }, { "epoch": 4.882544861337683, "grad_norm": 0.06691589951515198, "learning_rate": 0.0009380531720367378, "loss": 0.0366, "num_input_tokens_seen": 64689248, "step": 29930 }, { "epoch": 4.883360522022839, "grad_norm": 0.010861546732485294, "learning_rate": 0.0009380188505310488, "loss": 0.0329, "num_input_tokens_seen": 64698688, "step": 29935 }, { "epoch": 4.884176182707994, "grad_norm": 0.021841704845428467, "learning_rate": 0.0009379845201483519, "loss": 0.0614, "num_input_tokens_seen": 64710368, "step": 29940 }, { "epoch": 4.8849918433931485, "grad_norm": 0.3616441488265991, "learning_rate": 0.0009379501808893433, "loss": 0.1891, "num_input_tokens_seen": 64720416, "step": 29945 }, { "epoch": 4.885807504078303, "grad_norm": 0.1140311136841774, "learning_rate": 0.0009379158327547186, "loss": 0.2245, "num_input_tokens_seen": 64731360, "step": 29950 }, { "epoch": 4.886623164763458, "grad_norm": 0.19789689779281616, "learning_rate": 0.000937881475745174, "loss": 0.1402, "num_input_tokens_seen": 64740928, "step": 29955 }, { "epoch": 4.887438825448614, "grad_norm": 0.03600520268082619, "learning_rate": 0.0009378471098614059, "loss": 0.062, "num_input_tokens_seen": 64750784, "step": 29960 }, { "epoch": 4.888254486133769, "grad_norm": 0.23173433542251587, "learning_rate": 0.0009378127351041106, "loss": 0.173, "num_input_tokens_seen": 64762272, "step": 29965 }, { "epoch": 4.8890701468189235, "grad_norm": 0.1283300220966339, "learning_rate": 0.0009377783514739848, "loss": 0.115, "num_input_tokens_seen": 64773984, "step": 29970 }, { "epoch": 4.889885807504078, "grad_norm": 0.030921900644898415, "learning_rate": 0.0009377439589717254, "loss": 0.0907, "num_input_tokens_seen": 64784704, "step": 29975 }, { "epoch": 4.890701468189233, "grad_norm": 0.022784793749451637, "learning_rate": 0.0009377095575980293, "loss": 0.1639, "num_input_tokens_seen": 64795072, "step": 29980 }, { "epoch": 4.891517128874388, "grad_norm": 0.0242206659168005, "learning_rate": 0.0009376751473535939, "loss": 0.1102, "num_input_tokens_seen": 64805888, "step": 29985 }, { "epoch": 4.892332789559543, "grad_norm": 0.26550939679145813, "learning_rate": 0.0009376407282391161, "loss": 0.077, "num_input_tokens_seen": 64817120, "step": 29990 }, { "epoch": 4.8931484502446985, "grad_norm": 0.025271253660321236, "learning_rate": 0.0009376063002552939, "loss": 0.068, "num_input_tokens_seen": 64829504, "step": 29995 }, { "epoch": 4.893964110929853, "grad_norm": 0.02558698132634163, "learning_rate": 0.0009375718634028249, "loss": 0.0362, "num_input_tokens_seen": 64839488, "step": 30000 }, { "epoch": 4.894779771615008, "grad_norm": 0.4274783730506897, "learning_rate": 0.0009375374176824071, "loss": 0.222, "num_input_tokens_seen": 64851168, "step": 30005 }, { "epoch": 4.895595432300163, "grad_norm": 0.028903350234031677, "learning_rate": 0.0009375029630947384, "loss": 0.1904, "num_input_tokens_seen": 64860736, "step": 30010 }, { "epoch": 4.896411092985318, "grad_norm": 0.012105568312108517, "learning_rate": 0.000937468499640517, "loss": 0.0793, "num_input_tokens_seen": 64871968, "step": 30015 }, { "epoch": 4.897226753670473, "grad_norm": 0.2322903871536255, "learning_rate": 0.0009374340273204416, "loss": 0.2846, "num_input_tokens_seen": 64882912, "step": 30020 }, { "epoch": 4.898042414355628, "grad_norm": 0.06537723541259766, "learning_rate": 0.0009373995461352107, "loss": 0.0465, "num_input_tokens_seen": 64894272, "step": 30025 }, { "epoch": 4.898858075040783, "grad_norm": 0.18969091773033142, "learning_rate": 0.0009373650560855232, "loss": 0.1186, "num_input_tokens_seen": 64904672, "step": 30030 }, { "epoch": 4.899673735725938, "grad_norm": 0.2091660499572754, "learning_rate": 0.0009373305571720779, "loss": 0.1496, "num_input_tokens_seen": 64915296, "step": 30035 }, { "epoch": 4.900489396411093, "grad_norm": 0.030176764354109764, "learning_rate": 0.0009372960493955741, "loss": 0.1566, "num_input_tokens_seen": 64927840, "step": 30040 }, { "epoch": 4.901305057096248, "grad_norm": 0.041412852704524994, "learning_rate": 0.0009372615327567111, "loss": 0.0968, "num_input_tokens_seen": 64938976, "step": 30045 }, { "epoch": 4.902120717781403, "grad_norm": 0.026320433244109154, "learning_rate": 0.0009372270072561885, "loss": 0.0668, "num_input_tokens_seen": 64949056, "step": 30050 }, { "epoch": 4.902936378466558, "grad_norm": 0.023878064006567, "learning_rate": 0.0009371924728947059, "loss": 0.1959, "num_input_tokens_seen": 64959680, "step": 30055 }, { "epoch": 4.903752039151713, "grad_norm": 0.11372017860412598, "learning_rate": 0.0009371579296729631, "loss": 0.095, "num_input_tokens_seen": 64971232, "step": 30060 }, { "epoch": 4.904567699836868, "grad_norm": 0.04866662621498108, "learning_rate": 0.0009371233775916604, "loss": 0.1943, "num_input_tokens_seen": 64983040, "step": 30065 }, { "epoch": 4.9053833605220225, "grad_norm": 0.15420961380004883, "learning_rate": 0.0009370888166514979, "loss": 0.1675, "num_input_tokens_seen": 64993792, "step": 30070 }, { "epoch": 4.906199021207177, "grad_norm": 0.0190042182803154, "learning_rate": 0.0009370542468531761, "loss": 0.1009, "num_input_tokens_seen": 65004608, "step": 30075 }, { "epoch": 4.907014681892333, "grad_norm": 0.03797129914164543, "learning_rate": 0.0009370196681973955, "loss": 0.0749, "num_input_tokens_seen": 65015360, "step": 30080 }, { "epoch": 4.907830342577488, "grad_norm": 0.10052059590816498, "learning_rate": 0.0009369850806848569, "loss": 0.2176, "num_input_tokens_seen": 65025216, "step": 30085 }, { "epoch": 4.908646003262643, "grad_norm": 0.07716162502765656, "learning_rate": 0.0009369504843162613, "loss": 0.0644, "num_input_tokens_seen": 65036160, "step": 30090 }, { "epoch": 4.9094616639477975, "grad_norm": 0.19943881034851074, "learning_rate": 0.0009369158790923098, "loss": 0.0612, "num_input_tokens_seen": 65046112, "step": 30095 }, { "epoch": 4.910277324632952, "grad_norm": 0.03232225030660629, "learning_rate": 0.0009368812650137038, "loss": 0.0405, "num_input_tokens_seen": 65056960, "step": 30100 }, { "epoch": 4.911092985318108, "grad_norm": 0.014520195312798023, "learning_rate": 0.0009368466420811446, "loss": 0.1118, "num_input_tokens_seen": 65067488, "step": 30105 }, { "epoch": 4.911908646003263, "grad_norm": 0.12783744931221008, "learning_rate": 0.0009368120102953341, "loss": 0.138, "num_input_tokens_seen": 65076768, "step": 30110 }, { "epoch": 4.912724306688418, "grad_norm": 0.046797335147857666, "learning_rate": 0.0009367773696569742, "loss": 0.0359, "num_input_tokens_seen": 65088416, "step": 30115 }, { "epoch": 4.9135399673735725, "grad_norm": 0.008519193157553673, "learning_rate": 0.0009367427201667667, "loss": 0.0835, "num_input_tokens_seen": 65100192, "step": 30120 }, { "epoch": 4.914355628058727, "grad_norm": 0.14391852915287018, "learning_rate": 0.000936708061825414, "loss": 0.0602, "num_input_tokens_seen": 65110336, "step": 30125 }, { "epoch": 4.915171288743883, "grad_norm": 0.19014444947242737, "learning_rate": 0.0009366733946336184, "loss": 0.2144, "num_input_tokens_seen": 65121408, "step": 30130 }, { "epoch": 4.915986949429038, "grad_norm": 0.040380168706178665, "learning_rate": 0.0009366387185920824, "loss": 0.0334, "num_input_tokens_seen": 65131584, "step": 30135 }, { "epoch": 4.916802610114193, "grad_norm": 0.049314629286527634, "learning_rate": 0.0009366040337015089, "loss": 0.0762, "num_input_tokens_seen": 65143072, "step": 30140 }, { "epoch": 4.917618270799347, "grad_norm": 0.022450562566518784, "learning_rate": 0.0009365693399626009, "loss": 0.1344, "num_input_tokens_seen": 65155008, "step": 30145 }, { "epoch": 4.918433931484502, "grad_norm": 0.006674426142126322, "learning_rate": 0.0009365346373760613, "loss": 0.1624, "num_input_tokens_seen": 65165760, "step": 30150 }, { "epoch": 4.919249592169657, "grad_norm": 0.007688583806157112, "learning_rate": 0.0009364999259425935, "loss": 0.0116, "num_input_tokens_seen": 65175808, "step": 30155 }, { "epoch": 4.920065252854813, "grad_norm": 0.033700115978717804, "learning_rate": 0.0009364652056629008, "loss": 0.0651, "num_input_tokens_seen": 65186176, "step": 30160 }, { "epoch": 4.920880913539968, "grad_norm": 0.1259598284959793, "learning_rate": 0.0009364304765376872, "loss": 0.1328, "num_input_tokens_seen": 65196096, "step": 30165 }, { "epoch": 4.921696574225122, "grad_norm": 0.052267853170633316, "learning_rate": 0.0009363957385676563, "loss": 0.2088, "num_input_tokens_seen": 65206432, "step": 30170 }, { "epoch": 4.922512234910277, "grad_norm": 0.06683950871229172, "learning_rate": 0.0009363609917535122, "loss": 0.0611, "num_input_tokens_seen": 65216800, "step": 30175 }, { "epoch": 4.923327895595432, "grad_norm": 0.08807369321584702, "learning_rate": 0.000936326236095959, "loss": 0.0807, "num_input_tokens_seen": 65225280, "step": 30180 }, { "epoch": 4.924143556280587, "grad_norm": 0.1126125380396843, "learning_rate": 0.0009362914715957011, "loss": 0.166, "num_input_tokens_seen": 65235776, "step": 30185 }, { "epoch": 4.924959216965743, "grad_norm": 0.17967797815799713, "learning_rate": 0.000936256698253443, "loss": 0.1025, "num_input_tokens_seen": 65246496, "step": 30190 }, { "epoch": 4.925774877650897, "grad_norm": 0.12007225304841995, "learning_rate": 0.0009362219160698895, "loss": 0.0478, "num_input_tokens_seen": 65256864, "step": 30195 }, { "epoch": 4.926590538336052, "grad_norm": 0.011540076695382595, "learning_rate": 0.0009361871250457457, "loss": 0.151, "num_input_tokens_seen": 65268320, "step": 30200 }, { "epoch": 4.927406199021207, "grad_norm": 0.006911895237863064, "learning_rate": 0.0009361523251817161, "loss": 0.1814, "num_input_tokens_seen": 65279232, "step": 30205 }, { "epoch": 4.928221859706362, "grad_norm": 0.030219666659832, "learning_rate": 0.0009361175164785065, "loss": 0.1072, "num_input_tokens_seen": 65289664, "step": 30210 }, { "epoch": 4.9290375203915175, "grad_norm": 0.09913137555122375, "learning_rate": 0.0009360826989368223, "loss": 0.0381, "num_input_tokens_seen": 65301088, "step": 30215 }, { "epoch": 4.929853181076672, "grad_norm": 0.01770567148923874, "learning_rate": 0.0009360478725573689, "loss": 0.1241, "num_input_tokens_seen": 65311328, "step": 30220 }, { "epoch": 4.930668841761827, "grad_norm": 0.025420457124710083, "learning_rate": 0.0009360130373408522, "loss": 0.068, "num_input_tokens_seen": 65321088, "step": 30225 }, { "epoch": 4.931484502446982, "grad_norm": 0.284669429063797, "learning_rate": 0.000935978193287978, "loss": 0.1822, "num_input_tokens_seen": 65332576, "step": 30230 }, { "epoch": 4.932300163132137, "grad_norm": 0.1337418556213379, "learning_rate": 0.0009359433403994529, "loss": 0.2099, "num_input_tokens_seen": 65343200, "step": 30235 }, { "epoch": 4.933115823817292, "grad_norm": 0.015668069943785667, "learning_rate": 0.0009359084786759828, "loss": 0.0709, "num_input_tokens_seen": 65355040, "step": 30240 }, { "epoch": 4.933931484502447, "grad_norm": 0.037535425275564194, "learning_rate": 0.0009358736081182746, "loss": 0.1577, "num_input_tokens_seen": 65366592, "step": 30245 }, { "epoch": 4.934747145187602, "grad_norm": 0.19737955927848816, "learning_rate": 0.0009358387287270346, "loss": 0.154, "num_input_tokens_seen": 65377568, "step": 30250 }, { "epoch": 4.935562805872757, "grad_norm": 0.03458595648407936, "learning_rate": 0.0009358038405029699, "loss": 0.192, "num_input_tokens_seen": 65388064, "step": 30255 }, { "epoch": 4.936378466557912, "grad_norm": 0.1987319439649582, "learning_rate": 0.0009357689434467875, "loss": 0.141, "num_input_tokens_seen": 65398816, "step": 30260 }, { "epoch": 4.937194127243067, "grad_norm": 0.18942292034626007, "learning_rate": 0.0009357340375591947, "loss": 0.0928, "num_input_tokens_seen": 65409184, "step": 30265 }, { "epoch": 4.938009787928221, "grad_norm": 0.0764419436454773, "learning_rate": 0.0009356991228408988, "loss": 0.0767, "num_input_tokens_seen": 65422208, "step": 30270 }, { "epoch": 4.938825448613377, "grad_norm": 0.01329710427671671, "learning_rate": 0.0009356641992926075, "loss": 0.0533, "num_input_tokens_seen": 65433984, "step": 30275 }, { "epoch": 4.939641109298532, "grad_norm": 0.0733145996928215, "learning_rate": 0.0009356292669150286, "loss": 0.0747, "num_input_tokens_seen": 65444864, "step": 30280 }, { "epoch": 4.940456769983687, "grad_norm": 0.047081612050533295, "learning_rate": 0.0009355943257088698, "loss": 0.0963, "num_input_tokens_seen": 65456960, "step": 30285 }, { "epoch": 4.941272430668842, "grad_norm": 0.10892040282487869, "learning_rate": 0.0009355593756748395, "loss": 0.0535, "num_input_tokens_seen": 65466816, "step": 30290 }, { "epoch": 4.942088091353996, "grad_norm": 0.017515188083052635, "learning_rate": 0.0009355244168136459, "loss": 0.1046, "num_input_tokens_seen": 65478240, "step": 30295 }, { "epoch": 4.942903752039152, "grad_norm": 0.014847962185740471, "learning_rate": 0.0009354894491259975, "loss": 0.0855, "num_input_tokens_seen": 65489408, "step": 30300 }, { "epoch": 4.943719412724307, "grad_norm": 0.02151155099272728, "learning_rate": 0.0009354544726126029, "loss": 0.0291, "num_input_tokens_seen": 65499904, "step": 30305 }, { "epoch": 4.944535073409462, "grad_norm": 0.43711721897125244, "learning_rate": 0.000935419487274171, "loss": 0.0833, "num_input_tokens_seen": 65510144, "step": 30310 }, { "epoch": 4.945350734094617, "grad_norm": 0.2166256606578827, "learning_rate": 0.0009353844931114108, "loss": 0.1275, "num_input_tokens_seen": 65520704, "step": 30315 }, { "epoch": 4.946166394779771, "grad_norm": 0.10503847897052765, "learning_rate": 0.0009353494901250316, "loss": 0.1356, "num_input_tokens_seen": 65531424, "step": 30320 }, { "epoch": 4.946982055464927, "grad_norm": 0.3435342311859131, "learning_rate": 0.0009353144783157428, "loss": 0.1432, "num_input_tokens_seen": 65542272, "step": 30325 }, { "epoch": 4.947797716150082, "grad_norm": 0.0990108996629715, "learning_rate": 0.0009352794576842536, "loss": 0.0943, "num_input_tokens_seen": 65553664, "step": 30330 }, { "epoch": 4.948613376835237, "grad_norm": 0.680467963218689, "learning_rate": 0.0009352444282312742, "loss": 0.1843, "num_input_tokens_seen": 65564992, "step": 30335 }, { "epoch": 4.9494290375203915, "grad_norm": 0.2405286282300949, "learning_rate": 0.0009352093899575143, "loss": 0.1095, "num_input_tokens_seen": 65576736, "step": 30340 }, { "epoch": 4.950244698205546, "grad_norm": 0.049668990075588226, "learning_rate": 0.0009351743428636838, "loss": 0.0338, "num_input_tokens_seen": 65587072, "step": 30345 }, { "epoch": 4.951060358890701, "grad_norm": 0.026410933583974838, "learning_rate": 0.0009351392869504934, "loss": 0.0325, "num_input_tokens_seen": 65598400, "step": 30350 }, { "epoch": 4.951876019575856, "grad_norm": 0.01361384242773056, "learning_rate": 0.0009351042222186533, "loss": 0.1572, "num_input_tokens_seen": 65609024, "step": 30355 }, { "epoch": 4.952691680261012, "grad_norm": 0.15705722570419312, "learning_rate": 0.0009350691486688743, "loss": 0.2538, "num_input_tokens_seen": 65618368, "step": 30360 }, { "epoch": 4.9535073409461665, "grad_norm": 0.1279543787240982, "learning_rate": 0.0009350340663018668, "loss": 0.0771, "num_input_tokens_seen": 65629216, "step": 30365 }, { "epoch": 4.954323001631321, "grad_norm": 0.01910022459924221, "learning_rate": 0.0009349989751183422, "loss": 0.0606, "num_input_tokens_seen": 65639904, "step": 30370 }, { "epoch": 4.955138662316476, "grad_norm": 0.014316494576632977, "learning_rate": 0.0009349638751190115, "loss": 0.0762, "num_input_tokens_seen": 65651936, "step": 30375 }, { "epoch": 4.955954323001631, "grad_norm": 0.05893901363015175, "learning_rate": 0.0009349287663045862, "loss": 0.1634, "num_input_tokens_seen": 65662848, "step": 30380 }, { "epoch": 4.956769983686787, "grad_norm": 0.2651807367801666, "learning_rate": 0.0009348936486757775, "loss": 0.122, "num_input_tokens_seen": 65674272, "step": 30385 }, { "epoch": 4.9575856443719415, "grad_norm": 0.3959857225418091, "learning_rate": 0.0009348585222332975, "loss": 0.2737, "num_input_tokens_seen": 65684288, "step": 30390 }, { "epoch": 4.958401305057096, "grad_norm": 0.03873610496520996, "learning_rate": 0.0009348233869778577, "loss": 0.0281, "num_input_tokens_seen": 65694784, "step": 30395 }, { "epoch": 4.959216965742251, "grad_norm": 0.03320920094847679, "learning_rate": 0.0009347882429101706, "loss": 0.0251, "num_input_tokens_seen": 65704768, "step": 30400 }, { "epoch": 4.960032626427406, "grad_norm": 0.055013399571180344, "learning_rate": 0.000934753090030948, "loss": 0.3051, "num_input_tokens_seen": 65715264, "step": 30405 }, { "epoch": 4.960848287112562, "grad_norm": 0.01581265963613987, "learning_rate": 0.0009347179283409027, "loss": 0.084, "num_input_tokens_seen": 65726624, "step": 30410 }, { "epoch": 4.9616639477977165, "grad_norm": 0.15133565664291382, "learning_rate": 0.0009346827578407468, "loss": 0.1525, "num_input_tokens_seen": 65737760, "step": 30415 }, { "epoch": 4.962479608482871, "grad_norm": 0.05148269236087799, "learning_rate": 0.0009346475785311936, "loss": 0.1763, "num_input_tokens_seen": 65749280, "step": 30420 }, { "epoch": 4.963295269168026, "grad_norm": 0.09984282404184341, "learning_rate": 0.0009346123904129558, "loss": 0.0952, "num_input_tokens_seen": 65759616, "step": 30425 }, { "epoch": 4.964110929853181, "grad_norm": 0.03934243321418762, "learning_rate": 0.0009345771934867464, "loss": 0.0789, "num_input_tokens_seen": 65771104, "step": 30430 }, { "epoch": 4.964926590538336, "grad_norm": 0.035313550382852554, "learning_rate": 0.000934541987753279, "loss": 0.059, "num_input_tokens_seen": 65782688, "step": 30435 }, { "epoch": 4.9657422512234906, "grad_norm": 0.037588831037282944, "learning_rate": 0.0009345067732132671, "loss": 0.0505, "num_input_tokens_seen": 65793408, "step": 30440 }, { "epoch": 4.966557911908646, "grad_norm": 0.28265854716300964, "learning_rate": 0.0009344715498674241, "loss": 0.1367, "num_input_tokens_seen": 65805024, "step": 30445 }, { "epoch": 4.967373572593801, "grad_norm": 0.020411711186170578, "learning_rate": 0.0009344363177164639, "loss": 0.0968, "num_input_tokens_seen": 65816736, "step": 30450 }, { "epoch": 4.968189233278956, "grad_norm": 0.12036476284265518, "learning_rate": 0.0009344010767611007, "loss": 0.2162, "num_input_tokens_seen": 65828512, "step": 30455 }, { "epoch": 4.969004893964111, "grad_norm": 0.24372944235801697, "learning_rate": 0.0009343658270020485, "loss": 0.1547, "num_input_tokens_seen": 65839200, "step": 30460 }, { "epoch": 4.9698205546492655, "grad_norm": 0.04841368645429611, "learning_rate": 0.000934330568440022, "loss": 0.0459, "num_input_tokens_seen": 65851104, "step": 30465 }, { "epoch": 4.970636215334421, "grad_norm": 0.04966012388467789, "learning_rate": 0.0009342953010757353, "loss": 0.11, "num_input_tokens_seen": 65861696, "step": 30470 }, { "epoch": 4.971451876019576, "grad_norm": 0.19069804251194, "learning_rate": 0.0009342600249099036, "loss": 0.1176, "num_input_tokens_seen": 65871264, "step": 30475 }, { "epoch": 4.972267536704731, "grad_norm": 0.048126090317964554, "learning_rate": 0.0009342247399432414, "loss": 0.1139, "num_input_tokens_seen": 65882816, "step": 30480 }, { "epoch": 4.973083197389886, "grad_norm": 0.020289182662963867, "learning_rate": 0.0009341894461764641, "loss": 0.0724, "num_input_tokens_seen": 65892576, "step": 30485 }, { "epoch": 4.9738988580750405, "grad_norm": 0.03914694860577583, "learning_rate": 0.0009341541436102868, "loss": 0.1141, "num_input_tokens_seen": 65903744, "step": 30490 }, { "epoch": 4.974714518760196, "grad_norm": 0.03482364863157272, "learning_rate": 0.0009341188322454251, "loss": 0.1668, "num_input_tokens_seen": 65913504, "step": 30495 }, { "epoch": 4.975530179445351, "grad_norm": 0.01898271031677723, "learning_rate": 0.0009340835120825946, "loss": 0.0568, "num_input_tokens_seen": 65925152, "step": 30500 }, { "epoch": 4.976345840130506, "grad_norm": 0.10762995481491089, "learning_rate": 0.0009340481831225109, "loss": 0.106, "num_input_tokens_seen": 65936288, "step": 30505 }, { "epoch": 4.977161500815661, "grad_norm": 0.04676266387104988, "learning_rate": 0.0009340128453658902, "loss": 0.044, "num_input_tokens_seen": 65947104, "step": 30510 }, { "epoch": 4.9779771615008155, "grad_norm": 0.013631324283778667, "learning_rate": 0.0009339774988134487, "loss": 0.1765, "num_input_tokens_seen": 65958464, "step": 30515 }, { "epoch": 4.97879282218597, "grad_norm": 0.23239445686340332, "learning_rate": 0.0009339421434659025, "loss": 0.123, "num_input_tokens_seen": 65970784, "step": 30520 }, { "epoch": 4.979608482871125, "grad_norm": 0.11724288761615753, "learning_rate": 0.0009339067793239682, "loss": 0.1044, "num_input_tokens_seen": 65981088, "step": 30525 }, { "epoch": 4.980424143556281, "grad_norm": 0.08179371058940887, "learning_rate": 0.0009338714063883627, "loss": 0.0622, "num_input_tokens_seen": 65992608, "step": 30530 }, { "epoch": 4.981239804241436, "grad_norm": 0.06659191846847534, "learning_rate": 0.0009338360246598028, "loss": 0.0275, "num_input_tokens_seen": 66004416, "step": 30535 }, { "epoch": 4.9820554649265905, "grad_norm": 0.18371257185935974, "learning_rate": 0.0009338006341390053, "loss": 0.3027, "num_input_tokens_seen": 66014464, "step": 30540 }, { "epoch": 4.982871125611745, "grad_norm": 0.20001915097236633, "learning_rate": 0.0009337652348266879, "loss": 0.0386, "num_input_tokens_seen": 66024416, "step": 30545 }, { "epoch": 4.9836867862969, "grad_norm": 0.21984423696994781, "learning_rate": 0.0009337298267235675, "loss": 0.1813, "num_input_tokens_seen": 66035776, "step": 30550 }, { "epoch": 4.984502446982056, "grad_norm": 0.015670161694288254, "learning_rate": 0.0009336944098303621, "loss": 0.0501, "num_input_tokens_seen": 66046464, "step": 30555 }, { "epoch": 4.985318107667211, "grad_norm": 0.0296319667249918, "learning_rate": 0.0009336589841477893, "loss": 0.0785, "num_input_tokens_seen": 66055680, "step": 30560 }, { "epoch": 4.986133768352365, "grad_norm": 0.09039829671382904, "learning_rate": 0.0009336235496765669, "loss": 0.1786, "num_input_tokens_seen": 66066944, "step": 30565 }, { "epoch": 4.98694942903752, "grad_norm": 0.05969356372952461, "learning_rate": 0.0009335881064174134, "loss": 0.0984, "num_input_tokens_seen": 66077152, "step": 30570 }, { "epoch": 4.987765089722675, "grad_norm": 0.01346441637724638, "learning_rate": 0.0009335526543710466, "loss": 0.367, "num_input_tokens_seen": 66087712, "step": 30575 }, { "epoch": 4.988580750407831, "grad_norm": 0.08466268330812454, "learning_rate": 0.0009335171935381854, "loss": 0.11, "num_input_tokens_seen": 66098336, "step": 30580 }, { "epoch": 4.989396411092986, "grad_norm": 0.07348710298538208, "learning_rate": 0.0009334817239195483, "loss": 0.0621, "num_input_tokens_seen": 66109184, "step": 30585 }, { "epoch": 4.99021207177814, "grad_norm": 0.02474355883896351, "learning_rate": 0.0009334462455158543, "loss": 0.0252, "num_input_tokens_seen": 66120160, "step": 30590 }, { "epoch": 4.991027732463295, "grad_norm": 0.02579125203192234, "learning_rate": 0.0009334107583278222, "loss": 0.0987, "num_input_tokens_seen": 66131552, "step": 30595 }, { "epoch": 4.99184339314845, "grad_norm": 0.09439677000045776, "learning_rate": 0.0009333752623561711, "loss": 0.0677, "num_input_tokens_seen": 66142624, "step": 30600 }, { "epoch": 4.992659053833605, "grad_norm": 0.0035137098748236895, "learning_rate": 0.0009333397576016207, "loss": 0.0482, "num_input_tokens_seen": 66153568, "step": 30605 }, { "epoch": 4.993474714518761, "grad_norm": 0.07614894211292267, "learning_rate": 0.0009333042440648903, "loss": 0.0661, "num_input_tokens_seen": 66163840, "step": 30610 }, { "epoch": 4.994290375203915, "grad_norm": 0.1604684740304947, "learning_rate": 0.0009332687217466997, "loss": 0.216, "num_input_tokens_seen": 66173568, "step": 30615 }, { "epoch": 4.99510603588907, "grad_norm": 0.005750894080847502, "learning_rate": 0.000933233190647769, "loss": 0.1232, "num_input_tokens_seen": 66183136, "step": 30620 }, { "epoch": 4.995921696574225, "grad_norm": 0.01765310950577259, "learning_rate": 0.0009331976507688178, "loss": 0.0913, "num_input_tokens_seen": 66194560, "step": 30625 }, { "epoch": 4.99673735725938, "grad_norm": 0.17110048234462738, "learning_rate": 0.0009331621021105668, "loss": 0.1888, "num_input_tokens_seen": 66205440, "step": 30630 }, { "epoch": 4.997553017944535, "grad_norm": 0.07099224627017975, "learning_rate": 0.0009331265446737364, "loss": 0.195, "num_input_tokens_seen": 66215616, "step": 30635 }, { "epoch": 4.99836867862969, "grad_norm": 0.052238188683986664, "learning_rate": 0.0009330909784590469, "loss": 0.0538, "num_input_tokens_seen": 66226464, "step": 30640 }, { "epoch": 4.999184339314845, "grad_norm": 0.05132593587040901, "learning_rate": 0.0009330554034672194, "loss": 0.0958, "num_input_tokens_seen": 66238144, "step": 30645 }, { "epoch": 5.0, "grad_norm": 0.16655795276165009, "learning_rate": 0.0009330198196989749, "loss": 0.1424, "num_input_tokens_seen": 66248576, "step": 30650 }, { "epoch": 5.0, "eval_loss": 0.12099920213222504, "eval_runtime": 103.3952, "eval_samples_per_second": 26.355, "eval_steps_per_second": 6.596, "num_input_tokens_seen": 66248576, "step": 30650 }, { "epoch": 5.000815660685155, "grad_norm": 0.19292932748794556, "learning_rate": 0.0009329842271550342, "loss": 0.2331, "num_input_tokens_seen": 66259904, "step": 30655 }, { "epoch": 5.00163132137031, "grad_norm": 0.039342980831861496, "learning_rate": 0.0009329486258361191, "loss": 0.0778, "num_input_tokens_seen": 66271744, "step": 30660 }, { "epoch": 5.002446982055465, "grad_norm": 0.11099471151828766, "learning_rate": 0.0009329130157429507, "loss": 0.0978, "num_input_tokens_seen": 66283072, "step": 30665 }, { "epoch": 5.00326264274062, "grad_norm": 0.01309216022491455, "learning_rate": 0.000932877396876251, "loss": 0.0518, "num_input_tokens_seen": 66294784, "step": 30670 }, { "epoch": 5.004078303425775, "grad_norm": 0.024124326184391975, "learning_rate": 0.0009328417692367415, "loss": 0.1393, "num_input_tokens_seen": 66305728, "step": 30675 }, { "epoch": 5.00489396411093, "grad_norm": 0.07616019248962402, "learning_rate": 0.0009328061328251445, "loss": 0.0466, "num_input_tokens_seen": 66316000, "step": 30680 }, { "epoch": 5.005709624796085, "grad_norm": 0.07263064384460449, "learning_rate": 0.0009327704876421824, "loss": 0.1198, "num_input_tokens_seen": 66327488, "step": 30685 }, { "epoch": 5.006525285481239, "grad_norm": 0.010890113189816475, "learning_rate": 0.000932734833688577, "loss": 0.0965, "num_input_tokens_seen": 66339200, "step": 30690 }, { "epoch": 5.007340946166395, "grad_norm": 0.034294452518224716, "learning_rate": 0.0009326991709650514, "loss": 0.0183, "num_input_tokens_seen": 66350944, "step": 30695 }, { "epoch": 5.00815660685155, "grad_norm": 0.1619931310415268, "learning_rate": 0.0009326634994723282, "loss": 0.0652, "num_input_tokens_seen": 66362272, "step": 30700 }, { "epoch": 5.008972267536705, "grad_norm": 0.04089265316724777, "learning_rate": 0.0009326278192111304, "loss": 0.0523, "num_input_tokens_seen": 66371904, "step": 30705 }, { "epoch": 5.00978792822186, "grad_norm": 0.037255510687828064, "learning_rate": 0.0009325921301821809, "loss": 0.0339, "num_input_tokens_seen": 66383360, "step": 30710 }, { "epoch": 5.010603588907014, "grad_norm": 0.023775247856974602, "learning_rate": 0.000932556432386203, "loss": 0.0252, "num_input_tokens_seen": 66394112, "step": 30715 }, { "epoch": 5.011419249592169, "grad_norm": 0.004952778108417988, "learning_rate": 0.0009325207258239204, "loss": 0.0867, "num_input_tokens_seen": 66404992, "step": 30720 }, { "epoch": 5.012234910277325, "grad_norm": 0.20834074914455414, "learning_rate": 0.0009324850104960566, "loss": 0.0936, "num_input_tokens_seen": 66415680, "step": 30725 }, { "epoch": 5.01305057096248, "grad_norm": 0.20381319522857666, "learning_rate": 0.0009324492864033354, "loss": 0.1504, "num_input_tokens_seen": 66427072, "step": 30730 }, { "epoch": 5.013866231647635, "grad_norm": 0.02317776158452034, "learning_rate": 0.0009324135535464808, "loss": 0.0571, "num_input_tokens_seen": 66438720, "step": 30735 }, { "epoch": 5.014681892332789, "grad_norm": 0.162574902176857, "learning_rate": 0.000932377811926217, "loss": 0.0935, "num_input_tokens_seen": 66449216, "step": 30740 }, { "epoch": 5.015497553017944, "grad_norm": 0.025656161829829216, "learning_rate": 0.0009323420615432683, "loss": 0.119, "num_input_tokens_seen": 66460064, "step": 30745 }, { "epoch": 5.0163132137031, "grad_norm": 0.34151774644851685, "learning_rate": 0.0009323063023983593, "loss": 0.1343, "num_input_tokens_seen": 66470656, "step": 30750 }, { "epoch": 5.017128874388255, "grad_norm": 0.004854666069149971, "learning_rate": 0.0009322705344922146, "loss": 0.0562, "num_input_tokens_seen": 66480896, "step": 30755 }, { "epoch": 5.0179445350734095, "grad_norm": 0.03299418091773987, "learning_rate": 0.0009322347578255592, "loss": 0.151, "num_input_tokens_seen": 66491040, "step": 30760 }, { "epoch": 5.018760195758564, "grad_norm": 0.07489554584026337, "learning_rate": 0.0009321989723991181, "loss": 0.1743, "num_input_tokens_seen": 66500640, "step": 30765 }, { "epoch": 5.019575856443719, "grad_norm": 0.10389326512813568, "learning_rate": 0.0009321631782136166, "loss": 0.1137, "num_input_tokens_seen": 66509376, "step": 30770 }, { "epoch": 5.020391517128874, "grad_norm": 0.01117030717432499, "learning_rate": 0.0009321273752697798, "loss": 0.1189, "num_input_tokens_seen": 66519552, "step": 30775 }, { "epoch": 5.02120717781403, "grad_norm": 0.007174614816904068, "learning_rate": 0.0009320915635683338, "loss": 0.0188, "num_input_tokens_seen": 66530432, "step": 30780 }, { "epoch": 5.0220228384991845, "grad_norm": 0.1178077757358551, "learning_rate": 0.0009320557431100041, "loss": 0.0483, "num_input_tokens_seen": 66543296, "step": 30785 }, { "epoch": 5.022838499184339, "grad_norm": 0.10831668972969055, "learning_rate": 0.0009320199138955165, "loss": 0.1303, "num_input_tokens_seen": 66554304, "step": 30790 }, { "epoch": 5.023654159869494, "grad_norm": 0.019279837608337402, "learning_rate": 0.0009319840759255976, "loss": 0.0603, "num_input_tokens_seen": 66564800, "step": 30795 }, { "epoch": 5.024469820554649, "grad_norm": 0.03408272936940193, "learning_rate": 0.0009319482292009731, "loss": 0.0475, "num_input_tokens_seen": 66576800, "step": 30800 }, { "epoch": 5.025285481239805, "grad_norm": 0.19642093777656555, "learning_rate": 0.0009319123737223698, "loss": 0.07, "num_input_tokens_seen": 66587264, "step": 30805 }, { "epoch": 5.0261011419249595, "grad_norm": 0.2684004008769989, "learning_rate": 0.0009318765094905144, "loss": 0.0828, "num_input_tokens_seen": 66598848, "step": 30810 }, { "epoch": 5.026916802610114, "grad_norm": 0.2714649736881256, "learning_rate": 0.0009318406365061336, "loss": 0.3286, "num_input_tokens_seen": 66610560, "step": 30815 }, { "epoch": 5.027732463295269, "grad_norm": 0.04261276498436928, "learning_rate": 0.0009318047547699546, "loss": 0.0427, "num_input_tokens_seen": 66621248, "step": 30820 }, { "epoch": 5.028548123980424, "grad_norm": 0.281934916973114, "learning_rate": 0.0009317688642827044, "loss": 0.1371, "num_input_tokens_seen": 66632384, "step": 30825 }, { "epoch": 5.029363784665579, "grad_norm": 0.045836810022592545, "learning_rate": 0.0009317329650451103, "loss": 0.0374, "num_input_tokens_seen": 66643648, "step": 30830 }, { "epoch": 5.0301794453507345, "grad_norm": 0.18936778604984283, "learning_rate": 0.0009316970570579002, "loss": 0.1372, "num_input_tokens_seen": 66654528, "step": 30835 }, { "epoch": 5.030995106035889, "grad_norm": 0.008387645706534386, "learning_rate": 0.0009316611403218013, "loss": 0.1091, "num_input_tokens_seen": 66665536, "step": 30840 }, { "epoch": 5.031810766721044, "grad_norm": 0.1754762977361679, "learning_rate": 0.000931625214837542, "loss": 0.1136, "num_input_tokens_seen": 66676288, "step": 30845 }, { "epoch": 5.032626427406199, "grad_norm": 0.22303487360477448, "learning_rate": 0.0009315892806058501, "loss": 0.1583, "num_input_tokens_seen": 66687648, "step": 30850 }, { "epoch": 5.033442088091354, "grad_norm": 0.11185749620199203, "learning_rate": 0.0009315533376274541, "loss": 0.1291, "num_input_tokens_seen": 66698112, "step": 30855 }, { "epoch": 5.034257748776509, "grad_norm": 0.014345620758831501, "learning_rate": 0.0009315173859030821, "loss": 0.0844, "num_input_tokens_seen": 66709440, "step": 30860 }, { "epoch": 5.035073409461664, "grad_norm": 0.02847551926970482, "learning_rate": 0.0009314814254334627, "loss": 0.1353, "num_input_tokens_seen": 66720768, "step": 30865 }, { "epoch": 5.035889070146819, "grad_norm": 0.09638968110084534, "learning_rate": 0.000931445456219325, "loss": 0.0769, "num_input_tokens_seen": 66732160, "step": 30870 }, { "epoch": 5.036704730831974, "grad_norm": 0.02057075873017311, "learning_rate": 0.0009314094782613977, "loss": 0.0517, "num_input_tokens_seen": 66742368, "step": 30875 }, { "epoch": 5.037520391517129, "grad_norm": 0.05229242146015167, "learning_rate": 0.0009313734915604103, "loss": 0.1478, "num_input_tokens_seen": 66752832, "step": 30880 }, { "epoch": 5.0383360522022835, "grad_norm": 0.015186270698904991, "learning_rate": 0.0009313374961170917, "loss": 0.062, "num_input_tokens_seen": 66764032, "step": 30885 }, { "epoch": 5.039151712887439, "grad_norm": 0.0803709402680397, "learning_rate": 0.0009313014919321715, "loss": 0.1218, "num_input_tokens_seen": 66774944, "step": 30890 }, { "epoch": 5.039967373572594, "grad_norm": 0.0704301968216896, "learning_rate": 0.0009312654790063795, "loss": 0.1541, "num_input_tokens_seen": 66786240, "step": 30895 }, { "epoch": 5.040783034257749, "grad_norm": 0.0323120579123497, "learning_rate": 0.0009312294573404454, "loss": 0.0395, "num_input_tokens_seen": 66797376, "step": 30900 }, { "epoch": 5.041598694942904, "grad_norm": 0.17087383568286896, "learning_rate": 0.0009311934269350993, "loss": 0.0578, "num_input_tokens_seen": 66807680, "step": 30905 }, { "epoch": 5.0424143556280585, "grad_norm": 0.25635138154029846, "learning_rate": 0.0009311573877910716, "loss": 0.2143, "num_input_tokens_seen": 66818336, "step": 30910 }, { "epoch": 5.043230016313213, "grad_norm": 0.03489004820585251, "learning_rate": 0.0009311213399090921, "loss": 0.2087, "num_input_tokens_seen": 66830240, "step": 30915 }, { "epoch": 5.044045676998369, "grad_norm": 0.020329639315605164, "learning_rate": 0.000931085283289892, "loss": 0.091, "num_input_tokens_seen": 66840000, "step": 30920 }, { "epoch": 5.044861337683524, "grad_norm": 0.05295855551958084, "learning_rate": 0.0009310492179342016, "loss": 0.0732, "num_input_tokens_seen": 66849792, "step": 30925 }, { "epoch": 5.045676998368679, "grad_norm": 0.04636767506599426, "learning_rate": 0.0009310131438427521, "loss": 0.0234, "num_input_tokens_seen": 66860384, "step": 30930 }, { "epoch": 5.0464926590538335, "grad_norm": 0.0942985787987709, "learning_rate": 0.0009309770610162744, "loss": 0.1044, "num_input_tokens_seen": 66870880, "step": 30935 }, { "epoch": 5.047308319738988, "grad_norm": 0.009630602784454823, "learning_rate": 0.0009309409694555, "loss": 0.0774, "num_input_tokens_seen": 66882144, "step": 30940 }, { "epoch": 5.048123980424143, "grad_norm": 0.17799124121665955, "learning_rate": 0.0009309048691611599, "loss": 0.1953, "num_input_tokens_seen": 66893280, "step": 30945 }, { "epoch": 5.048939641109299, "grad_norm": 0.01871904544532299, "learning_rate": 0.0009308687601339861, "loss": 0.0307, "num_input_tokens_seen": 66904224, "step": 30950 }, { "epoch": 5.049755301794454, "grad_norm": 0.07792586088180542, "learning_rate": 0.0009308326423747103, "loss": 0.025, "num_input_tokens_seen": 66915936, "step": 30955 }, { "epoch": 5.0505709624796085, "grad_norm": 0.048931483179330826, "learning_rate": 0.0009307965158840644, "loss": 0.0606, "num_input_tokens_seen": 66926432, "step": 30960 }, { "epoch": 5.051386623164763, "grad_norm": 0.011593530885875225, "learning_rate": 0.0009307603806627807, "loss": 0.0806, "num_input_tokens_seen": 66937984, "step": 30965 }, { "epoch": 5.052202283849918, "grad_norm": 0.13193394243717194, "learning_rate": 0.0009307242367115914, "loss": 0.0623, "num_input_tokens_seen": 66949312, "step": 30970 }, { "epoch": 5.053017944535074, "grad_norm": 0.16529838740825653, "learning_rate": 0.000930688084031229, "loss": 0.1508, "num_input_tokens_seen": 66960032, "step": 30975 }, { "epoch": 5.053833605220229, "grad_norm": 0.0980365201830864, "learning_rate": 0.0009306519226224262, "loss": 0.1488, "num_input_tokens_seen": 66970208, "step": 30980 }, { "epoch": 5.054649265905383, "grad_norm": 0.1277938187122345, "learning_rate": 0.0009306157524859158, "loss": 0.1445, "num_input_tokens_seen": 66980000, "step": 30985 }, { "epoch": 5.055464926590538, "grad_norm": 0.21884754300117493, "learning_rate": 0.000930579573622431, "loss": 0.2126, "num_input_tokens_seen": 66990400, "step": 30990 }, { "epoch": 5.056280587275693, "grad_norm": 0.04518071934580803, "learning_rate": 0.0009305433860327049, "loss": 0.0731, "num_input_tokens_seen": 67001184, "step": 30995 }, { "epoch": 5.057096247960848, "grad_norm": 0.01968988962471485, "learning_rate": 0.0009305071897174708, "loss": 0.0856, "num_input_tokens_seen": 67012640, "step": 31000 }, { "epoch": 5.057911908646004, "grad_norm": 0.06685356050729752, "learning_rate": 0.0009304709846774625, "loss": 0.0884, "num_input_tokens_seen": 67023936, "step": 31005 }, { "epoch": 5.058727569331158, "grad_norm": 0.01798919029533863, "learning_rate": 0.0009304347709134136, "loss": 0.0427, "num_input_tokens_seen": 67034912, "step": 31010 }, { "epoch": 5.059543230016313, "grad_norm": 0.009535958990454674, "learning_rate": 0.000930398548426058, "loss": 0.0316, "num_input_tokens_seen": 67045184, "step": 31015 }, { "epoch": 5.060358890701468, "grad_norm": 0.030188433825969696, "learning_rate": 0.0009303623172161298, "loss": 0.1074, "num_input_tokens_seen": 67055936, "step": 31020 }, { "epoch": 5.061174551386623, "grad_norm": 0.06585537642240524, "learning_rate": 0.0009303260772843632, "loss": 0.0587, "num_input_tokens_seen": 67066528, "step": 31025 }, { "epoch": 5.061990212071779, "grad_norm": 0.04481403902173042, "learning_rate": 0.0009302898286314929, "loss": 0.1304, "num_input_tokens_seen": 67077504, "step": 31030 }, { "epoch": 5.062805872756933, "grad_norm": 0.05069199204444885, "learning_rate": 0.0009302535712582532, "loss": 0.0279, "num_input_tokens_seen": 67088480, "step": 31035 }, { "epoch": 5.063621533442088, "grad_norm": 0.1780654489994049, "learning_rate": 0.0009302173051653792, "loss": 0.0536, "num_input_tokens_seen": 67099936, "step": 31040 }, { "epoch": 5.064437194127243, "grad_norm": 0.22669237852096558, "learning_rate": 0.0009301810303536056, "loss": 0.2969, "num_input_tokens_seen": 67109760, "step": 31045 }, { "epoch": 5.065252854812398, "grad_norm": 0.08419650793075562, "learning_rate": 0.0009301447468236678, "loss": 0.0358, "num_input_tokens_seen": 67119840, "step": 31050 }, { "epoch": 5.066068515497553, "grad_norm": 0.45807531476020813, "learning_rate": 0.000930108454576301, "loss": 0.1346, "num_input_tokens_seen": 67130304, "step": 31055 }, { "epoch": 5.066884176182708, "grad_norm": 0.06297741830348969, "learning_rate": 0.0009300721536122408, "loss": 0.0572, "num_input_tokens_seen": 67140960, "step": 31060 }, { "epoch": 5.067699836867863, "grad_norm": 0.2701318860054016, "learning_rate": 0.0009300358439322228, "loss": 0.0844, "num_input_tokens_seen": 67152160, "step": 31065 }, { "epoch": 5.068515497553018, "grad_norm": 0.011515563353896141, "learning_rate": 0.0009299995255369828, "loss": 0.1513, "num_input_tokens_seen": 67163104, "step": 31070 }, { "epoch": 5.069331158238173, "grad_norm": 0.0866737812757492, "learning_rate": 0.000929963198427257, "loss": 0.0299, "num_input_tokens_seen": 67174208, "step": 31075 }, { "epoch": 5.070146818923328, "grad_norm": 0.0019546225666999817, "learning_rate": 0.0009299268626037815, "loss": 0.1659, "num_input_tokens_seen": 67186912, "step": 31080 }, { "epoch": 5.0709624796084825, "grad_norm": 0.34600770473480225, "learning_rate": 0.0009298905180672928, "loss": 0.2018, "num_input_tokens_seen": 67197952, "step": 31085 }, { "epoch": 5.071778140293638, "grad_norm": 0.01651175133883953, "learning_rate": 0.0009298541648185272, "loss": 0.1235, "num_input_tokens_seen": 67208832, "step": 31090 }, { "epoch": 5.072593800978793, "grad_norm": 0.185794860124588, "learning_rate": 0.0009298178028582218, "loss": 0.0688, "num_input_tokens_seen": 67219968, "step": 31095 }, { "epoch": 5.073409461663948, "grad_norm": 0.26523634791374207, "learning_rate": 0.0009297814321871133, "loss": 0.1548, "num_input_tokens_seen": 67231232, "step": 31100 }, { "epoch": 5.074225122349103, "grad_norm": 0.32514598965644836, "learning_rate": 0.0009297450528059389, "loss": 0.1022, "num_input_tokens_seen": 67242784, "step": 31105 }, { "epoch": 5.075040783034257, "grad_norm": 0.12018303573131561, "learning_rate": 0.0009297086647154358, "loss": 0.0762, "num_input_tokens_seen": 67253440, "step": 31110 }, { "epoch": 5.075856443719413, "grad_norm": 0.040082309395074844, "learning_rate": 0.0009296722679163417, "loss": 0.0716, "num_input_tokens_seen": 67263168, "step": 31115 }, { "epoch": 5.076672104404568, "grad_norm": 0.019938020035624504, "learning_rate": 0.0009296358624093937, "loss": 0.0168, "num_input_tokens_seen": 67273248, "step": 31120 }, { "epoch": 5.077487765089723, "grad_norm": 0.19098550081253052, "learning_rate": 0.00092959944819533, "loss": 0.0936, "num_input_tokens_seen": 67283712, "step": 31125 }, { "epoch": 5.078303425774878, "grad_norm": 0.07549386471509933, "learning_rate": 0.0009295630252748885, "loss": 0.014, "num_input_tokens_seen": 67293824, "step": 31130 }, { "epoch": 5.079119086460032, "grad_norm": 0.25435495376586914, "learning_rate": 0.0009295265936488076, "loss": 0.1051, "num_input_tokens_seen": 67303936, "step": 31135 }, { "epoch": 5.079934747145187, "grad_norm": 0.054433248937129974, "learning_rate": 0.0009294901533178251, "loss": 0.0092, "num_input_tokens_seen": 67315200, "step": 31140 }, { "epoch": 5.080750407830343, "grad_norm": 0.14523616433143616, "learning_rate": 0.0009294537042826798, "loss": 0.061, "num_input_tokens_seen": 67325952, "step": 31145 }, { "epoch": 5.081566068515498, "grad_norm": 0.03600054606795311, "learning_rate": 0.0009294172465441104, "loss": 0.0518, "num_input_tokens_seen": 67336640, "step": 31150 }, { "epoch": 5.082381729200653, "grad_norm": 0.1872844696044922, "learning_rate": 0.0009293807801028558, "loss": 0.1058, "num_input_tokens_seen": 67347776, "step": 31155 }, { "epoch": 5.083197389885807, "grad_norm": 0.06703568249940872, "learning_rate": 0.0009293443049596551, "loss": 0.1516, "num_input_tokens_seen": 67359264, "step": 31160 }, { "epoch": 5.084013050570962, "grad_norm": 0.19908444583415985, "learning_rate": 0.0009293078211152473, "loss": 0.1284, "num_input_tokens_seen": 67368960, "step": 31165 }, { "epoch": 5.084828711256117, "grad_norm": 0.03559856116771698, "learning_rate": 0.0009292713285703718, "loss": 0.0476, "num_input_tokens_seen": 67379520, "step": 31170 }, { "epoch": 5.085644371941273, "grad_norm": 0.11854170262813568, "learning_rate": 0.0009292348273257684, "loss": 0.1697, "num_input_tokens_seen": 67390272, "step": 31175 }, { "epoch": 5.0864600326264275, "grad_norm": 0.11884764581918716, "learning_rate": 0.0009291983173821765, "loss": 0.1775, "num_input_tokens_seen": 67401536, "step": 31180 }, { "epoch": 5.087275693311582, "grad_norm": 0.056827448308467865, "learning_rate": 0.0009291617987403364, "loss": 0.0397, "num_input_tokens_seen": 67411264, "step": 31185 }, { "epoch": 5.088091353996737, "grad_norm": 0.0024486789479851723, "learning_rate": 0.000929125271400988, "loss": 0.0479, "num_input_tokens_seen": 67422976, "step": 31190 }, { "epoch": 5.088907014681892, "grad_norm": 0.14866988360881805, "learning_rate": 0.0009290887353648716, "loss": 0.0504, "num_input_tokens_seen": 67434720, "step": 31195 }, { "epoch": 5.089722675367048, "grad_norm": 0.02642189897596836, "learning_rate": 0.0009290521906327276, "loss": 0.0092, "num_input_tokens_seen": 67446368, "step": 31200 }, { "epoch": 5.0905383360522025, "grad_norm": 0.020417513325810432, "learning_rate": 0.0009290156372052967, "loss": 0.0516, "num_input_tokens_seen": 67457664, "step": 31205 }, { "epoch": 5.091353996737357, "grad_norm": 0.036934275180101395, "learning_rate": 0.0009289790750833196, "loss": 0.0764, "num_input_tokens_seen": 67468416, "step": 31210 }, { "epoch": 5.092169657422512, "grad_norm": 0.16267381608486176, "learning_rate": 0.0009289425042675373, "loss": 0.1494, "num_input_tokens_seen": 67479328, "step": 31215 }, { "epoch": 5.092985318107667, "grad_norm": 0.00929997954517603, "learning_rate": 0.0009289059247586911, "loss": 0.0177, "num_input_tokens_seen": 67490144, "step": 31220 }, { "epoch": 5.093800978792822, "grad_norm": 0.03489250689744949, "learning_rate": 0.0009288693365575222, "loss": 0.0243, "num_input_tokens_seen": 67500384, "step": 31225 }, { "epoch": 5.0946166394779775, "grad_norm": 0.1912592649459839, "learning_rate": 0.0009288327396647722, "loss": 0.0725, "num_input_tokens_seen": 67511744, "step": 31230 }, { "epoch": 5.095432300163132, "grad_norm": 0.40533021092414856, "learning_rate": 0.0009287961340811826, "loss": 0.2004, "num_input_tokens_seen": 67522688, "step": 31235 }, { "epoch": 5.096247960848287, "grad_norm": 0.2418777048587799, "learning_rate": 0.0009287595198074955, "loss": 0.2036, "num_input_tokens_seen": 67533984, "step": 31240 }, { "epoch": 5.097063621533442, "grad_norm": 0.17491726577281952, "learning_rate": 0.0009287228968444527, "loss": 0.2711, "num_input_tokens_seen": 67545952, "step": 31245 }, { "epoch": 5.097879282218597, "grad_norm": 0.00852019339799881, "learning_rate": 0.0009286862651927966, "loss": 0.0627, "num_input_tokens_seen": 67556928, "step": 31250 }, { "epoch": 5.0986949429037525, "grad_norm": 0.10353199392557144, "learning_rate": 0.0009286496248532695, "loss": 0.203, "num_input_tokens_seen": 67567840, "step": 31255 }, { "epoch": 5.099510603588907, "grad_norm": 0.006339102052152157, "learning_rate": 0.000928612975826614, "loss": 0.0513, "num_input_tokens_seen": 67578432, "step": 31260 }, { "epoch": 5.100326264274062, "grad_norm": 0.20576722919940948, "learning_rate": 0.0009285763181135727, "loss": 0.0726, "num_input_tokens_seen": 67588992, "step": 31265 }, { "epoch": 5.101141924959217, "grad_norm": 0.10356633365154266, "learning_rate": 0.0009285396517148888, "loss": 0.1517, "num_input_tokens_seen": 67599200, "step": 31270 }, { "epoch": 5.101957585644372, "grad_norm": 0.2491035759449005, "learning_rate": 0.000928502976631305, "loss": 0.1456, "num_input_tokens_seen": 67610016, "step": 31275 }, { "epoch": 5.102773246329527, "grad_norm": 0.0775388851761818, "learning_rate": 0.0009284662928635649, "loss": 0.073, "num_input_tokens_seen": 67621568, "step": 31280 }, { "epoch": 5.103588907014682, "grad_norm": 0.03371018171310425, "learning_rate": 0.0009284296004124118, "loss": 0.039, "num_input_tokens_seen": 67631712, "step": 31285 }, { "epoch": 5.104404567699837, "grad_norm": 0.011440278962254524, "learning_rate": 0.0009283928992785894, "loss": 0.0177, "num_input_tokens_seen": 67643552, "step": 31290 }, { "epoch": 5.105220228384992, "grad_norm": 0.004816057626157999, "learning_rate": 0.0009283561894628414, "loss": 0.055, "num_input_tokens_seen": 67653920, "step": 31295 }, { "epoch": 5.106035889070147, "grad_norm": 0.09972722083330154, "learning_rate": 0.0009283194709659117, "loss": 0.1086, "num_input_tokens_seen": 67665760, "step": 31300 }, { "epoch": 5.1068515497553015, "grad_norm": 0.011177991516888142, "learning_rate": 0.0009282827437885449, "loss": 0.0199, "num_input_tokens_seen": 67676320, "step": 31305 }, { "epoch": 5.107667210440456, "grad_norm": 0.010363086126744747, "learning_rate": 0.0009282460079314848, "loss": 0.0367, "num_input_tokens_seen": 67687008, "step": 31310 }, { "epoch": 5.108482871125612, "grad_norm": 0.07990599423646927, "learning_rate": 0.0009282092633954759, "loss": 0.1999, "num_input_tokens_seen": 67698112, "step": 31315 }, { "epoch": 5.109298531810767, "grad_norm": 0.026262901723384857, "learning_rate": 0.0009281725101812632, "loss": 0.086, "num_input_tokens_seen": 67707552, "step": 31320 }, { "epoch": 5.110114192495922, "grad_norm": 0.1390565186738968, "learning_rate": 0.0009281357482895914, "loss": 0.0503, "num_input_tokens_seen": 67718016, "step": 31325 }, { "epoch": 5.1109298531810765, "grad_norm": 0.18929249048233032, "learning_rate": 0.0009280989777212055, "loss": 0.0975, "num_input_tokens_seen": 67729056, "step": 31330 }, { "epoch": 5.111745513866231, "grad_norm": 0.015308565460145473, "learning_rate": 0.0009280621984768507, "loss": 0.1097, "num_input_tokens_seen": 67741024, "step": 31335 }, { "epoch": 5.112561174551387, "grad_norm": 0.04618688300251961, "learning_rate": 0.0009280254105572725, "loss": 0.1202, "num_input_tokens_seen": 67751296, "step": 31340 }, { "epoch": 5.113376835236542, "grad_norm": 0.05035729706287384, "learning_rate": 0.0009279886139632163, "loss": 0.1071, "num_input_tokens_seen": 67761536, "step": 31345 }, { "epoch": 5.114192495921697, "grad_norm": 0.0054813530296087265, "learning_rate": 0.000927951808695428, "loss": 0.0664, "num_input_tokens_seen": 67771968, "step": 31350 }, { "epoch": 5.1150081566068515, "grad_norm": 0.005418519489467144, "learning_rate": 0.0009279149947546534, "loss": 0.1719, "num_input_tokens_seen": 67781376, "step": 31355 }, { "epoch": 5.115823817292006, "grad_norm": 0.00409423653036356, "learning_rate": 0.0009278781721416385, "loss": 0.0805, "num_input_tokens_seen": 67793472, "step": 31360 }, { "epoch": 5.116639477977161, "grad_norm": 0.03038191795349121, "learning_rate": 0.0009278413408571295, "loss": 0.1029, "num_input_tokens_seen": 67804256, "step": 31365 }, { "epoch": 5.117455138662317, "grad_norm": 0.028354860842227936, "learning_rate": 0.0009278045009018733, "loss": 0.027, "num_input_tokens_seen": 67814688, "step": 31370 }, { "epoch": 5.118270799347472, "grad_norm": 0.007617499213665724, "learning_rate": 0.000927767652276616, "loss": 0.0202, "num_input_tokens_seen": 67824672, "step": 31375 }, { "epoch": 5.1190864600326265, "grad_norm": 0.005120754241943359, "learning_rate": 0.0009277307949821045, "loss": 0.1537, "num_input_tokens_seen": 67834720, "step": 31380 }, { "epoch": 5.119902120717781, "grad_norm": 0.021448107436299324, "learning_rate": 0.000927693929019086, "loss": 0.0241, "num_input_tokens_seen": 67846624, "step": 31385 }, { "epoch": 5.120717781402936, "grad_norm": 0.1250106692314148, "learning_rate": 0.0009276570543883074, "loss": 0.047, "num_input_tokens_seen": 67855296, "step": 31390 }, { "epoch": 5.121533442088092, "grad_norm": 0.12414438277482986, "learning_rate": 0.000927620171090516, "loss": 0.0678, "num_input_tokens_seen": 67866336, "step": 31395 }, { "epoch": 5.122349102773247, "grad_norm": 0.011611179448664188, "learning_rate": 0.0009275832791264593, "loss": 0.0664, "num_input_tokens_seen": 67877664, "step": 31400 }, { "epoch": 5.123164763458401, "grad_norm": 0.07134946435689926, "learning_rate": 0.0009275463784968852, "loss": 0.0711, "num_input_tokens_seen": 67888576, "step": 31405 }, { "epoch": 5.123980424143556, "grad_norm": 0.06697040051221848, "learning_rate": 0.0009275094692025413, "loss": 0.0834, "num_input_tokens_seen": 67898880, "step": 31410 }, { "epoch": 5.124796084828711, "grad_norm": 0.030282270163297653, "learning_rate": 0.0009274725512441757, "loss": 0.1136, "num_input_tokens_seen": 67908480, "step": 31415 }, { "epoch": 5.125611745513866, "grad_norm": 0.010222107172012329, "learning_rate": 0.0009274356246225364, "loss": 0.1331, "num_input_tokens_seen": 67918688, "step": 31420 }, { "epoch": 5.126427406199022, "grad_norm": 0.11597134917974472, "learning_rate": 0.0009273986893383722, "loss": 0.1195, "num_input_tokens_seen": 67929504, "step": 31425 }, { "epoch": 5.127243066884176, "grad_norm": 0.004997505806386471, "learning_rate": 0.000927361745392431, "loss": 0.0747, "num_input_tokens_seen": 67940192, "step": 31430 }, { "epoch": 5.128058727569331, "grad_norm": 0.10774416476488113, "learning_rate": 0.0009273247927854622, "loss": 0.0965, "num_input_tokens_seen": 67951072, "step": 31435 }, { "epoch": 5.128874388254486, "grad_norm": 0.1743687093257904, "learning_rate": 0.0009272878315182141, "loss": 0.0475, "num_input_tokens_seen": 67961280, "step": 31440 }, { "epoch": 5.129690048939641, "grad_norm": 0.23096045851707458, "learning_rate": 0.0009272508615914363, "loss": 0.123, "num_input_tokens_seen": 67972224, "step": 31445 }, { "epoch": 5.130505709624796, "grad_norm": 0.37662026286125183, "learning_rate": 0.0009272138830058776, "loss": 0.2525, "num_input_tokens_seen": 67984256, "step": 31450 }, { "epoch": 5.131321370309951, "grad_norm": 0.05308877304196358, "learning_rate": 0.0009271768957622877, "loss": 0.0669, "num_input_tokens_seen": 67994144, "step": 31455 }, { "epoch": 5.132137030995106, "grad_norm": 0.17500479519367218, "learning_rate": 0.0009271398998614162, "loss": 0.1912, "num_input_tokens_seen": 68004896, "step": 31460 }, { "epoch": 5.132952691680261, "grad_norm": 0.01674867980182171, "learning_rate": 0.0009271028953040126, "loss": 0.1249, "num_input_tokens_seen": 68015456, "step": 31465 }, { "epoch": 5.133768352365416, "grad_norm": 0.3295539319515228, "learning_rate": 0.0009270658820908271, "loss": 0.0809, "num_input_tokens_seen": 68026880, "step": 31470 }, { "epoch": 5.134584013050571, "grad_norm": 0.1620461344718933, "learning_rate": 0.0009270288602226096, "loss": 0.307, "num_input_tokens_seen": 68036672, "step": 31475 }, { "epoch": 5.135399673735726, "grad_norm": 0.0587170347571373, "learning_rate": 0.0009269918297001106, "loss": 0.059, "num_input_tokens_seen": 68047040, "step": 31480 }, { "epoch": 5.136215334420881, "grad_norm": 0.3170589804649353, "learning_rate": 0.0009269547905240805, "loss": 0.1165, "num_input_tokens_seen": 68058112, "step": 31485 }, { "epoch": 5.137030995106036, "grad_norm": 0.034953050315380096, "learning_rate": 0.00092691774269527, "loss": 0.0355, "num_input_tokens_seen": 68068384, "step": 31490 }, { "epoch": 5.137846655791191, "grad_norm": 0.053561653941869736, "learning_rate": 0.0009268806862144298, "loss": 0.0943, "num_input_tokens_seen": 68078848, "step": 31495 }, { "epoch": 5.138662316476346, "grad_norm": 0.011080753058195114, "learning_rate": 0.0009268436210823109, "loss": 0.0651, "num_input_tokens_seen": 68090272, "step": 31500 }, { "epoch": 5.1394779771615005, "grad_norm": 0.19692468643188477, "learning_rate": 0.0009268065472996645, "loss": 0.1488, "num_input_tokens_seen": 68101984, "step": 31505 }, { "epoch": 5.140293637846656, "grad_norm": 0.200147807598114, "learning_rate": 0.0009267694648672423, "loss": 0.0512, "num_input_tokens_seen": 68113696, "step": 31510 }, { "epoch": 5.141109298531811, "grad_norm": 0.12792713940143585, "learning_rate": 0.0009267323737857952, "loss": 0.0741, "num_input_tokens_seen": 68125120, "step": 31515 }, { "epoch": 5.141924959216966, "grad_norm": 0.036151349544525146, "learning_rate": 0.0009266952740560752, "loss": 0.0511, "num_input_tokens_seen": 68136128, "step": 31520 }, { "epoch": 5.142740619902121, "grad_norm": 0.006894730031490326, "learning_rate": 0.0009266581656788342, "loss": 0.071, "num_input_tokens_seen": 68147808, "step": 31525 }, { "epoch": 5.143556280587275, "grad_norm": 0.02515142224729061, "learning_rate": 0.0009266210486548243, "loss": 0.1119, "num_input_tokens_seen": 68158208, "step": 31530 }, { "epoch": 5.14437194127243, "grad_norm": 0.007471158169209957, "learning_rate": 0.0009265839229847975, "loss": 0.1079, "num_input_tokens_seen": 68169088, "step": 31535 }, { "epoch": 5.145187601957586, "grad_norm": 0.0272090844810009, "learning_rate": 0.0009265467886695064, "loss": 0.0137, "num_input_tokens_seen": 68179200, "step": 31540 }, { "epoch": 5.146003262642741, "grad_norm": 0.2392469048500061, "learning_rate": 0.0009265096457097035, "loss": 0.0796, "num_input_tokens_seen": 68189120, "step": 31545 }, { "epoch": 5.146818923327896, "grad_norm": 0.02147931605577469, "learning_rate": 0.0009264724941061418, "loss": 0.0274, "num_input_tokens_seen": 68200672, "step": 31550 }, { "epoch": 5.14763458401305, "grad_norm": 0.21360090374946594, "learning_rate": 0.0009264353338595736, "loss": 0.1693, "num_input_tokens_seen": 68211040, "step": 31555 }, { "epoch": 5.148450244698205, "grad_norm": 0.08548810333013535, "learning_rate": 0.0009263981649707527, "loss": 0.102, "num_input_tokens_seen": 68221216, "step": 31560 }, { "epoch": 5.149265905383361, "grad_norm": 0.01596478745341301, "learning_rate": 0.0009263609874404319, "loss": 0.1411, "num_input_tokens_seen": 68231360, "step": 31565 }, { "epoch": 5.150081566068516, "grad_norm": 0.051141407340765, "learning_rate": 0.0009263238012693649, "loss": 0.0586, "num_input_tokens_seen": 68243456, "step": 31570 }, { "epoch": 5.150897226753671, "grad_norm": 0.1186927780508995, "learning_rate": 0.0009262866064583051, "loss": 0.0407, "num_input_tokens_seen": 68254560, "step": 31575 }, { "epoch": 5.151712887438825, "grad_norm": 0.002311921678483486, "learning_rate": 0.0009262494030080066, "loss": 0.0422, "num_input_tokens_seen": 68265792, "step": 31580 }, { "epoch": 5.15252854812398, "grad_norm": 0.004859385080635548, "learning_rate": 0.0009262121909192232, "loss": 0.168, "num_input_tokens_seen": 68276992, "step": 31585 }, { "epoch": 5.153344208809135, "grad_norm": 0.09993654489517212, "learning_rate": 0.0009261749701927089, "loss": 0.2298, "num_input_tokens_seen": 68286496, "step": 31590 }, { "epoch": 5.154159869494291, "grad_norm": 0.03430628776550293, "learning_rate": 0.0009261377408292183, "loss": 0.1492, "num_input_tokens_seen": 68297696, "step": 31595 }, { "epoch": 5.1549755301794455, "grad_norm": 0.0053238943219184875, "learning_rate": 0.0009261005028295058, "loss": 0.0319, "num_input_tokens_seen": 68309216, "step": 31600 }, { "epoch": 5.1557911908646, "grad_norm": 0.2986612021923065, "learning_rate": 0.000926063256194326, "loss": 0.2046, "num_input_tokens_seen": 68319584, "step": 31605 }, { "epoch": 5.156606851549755, "grad_norm": 0.07867178320884705, "learning_rate": 0.0009260260009244339, "loss": 0.0633, "num_input_tokens_seen": 68331104, "step": 31610 }, { "epoch": 5.15742251223491, "grad_norm": 0.01586318016052246, "learning_rate": 0.0009259887370205844, "loss": 0.0906, "num_input_tokens_seen": 68341568, "step": 31615 }, { "epoch": 5.158238172920065, "grad_norm": 0.010150609537959099, "learning_rate": 0.0009259514644835327, "loss": 0.0247, "num_input_tokens_seen": 68352480, "step": 31620 }, { "epoch": 5.1590538336052205, "grad_norm": 0.13318832218647003, "learning_rate": 0.0009259141833140343, "loss": 0.1502, "num_input_tokens_seen": 68362560, "step": 31625 }, { "epoch": 5.159869494290375, "grad_norm": 0.22387240827083588, "learning_rate": 0.0009258768935128445, "loss": 0.1043, "num_input_tokens_seen": 68372800, "step": 31630 }, { "epoch": 5.16068515497553, "grad_norm": 0.30648529529571533, "learning_rate": 0.0009258395950807194, "loss": 0.1968, "num_input_tokens_seen": 68383264, "step": 31635 }, { "epoch": 5.161500815660685, "grad_norm": 0.004418856929987669, "learning_rate": 0.0009258022880184145, "loss": 0.0746, "num_input_tokens_seen": 68394176, "step": 31640 }, { "epoch": 5.16231647634584, "grad_norm": 0.2309313714504242, "learning_rate": 0.0009257649723266863, "loss": 0.1278, "num_input_tokens_seen": 68406592, "step": 31645 }, { "epoch": 5.1631321370309955, "grad_norm": 0.1833798736333847, "learning_rate": 0.0009257276480062907, "loss": 0.1194, "num_input_tokens_seen": 68415936, "step": 31650 }, { "epoch": 5.16394779771615, "grad_norm": 0.0035895612090826035, "learning_rate": 0.0009256903150579842, "loss": 0.176, "num_input_tokens_seen": 68426880, "step": 31655 }, { "epoch": 5.164763458401305, "grad_norm": 0.19863756000995636, "learning_rate": 0.0009256529734825234, "loss": 0.2212, "num_input_tokens_seen": 68439936, "step": 31660 }, { "epoch": 5.16557911908646, "grad_norm": 0.14670097827911377, "learning_rate": 0.0009256156232806652, "loss": 0.1125, "num_input_tokens_seen": 68450784, "step": 31665 }, { "epoch": 5.166394779771615, "grad_norm": 0.07167736440896988, "learning_rate": 0.0009255782644531664, "loss": 0.0307, "num_input_tokens_seen": 68462592, "step": 31670 }, { "epoch": 5.16721044045677, "grad_norm": 0.11502383649349213, "learning_rate": 0.0009255408970007842, "loss": 0.1548, "num_input_tokens_seen": 68472928, "step": 31675 }, { "epoch": 5.168026101141925, "grad_norm": 0.03686782345175743, "learning_rate": 0.0009255035209242759, "loss": 0.1334, "num_input_tokens_seen": 68483360, "step": 31680 }, { "epoch": 5.16884176182708, "grad_norm": 0.04977540299296379, "learning_rate": 0.0009254661362243991, "loss": 0.1431, "num_input_tokens_seen": 68494336, "step": 31685 }, { "epoch": 5.169657422512235, "grad_norm": 0.10123711824417114, "learning_rate": 0.000925428742901911, "loss": 0.0828, "num_input_tokens_seen": 68506240, "step": 31690 }, { "epoch": 5.17047308319739, "grad_norm": 0.01098128966987133, "learning_rate": 0.0009253913409575698, "loss": 0.06, "num_input_tokens_seen": 68516736, "step": 31695 }, { "epoch": 5.171288743882545, "grad_norm": 0.15740327537059784, "learning_rate": 0.0009253539303921336, "loss": 0.1266, "num_input_tokens_seen": 68527680, "step": 31700 }, { "epoch": 5.1721044045677, "grad_norm": 0.27190694212913513, "learning_rate": 0.0009253165112063604, "loss": 0.1337, "num_input_tokens_seen": 68537056, "step": 31705 }, { "epoch": 5.172920065252855, "grad_norm": 0.05805153027176857, "learning_rate": 0.0009252790834010085, "loss": 0.0541, "num_input_tokens_seen": 68547296, "step": 31710 }, { "epoch": 5.17373572593801, "grad_norm": 0.26238253712654114, "learning_rate": 0.0009252416469768363, "loss": 0.1146, "num_input_tokens_seen": 68557344, "step": 31715 }, { "epoch": 5.174551386623165, "grad_norm": 0.2675519585609436, "learning_rate": 0.0009252042019346029, "loss": 0.0867, "num_input_tokens_seen": 68567968, "step": 31720 }, { "epoch": 5.1753670473083195, "grad_norm": 0.30450505018234253, "learning_rate": 0.0009251667482750669, "loss": 0.1679, "num_input_tokens_seen": 68578656, "step": 31725 }, { "epoch": 5.176182707993474, "grad_norm": 0.0031907472293823957, "learning_rate": 0.0009251292859989873, "loss": 0.0446, "num_input_tokens_seen": 68588608, "step": 31730 }, { "epoch": 5.17699836867863, "grad_norm": 0.016649756580591202, "learning_rate": 0.0009250918151071235, "loss": 0.0295, "num_input_tokens_seen": 68598624, "step": 31735 }, { "epoch": 5.177814029363785, "grad_norm": 0.09811677038669586, "learning_rate": 0.0009250543356002347, "loss": 0.029, "num_input_tokens_seen": 68609792, "step": 31740 }, { "epoch": 5.17862969004894, "grad_norm": 0.033193688839673996, "learning_rate": 0.0009250168474790806, "loss": 0.0852, "num_input_tokens_seen": 68620512, "step": 31745 }, { "epoch": 5.1794453507340945, "grad_norm": 0.24452006816864014, "learning_rate": 0.0009249793507444208, "loss": 0.2061, "num_input_tokens_seen": 68631392, "step": 31750 }, { "epoch": 5.180261011419249, "grad_norm": 0.02791479602456093, "learning_rate": 0.0009249418453970155, "loss": 0.0707, "num_input_tokens_seen": 68642944, "step": 31755 }, { "epoch": 5.181076672104404, "grad_norm": 0.0830642580986023, "learning_rate": 0.0009249043314376247, "loss": 0.0382, "num_input_tokens_seen": 68653984, "step": 31760 }, { "epoch": 5.18189233278956, "grad_norm": 0.0186906885355711, "learning_rate": 0.0009248668088670084, "loss": 0.1107, "num_input_tokens_seen": 68665344, "step": 31765 }, { "epoch": 5.182707993474715, "grad_norm": 0.16254150867462158, "learning_rate": 0.0009248292776859273, "loss": 0.1132, "num_input_tokens_seen": 68676128, "step": 31770 }, { "epoch": 5.1835236541598695, "grad_norm": 0.04959748312830925, "learning_rate": 0.0009247917378951419, "loss": 0.0199, "num_input_tokens_seen": 68686304, "step": 31775 }, { "epoch": 5.184339314845024, "grad_norm": 0.2542068660259247, "learning_rate": 0.0009247541894954132, "loss": 0.0773, "num_input_tokens_seen": 68697248, "step": 31780 }, { "epoch": 5.185154975530179, "grad_norm": 0.07414700835943222, "learning_rate": 0.0009247166324875018, "loss": 0.0817, "num_input_tokens_seen": 68707744, "step": 31785 }, { "epoch": 5.185970636215335, "grad_norm": 0.21046984195709229, "learning_rate": 0.0009246790668721692, "loss": 0.1773, "num_input_tokens_seen": 68718496, "step": 31790 }, { "epoch": 5.18678629690049, "grad_norm": 0.11404412984848022, "learning_rate": 0.0009246414926501766, "loss": 0.2142, "num_input_tokens_seen": 68729920, "step": 31795 }, { "epoch": 5.1876019575856445, "grad_norm": 0.021141186356544495, "learning_rate": 0.0009246039098222854, "loss": 0.1502, "num_input_tokens_seen": 68740320, "step": 31800 }, { "epoch": 5.188417618270799, "grad_norm": 0.10404963791370392, "learning_rate": 0.0009245663183892572, "loss": 0.0983, "num_input_tokens_seen": 68751648, "step": 31805 }, { "epoch": 5.189233278955954, "grad_norm": 0.207743838429451, "learning_rate": 0.0009245287183518541, "loss": 0.0903, "num_input_tokens_seen": 68762592, "step": 31810 }, { "epoch": 5.190048939641109, "grad_norm": 0.010766721330583096, "learning_rate": 0.0009244911097108379, "loss": 0.2304, "num_input_tokens_seen": 68772448, "step": 31815 }, { "epoch": 5.190864600326265, "grad_norm": 0.05641620233654976, "learning_rate": 0.000924453492466971, "loss": 0.0593, "num_input_tokens_seen": 68782944, "step": 31820 }, { "epoch": 5.191680261011419, "grad_norm": 0.04736460745334625, "learning_rate": 0.0009244158666210154, "loss": 0.0461, "num_input_tokens_seen": 68791456, "step": 31825 }, { "epoch": 5.192495921696574, "grad_norm": 0.0764179453253746, "learning_rate": 0.0009243782321737339, "loss": 0.0716, "num_input_tokens_seen": 68802368, "step": 31830 }, { "epoch": 5.193311582381729, "grad_norm": 0.07003484666347504, "learning_rate": 0.0009243405891258894, "loss": 0.0991, "num_input_tokens_seen": 68812768, "step": 31835 }, { "epoch": 5.194127243066884, "grad_norm": 0.003841748461127281, "learning_rate": 0.0009243029374782443, "loss": 0.0276, "num_input_tokens_seen": 68824416, "step": 31840 }, { "epoch": 5.19494290375204, "grad_norm": 0.01023764256387949, "learning_rate": 0.0009242652772315621, "loss": 0.0097, "num_input_tokens_seen": 68835776, "step": 31845 }, { "epoch": 5.195758564437194, "grad_norm": 0.0831431970000267, "learning_rate": 0.0009242276083866056, "loss": 0.0938, "num_input_tokens_seen": 68846912, "step": 31850 }, { "epoch": 5.196574225122349, "grad_norm": 0.10236520320177078, "learning_rate": 0.0009241899309441386, "loss": 0.066, "num_input_tokens_seen": 68857632, "step": 31855 }, { "epoch": 5.197389885807504, "grad_norm": 0.013170513324439526, "learning_rate": 0.0009241522449049245, "loss": 0.1338, "num_input_tokens_seen": 68868768, "step": 31860 }, { "epoch": 5.198205546492659, "grad_norm": 0.08625346422195435, "learning_rate": 0.000924114550269727, "loss": 0.0162, "num_input_tokens_seen": 68879936, "step": 31865 }, { "epoch": 5.199021207177814, "grad_norm": 0.10950763523578644, "learning_rate": 0.0009240768470393101, "loss": 0.0711, "num_input_tokens_seen": 68890528, "step": 31870 }, { "epoch": 5.199836867862969, "grad_norm": 0.009500091895461082, "learning_rate": 0.0009240391352144382, "loss": 0.0679, "num_input_tokens_seen": 68901632, "step": 31875 }, { "epoch": 5.200652528548124, "grad_norm": 0.009571898728609085, "learning_rate": 0.0009240014147958751, "loss": 0.0322, "num_input_tokens_seen": 68912000, "step": 31880 }, { "epoch": 5.201468189233279, "grad_norm": 0.026301007717847824, "learning_rate": 0.0009239636857843854, "loss": 0.0299, "num_input_tokens_seen": 68922144, "step": 31885 }, { "epoch": 5.202283849918434, "grad_norm": 0.05546105280518532, "learning_rate": 0.0009239259481807338, "loss": 0.0472, "num_input_tokens_seen": 68932928, "step": 31890 }, { "epoch": 5.203099510603589, "grad_norm": 0.2150077074766159, "learning_rate": 0.0009238882019856851, "loss": 0.0741, "num_input_tokens_seen": 68942560, "step": 31895 }, { "epoch": 5.2039151712887435, "grad_norm": 0.007343418430536985, "learning_rate": 0.0009238504472000042, "loss": 0.1001, "num_input_tokens_seen": 68953216, "step": 31900 }, { "epoch": 5.204730831973899, "grad_norm": 0.058143239468336105, "learning_rate": 0.0009238126838244562, "loss": 0.0299, "num_input_tokens_seen": 68964352, "step": 31905 }, { "epoch": 5.205546492659054, "grad_norm": 0.14698320627212524, "learning_rate": 0.0009237749118598067, "loss": 0.1336, "num_input_tokens_seen": 68975456, "step": 31910 }, { "epoch": 5.206362153344209, "grad_norm": 0.2738533020019531, "learning_rate": 0.000923737131306821, "loss": 0.2138, "num_input_tokens_seen": 68985856, "step": 31915 }, { "epoch": 5.207177814029364, "grad_norm": 0.19384877383708954, "learning_rate": 0.0009236993421662648, "loss": 0.084, "num_input_tokens_seen": 68997504, "step": 31920 }, { "epoch": 5.2079934747145185, "grad_norm": 0.3091152608394623, "learning_rate": 0.0009236615444389038, "loss": 0.2369, "num_input_tokens_seen": 69007008, "step": 31925 }, { "epoch": 5.208809135399674, "grad_norm": 0.02713857591152191, "learning_rate": 0.0009236237381255041, "loss": 0.0268, "num_input_tokens_seen": 69018304, "step": 31930 }, { "epoch": 5.209624796084829, "grad_norm": 0.0630718544125557, "learning_rate": 0.0009235859232268322, "loss": 0.0813, "num_input_tokens_seen": 69028160, "step": 31935 }, { "epoch": 5.210440456769984, "grad_norm": 0.007515220437198877, "learning_rate": 0.000923548099743654, "loss": 0.0116, "num_input_tokens_seen": 69038624, "step": 31940 }, { "epoch": 5.211256117455139, "grad_norm": 0.22423508763313293, "learning_rate": 0.0009235102676767364, "loss": 0.2559, "num_input_tokens_seen": 69049888, "step": 31945 }, { "epoch": 5.212071778140293, "grad_norm": 0.006067072041332722, "learning_rate": 0.0009234724270268459, "loss": 0.0745, "num_input_tokens_seen": 69061376, "step": 31950 }, { "epoch": 5.212887438825448, "grad_norm": 0.024653153494000435, "learning_rate": 0.0009234345777947493, "loss": 0.0349, "num_input_tokens_seen": 69072576, "step": 31955 }, { "epoch": 5.213703099510604, "grad_norm": 0.03436309099197388, "learning_rate": 0.0009233967199812141, "loss": 0.1051, "num_input_tokens_seen": 69082976, "step": 31960 }, { "epoch": 5.214518760195759, "grad_norm": 0.32174986600875854, "learning_rate": 0.000923358853587007, "loss": 0.1442, "num_input_tokens_seen": 69095072, "step": 31965 }, { "epoch": 5.215334420880914, "grad_norm": 0.058664221316576004, "learning_rate": 0.0009233209786128957, "loss": 0.04, "num_input_tokens_seen": 69107040, "step": 31970 }, { "epoch": 5.216150081566068, "grad_norm": 0.19562427699565887, "learning_rate": 0.0009232830950596479, "loss": 0.3531, "num_input_tokens_seen": 69118336, "step": 31975 }, { "epoch": 5.216965742251223, "grad_norm": 0.03304930403828621, "learning_rate": 0.0009232452029280312, "loss": 0.1295, "num_input_tokens_seen": 69130208, "step": 31980 }, { "epoch": 5.217781402936378, "grad_norm": 0.09775389730930328, "learning_rate": 0.0009232073022188135, "loss": 0.03, "num_input_tokens_seen": 69140512, "step": 31985 }, { "epoch": 5.218597063621534, "grad_norm": 0.14110304415225983, "learning_rate": 0.0009231693929327628, "loss": 0.0802, "num_input_tokens_seen": 69150816, "step": 31990 }, { "epoch": 5.219412724306689, "grad_norm": 0.009008850902318954, "learning_rate": 0.0009231314750706476, "loss": 0.0448, "num_input_tokens_seen": 69161472, "step": 31995 }, { "epoch": 5.220228384991843, "grad_norm": 0.1816573441028595, "learning_rate": 0.0009230935486332363, "loss": 0.1657, "num_input_tokens_seen": 69172064, "step": 32000 }, { "epoch": 5.221044045676998, "grad_norm": 0.3012794554233551, "learning_rate": 0.0009230556136212975, "loss": 0.1398, "num_input_tokens_seen": 69183264, "step": 32005 }, { "epoch": 5.221859706362153, "grad_norm": 0.11017505824565887, "learning_rate": 0.0009230176700356001, "loss": 0.1744, "num_input_tokens_seen": 69194528, "step": 32010 }, { "epoch": 5.222675367047309, "grad_norm": 0.05585956946015358, "learning_rate": 0.0009229797178769128, "loss": 0.174, "num_input_tokens_seen": 69205920, "step": 32015 }, { "epoch": 5.2234910277324635, "grad_norm": 0.22336703538894653, "learning_rate": 0.000922941757146005, "loss": 0.0823, "num_input_tokens_seen": 69217408, "step": 32020 }, { "epoch": 5.224306688417618, "grad_norm": 0.010442069731652737, "learning_rate": 0.000922903787843646, "loss": 0.1295, "num_input_tokens_seen": 69226560, "step": 32025 }, { "epoch": 5.225122349102773, "grad_norm": 0.1273556351661682, "learning_rate": 0.0009228658099706053, "loss": 0.0722, "num_input_tokens_seen": 69238112, "step": 32030 }, { "epoch": 5.225938009787928, "grad_norm": 0.08883675187826157, "learning_rate": 0.0009228278235276524, "loss": 0.149, "num_input_tokens_seen": 69249088, "step": 32035 }, { "epoch": 5.226753670473083, "grad_norm": 0.016032544896006584, "learning_rate": 0.0009227898285155574, "loss": 0.0544, "num_input_tokens_seen": 69258752, "step": 32040 }, { "epoch": 5.2275693311582385, "grad_norm": 0.25991424918174744, "learning_rate": 0.00092275182493509, "loss": 0.1155, "num_input_tokens_seen": 69270496, "step": 32045 }, { "epoch": 5.228384991843393, "grad_norm": 0.16175585985183716, "learning_rate": 0.0009227138127870208, "loss": 0.1564, "num_input_tokens_seen": 69280800, "step": 32050 }, { "epoch": 5.229200652528548, "grad_norm": 0.03370179980993271, "learning_rate": 0.0009226757920721196, "loss": 0.181, "num_input_tokens_seen": 69291776, "step": 32055 }, { "epoch": 5.230016313213703, "grad_norm": 0.09633185714483261, "learning_rate": 0.0009226377627911575, "loss": 0.0787, "num_input_tokens_seen": 69302144, "step": 32060 }, { "epoch": 5.230831973898858, "grad_norm": 0.061300963163375854, "learning_rate": 0.000922599724944905, "loss": 0.0605, "num_input_tokens_seen": 69313984, "step": 32065 }, { "epoch": 5.231647634584013, "grad_norm": 0.11820586770772934, "learning_rate": 0.0009225616785341329, "loss": 0.0464, "num_input_tokens_seen": 69324640, "step": 32070 }, { "epoch": 5.232463295269168, "grad_norm": 0.07643051445484161, "learning_rate": 0.0009225236235596123, "loss": 0.0222, "num_input_tokens_seen": 69335680, "step": 32075 }, { "epoch": 5.233278955954323, "grad_norm": 0.016196228563785553, "learning_rate": 0.0009224855600221145, "loss": 0.0245, "num_input_tokens_seen": 69347616, "step": 32080 }, { "epoch": 5.234094616639478, "grad_norm": 0.1011374369263649, "learning_rate": 0.0009224474879224109, "loss": 0.1312, "num_input_tokens_seen": 69358400, "step": 32085 }, { "epoch": 5.234910277324633, "grad_norm": 0.029623612761497498, "learning_rate": 0.000922409407261273, "loss": 0.0477, "num_input_tokens_seen": 69368224, "step": 32090 }, { "epoch": 5.235725938009788, "grad_norm": 0.04129718616604805, "learning_rate": 0.0009223713180394726, "loss": 0.0925, "num_input_tokens_seen": 69379200, "step": 32095 }, { "epoch": 5.236541598694943, "grad_norm": 0.019016016274690628, "learning_rate": 0.0009223332202577815, "loss": 0.0822, "num_input_tokens_seen": 69389600, "step": 32100 }, { "epoch": 5.237357259380098, "grad_norm": 0.026308605447411537, "learning_rate": 0.0009222951139169722, "loss": 0.1353, "num_input_tokens_seen": 69399424, "step": 32105 }, { "epoch": 5.238172920065253, "grad_norm": 0.15939036011695862, "learning_rate": 0.0009222569990178165, "loss": 0.0608, "num_input_tokens_seen": 69410336, "step": 32110 }, { "epoch": 5.238988580750408, "grad_norm": 0.267518550157547, "learning_rate": 0.0009222188755610871, "loss": 0.1342, "num_input_tokens_seen": 69421152, "step": 32115 }, { "epoch": 5.239804241435563, "grad_norm": 0.009280465543270111, "learning_rate": 0.0009221807435475564, "loss": 0.1205, "num_input_tokens_seen": 69431360, "step": 32120 }, { "epoch": 5.240619902120717, "grad_norm": 0.01776033826172352, "learning_rate": 0.0009221426029779975, "loss": 0.0569, "num_input_tokens_seen": 69442144, "step": 32125 }, { "epoch": 5.241435562805873, "grad_norm": 0.025634892284870148, "learning_rate": 0.0009221044538531833, "loss": 0.0466, "num_input_tokens_seen": 69452576, "step": 32130 }, { "epoch": 5.242251223491028, "grad_norm": 0.04073479771614075, "learning_rate": 0.0009220662961738868, "loss": 0.1665, "num_input_tokens_seen": 69463936, "step": 32135 }, { "epoch": 5.243066884176183, "grad_norm": 0.03299302980303764, "learning_rate": 0.0009220281299408815, "loss": 0.0561, "num_input_tokens_seen": 69475424, "step": 32140 }, { "epoch": 5.2438825448613375, "grad_norm": 0.047456976026296616, "learning_rate": 0.0009219899551549405, "loss": 0.114, "num_input_tokens_seen": 69485632, "step": 32145 }, { "epoch": 5.244698205546492, "grad_norm": 0.029536686837673187, "learning_rate": 0.0009219517718168379, "loss": 0.179, "num_input_tokens_seen": 69495264, "step": 32150 }, { "epoch": 5.245513866231648, "grad_norm": 0.6375518441200256, "learning_rate": 0.0009219135799273474, "loss": 0.0721, "num_input_tokens_seen": 69506112, "step": 32155 }, { "epoch": 5.246329526916803, "grad_norm": 0.016054809093475342, "learning_rate": 0.0009218753794872429, "loss": 0.0486, "num_input_tokens_seen": 69516768, "step": 32160 }, { "epoch": 5.247145187601958, "grad_norm": 0.020715905353426933, "learning_rate": 0.0009218371704972987, "loss": 0.0741, "num_input_tokens_seen": 69528096, "step": 32165 }, { "epoch": 5.2479608482871125, "grad_norm": 0.06473297625780106, "learning_rate": 0.0009217989529582889, "loss": 0.0302, "num_input_tokens_seen": 69540000, "step": 32170 }, { "epoch": 5.248776508972267, "grad_norm": 0.06317108124494553, "learning_rate": 0.0009217607268709884, "loss": 0.0426, "num_input_tokens_seen": 69551200, "step": 32175 }, { "epoch": 5.249592169657422, "grad_norm": 0.012645971961319447, "learning_rate": 0.0009217224922361718, "loss": 0.0322, "num_input_tokens_seen": 69561600, "step": 32180 }, { "epoch": 5.250407830342578, "grad_norm": 0.04027498885989189, "learning_rate": 0.0009216842490546138, "loss": 0.0872, "num_input_tokens_seen": 69572608, "step": 32185 }, { "epoch": 5.251223491027733, "grad_norm": 0.02918020635843277, "learning_rate": 0.0009216459973270895, "loss": 0.1817, "num_input_tokens_seen": 69584096, "step": 32190 }, { "epoch": 5.2520391517128875, "grad_norm": 0.021406283602118492, "learning_rate": 0.0009216077370543743, "loss": 0.0901, "num_input_tokens_seen": 69594240, "step": 32195 }, { "epoch": 5.252854812398042, "grad_norm": 0.07893198728561401, "learning_rate": 0.0009215694682372433, "loss": 0.0284, "num_input_tokens_seen": 69606208, "step": 32200 }, { "epoch": 5.253670473083197, "grad_norm": 0.0020585639867931604, "learning_rate": 0.0009215311908764724, "loss": 0.0273, "num_input_tokens_seen": 69617504, "step": 32205 }, { "epoch": 5.254486133768353, "grad_norm": 0.21157675981521606, "learning_rate": 0.000921492904972837, "loss": 0.0974, "num_input_tokens_seen": 69628000, "step": 32210 }, { "epoch": 5.255301794453508, "grad_norm": 0.16144989430904388, "learning_rate": 0.0009214546105271133, "loss": 0.4135, "num_input_tokens_seen": 69639296, "step": 32215 }, { "epoch": 5.2561174551386625, "grad_norm": 0.16526293754577637, "learning_rate": 0.0009214163075400772, "loss": 0.1462, "num_input_tokens_seen": 69650272, "step": 32220 }, { "epoch": 5.256933115823817, "grad_norm": 0.06682567298412323, "learning_rate": 0.000921377996012505, "loss": 0.1254, "num_input_tokens_seen": 69660544, "step": 32225 }, { "epoch": 5.257748776508972, "grad_norm": 0.25775107741355896, "learning_rate": 0.0009213396759451732, "loss": 0.1977, "num_input_tokens_seen": 69671168, "step": 32230 }, { "epoch": 5.258564437194127, "grad_norm": 0.13597147166728973, "learning_rate": 0.0009213013473388584, "loss": 0.0848, "num_input_tokens_seen": 69681376, "step": 32235 }, { "epoch": 5.259380097879283, "grad_norm": 0.11380225419998169, "learning_rate": 0.0009212630101943373, "loss": 0.0665, "num_input_tokens_seen": 69692192, "step": 32240 }, { "epoch": 5.260195758564437, "grad_norm": 0.007670269813388586, "learning_rate": 0.000921224664512387, "loss": 0.0274, "num_input_tokens_seen": 69702816, "step": 32245 }, { "epoch": 5.261011419249592, "grad_norm": 0.015922527760267258, "learning_rate": 0.0009211863102937843, "loss": 0.0319, "num_input_tokens_seen": 69712800, "step": 32250 }, { "epoch": 5.261827079934747, "grad_norm": 0.0070436312817037106, "learning_rate": 0.0009211479475393068, "loss": 0.2056, "num_input_tokens_seen": 69723808, "step": 32255 }, { "epoch": 5.262642740619902, "grad_norm": 0.013316688127815723, "learning_rate": 0.0009211095762497319, "loss": 0.0193, "num_input_tokens_seen": 69734944, "step": 32260 }, { "epoch": 5.263458401305057, "grad_norm": 0.023608213290572166, "learning_rate": 0.0009210711964258372, "loss": 0.1439, "num_input_tokens_seen": 69744672, "step": 32265 }, { "epoch": 5.264274061990212, "grad_norm": 0.1670239418745041, "learning_rate": 0.0009210328080684005, "loss": 0.0817, "num_input_tokens_seen": 69754944, "step": 32270 }, { "epoch": 5.265089722675367, "grad_norm": 0.006368768867105246, "learning_rate": 0.0009209944111782, "loss": 0.0374, "num_input_tokens_seen": 69765824, "step": 32275 }, { "epoch": 5.265905383360522, "grad_norm": 0.011990712955594063, "learning_rate": 0.0009209560057560134, "loss": 0.1155, "num_input_tokens_seen": 69777152, "step": 32280 }, { "epoch": 5.266721044045677, "grad_norm": 0.07604020088911057, "learning_rate": 0.0009209175918026195, "loss": 0.0241, "num_input_tokens_seen": 69787360, "step": 32285 }, { "epoch": 5.267536704730832, "grad_norm": 0.2205602526664734, "learning_rate": 0.0009208791693187967, "loss": 0.1131, "num_input_tokens_seen": 69798048, "step": 32290 }, { "epoch": 5.268352365415987, "grad_norm": 0.04664922133088112, "learning_rate": 0.0009208407383053235, "loss": 0.0366, "num_input_tokens_seen": 69809088, "step": 32295 }, { "epoch": 5.269168026101142, "grad_norm": 0.007245397195219994, "learning_rate": 0.000920802298762979, "loss": 0.0704, "num_input_tokens_seen": 69819296, "step": 32300 }, { "epoch": 5.269983686786297, "grad_norm": 0.023717431351542473, "learning_rate": 0.0009207638506925419, "loss": 0.0771, "num_input_tokens_seen": 69829472, "step": 32305 }, { "epoch": 5.270799347471452, "grad_norm": 0.2488313913345337, "learning_rate": 0.0009207253940947916, "loss": 0.0537, "num_input_tokens_seen": 69840320, "step": 32310 }, { "epoch": 5.271615008156607, "grad_norm": 0.01920371875166893, "learning_rate": 0.0009206869289705075, "loss": 0.1372, "num_input_tokens_seen": 69852192, "step": 32315 }, { "epoch": 5.2724306688417615, "grad_norm": 0.03608312830328941, "learning_rate": 0.0009206484553204693, "loss": 0.0895, "num_input_tokens_seen": 69864032, "step": 32320 }, { "epoch": 5.273246329526917, "grad_norm": 0.03217107802629471, "learning_rate": 0.0009206099731454562, "loss": 0.0295, "num_input_tokens_seen": 69874816, "step": 32325 }, { "epoch": 5.274061990212072, "grad_norm": 0.03715856000781059, "learning_rate": 0.0009205714824462487, "loss": 0.0362, "num_input_tokens_seen": 69885984, "step": 32330 }, { "epoch": 5.274877650897227, "grad_norm": 0.00929944682866335, "learning_rate": 0.0009205329832236265, "loss": 0.0338, "num_input_tokens_seen": 69896000, "step": 32335 }, { "epoch": 5.275693311582382, "grad_norm": 0.03817165642976761, "learning_rate": 0.0009204944754783698, "loss": 0.1096, "num_input_tokens_seen": 69907040, "step": 32340 }, { "epoch": 5.2765089722675365, "grad_norm": 0.0905759334564209, "learning_rate": 0.0009204559592112592, "loss": 0.0265, "num_input_tokens_seen": 69917280, "step": 32345 }, { "epoch": 5.277324632952691, "grad_norm": 0.03727143257856369, "learning_rate": 0.0009204174344230751, "loss": 0.2925, "num_input_tokens_seen": 69929344, "step": 32350 }, { "epoch": 5.278140293637847, "grad_norm": 0.01050970796495676, "learning_rate": 0.0009203789011145984, "loss": 0.0518, "num_input_tokens_seen": 69940352, "step": 32355 }, { "epoch": 5.278955954323002, "grad_norm": 0.2786335051059723, "learning_rate": 0.00092034035928661, "loss": 0.1492, "num_input_tokens_seen": 69950080, "step": 32360 }, { "epoch": 5.279771615008157, "grad_norm": 0.2287655770778656, "learning_rate": 0.000920301808939891, "loss": 0.0845, "num_input_tokens_seen": 69960064, "step": 32365 }, { "epoch": 5.280587275693311, "grad_norm": 0.04754359647631645, "learning_rate": 0.0009202632500752226, "loss": 0.02, "num_input_tokens_seen": 69971072, "step": 32370 }, { "epoch": 5.281402936378466, "grad_norm": 0.13212303817272186, "learning_rate": 0.0009202246826933864, "loss": 0.0811, "num_input_tokens_seen": 69981408, "step": 32375 }, { "epoch": 5.282218597063622, "grad_norm": 0.2108330875635147, "learning_rate": 0.0009201861067951638, "loss": 0.0271, "num_input_tokens_seen": 69991840, "step": 32380 }, { "epoch": 5.283034257748777, "grad_norm": 0.02782035619020462, "learning_rate": 0.0009201475223813368, "loss": 0.189, "num_input_tokens_seen": 70002528, "step": 32385 }, { "epoch": 5.283849918433932, "grad_norm": 0.16605517268180847, "learning_rate": 0.0009201089294526872, "loss": 0.1117, "num_input_tokens_seen": 70013280, "step": 32390 }, { "epoch": 5.284665579119086, "grad_norm": 0.019904859364032745, "learning_rate": 0.0009200703280099971, "loss": 0.1801, "num_input_tokens_seen": 70024864, "step": 32395 }, { "epoch": 5.285481239804241, "grad_norm": 0.13329952955245972, "learning_rate": 0.0009200317180540491, "loss": 0.1104, "num_input_tokens_seen": 70035648, "step": 32400 }, { "epoch": 5.286296900489396, "grad_norm": 0.12227307260036469, "learning_rate": 0.0009199930995856254, "loss": 0.1115, "num_input_tokens_seen": 70046592, "step": 32405 }, { "epoch": 5.287112561174552, "grad_norm": 0.010475658811628819, "learning_rate": 0.0009199544726055087, "loss": 0.0213, "num_input_tokens_seen": 70058016, "step": 32410 }, { "epoch": 5.287928221859707, "grad_norm": 0.0636746883392334, "learning_rate": 0.000919915837114482, "loss": 0.0393, "num_input_tokens_seen": 70069536, "step": 32415 }, { "epoch": 5.288743882544861, "grad_norm": 0.022627348080277443, "learning_rate": 0.0009198771931133281, "loss": 0.1151, "num_input_tokens_seen": 70079936, "step": 32420 }, { "epoch": 5.289559543230016, "grad_norm": 0.34379759430885315, "learning_rate": 0.0009198385406028302, "loss": 0.0878, "num_input_tokens_seen": 70090912, "step": 32425 }, { "epoch": 5.290375203915171, "grad_norm": 0.019473228603601456, "learning_rate": 0.0009197998795837716, "loss": 0.0394, "num_input_tokens_seen": 70102624, "step": 32430 }, { "epoch": 5.291190864600326, "grad_norm": 0.20492912828922272, "learning_rate": 0.0009197612100569359, "loss": 0.1075, "num_input_tokens_seen": 70112864, "step": 32435 }, { "epoch": 5.2920065252854815, "grad_norm": 0.0357382632791996, "learning_rate": 0.0009197225320231069, "loss": 0.0834, "num_input_tokens_seen": 70122304, "step": 32440 }, { "epoch": 5.292822185970636, "grad_norm": 0.016824502497911453, "learning_rate": 0.0009196838454830682, "loss": 0.016, "num_input_tokens_seen": 70133728, "step": 32445 }, { "epoch": 5.293637846655791, "grad_norm": 0.07850372046232224, "learning_rate": 0.000919645150437604, "loss": 0.0894, "num_input_tokens_seen": 70144384, "step": 32450 }, { "epoch": 5.294453507340946, "grad_norm": 0.2341936081647873, "learning_rate": 0.0009196064468874985, "loss": 0.1501, "num_input_tokens_seen": 70155040, "step": 32455 }, { "epoch": 5.295269168026101, "grad_norm": 0.03325160965323448, "learning_rate": 0.0009195677348335361, "loss": 0.0169, "num_input_tokens_seen": 70165312, "step": 32460 }, { "epoch": 5.2960848287112565, "grad_norm": 0.0046011339873075485, "learning_rate": 0.0009195290142765012, "loss": 0.0465, "num_input_tokens_seen": 70177824, "step": 32465 }, { "epoch": 5.296900489396411, "grad_norm": 0.2414412796497345, "learning_rate": 0.0009194902852171787, "loss": 0.0773, "num_input_tokens_seen": 70187904, "step": 32470 }, { "epoch": 5.297716150081566, "grad_norm": 0.06987257301807404, "learning_rate": 0.0009194515476563533, "loss": 0.0763, "num_input_tokens_seen": 70199296, "step": 32475 }, { "epoch": 5.298531810766721, "grad_norm": 0.1548604816198349, "learning_rate": 0.0009194128015948103, "loss": 0.0339, "num_input_tokens_seen": 70209472, "step": 32480 }, { "epoch": 5.299347471451876, "grad_norm": 0.007507277186959982, "learning_rate": 0.0009193740470333347, "loss": 0.0868, "num_input_tokens_seen": 70220832, "step": 32485 }, { "epoch": 5.300163132137031, "grad_norm": 0.012318221852183342, "learning_rate": 0.0009193352839727121, "loss": 0.2219, "num_input_tokens_seen": 70230912, "step": 32490 }, { "epoch": 5.300978792822186, "grad_norm": 0.04195151478052139, "learning_rate": 0.0009192965124137279, "loss": 0.0795, "num_input_tokens_seen": 70241440, "step": 32495 }, { "epoch": 5.301794453507341, "grad_norm": 0.01736997626721859, "learning_rate": 0.000919257732357168, "loss": 0.0565, "num_input_tokens_seen": 70252320, "step": 32500 }, { "epoch": 5.302610114192496, "grad_norm": 0.032310500741004944, "learning_rate": 0.0009192189438038183, "loss": 0.1439, "num_input_tokens_seen": 70263584, "step": 32505 }, { "epoch": 5.303425774877651, "grad_norm": 0.03387526422739029, "learning_rate": 0.0009191801467544649, "loss": 0.0598, "num_input_tokens_seen": 70275040, "step": 32510 }, { "epoch": 5.304241435562806, "grad_norm": 0.17230959236621857, "learning_rate": 0.0009191413412098942, "loss": 0.1064, "num_input_tokens_seen": 70287168, "step": 32515 }, { "epoch": 5.30505709624796, "grad_norm": 0.04161704704165459, "learning_rate": 0.0009191025271708923, "loss": 0.2394, "num_input_tokens_seen": 70298848, "step": 32520 }, { "epoch": 5.305872756933116, "grad_norm": 0.038167256861925125, "learning_rate": 0.0009190637046382461, "loss": 0.0417, "num_input_tokens_seen": 70309760, "step": 32525 }, { "epoch": 5.306688417618271, "grad_norm": 0.16672658920288086, "learning_rate": 0.0009190248736127422, "loss": 0.0625, "num_input_tokens_seen": 70320288, "step": 32530 }, { "epoch": 5.307504078303426, "grad_norm": 0.042624689638614655, "learning_rate": 0.0009189860340951679, "loss": 0.0649, "num_input_tokens_seen": 70332128, "step": 32535 }, { "epoch": 5.308319738988581, "grad_norm": 0.012349123135209084, "learning_rate": 0.0009189471860863099, "loss": 0.0641, "num_input_tokens_seen": 70342400, "step": 32540 }, { "epoch": 5.309135399673735, "grad_norm": 0.2591664791107178, "learning_rate": 0.0009189083295869558, "loss": 0.1002, "num_input_tokens_seen": 70352416, "step": 32545 }, { "epoch": 5.309951060358891, "grad_norm": 0.022202063351869583, "learning_rate": 0.0009188694645978928, "loss": 0.1235, "num_input_tokens_seen": 70363296, "step": 32550 }, { "epoch": 5.310766721044046, "grad_norm": 0.1519620418548584, "learning_rate": 0.0009188305911199088, "loss": 0.1209, "num_input_tokens_seen": 70375488, "step": 32555 }, { "epoch": 5.311582381729201, "grad_norm": 0.025497501716017723, "learning_rate": 0.0009187917091537918, "loss": 0.0746, "num_input_tokens_seen": 70385184, "step": 32560 }, { "epoch": 5.3123980424143555, "grad_norm": 0.08437871932983398, "learning_rate": 0.0009187528187003293, "loss": 0.1442, "num_input_tokens_seen": 70395296, "step": 32565 }, { "epoch": 5.31321370309951, "grad_norm": 0.05105772614479065, "learning_rate": 0.0009187139197603097, "loss": 0.0387, "num_input_tokens_seen": 70405920, "step": 32570 }, { "epoch": 5.314029363784665, "grad_norm": 0.28494322299957275, "learning_rate": 0.0009186750123345214, "loss": 0.1898, "num_input_tokens_seen": 70416960, "step": 32575 }, { "epoch": 5.314845024469821, "grad_norm": 0.1514277160167694, "learning_rate": 0.0009186360964237528, "loss": 0.13, "num_input_tokens_seen": 70428480, "step": 32580 }, { "epoch": 5.315660685154976, "grad_norm": 0.030695544555783272, "learning_rate": 0.0009185971720287926, "loss": 0.0963, "num_input_tokens_seen": 70439200, "step": 32585 }, { "epoch": 5.3164763458401305, "grad_norm": 0.12666253745555878, "learning_rate": 0.0009185582391504299, "loss": 0.0913, "num_input_tokens_seen": 70450656, "step": 32590 }, { "epoch": 5.317292006525285, "grad_norm": 0.1707489788532257, "learning_rate": 0.0009185192977894533, "loss": 0.1483, "num_input_tokens_seen": 70461504, "step": 32595 }, { "epoch": 5.31810766721044, "grad_norm": 0.07268530875444412, "learning_rate": 0.0009184803479466521, "loss": 0.1427, "num_input_tokens_seen": 70471840, "step": 32600 }, { "epoch": 5.318923327895595, "grad_norm": 0.014792957343161106, "learning_rate": 0.0009184413896228161, "loss": 0.0997, "num_input_tokens_seen": 70481536, "step": 32605 }, { "epoch": 5.319738988580751, "grad_norm": 0.19271481037139893, "learning_rate": 0.0009184024228187343, "loss": 0.0948, "num_input_tokens_seen": 70492064, "step": 32610 }, { "epoch": 5.3205546492659055, "grad_norm": 0.2561456859111786, "learning_rate": 0.0009183634475351967, "loss": 0.107, "num_input_tokens_seen": 70501184, "step": 32615 }, { "epoch": 5.32137030995106, "grad_norm": 0.11083754897117615, "learning_rate": 0.0009183244637729931, "loss": 0.1501, "num_input_tokens_seen": 70511808, "step": 32620 }, { "epoch": 5.322185970636215, "grad_norm": 0.062356043606996536, "learning_rate": 0.0009182854715329134, "loss": 0.0937, "num_input_tokens_seen": 70521792, "step": 32625 }, { "epoch": 5.32300163132137, "grad_norm": 0.013759220018982887, "learning_rate": 0.0009182464708157481, "loss": 0.0377, "num_input_tokens_seen": 70533120, "step": 32630 }, { "epoch": 5.323817292006526, "grad_norm": 0.12132881581783295, "learning_rate": 0.0009182074616222875, "loss": 0.0632, "num_input_tokens_seen": 70543360, "step": 32635 }, { "epoch": 5.3246329526916805, "grad_norm": 0.08230478316545486, "learning_rate": 0.0009181684439533223, "loss": 0.1087, "num_input_tokens_seen": 70554560, "step": 32640 }, { "epoch": 5.325448613376835, "grad_norm": 0.1693173348903656, "learning_rate": 0.0009181294178096427, "loss": 0.2002, "num_input_tokens_seen": 70566080, "step": 32645 }, { "epoch": 5.32626427406199, "grad_norm": 0.21043474972248077, "learning_rate": 0.0009180903831920404, "loss": 0.1808, "num_input_tokens_seen": 70576064, "step": 32650 }, { "epoch": 5.327079934747145, "grad_norm": 0.0415470190346241, "learning_rate": 0.0009180513401013059, "loss": 0.1335, "num_input_tokens_seen": 70586624, "step": 32655 }, { "epoch": 5.327895595432301, "grad_norm": 0.019588204100728035, "learning_rate": 0.0009180122885382307, "loss": 0.1125, "num_input_tokens_seen": 70598176, "step": 32660 }, { "epoch": 5.328711256117455, "grad_norm": 0.24973243474960327, "learning_rate": 0.0009179732285036062, "loss": 0.2379, "num_input_tokens_seen": 70608128, "step": 32665 }, { "epoch": 5.32952691680261, "grad_norm": 0.006756752263754606, "learning_rate": 0.0009179341599982239, "loss": 0.1654, "num_input_tokens_seen": 70620064, "step": 32670 }, { "epoch": 5.330342577487765, "grad_norm": 0.14148695766925812, "learning_rate": 0.0009178950830228759, "loss": 0.1429, "num_input_tokens_seen": 70630048, "step": 32675 }, { "epoch": 5.33115823817292, "grad_norm": 0.02704835683107376, "learning_rate": 0.0009178559975783536, "loss": 0.0799, "num_input_tokens_seen": 70639552, "step": 32680 }, { "epoch": 5.331973898858075, "grad_norm": 0.24264878034591675, "learning_rate": 0.0009178169036654496, "loss": 0.0869, "num_input_tokens_seen": 70651040, "step": 32685 }, { "epoch": 5.33278955954323, "grad_norm": 0.10831085592508316, "learning_rate": 0.0009177778012849561, "loss": 0.0775, "num_input_tokens_seen": 70660736, "step": 32690 }, { "epoch": 5.333605220228385, "grad_norm": 0.014376015402376652, "learning_rate": 0.0009177386904376652, "loss": 0.0338, "num_input_tokens_seen": 70671488, "step": 32695 }, { "epoch": 5.33442088091354, "grad_norm": 0.029407214373350143, "learning_rate": 0.0009176995711243699, "loss": 0.0204, "num_input_tokens_seen": 70680608, "step": 32700 }, { "epoch": 5.335236541598695, "grad_norm": 0.3172883987426758, "learning_rate": 0.0009176604433458631, "loss": 0.1396, "num_input_tokens_seen": 70691200, "step": 32705 }, { "epoch": 5.33605220228385, "grad_norm": 0.028004512190818787, "learning_rate": 0.0009176213071029373, "loss": 0.0474, "num_input_tokens_seen": 70702496, "step": 32710 }, { "epoch": 5.3368678629690045, "grad_norm": 0.014881027862429619, "learning_rate": 0.0009175821623963861, "loss": 0.2455, "num_input_tokens_seen": 70713856, "step": 32715 }, { "epoch": 5.33768352365416, "grad_norm": 0.08989004045724869, "learning_rate": 0.0009175430092270026, "loss": 0.1038, "num_input_tokens_seen": 70724320, "step": 32720 }, { "epoch": 5.338499184339315, "grad_norm": 0.05116923525929451, "learning_rate": 0.0009175038475955804, "loss": 0.0783, "num_input_tokens_seen": 70735232, "step": 32725 }, { "epoch": 5.33931484502447, "grad_norm": 0.06526987254619598, "learning_rate": 0.0009174646775029129, "loss": 0.0791, "num_input_tokens_seen": 70746528, "step": 32730 }, { "epoch": 5.340130505709625, "grad_norm": 0.028074419125914574, "learning_rate": 0.0009174254989497942, "loss": 0.057, "num_input_tokens_seen": 70756320, "step": 32735 }, { "epoch": 5.3409461663947795, "grad_norm": 0.12809574604034424, "learning_rate": 0.0009173863119370183, "loss": 0.0726, "num_input_tokens_seen": 70768064, "step": 32740 }, { "epoch": 5.341761827079935, "grad_norm": 0.12281595915555954, "learning_rate": 0.0009173471164653791, "loss": 0.0536, "num_input_tokens_seen": 70779488, "step": 32745 }, { "epoch": 5.34257748776509, "grad_norm": 0.08573044091463089, "learning_rate": 0.0009173079125356714, "loss": 0.1067, "num_input_tokens_seen": 70789728, "step": 32750 }, { "epoch": 5.343393148450245, "grad_norm": 0.19512666761875153, "learning_rate": 0.0009172687001486892, "loss": 0.1032, "num_input_tokens_seen": 70800992, "step": 32755 }, { "epoch": 5.3442088091354, "grad_norm": 0.07091918587684631, "learning_rate": 0.0009172294793052277, "loss": 0.0405, "num_input_tokens_seen": 70812032, "step": 32760 }, { "epoch": 5.3450244698205545, "grad_norm": 0.312071293592453, "learning_rate": 0.0009171902500060814, "loss": 0.1494, "num_input_tokens_seen": 70824032, "step": 32765 }, { "epoch": 5.345840130505709, "grad_norm": 0.004325787536799908, "learning_rate": 0.0009171510122520455, "loss": 0.0629, "num_input_tokens_seen": 70835648, "step": 32770 }, { "epoch": 5.346655791190865, "grad_norm": 0.01614411175251007, "learning_rate": 0.000917111766043915, "loss": 0.0197, "num_input_tokens_seen": 70846080, "step": 32775 }, { "epoch": 5.34747145187602, "grad_norm": 0.1334189772605896, "learning_rate": 0.0009170725113824855, "loss": 0.2134, "num_input_tokens_seen": 70856096, "step": 32780 }, { "epoch": 5.348287112561175, "grad_norm": 0.07463495433330536, "learning_rate": 0.0009170332482685524, "loss": 0.0312, "num_input_tokens_seen": 70867168, "step": 32785 }, { "epoch": 5.349102773246329, "grad_norm": 0.11940980702638626, "learning_rate": 0.0009169939767029116, "loss": 0.0687, "num_input_tokens_seen": 70878080, "step": 32790 }, { "epoch": 5.349918433931484, "grad_norm": 0.020729253068566322, "learning_rate": 0.0009169546966863588, "loss": 0.1368, "num_input_tokens_seen": 70889472, "step": 32795 }, { "epoch": 5.350734094616639, "grad_norm": 0.14954179525375366, "learning_rate": 0.0009169154082196901, "loss": 0.0857, "num_input_tokens_seen": 70900640, "step": 32800 }, { "epoch": 5.351549755301795, "grad_norm": 0.32872921228408813, "learning_rate": 0.0009168761113037019, "loss": 0.128, "num_input_tokens_seen": 70909856, "step": 32805 }, { "epoch": 5.35236541598695, "grad_norm": 0.0425887331366539, "learning_rate": 0.0009168368059391903, "loss": 0.0225, "num_input_tokens_seen": 70921248, "step": 32810 }, { "epoch": 5.353181076672104, "grad_norm": 0.078069768846035, "learning_rate": 0.0009167974921269519, "loss": 0.1495, "num_input_tokens_seen": 70930720, "step": 32815 }, { "epoch": 5.353996737357259, "grad_norm": 0.14288416504859924, "learning_rate": 0.0009167581698677838, "loss": 0.0707, "num_input_tokens_seen": 70940448, "step": 32820 }, { "epoch": 5.354812398042414, "grad_norm": 0.022837474942207336, "learning_rate": 0.0009167188391624827, "loss": 0.0113, "num_input_tokens_seen": 70952000, "step": 32825 }, { "epoch": 5.35562805872757, "grad_norm": 0.008411848917603493, "learning_rate": 0.0009166795000118456, "loss": 0.0717, "num_input_tokens_seen": 70961856, "step": 32830 }, { "epoch": 5.356443719412725, "grad_norm": 0.0260999146848917, "learning_rate": 0.0009166401524166699, "loss": 0.0281, "num_input_tokens_seen": 70972320, "step": 32835 }, { "epoch": 5.357259380097879, "grad_norm": 0.0431085005402565, "learning_rate": 0.000916600796377753, "loss": 0.0942, "num_input_tokens_seen": 70983648, "step": 32840 }, { "epoch": 5.358075040783034, "grad_norm": 0.01479522418230772, "learning_rate": 0.0009165614318958924, "loss": 0.1821, "num_input_tokens_seen": 70993952, "step": 32845 }, { "epoch": 5.358890701468189, "grad_norm": 0.27183374762535095, "learning_rate": 0.0009165220589718859, "loss": 0.1811, "num_input_tokens_seen": 71004160, "step": 32850 }, { "epoch": 5.359706362153344, "grad_norm": 0.07483382523059845, "learning_rate": 0.0009164826776065316, "loss": 0.0864, "num_input_tokens_seen": 71015264, "step": 32855 }, { "epoch": 5.3605220228384995, "grad_norm": 0.008126812987029552, "learning_rate": 0.0009164432878006274, "loss": 0.1002, "num_input_tokens_seen": 71026560, "step": 32860 }, { "epoch": 5.361337683523654, "grad_norm": 0.2765503227710724, "learning_rate": 0.0009164038895549716, "loss": 0.2013, "num_input_tokens_seen": 71038752, "step": 32865 }, { "epoch": 5.362153344208809, "grad_norm": 0.191825270652771, "learning_rate": 0.0009163644828703628, "loss": 0.1433, "num_input_tokens_seen": 71050112, "step": 32870 }, { "epoch": 5.362969004893964, "grad_norm": 0.04115011915564537, "learning_rate": 0.0009163250677475996, "loss": 0.0803, "num_input_tokens_seen": 71061600, "step": 32875 }, { "epoch": 5.363784665579119, "grad_norm": 0.010353055782616138, "learning_rate": 0.0009162856441874807, "loss": 0.0795, "num_input_tokens_seen": 71072960, "step": 32880 }, { "epoch": 5.364600326264274, "grad_norm": 0.03754093125462532, "learning_rate": 0.0009162462121908052, "loss": 0.1689, "num_input_tokens_seen": 71083872, "step": 32885 }, { "epoch": 5.365415986949429, "grad_norm": 0.20056197047233582, "learning_rate": 0.0009162067717583722, "loss": 0.2398, "num_input_tokens_seen": 71094368, "step": 32890 }, { "epoch": 5.366231647634584, "grad_norm": 0.26877015829086304, "learning_rate": 0.0009161673228909808, "loss": 0.1409, "num_input_tokens_seen": 71105472, "step": 32895 }, { "epoch": 5.367047308319739, "grad_norm": 0.035021211951971054, "learning_rate": 0.0009161278655894307, "loss": 0.0337, "num_input_tokens_seen": 71115840, "step": 32900 }, { "epoch": 5.367862969004894, "grad_norm": 0.17578844726085663, "learning_rate": 0.0009160883998545216, "loss": 0.0935, "num_input_tokens_seen": 71126240, "step": 32905 }, { "epoch": 5.368678629690049, "grad_norm": 0.054084427654743195, "learning_rate": 0.0009160489256870532, "loss": 0.1648, "num_input_tokens_seen": 71136640, "step": 32910 }, { "epoch": 5.369494290375204, "grad_norm": 0.03351510688662529, "learning_rate": 0.0009160094430878255, "loss": 0.1695, "num_input_tokens_seen": 71146592, "step": 32915 }, { "epoch": 5.370309951060359, "grad_norm": 0.013419684953987598, "learning_rate": 0.0009159699520576388, "loss": 0.1165, "num_input_tokens_seen": 71157664, "step": 32920 }, { "epoch": 5.371125611745514, "grad_norm": 0.2606358528137207, "learning_rate": 0.0009159304525972931, "loss": 0.0713, "num_input_tokens_seen": 71168352, "step": 32925 }, { "epoch": 5.371941272430669, "grad_norm": 0.041385188698768616, "learning_rate": 0.0009158909447075894, "loss": 0.0725, "num_input_tokens_seen": 71178432, "step": 32930 }, { "epoch": 5.372756933115824, "grad_norm": 0.2104557901620865, "learning_rate": 0.0009158514283893279, "loss": 0.1242, "num_input_tokens_seen": 71188256, "step": 32935 }, { "epoch": 5.373572593800978, "grad_norm": 0.1753581315279007, "learning_rate": 0.0009158119036433097, "loss": 0.1321, "num_input_tokens_seen": 71198720, "step": 32940 }, { "epoch": 5.374388254486134, "grad_norm": 0.10579296201467514, "learning_rate": 0.0009157723704703358, "loss": 0.1819, "num_input_tokens_seen": 71208928, "step": 32945 }, { "epoch": 5.375203915171289, "grad_norm": 0.11044318228960037, "learning_rate": 0.0009157328288712075, "loss": 0.0461, "num_input_tokens_seen": 71220640, "step": 32950 }, { "epoch": 5.376019575856444, "grad_norm": 0.015562590211629868, "learning_rate": 0.0009156932788467259, "loss": 0.0734, "num_input_tokens_seen": 71230976, "step": 32955 }, { "epoch": 5.376835236541599, "grad_norm": 0.021977802738547325, "learning_rate": 0.0009156537203976927, "loss": 0.0687, "num_input_tokens_seen": 71241696, "step": 32960 }, { "epoch": 5.377650897226753, "grad_norm": 0.1440175622701645, "learning_rate": 0.0009156141535249094, "loss": 0.0302, "num_input_tokens_seen": 71253792, "step": 32965 }, { "epoch": 5.378466557911908, "grad_norm": 0.17520390450954437, "learning_rate": 0.0009155745782291782, "loss": 0.063, "num_input_tokens_seen": 71263584, "step": 32970 }, { "epoch": 5.379282218597064, "grad_norm": 0.011420628987252712, "learning_rate": 0.000915534994511301, "loss": 0.0151, "num_input_tokens_seen": 71273760, "step": 32975 }, { "epoch": 5.380097879282219, "grad_norm": 0.25457486510276794, "learning_rate": 0.0009154954023720799, "loss": 0.045, "num_input_tokens_seen": 71284512, "step": 32980 }, { "epoch": 5.3809135399673735, "grad_norm": 0.023655150085687637, "learning_rate": 0.0009154558018123174, "loss": 0.1596, "num_input_tokens_seen": 71294560, "step": 32985 }, { "epoch": 5.381729200652528, "grad_norm": 0.06550729274749756, "learning_rate": 0.000915416192832816, "loss": 0.1011, "num_input_tokens_seen": 71304576, "step": 32990 }, { "epoch": 5.382544861337683, "grad_norm": 0.009049389511346817, "learning_rate": 0.0009153765754343786, "loss": 0.0417, "num_input_tokens_seen": 71315136, "step": 32995 }, { "epoch": 5.383360522022839, "grad_norm": 0.09317310899496078, "learning_rate": 0.0009153369496178078, "loss": 0.1479, "num_input_tokens_seen": 71324800, "step": 33000 }, { "epoch": 5.384176182707994, "grad_norm": 0.11149311810731888, "learning_rate": 0.0009152973153839068, "loss": 0.1349, "num_input_tokens_seen": 71336320, "step": 33005 }, { "epoch": 5.3849918433931485, "grad_norm": 0.25844934582710266, "learning_rate": 0.000915257672733479, "loss": 0.1111, "num_input_tokens_seen": 71348128, "step": 33010 }, { "epoch": 5.385807504078303, "grad_norm": 0.008003010414540768, "learning_rate": 0.0009152180216673276, "loss": 0.0315, "num_input_tokens_seen": 71358816, "step": 33015 }, { "epoch": 5.386623164763458, "grad_norm": 0.12192642688751221, "learning_rate": 0.0009151783621862564, "loss": 0.0382, "num_input_tokens_seen": 71369728, "step": 33020 }, { "epoch": 5.387438825448613, "grad_norm": 0.010448573157191277, "learning_rate": 0.0009151386942910688, "loss": 0.0945, "num_input_tokens_seen": 71381056, "step": 33025 }, { "epoch": 5.388254486133769, "grad_norm": 0.0021205150987952948, "learning_rate": 0.0009150990179825689, "loss": 0.0383, "num_input_tokens_seen": 71391456, "step": 33030 }, { "epoch": 5.3890701468189235, "grad_norm": 0.019802767783403397, "learning_rate": 0.000915059333261561, "loss": 0.1716, "num_input_tokens_seen": 71402208, "step": 33035 }, { "epoch": 5.389885807504078, "grad_norm": 0.06028865650296211, "learning_rate": 0.0009150196401288491, "loss": 0.0465, "num_input_tokens_seen": 71412864, "step": 33040 }, { "epoch": 5.390701468189233, "grad_norm": 0.017764244228601456, "learning_rate": 0.0009149799385852375, "loss": 0.0465, "num_input_tokens_seen": 71423840, "step": 33045 }, { "epoch": 5.391517128874388, "grad_norm": 0.04317443072795868, "learning_rate": 0.0009149402286315314, "loss": 0.0318, "num_input_tokens_seen": 71432480, "step": 33050 }, { "epoch": 5.392332789559543, "grad_norm": 0.2608911693096161, "learning_rate": 0.0009149005102685348, "loss": 0.1132, "num_input_tokens_seen": 71445152, "step": 33055 }, { "epoch": 5.3931484502446985, "grad_norm": 0.03656046465039253, "learning_rate": 0.0009148607834970532, "loss": 0.2079, "num_input_tokens_seen": 71455808, "step": 33060 }, { "epoch": 5.393964110929853, "grad_norm": 0.16922008991241455, "learning_rate": 0.0009148210483178916, "loss": 0.0763, "num_input_tokens_seen": 71467264, "step": 33065 }, { "epoch": 5.394779771615008, "grad_norm": 0.015841931104660034, "learning_rate": 0.000914781304731855, "loss": 0.0865, "num_input_tokens_seen": 71476832, "step": 33070 }, { "epoch": 5.395595432300163, "grad_norm": 0.24486678838729858, "learning_rate": 0.0009147415527397492, "loss": 0.1865, "num_input_tokens_seen": 71487840, "step": 33075 }, { "epoch": 5.396411092985318, "grad_norm": 0.13854020833969116, "learning_rate": 0.0009147017923423797, "loss": 0.1851, "num_input_tokens_seen": 71499776, "step": 33080 }, { "epoch": 5.397226753670473, "grad_norm": 0.04607773944735527, "learning_rate": 0.0009146620235405523, "loss": 0.0428, "num_input_tokens_seen": 71511232, "step": 33085 }, { "epoch": 5.398042414355628, "grad_norm": 0.04011186957359314, "learning_rate": 0.0009146222463350729, "loss": 0.1327, "num_input_tokens_seen": 71523040, "step": 33090 }, { "epoch": 5.398858075040783, "grad_norm": 0.06927596777677536, "learning_rate": 0.0009145824607267478, "loss": 0.1235, "num_input_tokens_seen": 71533216, "step": 33095 }, { "epoch": 5.399673735725938, "grad_norm": 0.043770719319581985, "learning_rate": 0.0009145426667163832, "loss": 0.0413, "num_input_tokens_seen": 71544256, "step": 33100 }, { "epoch": 5.400489396411093, "grad_norm": 0.29412418603897095, "learning_rate": 0.0009145028643047855, "loss": 0.0871, "num_input_tokens_seen": 71555104, "step": 33105 }, { "epoch": 5.401305057096248, "grad_norm": 0.09121891856193542, "learning_rate": 0.0009144630534927613, "loss": 0.0808, "num_input_tokens_seen": 71566176, "step": 33110 }, { "epoch": 5.402120717781403, "grad_norm": 0.3319765329360962, "learning_rate": 0.0009144232342811179, "loss": 0.1327, "num_input_tokens_seen": 71577440, "step": 33115 }, { "epoch": 5.402936378466558, "grad_norm": 0.017797963693737984, "learning_rate": 0.0009143834066706615, "loss": 0.0248, "num_input_tokens_seen": 71588160, "step": 33120 }, { "epoch": 5.403752039151713, "grad_norm": 0.165283203125, "learning_rate": 0.0009143435706621999, "loss": 0.0478, "num_input_tokens_seen": 71598848, "step": 33125 }, { "epoch": 5.404567699836868, "grad_norm": 0.011903224512934685, "learning_rate": 0.0009143037262565401, "loss": 0.1196, "num_input_tokens_seen": 71609536, "step": 33130 }, { "epoch": 5.4053833605220225, "grad_norm": 0.08311284333467484, "learning_rate": 0.00091426387345449, "loss": 0.1009, "num_input_tokens_seen": 71620576, "step": 33135 }, { "epoch": 5.406199021207178, "grad_norm": 0.032639991492033005, "learning_rate": 0.0009142240122568566, "loss": 0.1154, "num_input_tokens_seen": 71632800, "step": 33140 }, { "epoch": 5.407014681892333, "grad_norm": 0.1708725243806839, "learning_rate": 0.0009141841426644482, "loss": 0.0963, "num_input_tokens_seen": 71643392, "step": 33145 }, { "epoch": 5.407830342577488, "grad_norm": 0.1919700801372528, "learning_rate": 0.0009141442646780728, "loss": 0.0418, "num_input_tokens_seen": 71653824, "step": 33150 }, { "epoch": 5.408646003262643, "grad_norm": 0.05919947475194931, "learning_rate": 0.0009141043782985385, "loss": 0.0129, "num_input_tokens_seen": 71664288, "step": 33155 }, { "epoch": 5.4094616639477975, "grad_norm": 0.0068783871829509735, "learning_rate": 0.0009140644835266537, "loss": 0.1369, "num_input_tokens_seen": 71674848, "step": 33160 }, { "epoch": 5.410277324632952, "grad_norm": 0.07434239238500595, "learning_rate": 0.0009140245803632268, "loss": 0.0418, "num_input_tokens_seen": 71685280, "step": 33165 }, { "epoch": 5.411092985318108, "grad_norm": 0.05117892101407051, "learning_rate": 0.0009139846688090665, "loss": 0.0888, "num_input_tokens_seen": 71695584, "step": 33170 }, { "epoch": 5.411908646003263, "grad_norm": 0.016075173392891884, "learning_rate": 0.0009139447488649818, "loss": 0.0488, "num_input_tokens_seen": 71706016, "step": 33175 }, { "epoch": 5.412724306688418, "grad_norm": 0.18677929043769836, "learning_rate": 0.0009139048205317817, "loss": 0.12, "num_input_tokens_seen": 71718144, "step": 33180 }, { "epoch": 5.4135399673735725, "grad_norm": 0.01021922379732132, "learning_rate": 0.0009138648838102751, "loss": 0.2681, "num_input_tokens_seen": 71729600, "step": 33185 }, { "epoch": 5.414355628058727, "grad_norm": 0.19370241463184357, "learning_rate": 0.0009138249387012718, "loss": 0.2159, "num_input_tokens_seen": 71739680, "step": 33190 }, { "epoch": 5.415171288743883, "grad_norm": 0.07409081608057022, "learning_rate": 0.000913784985205581, "loss": 0.0201, "num_input_tokens_seen": 71750848, "step": 33195 }, { "epoch": 5.415986949429038, "grad_norm": 0.027513748034834862, "learning_rate": 0.0009137450233240127, "loss": 0.075, "num_input_tokens_seen": 71762080, "step": 33200 }, { "epoch": 5.416802610114193, "grad_norm": 0.00557843130081892, "learning_rate": 0.0009137050530573765, "loss": 0.0684, "num_input_tokens_seen": 71772320, "step": 33205 }, { "epoch": 5.417618270799347, "grad_norm": 0.16831335425376892, "learning_rate": 0.0009136650744064827, "loss": 0.1361, "num_input_tokens_seen": 71782912, "step": 33210 }, { "epoch": 5.418433931484502, "grad_norm": 0.037396691739559174, "learning_rate": 0.0009136250873721413, "loss": 0.0382, "num_input_tokens_seen": 71793696, "step": 33215 }, { "epoch": 5.419249592169657, "grad_norm": 0.2280731201171875, "learning_rate": 0.0009135850919551628, "loss": 0.104, "num_input_tokens_seen": 71803584, "step": 33220 }, { "epoch": 5.420065252854813, "grad_norm": 0.035369303077459335, "learning_rate": 0.0009135450881563578, "loss": 0.0536, "num_input_tokens_seen": 71815232, "step": 33225 }, { "epoch": 5.420880913539968, "grad_norm": 0.2152285873889923, "learning_rate": 0.0009135050759765369, "loss": 0.0528, "num_input_tokens_seen": 71824960, "step": 33230 }, { "epoch": 5.421696574225122, "grad_norm": 0.03372219577431679, "learning_rate": 0.0009134650554165111, "loss": 0.0108, "num_input_tokens_seen": 71836576, "step": 33235 }, { "epoch": 5.422512234910277, "grad_norm": 0.03322592377662659, "learning_rate": 0.0009134250264770914, "loss": 0.0154, "num_input_tokens_seen": 71847168, "step": 33240 }, { "epoch": 5.423327895595432, "grad_norm": 0.1197444424033165, "learning_rate": 0.0009133849891590891, "loss": 0.0457, "num_input_tokens_seen": 71857504, "step": 33245 }, { "epoch": 5.424143556280587, "grad_norm": 0.1454666703939438, "learning_rate": 0.0009133449434633157, "loss": 0.1297, "num_input_tokens_seen": 71868608, "step": 33250 }, { "epoch": 5.424959216965743, "grad_norm": 0.15138794481754303, "learning_rate": 0.0009133048893905824, "loss": 0.2438, "num_input_tokens_seen": 71879520, "step": 33255 }, { "epoch": 5.425774877650897, "grad_norm": 0.06414239853620529, "learning_rate": 0.0009132648269417014, "loss": 0.1642, "num_input_tokens_seen": 71890304, "step": 33260 }, { "epoch": 5.426590538336052, "grad_norm": 0.004905904643237591, "learning_rate": 0.0009132247561174843, "loss": 0.1074, "num_input_tokens_seen": 71900480, "step": 33265 }, { "epoch": 5.427406199021207, "grad_norm": 0.029563914984464645, "learning_rate": 0.0009131846769187434, "loss": 0.0306, "num_input_tokens_seen": 71911136, "step": 33270 }, { "epoch": 5.428221859706362, "grad_norm": 0.028971252962946892, "learning_rate": 0.0009131445893462908, "loss": 0.0627, "num_input_tokens_seen": 71923296, "step": 33275 }, { "epoch": 5.4290375203915175, "grad_norm": 0.21459606289863586, "learning_rate": 0.000913104493400939, "loss": 0.072, "num_input_tokens_seen": 71934464, "step": 33280 }, { "epoch": 5.429853181076672, "grad_norm": 0.1923767775297165, "learning_rate": 0.0009130643890835007, "loss": 0.2351, "num_input_tokens_seen": 71944672, "step": 33285 }, { "epoch": 5.430668841761827, "grad_norm": 0.039720792323350906, "learning_rate": 0.0009130242763947884, "loss": 0.0836, "num_input_tokens_seen": 71955872, "step": 33290 }, { "epoch": 5.431484502446982, "grad_norm": 0.22541844844818115, "learning_rate": 0.0009129841553356152, "loss": 0.0747, "num_input_tokens_seen": 71966400, "step": 33295 }, { "epoch": 5.432300163132137, "grad_norm": 0.1534278839826584, "learning_rate": 0.0009129440259067941, "loss": 0.0594, "num_input_tokens_seen": 71978240, "step": 33300 }, { "epoch": 5.433115823817292, "grad_norm": 0.3853054940700531, "learning_rate": 0.0009129038881091386, "loss": 0.3502, "num_input_tokens_seen": 71988640, "step": 33305 }, { "epoch": 5.433931484502447, "grad_norm": 0.04415808245539665, "learning_rate": 0.000912863741943462, "loss": 0.0426, "num_input_tokens_seen": 72000032, "step": 33310 }, { "epoch": 5.434747145187602, "grad_norm": 0.14555767178535461, "learning_rate": 0.000912823587410578, "loss": 0.1448, "num_input_tokens_seen": 72010304, "step": 33315 }, { "epoch": 5.435562805872757, "grad_norm": 0.046213071793317795, "learning_rate": 0.0009127834245113, "loss": 0.0813, "num_input_tokens_seen": 72021376, "step": 33320 }, { "epoch": 5.436378466557912, "grad_norm": 0.21453042328357697, "learning_rate": 0.0009127432532464424, "loss": 0.0588, "num_input_tokens_seen": 72031392, "step": 33325 }, { "epoch": 5.437194127243067, "grad_norm": 0.22624626755714417, "learning_rate": 0.0009127030736168192, "loss": 0.1267, "num_input_tokens_seen": 72041600, "step": 33330 }, { "epoch": 5.438009787928221, "grad_norm": 0.19587194919586182, "learning_rate": 0.0009126628856232446, "loss": 0.094, "num_input_tokens_seen": 72052288, "step": 33335 }, { "epoch": 5.438825448613377, "grad_norm": 0.19771817326545715, "learning_rate": 0.0009126226892665333, "loss": 0.1786, "num_input_tokens_seen": 72062656, "step": 33340 }, { "epoch": 5.439641109298532, "grad_norm": 0.03184128552675247, "learning_rate": 0.0009125824845474996, "loss": 0.0166, "num_input_tokens_seen": 72072416, "step": 33345 }, { "epoch": 5.440456769983687, "grad_norm": 0.13524416089057922, "learning_rate": 0.0009125422714669584, "loss": 0.1857, "num_input_tokens_seen": 72082624, "step": 33350 }, { "epoch": 5.441272430668842, "grad_norm": 0.2313859611749649, "learning_rate": 0.0009125020500257248, "loss": 0.1425, "num_input_tokens_seen": 72094144, "step": 33355 }, { "epoch": 5.442088091353996, "grad_norm": 0.11446195840835571, "learning_rate": 0.000912461820224614, "loss": 0.0561, "num_input_tokens_seen": 72105312, "step": 33360 }, { "epoch": 5.442903752039152, "grad_norm": 0.019046427682042122, "learning_rate": 0.000912421582064441, "loss": 0.2843, "num_input_tokens_seen": 72116096, "step": 33365 }, { "epoch": 5.443719412724307, "grad_norm": 0.01346815936267376, "learning_rate": 0.0009123813355460214, "loss": 0.0992, "num_input_tokens_seen": 72127392, "step": 33370 }, { "epoch": 5.444535073409462, "grad_norm": 0.07554985582828522, "learning_rate": 0.000912341080670171, "loss": 0.0914, "num_input_tokens_seen": 72137440, "step": 33375 }, { "epoch": 5.445350734094617, "grad_norm": 0.14728760719299316, "learning_rate": 0.0009123008174377054, "loss": 0.0779, "num_input_tokens_seen": 72150112, "step": 33380 }, { "epoch": 5.446166394779771, "grad_norm": 0.057900868356227875, "learning_rate": 0.0009122605458494409, "loss": 0.1828, "num_input_tokens_seen": 72159296, "step": 33385 }, { "epoch": 5.446982055464926, "grad_norm": 0.11225569248199463, "learning_rate": 0.0009122202659061934, "loss": 0.0435, "num_input_tokens_seen": 72170240, "step": 33390 }, { "epoch": 5.447797716150082, "grad_norm": 0.036758918315172195, "learning_rate": 0.0009121799776087791, "loss": 0.0167, "num_input_tokens_seen": 72180672, "step": 33395 }, { "epoch": 5.448613376835237, "grad_norm": 0.007878992706537247, "learning_rate": 0.0009121396809580147, "loss": 0.0929, "num_input_tokens_seen": 72191264, "step": 33400 }, { "epoch": 5.4494290375203915, "grad_norm": 0.17817571759223938, "learning_rate": 0.0009120993759547169, "loss": 0.1207, "num_input_tokens_seen": 72201472, "step": 33405 }, { "epoch": 5.450244698205546, "grad_norm": 0.07467425614595413, "learning_rate": 0.0009120590625997026, "loss": 0.063, "num_input_tokens_seen": 72213024, "step": 33410 }, { "epoch": 5.451060358890701, "grad_norm": 0.08875560760498047, "learning_rate": 0.0009120187408937884, "loss": 0.0549, "num_input_tokens_seen": 72224288, "step": 33415 }, { "epoch": 5.451876019575856, "grad_norm": 0.15524475276470184, "learning_rate": 0.0009119784108377918, "loss": 0.0649, "num_input_tokens_seen": 72235104, "step": 33420 }, { "epoch": 5.452691680261012, "grad_norm": 0.010183833539485931, "learning_rate": 0.0009119380724325302, "loss": 0.2185, "num_input_tokens_seen": 72245856, "step": 33425 }, { "epoch": 5.4535073409461665, "grad_norm": 0.053868986666202545, "learning_rate": 0.0009118977256788208, "loss": 0.0478, "num_input_tokens_seen": 72256032, "step": 33430 }, { "epoch": 5.454323001631321, "grad_norm": 0.11249548941850662, "learning_rate": 0.0009118573705774815, "loss": 0.1081, "num_input_tokens_seen": 72267104, "step": 33435 }, { "epoch": 5.455138662316476, "grad_norm": 0.008952013216912746, "learning_rate": 0.0009118170071293302, "loss": 0.0212, "num_input_tokens_seen": 72278912, "step": 33440 }, { "epoch": 5.455954323001631, "grad_norm": 0.03076971136033535, "learning_rate": 0.0009117766353351848, "loss": 0.083, "num_input_tokens_seen": 72289312, "step": 33445 }, { "epoch": 5.456769983686787, "grad_norm": 0.023297972977161407, "learning_rate": 0.0009117362551958635, "loss": 0.0506, "num_input_tokens_seen": 72300896, "step": 33450 }, { "epoch": 5.4575856443719415, "grad_norm": 0.028049878776073456, "learning_rate": 0.0009116958667121847, "loss": 0.021, "num_input_tokens_seen": 72312288, "step": 33455 }, { "epoch": 5.458401305057096, "grad_norm": 0.12048943340778351, "learning_rate": 0.0009116554698849668, "loss": 0.1195, "num_input_tokens_seen": 72322912, "step": 33460 }, { "epoch": 5.459216965742251, "grad_norm": 0.019078262150287628, "learning_rate": 0.0009116150647150286, "loss": 0.0531, "num_input_tokens_seen": 72333600, "step": 33465 }, { "epoch": 5.460032626427406, "grad_norm": 0.05092627555131912, "learning_rate": 0.0009115746512031891, "loss": 0.1175, "num_input_tokens_seen": 72344096, "step": 33470 }, { "epoch": 5.460848287112561, "grad_norm": 0.10556714981794357, "learning_rate": 0.0009115342293502669, "loss": 0.1504, "num_input_tokens_seen": 72354432, "step": 33475 }, { "epoch": 5.4616639477977165, "grad_norm": 0.0820518508553505, "learning_rate": 0.0009114937991570817, "loss": 0.0532, "num_input_tokens_seen": 72366400, "step": 33480 }, { "epoch": 5.462479608482871, "grad_norm": 0.04745522886514664, "learning_rate": 0.0009114533606244526, "loss": 0.0263, "num_input_tokens_seen": 72375808, "step": 33485 }, { "epoch": 5.463295269168026, "grad_norm": 0.18660973012447357, "learning_rate": 0.0009114129137531991, "loss": 0.1344, "num_input_tokens_seen": 72386560, "step": 33490 }, { "epoch": 5.464110929853181, "grad_norm": 0.04909445717930794, "learning_rate": 0.000911372458544141, "loss": 0.2111, "num_input_tokens_seen": 72396960, "step": 33495 }, { "epoch": 5.464926590538336, "grad_norm": 0.03188806772232056, "learning_rate": 0.0009113319949980983, "loss": 0.0183, "num_input_tokens_seen": 72407104, "step": 33500 }, { "epoch": 5.465742251223491, "grad_norm": 0.020453909412026405, "learning_rate": 0.0009112915231158907, "loss": 0.0981, "num_input_tokens_seen": 72418432, "step": 33505 }, { "epoch": 5.466557911908646, "grad_norm": 0.2022310048341751, "learning_rate": 0.0009112510428983387, "loss": 0.0943, "num_input_tokens_seen": 72429824, "step": 33510 }, { "epoch": 5.467373572593801, "grad_norm": 0.07638852298259735, "learning_rate": 0.0009112105543462628, "loss": 0.0174, "num_input_tokens_seen": 72440352, "step": 33515 }, { "epoch": 5.468189233278956, "grad_norm": 0.13178791105747223, "learning_rate": 0.0009111700574604831, "loss": 0.0705, "num_input_tokens_seen": 72452000, "step": 33520 }, { "epoch": 5.469004893964111, "grad_norm": 0.14010080695152283, "learning_rate": 0.0009111295522418207, "loss": 0.0609, "num_input_tokens_seen": 72462240, "step": 33525 }, { "epoch": 5.4698205546492655, "grad_norm": 0.057958196848630905, "learning_rate": 0.0009110890386910964, "loss": 0.0756, "num_input_tokens_seen": 72474656, "step": 33530 }, { "epoch": 5.470636215334421, "grad_norm": 0.0577903650701046, "learning_rate": 0.0009110485168091311, "loss": 0.0432, "num_input_tokens_seen": 72485568, "step": 33535 }, { "epoch": 5.471451876019576, "grad_norm": 0.01845645345747471, "learning_rate": 0.0009110079865967462, "loss": 0.1357, "num_input_tokens_seen": 72496192, "step": 33540 }, { "epoch": 5.472267536704731, "grad_norm": 0.26865318417549133, "learning_rate": 0.0009109674480547632, "loss": 0.2558, "num_input_tokens_seen": 72506176, "step": 33545 }, { "epoch": 5.473083197389886, "grad_norm": 0.027263272553682327, "learning_rate": 0.0009109269011840033, "loss": 0.1013, "num_input_tokens_seen": 72517216, "step": 33550 }, { "epoch": 5.4738988580750405, "grad_norm": 0.13702431321144104, "learning_rate": 0.0009108863459852886, "loss": 0.1482, "num_input_tokens_seen": 72528992, "step": 33555 }, { "epoch": 5.474714518760196, "grad_norm": 0.033558014780282974, "learning_rate": 0.0009108457824594407, "loss": 0.3343, "num_input_tokens_seen": 72539968, "step": 33560 }, { "epoch": 5.475530179445351, "grad_norm": 0.15126581490039825, "learning_rate": 0.0009108052106072819, "loss": 0.0679, "num_input_tokens_seen": 72551488, "step": 33565 }, { "epoch": 5.476345840130506, "grad_norm": 0.03495920076966286, "learning_rate": 0.0009107646304296344, "loss": 0.1196, "num_input_tokens_seen": 72561984, "step": 33570 }, { "epoch": 5.477161500815661, "grad_norm": 0.034811608493328094, "learning_rate": 0.0009107240419273206, "loss": 0.019, "num_input_tokens_seen": 72572704, "step": 33575 }, { "epoch": 5.4779771615008155, "grad_norm": 0.3282313942909241, "learning_rate": 0.000910683445101163, "loss": 0.245, "num_input_tokens_seen": 72584096, "step": 33580 }, { "epoch": 5.47879282218597, "grad_norm": 0.008367033675312996, "learning_rate": 0.0009106428399519844, "loss": 0.1595, "num_input_tokens_seen": 72594080, "step": 33585 }, { "epoch": 5.479608482871126, "grad_norm": 0.14254389703273773, "learning_rate": 0.0009106022264806078, "loss": 0.1269, "num_input_tokens_seen": 72605504, "step": 33590 }, { "epoch": 5.480424143556281, "grad_norm": 0.03509574383497238, "learning_rate": 0.000910561604687856, "loss": 0.0335, "num_input_tokens_seen": 72616736, "step": 33595 }, { "epoch": 5.481239804241436, "grad_norm": 0.04512341693043709, "learning_rate": 0.0009105209745745526, "loss": 0.2051, "num_input_tokens_seen": 72627136, "step": 33600 }, { "epoch": 5.4820554649265905, "grad_norm": 0.06479254364967346, "learning_rate": 0.0009104803361415208, "loss": 0.11, "num_input_tokens_seen": 72637408, "step": 33605 }, { "epoch": 5.482871125611745, "grad_norm": 0.17471860349178314, "learning_rate": 0.0009104396893895843, "loss": 0.1633, "num_input_tokens_seen": 72648000, "step": 33610 }, { "epoch": 5.4836867862969, "grad_norm": 0.049819447100162506, "learning_rate": 0.0009103990343195667, "loss": 0.0398, "num_input_tokens_seen": 72657888, "step": 33615 }, { "epoch": 5.484502446982056, "grad_norm": 0.13450585305690765, "learning_rate": 0.0009103583709322923, "loss": 0.085, "num_input_tokens_seen": 72667904, "step": 33620 }, { "epoch": 5.485318107667211, "grad_norm": 0.058071158826351166, "learning_rate": 0.0009103176992285847, "loss": 0.0583, "num_input_tokens_seen": 72679968, "step": 33625 }, { "epoch": 5.486133768352365, "grad_norm": 0.09091849625110626, "learning_rate": 0.0009102770192092684, "loss": 0.0348, "num_input_tokens_seen": 72691392, "step": 33630 }, { "epoch": 5.48694942903752, "grad_norm": 0.08295218646526337, "learning_rate": 0.000910236330875168, "loss": 0.0648, "num_input_tokens_seen": 72700672, "step": 33635 }, { "epoch": 5.487765089722675, "grad_norm": 0.22870251536369324, "learning_rate": 0.0009101956342271078, "loss": 0.1369, "num_input_tokens_seen": 72712128, "step": 33640 }, { "epoch": 5.488580750407831, "grad_norm": 0.014779879711568356, "learning_rate": 0.0009101549292659128, "loss": 0.1379, "num_input_tokens_seen": 72722016, "step": 33645 }, { "epoch": 5.489396411092986, "grad_norm": 0.1314256340265274, "learning_rate": 0.0009101142159924077, "loss": 0.0734, "num_input_tokens_seen": 72732704, "step": 33650 }, { "epoch": 5.49021207177814, "grad_norm": 0.12707646191120148, "learning_rate": 0.0009100734944074179, "loss": 0.0638, "num_input_tokens_seen": 72744256, "step": 33655 }, { "epoch": 5.491027732463295, "grad_norm": 0.03855331614613533, "learning_rate": 0.0009100327645117684, "loss": 0.0669, "num_input_tokens_seen": 72754336, "step": 33660 }, { "epoch": 5.49184339314845, "grad_norm": 0.02805102989077568, "learning_rate": 0.0009099920263062848, "loss": 0.1077, "num_input_tokens_seen": 72765536, "step": 33665 }, { "epoch": 5.492659053833605, "grad_norm": 0.03538591042160988, "learning_rate": 0.0009099512797917927, "loss": 0.019, "num_input_tokens_seen": 72776160, "step": 33670 }, { "epoch": 5.493474714518761, "grad_norm": 0.22538243234157562, "learning_rate": 0.0009099105249691179, "loss": 0.0887, "num_input_tokens_seen": 72786368, "step": 33675 }, { "epoch": 5.494290375203915, "grad_norm": 0.01177581213414669, "learning_rate": 0.0009098697618390862, "loss": 0.0581, "num_input_tokens_seen": 72797152, "step": 33680 }, { "epoch": 5.49510603588907, "grad_norm": 0.2585497796535492, "learning_rate": 0.0009098289904025239, "loss": 0.1149, "num_input_tokens_seen": 72807552, "step": 33685 }, { "epoch": 5.495921696574225, "grad_norm": 0.07408113032579422, "learning_rate": 0.0009097882106602571, "loss": 0.1972, "num_input_tokens_seen": 72817888, "step": 33690 }, { "epoch": 5.49673735725938, "grad_norm": 0.02731460891664028, "learning_rate": 0.0009097474226131124, "loss": 0.0196, "num_input_tokens_seen": 72829792, "step": 33695 }, { "epoch": 5.497553017944535, "grad_norm": 0.29231026768684387, "learning_rate": 0.0009097066262619165, "loss": 0.084, "num_input_tokens_seen": 72839360, "step": 33700 }, { "epoch": 5.49836867862969, "grad_norm": 0.2278498113155365, "learning_rate": 0.000909665821607496, "loss": 0.2324, "num_input_tokens_seen": 72849376, "step": 33705 }, { "epoch": 5.499184339314845, "grad_norm": 0.18931224942207336, "learning_rate": 0.0009096250086506779, "loss": 0.2018, "num_input_tokens_seen": 72859872, "step": 33710 }, { "epoch": 5.5, "grad_norm": 0.14162462949752808, "learning_rate": 0.0009095841873922894, "loss": 0.0834, "num_input_tokens_seen": 72871392, "step": 33715 }, { "epoch": 5.500815660685155, "grad_norm": 0.036499932408332825, "learning_rate": 0.0009095433578331576, "loss": 0.1281, "num_input_tokens_seen": 72882176, "step": 33720 }, { "epoch": 5.50163132137031, "grad_norm": 0.2484671026468277, "learning_rate": 0.0009095025199741103, "loss": 0.0841, "num_input_tokens_seen": 72893024, "step": 33725 }, { "epoch": 5.502446982055465, "grad_norm": 0.1913287341594696, "learning_rate": 0.0009094616738159748, "loss": 0.0924, "num_input_tokens_seen": 72904512, "step": 33730 }, { "epoch": 5.50326264274062, "grad_norm": 0.049790579825639725, "learning_rate": 0.000909420819359579, "loss": 0.0527, "num_input_tokens_seen": 72914848, "step": 33735 }, { "epoch": 5.504078303425775, "grad_norm": 0.19436365365982056, "learning_rate": 0.000909379956605751, "loss": 0.0706, "num_input_tokens_seen": 72924832, "step": 33740 }, { "epoch": 5.50489396411093, "grad_norm": 0.010649348609149456, "learning_rate": 0.000909339085555319, "loss": 0.2315, "num_input_tokens_seen": 72935936, "step": 33745 }, { "epoch": 5.505709624796085, "grad_norm": 0.18552739918231964, "learning_rate": 0.0009092982062091109, "loss": 0.1633, "num_input_tokens_seen": 72946400, "step": 33750 }, { "epoch": 5.506525285481239, "grad_norm": 0.2872273921966553, "learning_rate": 0.0009092573185679556, "loss": 0.0368, "num_input_tokens_seen": 72957536, "step": 33755 }, { "epoch": 5.507340946166395, "grad_norm": 0.032905105501413345, "learning_rate": 0.0009092164226326814, "loss": 0.0495, "num_input_tokens_seen": 72969216, "step": 33760 }, { "epoch": 5.50815660685155, "grad_norm": 0.07028263807296753, "learning_rate": 0.0009091755184041173, "loss": 0.1416, "num_input_tokens_seen": 72980096, "step": 33765 }, { "epoch": 5.508972267536705, "grad_norm": 0.20369423925876617, "learning_rate": 0.0009091346058830923, "loss": 0.0918, "num_input_tokens_seen": 72991808, "step": 33770 }, { "epoch": 5.50978792822186, "grad_norm": 0.16369512677192688, "learning_rate": 0.0009090936850704354, "loss": 0.0923, "num_input_tokens_seen": 73002304, "step": 33775 }, { "epoch": 5.510603588907014, "grad_norm": 0.26133355498313904, "learning_rate": 0.0009090527559669761, "loss": 0.1211, "num_input_tokens_seen": 73013120, "step": 33780 }, { "epoch": 5.511419249592169, "grad_norm": 0.15874671936035156, "learning_rate": 0.0009090118185735438, "loss": 0.1379, "num_input_tokens_seen": 73023392, "step": 33785 }, { "epoch": 5.512234910277325, "grad_norm": 0.17313063144683838, "learning_rate": 0.000908970872890968, "loss": 0.0637, "num_input_tokens_seen": 73034688, "step": 33790 }, { "epoch": 5.51305057096248, "grad_norm": 0.018427910283207893, "learning_rate": 0.0009089299189200789, "loss": 0.1245, "num_input_tokens_seen": 73045600, "step": 33795 }, { "epoch": 5.513866231647635, "grad_norm": 0.004695965442806482, "learning_rate": 0.000908888956661706, "loss": 0.2346, "num_input_tokens_seen": 73056224, "step": 33800 }, { "epoch": 5.514681892332789, "grad_norm": 0.06857309490442276, "learning_rate": 0.0009088479861166797, "loss": 0.2559, "num_input_tokens_seen": 73067552, "step": 33805 }, { "epoch": 5.515497553017944, "grad_norm": 0.2367842048406601, "learning_rate": 0.0009088070072858303, "loss": 0.1591, "num_input_tokens_seen": 73077408, "step": 33810 }, { "epoch": 5.5163132137031, "grad_norm": 0.009032535366714, "learning_rate": 0.0009087660201699884, "loss": 0.1355, "num_input_tokens_seen": 73087936, "step": 33815 }, { "epoch": 5.517128874388255, "grad_norm": 0.20530152320861816, "learning_rate": 0.0009087250247699846, "loss": 0.0991, "num_input_tokens_seen": 73098912, "step": 33820 }, { "epoch": 5.5179445350734095, "grad_norm": 0.024618202820420265, "learning_rate": 0.0009086840210866493, "loss": 0.0662, "num_input_tokens_seen": 73109920, "step": 33825 }, { "epoch": 5.518760195758564, "grad_norm": 0.018427925184369087, "learning_rate": 0.0009086430091208142, "loss": 0.0556, "num_input_tokens_seen": 73120704, "step": 33830 }, { "epoch": 5.519575856443719, "grad_norm": 0.029816431924700737, "learning_rate": 0.00090860198887331, "loss": 0.0435, "num_input_tokens_seen": 73132160, "step": 33835 }, { "epoch": 5.520391517128875, "grad_norm": 0.18719585239887238, "learning_rate": 0.0009085609603449683, "loss": 0.0573, "num_input_tokens_seen": 73141600, "step": 33840 }, { "epoch": 5.52120717781403, "grad_norm": 0.026867792010307312, "learning_rate": 0.0009085199235366201, "loss": 0.0416, "num_input_tokens_seen": 73152064, "step": 33845 }, { "epoch": 5.5220228384991845, "grad_norm": 0.14632810652256012, "learning_rate": 0.0009084788784490977, "loss": 0.0572, "num_input_tokens_seen": 73163200, "step": 33850 }, { "epoch": 5.522838499184339, "grad_norm": 0.027970803901553154, "learning_rate": 0.0009084378250832325, "loss": 0.0811, "num_input_tokens_seen": 73172704, "step": 33855 }, { "epoch": 5.523654159869494, "grad_norm": 0.08090617507696152, "learning_rate": 0.0009083967634398567, "loss": 0.1068, "num_input_tokens_seen": 73182368, "step": 33860 }, { "epoch": 5.524469820554649, "grad_norm": 0.15196193754673004, "learning_rate": 0.0009083556935198024, "loss": 0.1154, "num_input_tokens_seen": 73193504, "step": 33865 }, { "epoch": 5.525285481239804, "grad_norm": 0.02766106277704239, "learning_rate": 0.0009083146153239019, "loss": 0.046, "num_input_tokens_seen": 73203136, "step": 33870 }, { "epoch": 5.5261011419249595, "grad_norm": 0.013581224717199802, "learning_rate": 0.0009082735288529878, "loss": 0.1066, "num_input_tokens_seen": 73214496, "step": 33875 }, { "epoch": 5.526916802610114, "grad_norm": 0.007133916020393372, "learning_rate": 0.0009082324341078927, "loss": 0.1363, "num_input_tokens_seen": 73225248, "step": 33880 }, { "epoch": 5.527732463295269, "grad_norm": 0.018216347321867943, "learning_rate": 0.0009081913310894494, "loss": 0.0638, "num_input_tokens_seen": 73236864, "step": 33885 }, { "epoch": 5.528548123980424, "grad_norm": 0.05228884145617485, "learning_rate": 0.000908150219798491, "loss": 0.0864, "num_input_tokens_seen": 73246880, "step": 33890 }, { "epoch": 5.529363784665579, "grad_norm": 0.09811168909072876, "learning_rate": 0.0009081091002358506, "loss": 0.0786, "num_input_tokens_seen": 73256192, "step": 33895 }, { "epoch": 5.5301794453507345, "grad_norm": 0.007979627698659897, "learning_rate": 0.0009080679724023615, "loss": 0.0675, "num_input_tokens_seen": 73267072, "step": 33900 }, { "epoch": 5.530995106035889, "grad_norm": 0.04914901778101921, "learning_rate": 0.0009080268362988572, "loss": 0.0795, "num_input_tokens_seen": 73278208, "step": 33905 }, { "epoch": 5.531810766721044, "grad_norm": 0.34442588686943054, "learning_rate": 0.0009079856919261716, "loss": 0.1373, "num_input_tokens_seen": 73288800, "step": 33910 }, { "epoch": 5.532626427406199, "grad_norm": 0.02547566592693329, "learning_rate": 0.0009079445392851383, "loss": 0.0475, "num_input_tokens_seen": 73300064, "step": 33915 }, { "epoch": 5.533442088091354, "grad_norm": 0.07199371606111526, "learning_rate": 0.0009079033783765914, "loss": 0.0693, "num_input_tokens_seen": 73311264, "step": 33920 }, { "epoch": 5.5342577487765094, "grad_norm": 0.10011252015829086, "learning_rate": 0.0009078622092013651, "loss": 0.1076, "num_input_tokens_seen": 73322304, "step": 33925 }, { "epoch": 5.535073409461664, "grad_norm": 0.008656290359795094, "learning_rate": 0.0009078210317602938, "loss": 0.0772, "num_input_tokens_seen": 73333760, "step": 33930 }, { "epoch": 5.535889070146819, "grad_norm": 0.11770905554294586, "learning_rate": 0.0009077798460542119, "loss": 0.2849, "num_input_tokens_seen": 73343744, "step": 33935 }, { "epoch": 5.536704730831974, "grad_norm": 0.03572523966431618, "learning_rate": 0.0009077386520839541, "loss": 0.1529, "num_input_tokens_seen": 73353824, "step": 33940 }, { "epoch": 5.537520391517129, "grad_norm": 0.05641023814678192, "learning_rate": 0.0009076974498503552, "loss": 0.1559, "num_input_tokens_seen": 73363968, "step": 33945 }, { "epoch": 5.5383360522022835, "grad_norm": 0.09805291891098022, "learning_rate": 0.0009076562393542502, "loss": 0.0444, "num_input_tokens_seen": 73375680, "step": 33950 }, { "epoch": 5.539151712887438, "grad_norm": 0.05173416808247566, "learning_rate": 0.0009076150205964746, "loss": 0.0864, "num_input_tokens_seen": 73386912, "step": 33955 }, { "epoch": 5.539967373572594, "grad_norm": 0.14410558342933655, "learning_rate": 0.0009075737935778634, "loss": 0.0858, "num_input_tokens_seen": 73399648, "step": 33960 }, { "epoch": 5.540783034257749, "grad_norm": 0.1506105214357376, "learning_rate": 0.0009075325582992522, "loss": 0.0496, "num_input_tokens_seen": 73410944, "step": 33965 }, { "epoch": 5.541598694942904, "grad_norm": 0.030392751097679138, "learning_rate": 0.0009074913147614767, "loss": 0.042, "num_input_tokens_seen": 73422016, "step": 33970 }, { "epoch": 5.5424143556280585, "grad_norm": 0.020440472289919853, "learning_rate": 0.0009074500629653728, "loss": 0.1176, "num_input_tokens_seen": 73433120, "step": 33975 }, { "epoch": 5.543230016313213, "grad_norm": 0.02089664526283741, "learning_rate": 0.0009074088029117764, "loss": 0.0972, "num_input_tokens_seen": 73443456, "step": 33980 }, { "epoch": 5.544045676998369, "grad_norm": 0.3009980022907257, "learning_rate": 0.0009073675346015239, "loss": 0.1754, "num_input_tokens_seen": 73453632, "step": 33985 }, { "epoch": 5.544861337683524, "grad_norm": 0.18772025406360626, "learning_rate": 0.0009073262580354516, "loss": 0.2225, "num_input_tokens_seen": 73464000, "step": 33990 }, { "epoch": 5.545676998368679, "grad_norm": 0.02644345909357071, "learning_rate": 0.0009072849732143957, "loss": 0.1702, "num_input_tokens_seen": 73475328, "step": 33995 }, { "epoch": 5.5464926590538335, "grad_norm": 0.013715567998588085, "learning_rate": 0.0009072436801391932, "loss": 0.1263, "num_input_tokens_seen": 73486464, "step": 34000 }, { "epoch": 5.547308319738988, "grad_norm": 0.15529221296310425, "learning_rate": 0.0009072023788106811, "loss": 0.1647, "num_input_tokens_seen": 73497440, "step": 34005 }, { "epoch": 5.548123980424144, "grad_norm": 0.2070222645998001, "learning_rate": 0.0009071610692296961, "loss": 0.1927, "num_input_tokens_seen": 73508704, "step": 34010 }, { "epoch": 5.548939641109299, "grad_norm": 0.18096806108951569, "learning_rate": 0.0009071197513970755, "loss": 0.0852, "num_input_tokens_seen": 73519264, "step": 34015 }, { "epoch": 5.549755301794454, "grad_norm": 0.0723312720656395, "learning_rate": 0.0009070784253136565, "loss": 0.0437, "num_input_tokens_seen": 73529088, "step": 34020 }, { "epoch": 5.5505709624796085, "grad_norm": 0.02088317647576332, "learning_rate": 0.0009070370909802772, "loss": 0.1904, "num_input_tokens_seen": 73540800, "step": 34025 }, { "epoch": 5.551386623164763, "grad_norm": 0.020408082753419876, "learning_rate": 0.0009069957483977747, "loss": 0.0953, "num_input_tokens_seen": 73550656, "step": 34030 }, { "epoch": 5.552202283849918, "grad_norm": 0.050628501921892166, "learning_rate": 0.0009069543975669869, "loss": 0.0705, "num_input_tokens_seen": 73561824, "step": 34035 }, { "epoch": 5.553017944535073, "grad_norm": 0.17295457422733307, "learning_rate": 0.0009069130384887521, "loss": 0.1914, "num_input_tokens_seen": 73572544, "step": 34040 }, { "epoch": 5.553833605220229, "grad_norm": 0.018812965601682663, "learning_rate": 0.0009068716711639084, "loss": 0.0508, "num_input_tokens_seen": 73583136, "step": 34045 }, { "epoch": 5.554649265905383, "grad_norm": 0.21679341793060303, "learning_rate": 0.0009068302955932939, "loss": 0.0733, "num_input_tokens_seen": 73594528, "step": 34050 }, { "epoch": 5.555464926590538, "grad_norm": 0.06878527998924255, "learning_rate": 0.0009067889117777477, "loss": 0.0795, "num_input_tokens_seen": 73606272, "step": 34055 }, { "epoch": 5.556280587275693, "grad_norm": 0.20378956198692322, "learning_rate": 0.000906747519718108, "loss": 0.1039, "num_input_tokens_seen": 73616384, "step": 34060 }, { "epoch": 5.557096247960848, "grad_norm": 0.2941659390926361, "learning_rate": 0.0009067061194152138, "loss": 0.0564, "num_input_tokens_seen": 73626624, "step": 34065 }, { "epoch": 5.557911908646004, "grad_norm": 0.022131171077489853, "learning_rate": 0.0009066647108699041, "loss": 0.0767, "num_input_tokens_seen": 73637536, "step": 34070 }, { "epoch": 5.558727569331158, "grad_norm": 0.21184739470481873, "learning_rate": 0.0009066232940830182, "loss": 0.1311, "num_input_tokens_seen": 73647744, "step": 34075 }, { "epoch": 5.559543230016313, "grad_norm": 0.020756877958774567, "learning_rate": 0.0009065818690553955, "loss": 0.3321, "num_input_tokens_seen": 73657952, "step": 34080 }, { "epoch": 5.560358890701468, "grad_norm": 0.0600406639277935, "learning_rate": 0.0009065404357878752, "loss": 0.1166, "num_input_tokens_seen": 73669376, "step": 34085 }, { "epoch": 5.561174551386623, "grad_norm": 0.020772617310285568, "learning_rate": 0.0009064989942812974, "loss": 0.106, "num_input_tokens_seen": 73679616, "step": 34090 }, { "epoch": 5.561990212071779, "grad_norm": 0.12317997217178345, "learning_rate": 0.0009064575445365019, "loss": 0.0546, "num_input_tokens_seen": 73690816, "step": 34095 }, { "epoch": 5.562805872756933, "grad_norm": 0.057949621230363846, "learning_rate": 0.0009064160865543285, "loss": 0.1079, "num_input_tokens_seen": 73701792, "step": 34100 }, { "epoch": 5.563621533442088, "grad_norm": 0.08427233248949051, "learning_rate": 0.0009063746203356176, "loss": 0.0214, "num_input_tokens_seen": 73712288, "step": 34105 }, { "epoch": 5.564437194127243, "grad_norm": 0.12674139440059662, "learning_rate": 0.0009063331458812094, "loss": 0.0537, "num_input_tokens_seen": 73721472, "step": 34110 }, { "epoch": 5.565252854812398, "grad_norm": 0.13015665113925934, "learning_rate": 0.0009062916631919445, "loss": 0.2981, "num_input_tokens_seen": 73731744, "step": 34115 }, { "epoch": 5.566068515497553, "grad_norm": 0.016723886132240295, "learning_rate": 0.0009062501722686638, "loss": 0.0735, "num_input_tokens_seen": 73743200, "step": 34120 }, { "epoch": 5.566884176182708, "grad_norm": 0.2247808575630188, "learning_rate": 0.0009062086731122079, "loss": 0.231, "num_input_tokens_seen": 73754432, "step": 34125 }, { "epoch": 5.567699836867863, "grad_norm": 0.05223708599805832, "learning_rate": 0.0009061671657234179, "loss": 0.2016, "num_input_tokens_seen": 73766432, "step": 34130 }, { "epoch": 5.568515497553018, "grad_norm": 0.07750531286001205, "learning_rate": 0.000906125650103135, "loss": 0.126, "num_input_tokens_seen": 73776512, "step": 34135 }, { "epoch": 5.569331158238173, "grad_norm": 0.07240372151136398, "learning_rate": 0.0009060841262522006, "loss": 0.1414, "num_input_tokens_seen": 73786688, "step": 34140 }, { "epoch": 5.570146818923328, "grad_norm": 0.07198721915483475, "learning_rate": 0.0009060425941714563, "loss": 0.0999, "num_input_tokens_seen": 73798016, "step": 34145 }, { "epoch": 5.5709624796084825, "grad_norm": 0.027029162272810936, "learning_rate": 0.0009060010538617437, "loss": 0.0319, "num_input_tokens_seen": 73808992, "step": 34150 }, { "epoch": 5.571778140293638, "grad_norm": 0.12103355675935745, "learning_rate": 0.0009059595053239047, "loss": 0.1308, "num_input_tokens_seen": 73819520, "step": 34155 }, { "epoch": 5.572593800978793, "grad_norm": 0.067531056702137, "learning_rate": 0.0009059179485587813, "loss": 0.1778, "num_input_tokens_seen": 73831712, "step": 34160 }, { "epoch": 5.573409461663948, "grad_norm": 0.07771392166614532, "learning_rate": 0.0009058763835672157, "loss": 0.1988, "num_input_tokens_seen": 73842464, "step": 34165 }, { "epoch": 5.574225122349103, "grad_norm": 0.04161020740866661, "learning_rate": 0.0009058348103500504, "loss": 0.0862, "num_input_tokens_seen": 73853088, "step": 34170 }, { "epoch": 5.575040783034257, "grad_norm": 0.02227962017059326, "learning_rate": 0.0009057932289081278, "loss": 0.0497, "num_input_tokens_seen": 73864512, "step": 34175 }, { "epoch": 5.575856443719413, "grad_norm": 0.02278684638440609, "learning_rate": 0.0009057516392422906, "loss": 0.0666, "num_input_tokens_seen": 73875456, "step": 34180 }, { "epoch": 5.576672104404568, "grad_norm": 0.0633942186832428, "learning_rate": 0.0009057100413533817, "loss": 0.1228, "num_input_tokens_seen": 73886048, "step": 34185 }, { "epoch": 5.577487765089723, "grad_norm": 0.012222129851579666, "learning_rate": 0.0009056684352422441, "loss": 0.0677, "num_input_tokens_seen": 73898016, "step": 34190 }, { "epoch": 5.578303425774878, "grad_norm": 0.024317584931850433, "learning_rate": 0.0009056268209097211, "loss": 0.1371, "num_input_tokens_seen": 73909600, "step": 34195 }, { "epoch": 5.579119086460032, "grad_norm": 0.02568567730486393, "learning_rate": 0.000905585198356656, "loss": 0.0815, "num_input_tokens_seen": 73920128, "step": 34200 }, { "epoch": 5.579934747145187, "grad_norm": 0.06447894871234894, "learning_rate": 0.0009055435675838923, "loss": 0.0389, "num_input_tokens_seen": 73931456, "step": 34205 }, { "epoch": 5.580750407830343, "grad_norm": 0.020777931436896324, "learning_rate": 0.0009055019285922737, "loss": 0.1695, "num_input_tokens_seen": 73942464, "step": 34210 }, { "epoch": 5.581566068515498, "grad_norm": 0.09878566116094589, "learning_rate": 0.0009054602813826441, "loss": 0.0635, "num_input_tokens_seen": 73953120, "step": 34215 }, { "epoch": 5.582381729200653, "grad_norm": 0.0664665699005127, "learning_rate": 0.0009054186259558477, "loss": 0.0914, "num_input_tokens_seen": 73963104, "step": 34220 }, { "epoch": 5.583197389885807, "grad_norm": 0.01421379018574953, "learning_rate": 0.0009053769623127284, "loss": 0.0335, "num_input_tokens_seen": 73974240, "step": 34225 }, { "epoch": 5.584013050570962, "grad_norm": 0.1316417157649994, "learning_rate": 0.0009053352904541306, "loss": 0.1655, "num_input_tokens_seen": 73984864, "step": 34230 }, { "epoch": 5.584828711256117, "grad_norm": 0.0478694885969162, "learning_rate": 0.0009052936103808991, "loss": 0.0957, "num_input_tokens_seen": 73995936, "step": 34235 }, { "epoch": 5.585644371941273, "grad_norm": 0.04798683524131775, "learning_rate": 0.0009052519220938784, "loss": 0.043, "num_input_tokens_seen": 74007008, "step": 34240 }, { "epoch": 5.5864600326264275, "grad_norm": 0.022165268659591675, "learning_rate": 0.0009052102255939134, "loss": 0.0668, "num_input_tokens_seen": 74017664, "step": 34245 }, { "epoch": 5.587275693311582, "grad_norm": 0.1620720475912094, "learning_rate": 0.000905168520881849, "loss": 0.0979, "num_input_tokens_seen": 74028928, "step": 34250 }, { "epoch": 5.588091353996737, "grad_norm": 0.18612481653690338, "learning_rate": 0.0009051268079585306, "loss": 0.2239, "num_input_tokens_seen": 74041088, "step": 34255 }, { "epoch": 5.588907014681892, "grad_norm": 0.015167465433478355, "learning_rate": 0.0009050850868248037, "loss": 0.3041, "num_input_tokens_seen": 74052896, "step": 34260 }, { "epoch": 5.589722675367048, "grad_norm": 0.05257358402013779, "learning_rate": 0.0009050433574815134, "loss": 0.0485, "num_input_tokens_seen": 74063872, "step": 34265 }, { "epoch": 5.5905383360522025, "grad_norm": 0.07323313504457474, "learning_rate": 0.0009050016199295057, "loss": 0.2594, "num_input_tokens_seen": 74073856, "step": 34270 }, { "epoch": 5.591353996737357, "grad_norm": 0.01376113761216402, "learning_rate": 0.0009049598741696263, "loss": 0.0437, "num_input_tokens_seen": 74085472, "step": 34275 }, { "epoch": 5.592169657422512, "grad_norm": 0.02173309214413166, "learning_rate": 0.0009049181202027215, "loss": 0.0443, "num_input_tokens_seen": 74094720, "step": 34280 }, { "epoch": 5.592985318107667, "grad_norm": 0.14551734924316406, "learning_rate": 0.0009048763580296373, "loss": 0.173, "num_input_tokens_seen": 74105888, "step": 34285 }, { "epoch": 5.593800978792823, "grad_norm": 0.18056020140647888, "learning_rate": 0.00090483458765122, "loss": 0.0877, "num_input_tokens_seen": 74115968, "step": 34290 }, { "epoch": 5.5946166394779775, "grad_norm": 0.21234531700611115, "learning_rate": 0.0009047928090683162, "loss": 0.1543, "num_input_tokens_seen": 74126880, "step": 34295 }, { "epoch": 5.595432300163132, "grad_norm": 0.020504070445895195, "learning_rate": 0.0009047510222817725, "loss": 0.1306, "num_input_tokens_seen": 74137952, "step": 34300 }, { "epoch": 5.596247960848287, "grad_norm": 0.2272954136133194, "learning_rate": 0.0009047092272924361, "loss": 0.1549, "num_input_tokens_seen": 74148640, "step": 34305 }, { "epoch": 5.597063621533442, "grad_norm": 0.04594704508781433, "learning_rate": 0.0009046674241011537, "loss": 0.0655, "num_input_tokens_seen": 74159616, "step": 34310 }, { "epoch": 5.597879282218597, "grad_norm": 0.18465717136859894, "learning_rate": 0.0009046256127087727, "loss": 0.1131, "num_input_tokens_seen": 74170208, "step": 34315 }, { "epoch": 5.598694942903752, "grad_norm": 0.1933651864528656, "learning_rate": 0.0009045837931161402, "loss": 0.2482, "num_input_tokens_seen": 74180992, "step": 34320 }, { "epoch": 5.599510603588907, "grad_norm": 0.01716572791337967, "learning_rate": 0.0009045419653241038, "loss": 0.0893, "num_input_tokens_seen": 74191392, "step": 34325 }, { "epoch": 5.600326264274062, "grad_norm": 0.103814996778965, "learning_rate": 0.0009045001293335115, "loss": 0.0989, "num_input_tokens_seen": 74202496, "step": 34330 }, { "epoch": 5.601141924959217, "grad_norm": 0.1775224357843399, "learning_rate": 0.0009044582851452107, "loss": 0.0965, "num_input_tokens_seen": 74213632, "step": 34335 }, { "epoch": 5.601957585644372, "grad_norm": 0.053888604044914246, "learning_rate": 0.0009044164327600499, "loss": 0.0887, "num_input_tokens_seen": 74223392, "step": 34340 }, { "epoch": 5.602773246329527, "grad_norm": 0.02769533544778824, "learning_rate": 0.000904374572178877, "loss": 0.0727, "num_input_tokens_seen": 74234368, "step": 34345 }, { "epoch": 5.603588907014682, "grad_norm": 0.07350389659404755, "learning_rate": 0.0009043327034025404, "loss": 0.0706, "num_input_tokens_seen": 74245344, "step": 34350 }, { "epoch": 5.604404567699837, "grad_norm": 0.015233350917696953, "learning_rate": 0.0009042908264318885, "loss": 0.0489, "num_input_tokens_seen": 74254368, "step": 34355 }, { "epoch": 5.605220228384992, "grad_norm": 0.02992558479309082, "learning_rate": 0.0009042489412677702, "loss": 0.1246, "num_input_tokens_seen": 74265344, "step": 34360 }, { "epoch": 5.606035889070147, "grad_norm": 0.061724767088890076, "learning_rate": 0.0009042070479110343, "loss": 0.1397, "num_input_tokens_seen": 74275264, "step": 34365 }, { "epoch": 5.6068515497553015, "grad_norm": 0.08111986517906189, "learning_rate": 0.0009041651463625298, "loss": 0.0385, "num_input_tokens_seen": 74284608, "step": 34370 }, { "epoch": 5.607667210440457, "grad_norm": 0.05526169762015343, "learning_rate": 0.0009041232366231059, "loss": 0.1293, "num_input_tokens_seen": 74296128, "step": 34375 }, { "epoch": 5.608482871125612, "grad_norm": 0.21980856359004974, "learning_rate": 0.0009040813186936119, "loss": 0.1306, "num_input_tokens_seen": 74308000, "step": 34380 }, { "epoch": 5.609298531810767, "grad_norm": 0.23230993747711182, "learning_rate": 0.0009040393925748973, "loss": 0.1128, "num_input_tokens_seen": 74318240, "step": 34385 }, { "epoch": 5.610114192495922, "grad_norm": 0.19267278909683228, "learning_rate": 0.0009039974582678121, "loss": 0.1068, "num_input_tokens_seen": 74329184, "step": 34390 }, { "epoch": 5.6109298531810765, "grad_norm": 0.029193982481956482, "learning_rate": 0.0009039555157732056, "loss": 0.1378, "num_input_tokens_seen": 74340000, "step": 34395 }, { "epoch": 5.611745513866231, "grad_norm": 0.15133799612522125, "learning_rate": 0.0009039135650919283, "loss": 0.0774, "num_input_tokens_seen": 74350624, "step": 34400 }, { "epoch": 5.612561174551386, "grad_norm": 0.161695659160614, "learning_rate": 0.0009038716062248302, "loss": 0.0662, "num_input_tokens_seen": 74360608, "step": 34405 }, { "epoch": 5.613376835236542, "grad_norm": 0.05793755128979683, "learning_rate": 0.0009038296391727616, "loss": 0.0207, "num_input_tokens_seen": 74371104, "step": 34410 }, { "epoch": 5.614192495921697, "grad_norm": 0.008591441437602043, "learning_rate": 0.0009037876639365731, "loss": 0.0934, "num_input_tokens_seen": 74382400, "step": 34415 }, { "epoch": 5.6150081566068515, "grad_norm": 0.14188680052757263, "learning_rate": 0.0009037456805171154, "loss": 0.1134, "num_input_tokens_seen": 74394048, "step": 34420 }, { "epoch": 5.615823817292006, "grad_norm": 0.05818561464548111, "learning_rate": 0.0009037036889152391, "loss": 0.0622, "num_input_tokens_seen": 74406016, "step": 34425 }, { "epoch": 5.616639477977161, "grad_norm": 0.1742202490568161, "learning_rate": 0.0009036616891317956, "loss": 0.0609, "num_input_tokens_seen": 74417152, "step": 34430 }, { "epoch": 5.617455138662317, "grad_norm": 0.13640397787094116, "learning_rate": 0.0009036196811676358, "loss": 0.1228, "num_input_tokens_seen": 74428384, "step": 34435 }, { "epoch": 5.618270799347472, "grad_norm": 0.047426458448171616, "learning_rate": 0.0009035776650236112, "loss": 0.0624, "num_input_tokens_seen": 74438880, "step": 34440 }, { "epoch": 5.6190864600326265, "grad_norm": 0.14811500906944275, "learning_rate": 0.0009035356407005732, "loss": 0.1488, "num_input_tokens_seen": 74449504, "step": 34445 }, { "epoch": 5.619902120717781, "grad_norm": 0.21113741397857666, "learning_rate": 0.0009034936081993736, "loss": 0.2132, "num_input_tokens_seen": 74460032, "step": 34450 }, { "epoch": 5.620717781402936, "grad_norm": 0.055134713649749756, "learning_rate": 0.0009034515675208641, "loss": 0.0786, "num_input_tokens_seen": 74471072, "step": 34455 }, { "epoch": 5.621533442088092, "grad_norm": 0.29549095034599304, "learning_rate": 0.0009034095186658966, "loss": 0.1138, "num_input_tokens_seen": 74483360, "step": 34460 }, { "epoch": 5.622349102773247, "grad_norm": 0.280021607875824, "learning_rate": 0.0009033674616353236, "loss": 0.0716, "num_input_tokens_seen": 74492896, "step": 34465 }, { "epoch": 5.623164763458401, "grad_norm": 0.26246124505996704, "learning_rate": 0.0009033253964299972, "loss": 0.0924, "num_input_tokens_seen": 74503200, "step": 34470 }, { "epoch": 5.623980424143556, "grad_norm": 0.050797343254089355, "learning_rate": 0.0009032833230507702, "loss": 0.2738, "num_input_tokens_seen": 74514112, "step": 34475 }, { "epoch": 5.624796084828711, "grad_norm": 0.0720033347606659, "learning_rate": 0.000903241241498495, "loss": 0.2803, "num_input_tokens_seen": 74525856, "step": 34480 }, { "epoch": 5.625611745513866, "grad_norm": 0.19244541227817535, "learning_rate": 0.0009031991517740244, "loss": 0.1479, "num_input_tokens_seen": 74535232, "step": 34485 }, { "epoch": 5.626427406199021, "grad_norm": 0.13854020833969116, "learning_rate": 0.0009031570538782115, "loss": 0.1443, "num_input_tokens_seen": 74545216, "step": 34490 }, { "epoch": 5.627243066884176, "grad_norm": 0.46975669264793396, "learning_rate": 0.0009031149478119094, "loss": 0.1608, "num_input_tokens_seen": 74556192, "step": 34495 }, { "epoch": 5.628058727569331, "grad_norm": 0.1952454298734665, "learning_rate": 0.0009030728335759716, "loss": 0.0868, "num_input_tokens_seen": 74566240, "step": 34500 }, { "epoch": 5.628874388254486, "grad_norm": 0.13439679145812988, "learning_rate": 0.0009030307111712514, "loss": 0.1536, "num_input_tokens_seen": 74576768, "step": 34505 }, { "epoch": 5.629690048939641, "grad_norm": 0.03982832282781601, "learning_rate": 0.0009029885805986027, "loss": 0.1538, "num_input_tokens_seen": 74587872, "step": 34510 }, { "epoch": 5.630505709624796, "grad_norm": 0.24063228070735931, "learning_rate": 0.0009029464418588791, "loss": 0.1306, "num_input_tokens_seen": 74598752, "step": 34515 }, { "epoch": 5.631321370309951, "grad_norm": 0.0440259650349617, "learning_rate": 0.0009029042949529347, "loss": 0.1057, "num_input_tokens_seen": 74609856, "step": 34520 }, { "epoch": 5.632137030995106, "grad_norm": 0.05447227880358696, "learning_rate": 0.0009028621398816236, "loss": 0.1447, "num_input_tokens_seen": 74620960, "step": 34525 }, { "epoch": 5.632952691680261, "grad_norm": 0.018078099936246872, "learning_rate": 0.0009028199766458002, "loss": 0.1177, "num_input_tokens_seen": 74632192, "step": 34530 }, { "epoch": 5.633768352365416, "grad_norm": 0.12170816212892532, "learning_rate": 0.000902777805246319, "loss": 0.1091, "num_input_tokens_seen": 74643424, "step": 34535 }, { "epoch": 5.634584013050571, "grad_norm": 0.151223286986351, "learning_rate": 0.0009027356256840345, "loss": 0.0751, "num_input_tokens_seen": 74654080, "step": 34540 }, { "epoch": 5.635399673735726, "grad_norm": 0.04554088041186333, "learning_rate": 0.0009026934379598018, "loss": 0.0829, "num_input_tokens_seen": 74663840, "step": 34545 }, { "epoch": 5.636215334420881, "grad_norm": 0.05020049586892128, "learning_rate": 0.0009026512420744756, "loss": 0.0606, "num_input_tokens_seen": 74674688, "step": 34550 }, { "epoch": 5.637030995106036, "grad_norm": 0.09834477305412292, "learning_rate": 0.0009026090380289111, "loss": 0.1564, "num_input_tokens_seen": 74685344, "step": 34555 }, { "epoch": 5.637846655791191, "grad_norm": 0.0076295617036521435, "learning_rate": 0.0009025668258239638, "loss": 0.1465, "num_input_tokens_seen": 74697952, "step": 34560 }, { "epoch": 5.638662316476346, "grad_norm": 0.012084455229341984, "learning_rate": 0.0009025246054604892, "loss": 0.0624, "num_input_tokens_seen": 74708864, "step": 34565 }, { "epoch": 5.6394779771615005, "grad_norm": 0.1278560906648636, "learning_rate": 0.0009024823769393427, "loss": 0.2907, "num_input_tokens_seen": 74719072, "step": 34570 }, { "epoch": 5.640293637846656, "grad_norm": 0.0414595901966095, "learning_rate": 0.0009024401402613803, "loss": 0.0604, "num_input_tokens_seen": 74729408, "step": 34575 }, { "epoch": 5.641109298531811, "grad_norm": 0.02301446720957756, "learning_rate": 0.0009023978954274579, "loss": 0.0541, "num_input_tokens_seen": 74740064, "step": 34580 }, { "epoch": 5.641924959216966, "grad_norm": 0.021538980305194855, "learning_rate": 0.0009023556424384317, "loss": 0.0544, "num_input_tokens_seen": 74750080, "step": 34585 }, { "epoch": 5.642740619902121, "grad_norm": 0.06400078535079956, "learning_rate": 0.0009023133812951581, "loss": 0.0592, "num_input_tokens_seen": 74761536, "step": 34590 }, { "epoch": 5.643556280587275, "grad_norm": 0.032709237188100815, "learning_rate": 0.0009022711119984932, "loss": 0.0412, "num_input_tokens_seen": 74772128, "step": 34595 }, { "epoch": 5.64437194127243, "grad_norm": 0.09720578044652939, "learning_rate": 0.0009022288345492941, "loss": 0.1003, "num_input_tokens_seen": 74783040, "step": 34600 }, { "epoch": 5.645187601957586, "grad_norm": 0.3814923167228699, "learning_rate": 0.0009021865489484173, "loss": 0.0675, "num_input_tokens_seen": 74793984, "step": 34605 }, { "epoch": 5.646003262642741, "grad_norm": 0.06573888659477234, "learning_rate": 0.0009021442551967198, "loss": 0.1281, "num_input_tokens_seen": 74805088, "step": 34610 }, { "epoch": 5.646818923327896, "grad_norm": 0.08702082186937332, "learning_rate": 0.000902101953295059, "loss": 0.1954, "num_input_tokens_seen": 74815520, "step": 34615 }, { "epoch": 5.64763458401305, "grad_norm": 0.03930259495973587, "learning_rate": 0.0009020596432442918, "loss": 0.049, "num_input_tokens_seen": 74826368, "step": 34620 }, { "epoch": 5.648450244698205, "grad_norm": 0.017254164442420006, "learning_rate": 0.0009020173250452761, "loss": 0.1167, "num_input_tokens_seen": 74836416, "step": 34625 }, { "epoch": 5.649265905383361, "grad_norm": 0.0806068629026413, "learning_rate": 0.0009019749986988692, "loss": 0.0189, "num_input_tokens_seen": 74846784, "step": 34630 }, { "epoch": 5.650081566068516, "grad_norm": 0.19931930303573608, "learning_rate": 0.000901932664205929, "loss": 0.106, "num_input_tokens_seen": 74857216, "step": 34635 }, { "epoch": 5.650897226753671, "grad_norm": 0.06503892689943314, "learning_rate": 0.0009018903215673135, "loss": 0.0381, "num_input_tokens_seen": 74868384, "step": 34640 }, { "epoch": 5.651712887438825, "grad_norm": 0.12235700339078903, "learning_rate": 0.0009018479707838808, "loss": 0.0673, "num_input_tokens_seen": 74878592, "step": 34645 }, { "epoch": 5.65252854812398, "grad_norm": 0.21434731781482697, "learning_rate": 0.0009018056118564893, "loss": 0.0589, "num_input_tokens_seen": 74888992, "step": 34650 }, { "epoch": 5.653344208809135, "grad_norm": 0.19774998724460602, "learning_rate": 0.0009017632447859971, "loss": 0.0817, "num_input_tokens_seen": 74900640, "step": 34655 }, { "epoch": 5.654159869494291, "grad_norm": 0.011533130891621113, "learning_rate": 0.0009017208695732633, "loss": 0.037, "num_input_tokens_seen": 74912736, "step": 34660 }, { "epoch": 5.6549755301794455, "grad_norm": 0.0663393959403038, "learning_rate": 0.0009016784862191463, "loss": 0.0922, "num_input_tokens_seen": 74923616, "step": 34665 }, { "epoch": 5.6557911908646, "grad_norm": 0.028127873316407204, "learning_rate": 0.0009016360947245053, "loss": 0.0766, "num_input_tokens_seen": 74934336, "step": 34670 }, { "epoch": 5.656606851549755, "grad_norm": 0.12323799729347229, "learning_rate": 0.0009015936950901993, "loss": 0.0224, "num_input_tokens_seen": 74945952, "step": 34675 }, { "epoch": 5.65742251223491, "grad_norm": 0.006789292208850384, "learning_rate": 0.0009015512873170877, "loss": 0.1083, "num_input_tokens_seen": 74956512, "step": 34680 }, { "epoch": 5.658238172920065, "grad_norm": 0.08701828867197037, "learning_rate": 0.0009015088714060297, "loss": 0.3225, "num_input_tokens_seen": 74967104, "step": 34685 }, { "epoch": 5.6590538336052205, "grad_norm": 0.21887104213237762, "learning_rate": 0.0009014664473578851, "loss": 0.1056, "num_input_tokens_seen": 74977344, "step": 34690 }, { "epoch": 5.659869494290375, "grad_norm": 0.3914003074169159, "learning_rate": 0.0009014240151735138, "loss": 0.1699, "num_input_tokens_seen": 74988672, "step": 34695 }, { "epoch": 5.66068515497553, "grad_norm": 0.015539568848907948, "learning_rate": 0.0009013815748537755, "loss": 0.1177, "num_input_tokens_seen": 75000384, "step": 34700 }, { "epoch": 5.661500815660685, "grad_norm": 0.06390603631734848, "learning_rate": 0.0009013391263995303, "loss": 0.1135, "num_input_tokens_seen": 75010528, "step": 34705 }, { "epoch": 5.66231647634584, "grad_norm": 0.011964190751314163, "learning_rate": 0.0009012966698116387, "loss": 0.023, "num_input_tokens_seen": 75021920, "step": 34710 }, { "epoch": 5.6631321370309955, "grad_norm": 0.03568481281399727, "learning_rate": 0.0009012542050909609, "loss": 0.0481, "num_input_tokens_seen": 75031968, "step": 34715 }, { "epoch": 5.66394779771615, "grad_norm": 0.17110410332679749, "learning_rate": 0.0009012117322383577, "loss": 0.1272, "num_input_tokens_seen": 75043200, "step": 34720 }, { "epoch": 5.664763458401305, "grad_norm": 0.04262327775359154, "learning_rate": 0.0009011692512546897, "loss": 0.058, "num_input_tokens_seen": 75054016, "step": 34725 }, { "epoch": 5.66557911908646, "grad_norm": 0.1769617199897766, "learning_rate": 0.0009011267621408179, "loss": 0.064, "num_input_tokens_seen": 75065632, "step": 34730 }, { "epoch": 5.666394779771615, "grad_norm": 0.27743223309516907, "learning_rate": 0.0009010842648976034, "loss": 0.0554, "num_input_tokens_seen": 75076384, "step": 34735 }, { "epoch": 5.6672104404567705, "grad_norm": 0.2698041796684265, "learning_rate": 0.0009010417595259077, "loss": 0.1715, "num_input_tokens_seen": 75087104, "step": 34740 }, { "epoch": 5.668026101141925, "grad_norm": 0.13711011409759521, "learning_rate": 0.0009009992460265917, "loss": 0.0883, "num_input_tokens_seen": 75097184, "step": 34745 }, { "epoch": 5.66884176182708, "grad_norm": 0.013362093828618526, "learning_rate": 0.0009009567244005174, "loss": 0.0612, "num_input_tokens_seen": 75107136, "step": 34750 }, { "epoch": 5.669657422512235, "grad_norm": 0.028613250702619553, "learning_rate": 0.0009009141946485464, "loss": 0.0546, "num_input_tokens_seen": 75117024, "step": 34755 }, { "epoch": 5.67047308319739, "grad_norm": 0.016025543212890625, "learning_rate": 0.0009008716567715406, "loss": 0.0294, "num_input_tokens_seen": 75129632, "step": 34760 }, { "epoch": 5.671288743882545, "grad_norm": 0.010202709585428238, "learning_rate": 0.0009008291107703621, "loss": 0.2316, "num_input_tokens_seen": 75140704, "step": 34765 }, { "epoch": 5.672104404567699, "grad_norm": 0.010567674413323402, "learning_rate": 0.0009007865566458733, "loss": 0.0205, "num_input_tokens_seen": 75150880, "step": 34770 }, { "epoch": 5.672920065252855, "grad_norm": 0.2892915606498718, "learning_rate": 0.0009007439943989364, "loss": 0.1478, "num_input_tokens_seen": 75161728, "step": 34775 }, { "epoch": 5.67373572593801, "grad_norm": 0.05027128756046295, "learning_rate": 0.0009007014240304143, "loss": 0.0588, "num_input_tokens_seen": 75171936, "step": 34780 }, { "epoch": 5.674551386623165, "grad_norm": 0.06722673773765564, "learning_rate": 0.0009006588455411692, "loss": 0.0681, "num_input_tokens_seen": 75182720, "step": 34785 }, { "epoch": 5.6753670473083195, "grad_norm": 0.2073163390159607, "learning_rate": 0.0009006162589320645, "loss": 0.1459, "num_input_tokens_seen": 75193344, "step": 34790 }, { "epoch": 5.676182707993474, "grad_norm": 0.10115987807512283, "learning_rate": 0.000900573664203963, "loss": 0.0308, "num_input_tokens_seen": 75203104, "step": 34795 }, { "epoch": 5.67699836867863, "grad_norm": 0.4650232195854187, "learning_rate": 0.0009005310613577282, "loss": 0.1244, "num_input_tokens_seen": 75213632, "step": 34800 }, { "epoch": 5.677814029363785, "grad_norm": 0.007083847187459469, "learning_rate": 0.0009004884503942232, "loss": 0.0641, "num_input_tokens_seen": 75224192, "step": 34805 }, { "epoch": 5.67862969004894, "grad_norm": 0.008455055765807629, "learning_rate": 0.0009004458313143118, "loss": 0.0426, "num_input_tokens_seen": 75234880, "step": 34810 }, { "epoch": 5.6794453507340945, "grad_norm": 0.10253097862005234, "learning_rate": 0.0009004032041188575, "loss": 0.1099, "num_input_tokens_seen": 75244768, "step": 34815 }, { "epoch": 5.680261011419249, "grad_norm": 0.25227639079093933, "learning_rate": 0.0009003605688087244, "loss": 0.2557, "num_input_tokens_seen": 75255264, "step": 34820 }, { "epoch": 5.681076672104405, "grad_norm": 0.040591347962617874, "learning_rate": 0.0009003179253847764, "loss": 0.2172, "num_input_tokens_seen": 75265408, "step": 34825 }, { "epoch": 5.68189233278956, "grad_norm": 0.22518138587474823, "learning_rate": 0.0009002752738478779, "loss": 0.1612, "num_input_tokens_seen": 75277504, "step": 34830 }, { "epoch": 5.682707993474715, "grad_norm": 0.06464552134275436, "learning_rate": 0.000900232614198893, "loss": 0.1623, "num_input_tokens_seen": 75288864, "step": 34835 }, { "epoch": 5.6835236541598695, "grad_norm": 0.02341165393590927, "learning_rate": 0.0009001899464386867, "loss": 0.1506, "num_input_tokens_seen": 75298784, "step": 34840 }, { "epoch": 5.684339314845024, "grad_norm": 0.0356857031583786, "learning_rate": 0.0009001472705681233, "loss": 0.0259, "num_input_tokens_seen": 75309888, "step": 34845 }, { "epoch": 5.685154975530179, "grad_norm": 0.023588141426444054, "learning_rate": 0.0009001045865880679, "loss": 0.0789, "num_input_tokens_seen": 75321216, "step": 34850 }, { "epoch": 5.685970636215334, "grad_norm": 0.005699558649212122, "learning_rate": 0.0009000618944993854, "loss": 0.073, "num_input_tokens_seen": 75333280, "step": 34855 }, { "epoch": 5.68678629690049, "grad_norm": 0.2904861569404602, "learning_rate": 0.0009000191943029412, "loss": 0.0885, "num_input_tokens_seen": 75342464, "step": 34860 }, { "epoch": 5.6876019575856445, "grad_norm": 0.07417767494916916, "learning_rate": 0.0008999764859996005, "loss": 0.0726, "num_input_tokens_seen": 75354240, "step": 34865 }, { "epoch": 5.688417618270799, "grad_norm": 0.06917222589254379, "learning_rate": 0.000899933769590229, "loss": 0.0889, "num_input_tokens_seen": 75364992, "step": 34870 }, { "epoch": 5.689233278955954, "grad_norm": 0.16416126489639282, "learning_rate": 0.0008998910450756923, "loss": 0.2155, "num_input_tokens_seen": 75375392, "step": 34875 }, { "epoch": 5.690048939641109, "grad_norm": 0.23445457220077515, "learning_rate": 0.0008998483124568561, "loss": 0.0474, "num_input_tokens_seen": 75387328, "step": 34880 }, { "epoch": 5.690864600326265, "grad_norm": 0.01251760683953762, "learning_rate": 0.0008998055717345868, "loss": 0.0442, "num_input_tokens_seen": 75397984, "step": 34885 }, { "epoch": 5.691680261011419, "grad_norm": 0.01877402886748314, "learning_rate": 0.0008997628229097503, "loss": 0.1067, "num_input_tokens_seen": 75407360, "step": 34890 }, { "epoch": 5.692495921696574, "grad_norm": 0.17165693640708923, "learning_rate": 0.0008997200659832129, "loss": 0.1041, "num_input_tokens_seen": 75417408, "step": 34895 }, { "epoch": 5.693311582381729, "grad_norm": 0.07236825674772263, "learning_rate": 0.0008996773009558416, "loss": 0.045, "num_input_tokens_seen": 75427840, "step": 34900 }, { "epoch": 5.694127243066884, "grad_norm": 0.34581971168518066, "learning_rate": 0.0008996345278285027, "loss": 0.1505, "num_input_tokens_seen": 75439424, "step": 34905 }, { "epoch": 5.69494290375204, "grad_norm": 0.22256053984165192, "learning_rate": 0.000899591746602063, "loss": 0.1045, "num_input_tokens_seen": 75450272, "step": 34910 }, { "epoch": 5.695758564437194, "grad_norm": 0.0090791629627347, "learning_rate": 0.0008995489572773896, "loss": 0.0798, "num_input_tokens_seen": 75460544, "step": 34915 }, { "epoch": 5.696574225122349, "grad_norm": 0.04982369393110275, "learning_rate": 0.0008995061598553499, "loss": 0.0309, "num_input_tokens_seen": 75471488, "step": 34920 }, { "epoch": 5.697389885807504, "grad_norm": 0.14165297150611877, "learning_rate": 0.000899463354336811, "loss": 0.1819, "num_input_tokens_seen": 75482944, "step": 34925 }, { "epoch": 5.698205546492659, "grad_norm": 0.019582441076636314, "learning_rate": 0.0008994205407226403, "loss": 0.0423, "num_input_tokens_seen": 75493984, "step": 34930 }, { "epoch": 5.699021207177814, "grad_norm": 0.00845408346503973, "learning_rate": 0.0008993777190137058, "loss": 0.1114, "num_input_tokens_seen": 75504864, "step": 34935 }, { "epoch": 5.699836867862969, "grad_norm": 0.1220518946647644, "learning_rate": 0.0008993348892108753, "loss": 0.125, "num_input_tokens_seen": 75515936, "step": 34940 }, { "epoch": 5.700652528548124, "grad_norm": 0.0652894675731659, "learning_rate": 0.0008992920513150165, "loss": 0.051, "num_input_tokens_seen": 75527520, "step": 34945 }, { "epoch": 5.701468189233279, "grad_norm": 0.02808886580169201, "learning_rate": 0.0008992492053269976, "loss": 0.0843, "num_input_tokens_seen": 75539072, "step": 34950 }, { "epoch": 5.702283849918434, "grad_norm": 0.17246325314044952, "learning_rate": 0.0008992063512476873, "loss": 0.1733, "num_input_tokens_seen": 75549792, "step": 34955 }, { "epoch": 5.703099510603589, "grad_norm": 0.18310581147670746, "learning_rate": 0.0008991634890779538, "loss": 0.0305, "num_input_tokens_seen": 75560448, "step": 34960 }, { "epoch": 5.7039151712887435, "grad_norm": 0.048973195254802704, "learning_rate": 0.0008991206188186658, "loss": 0.1631, "num_input_tokens_seen": 75571776, "step": 34965 }, { "epoch": 5.704730831973899, "grad_norm": 0.10040713101625443, "learning_rate": 0.0008990777404706922, "loss": 0.2469, "num_input_tokens_seen": 75583328, "step": 34970 }, { "epoch": 5.705546492659054, "grad_norm": 0.19471172988414764, "learning_rate": 0.0008990348540349019, "loss": 0.1636, "num_input_tokens_seen": 75593984, "step": 34975 }, { "epoch": 5.706362153344209, "grad_norm": 0.25231602787971497, "learning_rate": 0.0008989919595121641, "loss": 0.1036, "num_input_tokens_seen": 75604256, "step": 34980 }, { "epoch": 5.707177814029364, "grad_norm": 0.02728627808392048, "learning_rate": 0.000898949056903348, "loss": 0.0964, "num_input_tokens_seen": 75614848, "step": 34985 }, { "epoch": 5.7079934747145185, "grad_norm": 0.22083836793899536, "learning_rate": 0.0008989061462093233, "loss": 0.1313, "num_input_tokens_seen": 75626208, "step": 34990 }, { "epoch": 5.708809135399674, "grad_norm": 0.03731374442577362, "learning_rate": 0.0008988632274309593, "loss": 0.1389, "num_input_tokens_seen": 75637024, "step": 34995 }, { "epoch": 5.709624796084829, "grad_norm": 0.01850057952105999, "learning_rate": 0.0008988203005691262, "loss": 0.0446, "num_input_tokens_seen": 75647456, "step": 35000 }, { "epoch": 5.710440456769984, "grad_norm": 0.08934499323368073, "learning_rate": 0.0008987773656246936, "loss": 0.0551, "num_input_tokens_seen": 75658368, "step": 35005 }, { "epoch": 5.711256117455139, "grad_norm": 0.11565990746021271, "learning_rate": 0.0008987344225985319, "loss": 0.1668, "num_input_tokens_seen": 75668640, "step": 35010 }, { "epoch": 5.712071778140293, "grad_norm": 0.07857227325439453, "learning_rate": 0.0008986914714915112, "loss": 0.1319, "num_input_tokens_seen": 75679136, "step": 35015 }, { "epoch": 5.712887438825448, "grad_norm": 0.018412547186017036, "learning_rate": 0.000898648512304502, "loss": 0.0948, "num_input_tokens_seen": 75689312, "step": 35020 }, { "epoch": 5.713703099510604, "grad_norm": 0.15788637101650238, "learning_rate": 0.0008986055450383752, "loss": 0.1009, "num_input_tokens_seen": 75699360, "step": 35025 }, { "epoch": 5.714518760195759, "grad_norm": 0.059505756944417953, "learning_rate": 0.0008985625696940013, "loss": 0.0621, "num_input_tokens_seen": 75710304, "step": 35030 }, { "epoch": 5.715334420880914, "grad_norm": 0.03797432407736778, "learning_rate": 0.0008985195862722513, "loss": 0.1173, "num_input_tokens_seen": 75721408, "step": 35035 }, { "epoch": 5.716150081566068, "grad_norm": 0.08449093252420425, "learning_rate": 0.0008984765947739964, "loss": 0.154, "num_input_tokens_seen": 75732832, "step": 35040 }, { "epoch": 5.716965742251223, "grad_norm": 0.0638086199760437, "learning_rate": 0.0008984335952001075, "loss": 0.0495, "num_input_tokens_seen": 75744256, "step": 35045 }, { "epoch": 5.717781402936378, "grad_norm": 0.050827570259571075, "learning_rate": 0.0008983905875514566, "loss": 0.0169, "num_input_tokens_seen": 75754016, "step": 35050 }, { "epoch": 5.718597063621534, "grad_norm": 0.13446107506752014, "learning_rate": 0.000898347571828915, "loss": 0.0554, "num_input_tokens_seen": 75766304, "step": 35055 }, { "epoch": 5.719412724306689, "grad_norm": 0.03578329086303711, "learning_rate": 0.0008983045480333545, "loss": 0.1295, "num_input_tokens_seen": 75776544, "step": 35060 }, { "epoch": 5.720228384991843, "grad_norm": 0.11802957952022552, "learning_rate": 0.0008982615161656471, "loss": 0.2292, "num_input_tokens_seen": 75787008, "step": 35065 }, { "epoch": 5.721044045676998, "grad_norm": 0.011829815804958344, "learning_rate": 0.0008982184762266648, "loss": 0.0445, "num_input_tokens_seen": 75796128, "step": 35070 }, { "epoch": 5.721859706362153, "grad_norm": 0.09536035358905792, "learning_rate": 0.00089817542821728, "loss": 0.0644, "num_input_tokens_seen": 75808640, "step": 35075 }, { "epoch": 5.722675367047309, "grad_norm": 0.21114234626293182, "learning_rate": 0.0008981323721383649, "loss": 0.2755, "num_input_tokens_seen": 75819040, "step": 35080 }, { "epoch": 5.7234910277324635, "grad_norm": 0.1931707113981247, "learning_rate": 0.0008980893079907922, "loss": 0.0722, "num_input_tokens_seen": 75830464, "step": 35085 }, { "epoch": 5.724306688417618, "grad_norm": 0.010170339606702328, "learning_rate": 0.0008980462357754347, "loss": 0.0227, "num_input_tokens_seen": 75841440, "step": 35090 }, { "epoch": 5.725122349102773, "grad_norm": 0.11149877309799194, "learning_rate": 0.0008980031554931654, "loss": 0.1586, "num_input_tokens_seen": 75853952, "step": 35095 }, { "epoch": 5.725938009787928, "grad_norm": 0.22925962507724762, "learning_rate": 0.0008979600671448571, "loss": 0.1256, "num_input_tokens_seen": 75863776, "step": 35100 }, { "epoch": 5.726753670473083, "grad_norm": 0.15158496797084808, "learning_rate": 0.0008979169707313831, "loss": 0.0396, "num_input_tokens_seen": 75876352, "step": 35105 }, { "epoch": 5.7275693311582385, "grad_norm": 0.04836349934339523, "learning_rate": 0.000897873866253617, "loss": 0.0416, "num_input_tokens_seen": 75888000, "step": 35110 }, { "epoch": 5.728384991843393, "grad_norm": 0.011450660414993763, "learning_rate": 0.0008978307537124324, "loss": 0.0917, "num_input_tokens_seen": 75898048, "step": 35115 }, { "epoch": 5.729200652528548, "grad_norm": 0.2579789161682129, "learning_rate": 0.0008977876331087027, "loss": 0.3243, "num_input_tokens_seen": 75909344, "step": 35120 }, { "epoch": 5.730016313213703, "grad_norm": 0.1533122956752777, "learning_rate": 0.0008977445044433021, "loss": 0.108, "num_input_tokens_seen": 75919360, "step": 35125 }, { "epoch": 5.730831973898858, "grad_norm": 0.011914343573153019, "learning_rate": 0.0008977013677171045, "loss": 0.0261, "num_input_tokens_seen": 75930624, "step": 35130 }, { "epoch": 5.731647634584013, "grad_norm": 0.054159220308065414, "learning_rate": 0.0008976582229309842, "loss": 0.095, "num_input_tokens_seen": 75941728, "step": 35135 }, { "epoch": 5.732463295269168, "grad_norm": 0.22217957675457, "learning_rate": 0.0008976150700858155, "loss": 0.2075, "num_input_tokens_seen": 75952960, "step": 35140 }, { "epoch": 5.733278955954323, "grad_norm": 0.288897305727005, "learning_rate": 0.000897571909182473, "loss": 0.1507, "num_input_tokens_seen": 75964320, "step": 35145 }, { "epoch": 5.734094616639478, "grad_norm": 0.15639406442642212, "learning_rate": 0.0008975287402218314, "loss": 0.0971, "num_input_tokens_seen": 75974848, "step": 35150 }, { "epoch": 5.734910277324633, "grad_norm": 0.13085006177425385, "learning_rate": 0.0008974855632047657, "loss": 0.0652, "num_input_tokens_seen": 75985024, "step": 35155 }, { "epoch": 5.735725938009788, "grad_norm": 0.23708663880825043, "learning_rate": 0.0008974423781321506, "loss": 0.0831, "num_input_tokens_seen": 75996544, "step": 35160 }, { "epoch": 5.736541598694943, "grad_norm": 0.010799623094499111, "learning_rate": 0.0008973991850048616, "loss": 0.0199, "num_input_tokens_seen": 76007616, "step": 35165 }, { "epoch": 5.737357259380098, "grad_norm": 0.22216928005218506, "learning_rate": 0.0008973559838237739, "loss": 0.1143, "num_input_tokens_seen": 76017824, "step": 35170 }, { "epoch": 5.738172920065253, "grad_norm": 0.01401756051927805, "learning_rate": 0.0008973127745897634, "loss": 0.0905, "num_input_tokens_seen": 76029024, "step": 35175 }, { "epoch": 5.738988580750408, "grad_norm": 0.18570876121520996, "learning_rate": 0.0008972695573037052, "loss": 0.1219, "num_input_tokens_seen": 76039968, "step": 35180 }, { "epoch": 5.739804241435563, "grad_norm": 0.04568921774625778, "learning_rate": 0.0008972263319664756, "loss": 0.0498, "num_input_tokens_seen": 76051360, "step": 35185 }, { "epoch": 5.740619902120718, "grad_norm": 0.007866466417908669, "learning_rate": 0.0008971830985789504, "loss": 0.1595, "num_input_tokens_seen": 76062016, "step": 35190 }, { "epoch": 5.741435562805873, "grad_norm": 0.005754650104790926, "learning_rate": 0.0008971398571420058, "loss": 0.1148, "num_input_tokens_seen": 76073696, "step": 35195 }, { "epoch": 5.742251223491028, "grad_norm": 0.022885838523507118, "learning_rate": 0.0008970966076565183, "loss": 0.119, "num_input_tokens_seen": 76083104, "step": 35200 }, { "epoch": 5.743066884176183, "grad_norm": 0.11712540686130524, "learning_rate": 0.0008970533501233642, "loss": 0.0966, "num_input_tokens_seen": 76094144, "step": 35205 }, { "epoch": 5.7438825448613375, "grad_norm": 0.00609155697748065, "learning_rate": 0.0008970100845434204, "loss": 0.0308, "num_input_tokens_seen": 76105408, "step": 35210 }, { "epoch": 5.744698205546492, "grad_norm": 0.08641016483306885, "learning_rate": 0.0008969668109175635, "loss": 0.1631, "num_input_tokens_seen": 76116896, "step": 35215 }, { "epoch": 5.745513866231647, "grad_norm": 0.00937197171151638, "learning_rate": 0.0008969235292466706, "loss": 0.0648, "num_input_tokens_seen": 76128736, "step": 35220 }, { "epoch": 5.746329526916803, "grad_norm": 0.008261475712060928, "learning_rate": 0.0008968802395316187, "loss": 0.0116, "num_input_tokens_seen": 76138240, "step": 35225 }, { "epoch": 5.747145187601958, "grad_norm": 0.05619320645928383, "learning_rate": 0.0008968369417732855, "loss": 0.0609, "num_input_tokens_seen": 76149312, "step": 35230 }, { "epoch": 5.7479608482871125, "grad_norm": 0.08603104203939438, "learning_rate": 0.0008967936359725482, "loss": 0.1606, "num_input_tokens_seen": 76161056, "step": 35235 }, { "epoch": 5.748776508972267, "grad_norm": 0.014682306908071041, "learning_rate": 0.0008967503221302844, "loss": 0.0621, "num_input_tokens_seen": 76172288, "step": 35240 }, { "epoch": 5.749592169657422, "grad_norm": 0.008648062124848366, "learning_rate": 0.0008967070002473721, "loss": 0.1693, "num_input_tokens_seen": 76182432, "step": 35245 }, { "epoch": 5.750407830342578, "grad_norm": 0.06725231558084488, "learning_rate": 0.0008966636703246891, "loss": 0.0745, "num_input_tokens_seen": 76193216, "step": 35250 }, { "epoch": 5.751223491027733, "grad_norm": 0.051333747804164886, "learning_rate": 0.0008966203323631137, "loss": 0.1863, "num_input_tokens_seen": 76203520, "step": 35255 }, { "epoch": 5.7520391517128875, "grad_norm": 0.041993435472249985, "learning_rate": 0.000896576986363524, "loss": 0.081, "num_input_tokens_seen": 76215584, "step": 35260 }, { "epoch": 5.752854812398042, "grad_norm": 0.0039042108692228794, "learning_rate": 0.0008965336323267986, "loss": 0.0566, "num_input_tokens_seen": 76225120, "step": 35265 }, { "epoch": 5.753670473083197, "grad_norm": 0.10992413014173508, "learning_rate": 0.0008964902702538163, "loss": 0.1709, "num_input_tokens_seen": 76235776, "step": 35270 }, { "epoch": 5.754486133768353, "grad_norm": 0.02428418956696987, "learning_rate": 0.0008964469001454554, "loss": 0.0625, "num_input_tokens_seen": 76246112, "step": 35275 }, { "epoch": 5.755301794453508, "grad_norm": 0.26490381360054016, "learning_rate": 0.0008964035220025953, "loss": 0.1386, "num_input_tokens_seen": 76255872, "step": 35280 }, { "epoch": 5.7561174551386625, "grad_norm": 0.005199507810175419, "learning_rate": 0.000896360135826115, "loss": 0.0508, "num_input_tokens_seen": 76265312, "step": 35285 }, { "epoch": 5.756933115823817, "grad_norm": 0.006524212658405304, "learning_rate": 0.0008963167416168936, "loss": 0.031, "num_input_tokens_seen": 76275104, "step": 35290 }, { "epoch": 5.757748776508972, "grad_norm": 0.054473526775836945, "learning_rate": 0.0008962733393758107, "loss": 0.0554, "num_input_tokens_seen": 76286304, "step": 35295 }, { "epoch": 5.758564437194127, "grad_norm": 0.13662466406822205, "learning_rate": 0.0008962299291037459, "loss": 0.1246, "num_input_tokens_seen": 76296704, "step": 35300 }, { "epoch": 5.759380097879282, "grad_norm": 0.11444362252950668, "learning_rate": 0.000896186510801579, "loss": 0.0867, "num_input_tokens_seen": 76307328, "step": 35305 }, { "epoch": 5.760195758564437, "grad_norm": 0.010635402984917164, "learning_rate": 0.0008961430844701899, "loss": 0.0943, "num_input_tokens_seen": 76318048, "step": 35310 }, { "epoch": 5.761011419249592, "grad_norm": 0.03280719742178917, "learning_rate": 0.0008960996501104583, "loss": 0.1289, "num_input_tokens_seen": 76328768, "step": 35315 }, { "epoch": 5.761827079934747, "grad_norm": 0.01935116946697235, "learning_rate": 0.0008960562077232652, "loss": 0.045, "num_input_tokens_seen": 76339712, "step": 35320 }, { "epoch": 5.762642740619902, "grad_norm": 0.25301891565322876, "learning_rate": 0.0008960127573094904, "loss": 0.082, "num_input_tokens_seen": 76351616, "step": 35325 }, { "epoch": 5.763458401305057, "grad_norm": 0.08036404848098755, "learning_rate": 0.0008959692988700148, "loss": 0.0467, "num_input_tokens_seen": 76361408, "step": 35330 }, { "epoch": 5.764274061990212, "grad_norm": 0.03166870027780533, "learning_rate": 0.000895925832405719, "loss": 0.0762, "num_input_tokens_seen": 76372544, "step": 35335 }, { "epoch": 5.765089722675367, "grad_norm": 0.10184311866760254, "learning_rate": 0.0008958823579174839, "loss": 0.1098, "num_input_tokens_seen": 76383552, "step": 35340 }, { "epoch": 5.765905383360522, "grad_norm": 0.17572534084320068, "learning_rate": 0.0008958388754061907, "loss": 0.0737, "num_input_tokens_seen": 76393568, "step": 35345 }, { "epoch": 5.766721044045677, "grad_norm": 0.030263762921094894, "learning_rate": 0.0008957953848727205, "loss": 0.0165, "num_input_tokens_seen": 76402976, "step": 35350 }, { "epoch": 5.767536704730832, "grad_norm": 0.008022490888834, "learning_rate": 0.0008957518863179545, "loss": 0.1867, "num_input_tokens_seen": 76414176, "step": 35355 }, { "epoch": 5.768352365415987, "grad_norm": 0.003972323145717382, "learning_rate": 0.0008957083797427747, "loss": 0.0115, "num_input_tokens_seen": 76425248, "step": 35360 }, { "epoch": 5.769168026101142, "grad_norm": 0.3723903298377991, "learning_rate": 0.0008956648651480627, "loss": 0.1278, "num_input_tokens_seen": 76435104, "step": 35365 }, { "epoch": 5.769983686786297, "grad_norm": 0.29073670506477356, "learning_rate": 0.0008956213425347001, "loss": 0.1751, "num_input_tokens_seen": 76446496, "step": 35370 }, { "epoch": 5.770799347471452, "grad_norm": 0.4639027416706085, "learning_rate": 0.0008955778119035692, "loss": 0.2464, "num_input_tokens_seen": 76456000, "step": 35375 }, { "epoch": 5.771615008156607, "grad_norm": 0.01451733335852623, "learning_rate": 0.000895534273255552, "loss": 0.0313, "num_input_tokens_seen": 76466880, "step": 35380 }, { "epoch": 5.7724306688417615, "grad_norm": 0.018691841512918472, "learning_rate": 0.0008954907265915311, "loss": 0.0964, "num_input_tokens_seen": 76478880, "step": 35385 }, { "epoch": 5.773246329526917, "grad_norm": 0.20020082592964172, "learning_rate": 0.0008954471719123889, "loss": 0.2309, "num_input_tokens_seen": 76490240, "step": 35390 }, { "epoch": 5.774061990212072, "grad_norm": 0.16038750112056732, "learning_rate": 0.0008954036092190079, "loss": 0.078, "num_input_tokens_seen": 76499168, "step": 35395 }, { "epoch": 5.774877650897227, "grad_norm": 0.005479294341057539, "learning_rate": 0.0008953600385122713, "loss": 0.0832, "num_input_tokens_seen": 76511392, "step": 35400 }, { "epoch": 5.775693311582382, "grad_norm": 0.03720789775252342, "learning_rate": 0.0008953164597930621, "loss": 0.0649, "num_input_tokens_seen": 76520896, "step": 35405 }, { "epoch": 5.7765089722675365, "grad_norm": 0.1710098385810852, "learning_rate": 0.0008952728730622632, "loss": 0.126, "num_input_tokens_seen": 76531648, "step": 35410 }, { "epoch": 5.777324632952691, "grad_norm": 0.04318312928080559, "learning_rate": 0.000895229278320758, "loss": 0.0368, "num_input_tokens_seen": 76542464, "step": 35415 }, { "epoch": 5.778140293637847, "grad_norm": 0.043150052428245544, "learning_rate": 0.0008951856755694303, "loss": 0.0952, "num_input_tokens_seen": 76553824, "step": 35420 }, { "epoch": 5.778955954323002, "grad_norm": 0.20751482248306274, "learning_rate": 0.0008951420648091635, "loss": 0.1107, "num_input_tokens_seen": 76565344, "step": 35425 }, { "epoch": 5.779771615008157, "grad_norm": 0.04356337711215019, "learning_rate": 0.0008950984460408414, "loss": 0.0887, "num_input_tokens_seen": 76577600, "step": 35430 }, { "epoch": 5.780587275693311, "grad_norm": 0.0067030293866992, "learning_rate": 0.0008950548192653481, "loss": 0.2208, "num_input_tokens_seen": 76588448, "step": 35435 }, { "epoch": 5.781402936378466, "grad_norm": 0.029318923130631447, "learning_rate": 0.0008950111844835678, "loss": 0.0751, "num_input_tokens_seen": 76600704, "step": 35440 }, { "epoch": 5.782218597063622, "grad_norm": 0.004557712934911251, "learning_rate": 0.0008949675416963847, "loss": 0.1848, "num_input_tokens_seen": 76611488, "step": 35445 }, { "epoch": 5.783034257748777, "grad_norm": 0.19797858595848083, "learning_rate": 0.0008949238909046833, "loss": 0.0869, "num_input_tokens_seen": 76623904, "step": 35450 }, { "epoch": 5.783849918433932, "grad_norm": 0.12520618736743927, "learning_rate": 0.0008948802321093484, "loss": 0.1262, "num_input_tokens_seen": 76635936, "step": 35455 }, { "epoch": 5.784665579119086, "grad_norm": 0.0165663193911314, "learning_rate": 0.0008948365653112645, "loss": 0.0287, "num_input_tokens_seen": 76646720, "step": 35460 }, { "epoch": 5.785481239804241, "grad_norm": 0.20906522870063782, "learning_rate": 0.0008947928905113166, "loss": 0.1032, "num_input_tokens_seen": 76656704, "step": 35465 }, { "epoch": 5.786296900489396, "grad_norm": 0.04493867978453636, "learning_rate": 0.00089474920771039, "loss": 0.2559, "num_input_tokens_seen": 76667360, "step": 35470 }, { "epoch": 5.787112561174552, "grad_norm": 0.1427122801542282, "learning_rate": 0.0008947055169093701, "loss": 0.0507, "num_input_tokens_seen": 76677440, "step": 35475 }, { "epoch": 5.787928221859707, "grad_norm": 0.21092888712882996, "learning_rate": 0.000894661818109142, "loss": 0.155, "num_input_tokens_seen": 76688320, "step": 35480 }, { "epoch": 5.788743882544861, "grad_norm": 0.13803480565547943, "learning_rate": 0.0008946181113105915, "loss": 0.1265, "num_input_tokens_seen": 76700608, "step": 35485 }, { "epoch": 5.789559543230016, "grad_norm": 0.19523029029369354, "learning_rate": 0.0008945743965146044, "loss": 0.1149, "num_input_tokens_seen": 76712064, "step": 35490 }, { "epoch": 5.790375203915171, "grad_norm": 0.05511949956417084, "learning_rate": 0.0008945306737220669, "loss": 0.0938, "num_input_tokens_seen": 76722784, "step": 35495 }, { "epoch": 5.791190864600326, "grad_norm": 0.21019725501537323, "learning_rate": 0.0008944869429338645, "loss": 0.0816, "num_input_tokens_seen": 76734752, "step": 35500 }, { "epoch": 5.7920065252854815, "grad_norm": 0.020916135981678963, "learning_rate": 0.0008944432041508838, "loss": 0.0304, "num_input_tokens_seen": 76745504, "step": 35505 }, { "epoch": 5.792822185970636, "grad_norm": 0.21575558185577393, "learning_rate": 0.0008943994573740111, "loss": 0.1336, "num_input_tokens_seen": 76756768, "step": 35510 }, { "epoch": 5.793637846655791, "grad_norm": 0.08731380105018616, "learning_rate": 0.0008943557026041331, "loss": 0.1259, "num_input_tokens_seen": 76766400, "step": 35515 }, { "epoch": 5.794453507340946, "grad_norm": 0.19834499061107635, "learning_rate": 0.0008943119398421367, "loss": 0.077, "num_input_tokens_seen": 76778208, "step": 35520 }, { "epoch": 5.795269168026101, "grad_norm": 0.2235175371170044, "learning_rate": 0.0008942681690889084, "loss": 0.3166, "num_input_tokens_seen": 76787712, "step": 35525 }, { "epoch": 5.7960848287112565, "grad_norm": 0.10026352107524872, "learning_rate": 0.0008942243903453356, "loss": 0.1214, "num_input_tokens_seen": 76798592, "step": 35530 }, { "epoch": 5.796900489396411, "grad_norm": 0.17312408983707428, "learning_rate": 0.0008941806036123054, "loss": 0.0992, "num_input_tokens_seen": 76810688, "step": 35535 }, { "epoch": 5.797716150081566, "grad_norm": 0.07986405491828918, "learning_rate": 0.0008941368088907052, "loss": 0.0481, "num_input_tokens_seen": 76822080, "step": 35540 }, { "epoch": 5.798531810766721, "grad_norm": 0.03755347803235054, "learning_rate": 0.0008940930061814226, "loss": 0.095, "num_input_tokens_seen": 76832224, "step": 35545 }, { "epoch": 5.799347471451876, "grad_norm": 0.04478127136826515, "learning_rate": 0.0008940491954853451, "loss": 0.1006, "num_input_tokens_seen": 76843584, "step": 35550 }, { "epoch": 5.800163132137031, "grad_norm": 0.01865328848361969, "learning_rate": 0.0008940053768033609, "loss": 0.0711, "num_input_tokens_seen": 76854688, "step": 35555 }, { "epoch": 5.800978792822186, "grad_norm": 0.008900360204279423, "learning_rate": 0.0008939615501363581, "loss": 0.0727, "num_input_tokens_seen": 76866560, "step": 35560 }, { "epoch": 5.801794453507341, "grad_norm": 0.0329662561416626, "learning_rate": 0.0008939177154852245, "loss": 0.1171, "num_input_tokens_seen": 76876608, "step": 35565 }, { "epoch": 5.802610114192496, "grad_norm": 0.019964130595326424, "learning_rate": 0.0008938738728508487, "loss": 0.0625, "num_input_tokens_seen": 76886656, "step": 35570 }, { "epoch": 5.803425774877651, "grad_norm": 0.13185709714889526, "learning_rate": 0.0008938300222341192, "loss": 0.0812, "num_input_tokens_seen": 76897696, "step": 35575 }, { "epoch": 5.804241435562806, "grad_norm": 0.04914616420865059, "learning_rate": 0.0008937861636359248, "loss": 0.0342, "num_input_tokens_seen": 76908128, "step": 35580 }, { "epoch": 5.80505709624796, "grad_norm": 0.06280156224966049, "learning_rate": 0.000893742297057154, "loss": 0.039, "num_input_tokens_seen": 76918912, "step": 35585 }, { "epoch": 5.805872756933116, "grad_norm": 0.10165125131607056, "learning_rate": 0.0008936984224986962, "loss": 0.0566, "num_input_tokens_seen": 76930240, "step": 35590 }, { "epoch": 5.806688417618271, "grad_norm": 0.05567912384867668, "learning_rate": 0.0008936545399614405, "loss": 0.1717, "num_input_tokens_seen": 76941856, "step": 35595 }, { "epoch": 5.807504078303426, "grad_norm": 0.11308423429727554, "learning_rate": 0.0008936106494462761, "loss": 0.1369, "num_input_tokens_seen": 76952608, "step": 35600 }, { "epoch": 5.808319738988581, "grad_norm": 0.034917544573545456, "learning_rate": 0.0008935667509540926, "loss": 0.0723, "num_input_tokens_seen": 76963904, "step": 35605 }, { "epoch": 5.809135399673735, "grad_norm": 0.1020023375749588, "learning_rate": 0.0008935228444857795, "loss": 0.1328, "num_input_tokens_seen": 76974848, "step": 35610 }, { "epoch": 5.809951060358891, "grad_norm": 0.031795721501111984, "learning_rate": 0.0008934789300422268, "loss": 0.0606, "num_input_tokens_seen": 76984640, "step": 35615 }, { "epoch": 5.810766721044046, "grad_norm": 0.020438876003026962, "learning_rate": 0.0008934350076243245, "loss": 0.144, "num_input_tokens_seen": 76994112, "step": 35620 }, { "epoch": 5.811582381729201, "grad_norm": 0.06384740769863129, "learning_rate": 0.0008933910772329625, "loss": 0.0439, "num_input_tokens_seen": 77005792, "step": 35625 }, { "epoch": 5.8123980424143555, "grad_norm": 0.018098855391144753, "learning_rate": 0.0008933471388690314, "loss": 0.0366, "num_input_tokens_seen": 77016288, "step": 35630 }, { "epoch": 5.81321370309951, "grad_norm": 0.20901861786842346, "learning_rate": 0.0008933031925334214, "loss": 0.1645, "num_input_tokens_seen": 77027456, "step": 35635 }, { "epoch": 5.814029363784666, "grad_norm": 0.026231657713651657, "learning_rate": 0.0008932592382270235, "loss": 0.1708, "num_input_tokens_seen": 77038816, "step": 35640 }, { "epoch": 5.814845024469821, "grad_norm": 0.047216691076755524, "learning_rate": 0.0008932152759507279, "loss": 0.0307, "num_input_tokens_seen": 77050112, "step": 35645 }, { "epoch": 5.815660685154976, "grad_norm": 0.20011982321739197, "learning_rate": 0.0008931713057054263, "loss": 0.1104, "num_input_tokens_seen": 77060768, "step": 35650 }, { "epoch": 5.8164763458401305, "grad_norm": 0.2040461301803589, "learning_rate": 0.0008931273274920091, "loss": 0.0727, "num_input_tokens_seen": 77071712, "step": 35655 }, { "epoch": 5.817292006525285, "grad_norm": 0.02102004364132881, "learning_rate": 0.0008930833413113682, "loss": 0.1561, "num_input_tokens_seen": 77081824, "step": 35660 }, { "epoch": 5.81810766721044, "grad_norm": 0.03012845665216446, "learning_rate": 0.0008930393471643945, "loss": 0.0348, "num_input_tokens_seen": 77093760, "step": 35665 }, { "epoch": 5.818923327895595, "grad_norm": 0.04393388330936432, "learning_rate": 0.0008929953450519799, "loss": 0.0973, "num_input_tokens_seen": 77105056, "step": 35670 }, { "epoch": 5.819738988580751, "grad_norm": 0.0268500167876482, "learning_rate": 0.000892951334975016, "loss": 0.2219, "num_input_tokens_seen": 77116768, "step": 35675 }, { "epoch": 5.8205546492659055, "grad_norm": 0.039734356105327606, "learning_rate": 0.0008929073169343948, "loss": 0.0278, "num_input_tokens_seen": 77126080, "step": 35680 }, { "epoch": 5.82137030995106, "grad_norm": 0.00670345826074481, "learning_rate": 0.0008928632909310084, "loss": 0.0191, "num_input_tokens_seen": 77138720, "step": 35685 }, { "epoch": 5.822185970636215, "grad_norm": 0.020748404785990715, "learning_rate": 0.000892819256965749, "loss": 0.0561, "num_input_tokens_seen": 77150304, "step": 35690 }, { "epoch": 5.82300163132137, "grad_norm": 0.006461436860263348, "learning_rate": 0.0008927752150395092, "loss": 0.0242, "num_input_tokens_seen": 77161408, "step": 35695 }, { "epoch": 5.823817292006526, "grad_norm": 0.04613238573074341, "learning_rate": 0.0008927311651531813, "loss": 0.1324, "num_input_tokens_seen": 77171104, "step": 35700 }, { "epoch": 5.8246329526916805, "grad_norm": 0.03033752180635929, "learning_rate": 0.0008926871073076581, "loss": 0.0144, "num_input_tokens_seen": 77183264, "step": 35705 }, { "epoch": 5.825448613376835, "grad_norm": 0.0055288695730268955, "learning_rate": 0.0008926430415038324, "loss": 0.102, "num_input_tokens_seen": 77192608, "step": 35710 }, { "epoch": 5.82626427406199, "grad_norm": 0.3753073215484619, "learning_rate": 0.0008925989677425976, "loss": 0.1775, "num_input_tokens_seen": 77202432, "step": 35715 }, { "epoch": 5.827079934747145, "grad_norm": 0.01609216444194317, "learning_rate": 0.0008925548860248464, "loss": 0.0581, "num_input_tokens_seen": 77213376, "step": 35720 }, { "epoch": 5.827895595432301, "grad_norm": 0.12528999149799347, "learning_rate": 0.0008925107963514727, "loss": 0.0309, "num_input_tokens_seen": 77224448, "step": 35725 }, { "epoch": 5.828711256117455, "grad_norm": 0.05051087588071823, "learning_rate": 0.0008924666987233697, "loss": 0.1069, "num_input_tokens_seen": 77235264, "step": 35730 }, { "epoch": 5.82952691680261, "grad_norm": 0.03082728572189808, "learning_rate": 0.0008924225931414312, "loss": 0.0907, "num_input_tokens_seen": 77245280, "step": 35735 }, { "epoch": 5.830342577487765, "grad_norm": 0.024117425084114075, "learning_rate": 0.000892378479606551, "loss": 0.0191, "num_input_tokens_seen": 77254976, "step": 35740 }, { "epoch": 5.83115823817292, "grad_norm": 0.00413041515275836, "learning_rate": 0.0008923343581196231, "loss": 0.0402, "num_input_tokens_seen": 77264992, "step": 35745 }, { "epoch": 5.831973898858075, "grad_norm": 0.027068182826042175, "learning_rate": 0.0008922902286815417, "loss": 0.0574, "num_input_tokens_seen": 77277600, "step": 35750 }, { "epoch": 5.8327895595432295, "grad_norm": 0.003934292122721672, "learning_rate": 0.0008922460912932013, "loss": 0.0348, "num_input_tokens_seen": 77288544, "step": 35755 }, { "epoch": 5.833605220228385, "grad_norm": 0.010412490926682949, "learning_rate": 0.0008922019459554961, "loss": 0.1897, "num_input_tokens_seen": 77299648, "step": 35760 }, { "epoch": 5.83442088091354, "grad_norm": 0.15992146730422974, "learning_rate": 0.000892157792669321, "loss": 0.0811, "num_input_tokens_seen": 77311008, "step": 35765 }, { "epoch": 5.835236541598695, "grad_norm": 0.032339174300432205, "learning_rate": 0.0008921136314355706, "loss": 0.0093, "num_input_tokens_seen": 77320928, "step": 35770 }, { "epoch": 5.83605220228385, "grad_norm": 0.014086034148931503, "learning_rate": 0.0008920694622551402, "loss": 0.0587, "num_input_tokens_seen": 77331328, "step": 35775 }, { "epoch": 5.8368678629690045, "grad_norm": 0.3389575481414795, "learning_rate": 0.0008920252851289248, "loss": 0.236, "num_input_tokens_seen": 77342272, "step": 35780 }, { "epoch": 5.83768352365416, "grad_norm": 0.017857255414128304, "learning_rate": 0.0008919811000578195, "loss": 0.0768, "num_input_tokens_seen": 77353952, "step": 35785 }, { "epoch": 5.838499184339315, "grad_norm": 0.023409778252243996, "learning_rate": 0.0008919369070427201, "loss": 0.0528, "num_input_tokens_seen": 77364992, "step": 35790 }, { "epoch": 5.83931484502447, "grad_norm": 0.34260937571525574, "learning_rate": 0.000891892706084522, "loss": 0.0514, "num_input_tokens_seen": 77375424, "step": 35795 }, { "epoch": 5.840130505709625, "grad_norm": 0.15101541578769684, "learning_rate": 0.0008918484971841211, "loss": 0.0591, "num_input_tokens_seen": 77384576, "step": 35800 }, { "epoch": 5.8409461663947795, "grad_norm": 0.026871444657444954, "learning_rate": 0.0008918042803424133, "loss": 0.0332, "num_input_tokens_seen": 77395584, "step": 35805 }, { "epoch": 5.841761827079935, "grad_norm": 0.04889817163348198, "learning_rate": 0.0008917600555602947, "loss": 0.1316, "num_input_tokens_seen": 77406944, "step": 35810 }, { "epoch": 5.84257748776509, "grad_norm": 0.012206361629068851, "learning_rate": 0.0008917158228386616, "loss": 0.13, "num_input_tokens_seen": 77418240, "step": 35815 }, { "epoch": 5.843393148450245, "grad_norm": 0.005175419617444277, "learning_rate": 0.0008916715821784105, "loss": 0.018, "num_input_tokens_seen": 77429920, "step": 35820 }, { "epoch": 5.8442088091354, "grad_norm": 0.05672929808497429, "learning_rate": 0.0008916273335804377, "loss": 0.0394, "num_input_tokens_seen": 77441504, "step": 35825 }, { "epoch": 5.8450244698205545, "grad_norm": 0.0816783681511879, "learning_rate": 0.0008915830770456403, "loss": 0.038, "num_input_tokens_seen": 77452320, "step": 35830 }, { "epoch": 5.845840130505709, "grad_norm": 0.09134317189455032, "learning_rate": 0.0008915388125749152, "loss": 0.0466, "num_input_tokens_seen": 77463168, "step": 35835 }, { "epoch": 5.846655791190865, "grad_norm": 0.05557303503155708, "learning_rate": 0.0008914945401691592, "loss": 0.0353, "num_input_tokens_seen": 77474464, "step": 35840 }, { "epoch": 5.84747145187602, "grad_norm": 0.3143428564071655, "learning_rate": 0.0008914502598292698, "loss": 0.2314, "num_input_tokens_seen": 77485792, "step": 35845 }, { "epoch": 5.848287112561175, "grad_norm": 0.23155027627944946, "learning_rate": 0.0008914059715561442, "loss": 0.2272, "num_input_tokens_seen": 77497568, "step": 35850 }, { "epoch": 5.849102773246329, "grad_norm": 0.5001063346862793, "learning_rate": 0.0008913616753506801, "loss": 0.0709, "num_input_tokens_seen": 77507296, "step": 35855 }, { "epoch": 5.849918433931484, "grad_norm": 0.22688445448875427, "learning_rate": 0.0008913173712137752, "loss": 0.0571, "num_input_tokens_seen": 77517856, "step": 35860 }, { "epoch": 5.850734094616639, "grad_norm": 0.014077413827180862, "learning_rate": 0.0008912730591463274, "loss": 0.0847, "num_input_tokens_seen": 77528320, "step": 35865 }, { "epoch": 5.851549755301795, "grad_norm": 0.005804943386465311, "learning_rate": 0.0008912287391492345, "loss": 0.0677, "num_input_tokens_seen": 77539392, "step": 35870 }, { "epoch": 5.85236541598695, "grad_norm": 0.049013834446668625, "learning_rate": 0.0008911844112233951, "loss": 0.0577, "num_input_tokens_seen": 77550176, "step": 35875 }, { "epoch": 5.853181076672104, "grad_norm": 0.42374399304389954, "learning_rate": 0.0008911400753697072, "loss": 0.1369, "num_input_tokens_seen": 77560864, "step": 35880 }, { "epoch": 5.853996737357259, "grad_norm": 0.04476075619459152, "learning_rate": 0.0008910957315890695, "loss": 0.0288, "num_input_tokens_seen": 77570752, "step": 35885 }, { "epoch": 5.854812398042414, "grad_norm": 0.011624328792095184, "learning_rate": 0.0008910513798823807, "loss": 0.0218, "num_input_tokens_seen": 77581600, "step": 35890 }, { "epoch": 5.85562805872757, "grad_norm": 0.006588727701455355, "learning_rate": 0.0008910070202505396, "loss": 0.0593, "num_input_tokens_seen": 77593536, "step": 35895 }, { "epoch": 5.856443719412725, "grad_norm": 0.28029340505599976, "learning_rate": 0.0008909626526944452, "loss": 0.2462, "num_input_tokens_seen": 77603328, "step": 35900 }, { "epoch": 5.857259380097879, "grad_norm": 0.04250373691320419, "learning_rate": 0.0008909182772149966, "loss": 0.0479, "num_input_tokens_seen": 77612800, "step": 35905 }, { "epoch": 5.858075040783034, "grad_norm": 0.0644042119383812, "learning_rate": 0.0008908738938130933, "loss": 0.0841, "num_input_tokens_seen": 77622304, "step": 35910 }, { "epoch": 5.858890701468189, "grad_norm": 0.07935375720262527, "learning_rate": 0.0008908295024896346, "loss": 0.0657, "num_input_tokens_seen": 77634336, "step": 35915 }, { "epoch": 5.859706362153344, "grad_norm": 0.018218394368886948, "learning_rate": 0.0008907851032455204, "loss": 0.0737, "num_input_tokens_seen": 77645504, "step": 35920 }, { "epoch": 5.8605220228384995, "grad_norm": 0.137411966919899, "learning_rate": 0.0008907406960816502, "loss": 0.0551, "num_input_tokens_seen": 77657472, "step": 35925 }, { "epoch": 5.861337683523654, "grad_norm": 0.22196845710277557, "learning_rate": 0.0008906962809989242, "loss": 0.1278, "num_input_tokens_seen": 77669344, "step": 35930 }, { "epoch": 5.862153344208809, "grad_norm": 0.0576411709189415, "learning_rate": 0.0008906518579982423, "loss": 0.0599, "num_input_tokens_seen": 77679648, "step": 35935 }, { "epoch": 5.862969004893964, "grad_norm": 0.03521393612027168, "learning_rate": 0.000890607427080505, "loss": 0.0273, "num_input_tokens_seen": 77689440, "step": 35940 }, { "epoch": 5.863784665579119, "grad_norm": 0.034378282725811005, "learning_rate": 0.0008905629882466126, "loss": 0.0667, "num_input_tokens_seen": 77700672, "step": 35945 }, { "epoch": 5.864600326264274, "grad_norm": 0.004574810154736042, "learning_rate": 0.0008905185414974659, "loss": 0.0447, "num_input_tokens_seen": 77710368, "step": 35950 }, { "epoch": 5.865415986949429, "grad_norm": 0.07072892785072327, "learning_rate": 0.0008904740868339655, "loss": 0.1731, "num_input_tokens_seen": 77721728, "step": 35955 }, { "epoch": 5.866231647634584, "grad_norm": 0.044736456125974655, "learning_rate": 0.0008904296242570123, "loss": 0.052, "num_input_tokens_seen": 77732448, "step": 35960 }, { "epoch": 5.867047308319739, "grad_norm": 0.0900491327047348, "learning_rate": 0.0008903851537675076, "loss": 0.0902, "num_input_tokens_seen": 77743520, "step": 35965 }, { "epoch": 5.867862969004894, "grad_norm": 0.0018697967752814293, "learning_rate": 0.0008903406753663524, "loss": 0.1432, "num_input_tokens_seen": 77754656, "step": 35970 }, { "epoch": 5.868678629690049, "grad_norm": 0.07420942187309265, "learning_rate": 0.0008902961890544483, "loss": 0.1013, "num_input_tokens_seen": 77764960, "step": 35975 }, { "epoch": 5.869494290375204, "grad_norm": 0.014277939684689045, "learning_rate": 0.0008902516948326967, "loss": 0.2049, "num_input_tokens_seen": 77776672, "step": 35980 }, { "epoch": 5.870309951060359, "grad_norm": 0.1220325455069542, "learning_rate": 0.0008902071927019996, "loss": 0.1353, "num_input_tokens_seen": 77788352, "step": 35985 }, { "epoch": 5.871125611745514, "grad_norm": 0.017809653654694557, "learning_rate": 0.0008901626826632586, "loss": 0.04, "num_input_tokens_seen": 77797504, "step": 35990 }, { "epoch": 5.871941272430669, "grad_norm": 0.010331861674785614, "learning_rate": 0.000890118164717376, "loss": 0.0541, "num_input_tokens_seen": 77807136, "step": 35995 }, { "epoch": 5.872756933115824, "grad_norm": 0.33883270621299744, "learning_rate": 0.0008900736388652537, "loss": 0.205, "num_input_tokens_seen": 77817728, "step": 36000 }, { "epoch": 5.873572593800979, "grad_norm": 0.021140409633517265, "learning_rate": 0.0008900291051077944, "loss": 0.1508, "num_input_tokens_seen": 77828032, "step": 36005 }, { "epoch": 5.874388254486134, "grad_norm": 0.005671947728842497, "learning_rate": 0.0008899845634459005, "loss": 0.0407, "num_input_tokens_seen": 77838272, "step": 36010 }, { "epoch": 5.875203915171289, "grad_norm": 0.11678887158632278, "learning_rate": 0.0008899400138804748, "loss": 0.1261, "num_input_tokens_seen": 77848640, "step": 36015 }, { "epoch": 5.876019575856444, "grad_norm": 0.11311411112546921, "learning_rate": 0.0008898954564124197, "loss": 0.1584, "num_input_tokens_seen": 77860256, "step": 36020 }, { "epoch": 5.876835236541599, "grad_norm": 0.03331954777240753, "learning_rate": 0.0008898508910426388, "loss": 0.0781, "num_input_tokens_seen": 77871040, "step": 36025 }, { "epoch": 5.877650897226753, "grad_norm": 0.1294984668493271, "learning_rate": 0.0008898063177720351, "loss": 0.1216, "num_input_tokens_seen": 77882400, "step": 36030 }, { "epoch": 5.878466557911908, "grad_norm": 0.0061601377092301846, "learning_rate": 0.0008897617366015118, "loss": 0.0277, "num_input_tokens_seen": 77893216, "step": 36035 }, { "epoch": 5.879282218597064, "grad_norm": 0.03422814980149269, "learning_rate": 0.0008897171475319723, "loss": 0.069, "num_input_tokens_seen": 77905088, "step": 36040 }, { "epoch": 5.880097879282219, "grad_norm": 0.07074693590402603, "learning_rate": 0.0008896725505643206, "loss": 0.0383, "num_input_tokens_seen": 77914624, "step": 36045 }, { "epoch": 5.8809135399673735, "grad_norm": 0.2282271385192871, "learning_rate": 0.0008896279456994603, "loss": 0.0993, "num_input_tokens_seen": 77924896, "step": 36050 }, { "epoch": 5.881729200652528, "grad_norm": 0.04102031886577606, "learning_rate": 0.0008895833329382954, "loss": 0.0236, "num_input_tokens_seen": 77934976, "step": 36055 }, { "epoch": 5.882544861337683, "grad_norm": 0.0101171201094985, "learning_rate": 0.00088953871228173, "loss": 0.0472, "num_input_tokens_seen": 77945824, "step": 36060 }, { "epoch": 5.883360522022839, "grad_norm": 0.022622620686888695, "learning_rate": 0.0008894940837306685, "loss": 0.1508, "num_input_tokens_seen": 77956704, "step": 36065 }, { "epoch": 5.884176182707994, "grad_norm": 0.03825777769088745, "learning_rate": 0.000889449447286015, "loss": 0.241, "num_input_tokens_seen": 77968288, "step": 36070 }, { "epoch": 5.8849918433931485, "grad_norm": 0.027706053107976913, "learning_rate": 0.0008894048029486748, "loss": 0.0252, "num_input_tokens_seen": 77979264, "step": 36075 }, { "epoch": 5.885807504078303, "grad_norm": 0.06347054243087769, "learning_rate": 0.0008893601507195521, "loss": 0.1103, "num_input_tokens_seen": 77988448, "step": 36080 }, { "epoch": 5.886623164763458, "grad_norm": 0.08096565306186676, "learning_rate": 0.000889315490599552, "loss": 0.0732, "num_input_tokens_seen": 77999040, "step": 36085 }, { "epoch": 5.887438825448614, "grad_norm": 0.33597108721733093, "learning_rate": 0.0008892708225895796, "loss": 0.1519, "num_input_tokens_seen": 78009408, "step": 36090 }, { "epoch": 5.888254486133769, "grad_norm": 0.01979757472872734, "learning_rate": 0.0008892261466905402, "loss": 0.0937, "num_input_tokens_seen": 78019776, "step": 36095 }, { "epoch": 5.8890701468189235, "grad_norm": 0.19596102833747864, "learning_rate": 0.000889181462903339, "loss": 0.3053, "num_input_tokens_seen": 78030144, "step": 36100 }, { "epoch": 5.889885807504078, "grad_norm": 0.27742302417755127, "learning_rate": 0.0008891367712288819, "loss": 0.1255, "num_input_tokens_seen": 78041152, "step": 36105 }, { "epoch": 5.890701468189233, "grad_norm": 0.05575815960764885, "learning_rate": 0.0008890920716680744, "loss": 0.1552, "num_input_tokens_seen": 78052384, "step": 36110 }, { "epoch": 5.891517128874388, "grad_norm": 0.013160888105630875, "learning_rate": 0.0008890473642218226, "loss": 0.0322, "num_input_tokens_seen": 78062400, "step": 36115 }, { "epoch": 5.892332789559543, "grad_norm": 0.009605771861970425, "learning_rate": 0.0008890026488910323, "loss": 0.1056, "num_input_tokens_seen": 78072480, "step": 36120 }, { "epoch": 5.8931484502446985, "grad_norm": 0.0945558175444603, "learning_rate": 0.0008889579256766098, "loss": 0.0372, "num_input_tokens_seen": 78083968, "step": 36125 }, { "epoch": 5.893964110929853, "grad_norm": 0.14063893258571625, "learning_rate": 0.0008889131945794618, "loss": 0.0596, "num_input_tokens_seen": 78094176, "step": 36130 }, { "epoch": 5.894779771615008, "grad_norm": 0.02134103700518608, "learning_rate": 0.0008888684556004942, "loss": 0.0324, "num_input_tokens_seen": 78103872, "step": 36135 }, { "epoch": 5.895595432300163, "grad_norm": 0.13972730934619904, "learning_rate": 0.0008888237087406141, "loss": 0.0643, "num_input_tokens_seen": 78114656, "step": 36140 }, { "epoch": 5.896411092985318, "grad_norm": 0.06875422596931458, "learning_rate": 0.0008887789540007285, "loss": 0.1673, "num_input_tokens_seen": 78125312, "step": 36145 }, { "epoch": 5.897226753670473, "grad_norm": 0.02877797745168209, "learning_rate": 0.000888734191381744, "loss": 0.035, "num_input_tokens_seen": 78135936, "step": 36150 }, { "epoch": 5.898042414355628, "grad_norm": 0.06866522133350372, "learning_rate": 0.000888689420884568, "loss": 0.1522, "num_input_tokens_seen": 78148384, "step": 36155 }, { "epoch": 5.898858075040783, "grad_norm": 0.013201478868722916, "learning_rate": 0.0008886446425101078, "loss": 0.0963, "num_input_tokens_seen": 78158368, "step": 36160 }, { "epoch": 5.899673735725938, "grad_norm": 0.06638146191835403, "learning_rate": 0.0008885998562592709, "loss": 0.0219, "num_input_tokens_seen": 78167680, "step": 36165 }, { "epoch": 5.900489396411093, "grad_norm": 0.2783553898334503, "learning_rate": 0.0008885550621329649, "loss": 0.0815, "num_input_tokens_seen": 78178048, "step": 36170 }, { "epoch": 5.901305057096248, "grad_norm": 0.07050324976444244, "learning_rate": 0.0008885102601320976, "loss": 0.0394, "num_input_tokens_seen": 78187360, "step": 36175 }, { "epoch": 5.902120717781403, "grad_norm": 0.01027140486985445, "learning_rate": 0.0008884654502575771, "loss": 0.0568, "num_input_tokens_seen": 78198912, "step": 36180 }, { "epoch": 5.902936378466558, "grad_norm": 0.009896304458379745, "learning_rate": 0.0008884206325103115, "loss": 0.0781, "num_input_tokens_seen": 78209952, "step": 36185 }, { "epoch": 5.903752039151713, "grad_norm": 0.010709409601986408, "learning_rate": 0.000888375806891209, "loss": 0.1165, "num_input_tokens_seen": 78219712, "step": 36190 }, { "epoch": 5.904567699836868, "grad_norm": 0.04064284265041351, "learning_rate": 0.0008883309734011779, "loss": 0.0475, "num_input_tokens_seen": 78231776, "step": 36195 }, { "epoch": 5.9053833605220225, "grad_norm": 0.038544662296772, "learning_rate": 0.0008882861320411273, "loss": 0.0941, "num_input_tokens_seen": 78243648, "step": 36200 }, { "epoch": 5.906199021207177, "grad_norm": 0.05744968354701996, "learning_rate": 0.0008882412828119655, "loss": 0.0914, "num_input_tokens_seen": 78253792, "step": 36205 }, { "epoch": 5.907014681892333, "grad_norm": 0.012252528220415115, "learning_rate": 0.0008881964257146015, "loss": 0.0543, "num_input_tokens_seen": 78263616, "step": 36210 }, { "epoch": 5.907830342577488, "grad_norm": 0.18378940224647522, "learning_rate": 0.0008881515607499446, "loss": 0.1035, "num_input_tokens_seen": 78275296, "step": 36215 }, { "epoch": 5.908646003262643, "grad_norm": 0.16523931920528412, "learning_rate": 0.000888106687918904, "loss": 0.1301, "num_input_tokens_seen": 78286336, "step": 36220 }, { "epoch": 5.9094616639477975, "grad_norm": 0.05260119214653969, "learning_rate": 0.000888061807222389, "loss": 0.0246, "num_input_tokens_seen": 78296640, "step": 36225 }, { "epoch": 5.910277324632952, "grad_norm": 0.003584100864827633, "learning_rate": 0.000888016918661309, "loss": 0.0172, "num_input_tokens_seen": 78306560, "step": 36230 }, { "epoch": 5.911092985318108, "grad_norm": 0.2283678501844406, "learning_rate": 0.0008879720222365739, "loss": 0.1179, "num_input_tokens_seen": 78317248, "step": 36235 }, { "epoch": 5.911908646003263, "grad_norm": 0.08701247721910477, "learning_rate": 0.0008879271179490938, "loss": 0.133, "num_input_tokens_seen": 78328992, "step": 36240 }, { "epoch": 5.912724306688418, "grad_norm": 0.09132330864667892, "learning_rate": 0.0008878822057997784, "loss": 0.0695, "num_input_tokens_seen": 78340384, "step": 36245 }, { "epoch": 5.9135399673735725, "grad_norm": 0.2684599459171295, "learning_rate": 0.000887837285789538, "loss": 0.1982, "num_input_tokens_seen": 78352096, "step": 36250 }, { "epoch": 5.914355628058727, "grad_norm": 0.037875618785619736, "learning_rate": 0.0008877923579192831, "loss": 0.0138, "num_input_tokens_seen": 78361728, "step": 36255 }, { "epoch": 5.915171288743883, "grad_norm": 0.009383009746670723, "learning_rate": 0.0008877474221899241, "loss": 0.0733, "num_input_tokens_seen": 78373440, "step": 36260 }, { "epoch": 5.915986949429038, "grad_norm": 0.2724219858646393, "learning_rate": 0.0008877024786023718, "loss": 0.3018, "num_input_tokens_seen": 78383744, "step": 36265 }, { "epoch": 5.916802610114193, "grad_norm": 0.08910335600376129, "learning_rate": 0.0008876575271575366, "loss": 0.086, "num_input_tokens_seen": 78394080, "step": 36270 }, { "epoch": 5.917618270799347, "grad_norm": 0.08044688403606415, "learning_rate": 0.0008876125678563301, "loss": 0.1419, "num_input_tokens_seen": 78404416, "step": 36275 }, { "epoch": 5.918433931484502, "grad_norm": 0.1438187211751938, "learning_rate": 0.0008875676006996631, "loss": 0.1822, "num_input_tokens_seen": 78414624, "step": 36280 }, { "epoch": 5.919249592169657, "grad_norm": 0.5074575543403625, "learning_rate": 0.0008875226256884471, "loss": 0.1391, "num_input_tokens_seen": 78424704, "step": 36285 }, { "epoch": 5.920065252854813, "grad_norm": 0.14423705637454987, "learning_rate": 0.0008874776428235933, "loss": 0.1201, "num_input_tokens_seen": 78436064, "step": 36290 }, { "epoch": 5.920880913539968, "grad_norm": 0.21933916211128235, "learning_rate": 0.0008874326521060138, "loss": 0.0663, "num_input_tokens_seen": 78447200, "step": 36295 }, { "epoch": 5.921696574225122, "grad_norm": 0.02093925140798092, "learning_rate": 0.0008873876535366199, "loss": 0.0535, "num_input_tokens_seen": 78459552, "step": 36300 }, { "epoch": 5.922512234910277, "grad_norm": 0.06091681867837906, "learning_rate": 0.0008873426471163238, "loss": 0.0752, "num_input_tokens_seen": 78470912, "step": 36305 }, { "epoch": 5.923327895595432, "grad_norm": 0.038125790655612946, "learning_rate": 0.0008872976328460376, "loss": 0.144, "num_input_tokens_seen": 78481728, "step": 36310 }, { "epoch": 5.924143556280587, "grad_norm": 0.2037804126739502, "learning_rate": 0.0008872526107266736, "loss": 0.1306, "num_input_tokens_seen": 78493248, "step": 36315 }, { "epoch": 5.924959216965743, "grad_norm": 0.17241688072681427, "learning_rate": 0.0008872075807591442, "loss": 0.273, "num_input_tokens_seen": 78504224, "step": 36320 }, { "epoch": 5.925774877650897, "grad_norm": 0.04938659444451332, "learning_rate": 0.0008871625429443617, "loss": 0.1786, "num_input_tokens_seen": 78514944, "step": 36325 }, { "epoch": 5.926590538336052, "grad_norm": 0.13024407625198364, "learning_rate": 0.0008871174972832394, "loss": 0.0791, "num_input_tokens_seen": 78525920, "step": 36330 }, { "epoch": 5.927406199021207, "grad_norm": 0.02717706933617592, "learning_rate": 0.0008870724437766898, "loss": 0.0405, "num_input_tokens_seen": 78537888, "step": 36335 }, { "epoch": 5.928221859706362, "grad_norm": 0.009567965753376484, "learning_rate": 0.0008870273824256261, "loss": 0.0324, "num_input_tokens_seen": 78547584, "step": 36340 }, { "epoch": 5.9290375203915175, "grad_norm": 0.0030902696307748556, "learning_rate": 0.0008869823132309616, "loss": 0.0572, "num_input_tokens_seen": 78557760, "step": 36345 }, { "epoch": 5.929853181076672, "grad_norm": 0.049891430884599686, "learning_rate": 0.0008869372361936096, "loss": 0.0572, "num_input_tokens_seen": 78568640, "step": 36350 }, { "epoch": 5.930668841761827, "grad_norm": 0.01841222122311592, "learning_rate": 0.0008868921513144835, "loss": 0.0685, "num_input_tokens_seen": 78580224, "step": 36355 }, { "epoch": 5.931484502446982, "grad_norm": 0.03582854941487312, "learning_rate": 0.0008868470585944972, "loss": 0.0341, "num_input_tokens_seen": 78590432, "step": 36360 }, { "epoch": 5.932300163132137, "grad_norm": 0.15736792981624603, "learning_rate": 0.0008868019580345645, "loss": 0.0631, "num_input_tokens_seen": 78601696, "step": 36365 }, { "epoch": 5.933115823817292, "grad_norm": 0.07770948112010956, "learning_rate": 0.0008867568496355996, "loss": 0.0571, "num_input_tokens_seen": 78613024, "step": 36370 }, { "epoch": 5.933931484502447, "grad_norm": 0.010174530558288097, "learning_rate": 0.0008867117333985164, "loss": 0.18, "num_input_tokens_seen": 78624064, "step": 36375 }, { "epoch": 5.934747145187602, "grad_norm": 0.08652004599571228, "learning_rate": 0.0008866666093242292, "loss": 0.0473, "num_input_tokens_seen": 78635104, "step": 36380 }, { "epoch": 5.935562805872757, "grad_norm": 0.17718979716300964, "learning_rate": 0.0008866214774136528, "loss": 0.2155, "num_input_tokens_seen": 78646272, "step": 36385 }, { "epoch": 5.936378466557912, "grad_norm": 0.0171345341950655, "learning_rate": 0.0008865763376677017, "loss": 0.0465, "num_input_tokens_seen": 78656640, "step": 36390 }, { "epoch": 5.937194127243067, "grad_norm": 0.13406716287136078, "learning_rate": 0.0008865311900872905, "loss": 0.1356, "num_input_tokens_seen": 78667200, "step": 36395 }, { "epoch": 5.938009787928221, "grad_norm": 0.08030443638563156, "learning_rate": 0.0008864860346733346, "loss": 0.0393, "num_input_tokens_seen": 78677184, "step": 36400 }, { "epoch": 5.938825448613377, "grad_norm": 0.08462988585233688, "learning_rate": 0.0008864408714267489, "loss": 0.1079, "num_input_tokens_seen": 78689440, "step": 36405 }, { "epoch": 5.939641109298532, "grad_norm": 0.35317009687423706, "learning_rate": 0.0008863957003484486, "loss": 0.112, "num_input_tokens_seen": 78700640, "step": 36410 }, { "epoch": 5.940456769983687, "grad_norm": 0.03767653927206993, "learning_rate": 0.0008863505214393494, "loss": 0.1616, "num_input_tokens_seen": 78710976, "step": 36415 }, { "epoch": 5.941272430668842, "grad_norm": 0.0903841108083725, "learning_rate": 0.0008863053347003667, "loss": 0.1289, "num_input_tokens_seen": 78722240, "step": 36420 }, { "epoch": 5.942088091353996, "grad_norm": 0.014996036887168884, "learning_rate": 0.0008862601401324162, "loss": 0.1195, "num_input_tokens_seen": 78732704, "step": 36425 }, { "epoch": 5.942903752039152, "grad_norm": 0.06513303518295288, "learning_rate": 0.0008862149377364142, "loss": 0.1074, "num_input_tokens_seen": 78742592, "step": 36430 }, { "epoch": 5.943719412724307, "grad_norm": 0.13003186881542206, "learning_rate": 0.0008861697275132763, "loss": 0.1357, "num_input_tokens_seen": 78752064, "step": 36435 }, { "epoch": 5.944535073409462, "grad_norm": 0.19689883291721344, "learning_rate": 0.0008861245094639193, "loss": 0.1116, "num_input_tokens_seen": 78761984, "step": 36440 }, { "epoch": 5.945350734094617, "grad_norm": 0.00810596626251936, "learning_rate": 0.000886079283589259, "loss": 0.0769, "num_input_tokens_seen": 78773504, "step": 36445 }, { "epoch": 5.946166394779771, "grad_norm": 0.014026161283254623, "learning_rate": 0.0008860340498902121, "loss": 0.1225, "num_input_tokens_seen": 78784352, "step": 36450 }, { "epoch": 5.946982055464927, "grad_norm": 0.08399257808923721, "learning_rate": 0.0008859888083676958, "loss": 0.1057, "num_input_tokens_seen": 78795968, "step": 36455 }, { "epoch": 5.947797716150082, "grad_norm": 0.08424603939056396, "learning_rate": 0.0008859435590226266, "loss": 0.1024, "num_input_tokens_seen": 78805984, "step": 36460 }, { "epoch": 5.948613376835237, "grad_norm": 0.02504083514213562, "learning_rate": 0.0008858983018559214, "loss": 0.0457, "num_input_tokens_seen": 78817184, "step": 36465 }, { "epoch": 5.9494290375203915, "grad_norm": 0.19386163353919983, "learning_rate": 0.0008858530368684977, "loss": 0.2027, "num_input_tokens_seen": 78828160, "step": 36470 }, { "epoch": 5.950244698205546, "grad_norm": 0.13911594450473785, "learning_rate": 0.0008858077640612727, "loss": 0.1198, "num_input_tokens_seen": 78838944, "step": 36475 }, { "epoch": 5.951060358890701, "grad_norm": 0.020422089844942093, "learning_rate": 0.0008857624834351639, "loss": 0.0332, "num_input_tokens_seen": 78848896, "step": 36480 }, { "epoch": 5.951876019575856, "grad_norm": 0.026278553530573845, "learning_rate": 0.000885717194991089, "loss": 0.0181, "num_input_tokens_seen": 78860608, "step": 36485 }, { "epoch": 5.952691680261012, "grad_norm": 0.008417508564889431, "learning_rate": 0.0008856718987299656, "loss": 0.0644, "num_input_tokens_seen": 78871200, "step": 36490 }, { "epoch": 5.9535073409461665, "grad_norm": 0.045608025044202805, "learning_rate": 0.0008856265946527122, "loss": 0.0382, "num_input_tokens_seen": 78882464, "step": 36495 }, { "epoch": 5.954323001631321, "grad_norm": 0.020935669541358948, "learning_rate": 0.0008855812827602465, "loss": 0.0739, "num_input_tokens_seen": 78892416, "step": 36500 }, { "epoch": 5.955138662316476, "grad_norm": 0.036478910595178604, "learning_rate": 0.0008855359630534871, "loss": 0.0398, "num_input_tokens_seen": 78904128, "step": 36505 }, { "epoch": 5.955954323001631, "grad_norm": 0.025049181655049324, "learning_rate": 0.0008854906355333522, "loss": 0.0129, "num_input_tokens_seen": 78913152, "step": 36510 }, { "epoch": 5.956769983686787, "grad_norm": 0.025541089475154877, "learning_rate": 0.0008854453002007607, "loss": 0.0664, "num_input_tokens_seen": 78924672, "step": 36515 }, { "epoch": 5.9575856443719415, "grad_norm": 0.013693660497665405, "learning_rate": 0.0008853999570566311, "loss": 0.0741, "num_input_tokens_seen": 78936224, "step": 36520 }, { "epoch": 5.958401305057096, "grad_norm": 0.05598202720284462, "learning_rate": 0.0008853546061018825, "loss": 0.1703, "num_input_tokens_seen": 78947200, "step": 36525 }, { "epoch": 5.959216965742251, "grad_norm": 0.030897224321961403, "learning_rate": 0.000885309247337434, "loss": 0.0402, "num_input_tokens_seen": 78959104, "step": 36530 }, { "epoch": 5.960032626427406, "grad_norm": 0.006449823267757893, "learning_rate": 0.0008852638807642048, "loss": 0.0547, "num_input_tokens_seen": 78970240, "step": 36535 }, { "epoch": 5.960848287112562, "grad_norm": 0.01938220113515854, "learning_rate": 0.0008852185063831142, "loss": 0.0472, "num_input_tokens_seen": 78979872, "step": 36540 }, { "epoch": 5.9616639477977165, "grad_norm": 0.01463254727423191, "learning_rate": 0.000885173124195082, "loss": 0.0383, "num_input_tokens_seen": 78992032, "step": 36545 }, { "epoch": 5.962479608482871, "grad_norm": 0.015324999578297138, "learning_rate": 0.0008851277342010278, "loss": 0.0787, "num_input_tokens_seen": 79002240, "step": 36550 }, { "epoch": 5.963295269168026, "grad_norm": 0.006455022841691971, "learning_rate": 0.0008850823364018715, "loss": 0.0297, "num_input_tokens_seen": 79013632, "step": 36555 }, { "epoch": 5.964110929853181, "grad_norm": 0.2201565057039261, "learning_rate": 0.0008850369307985328, "loss": 0.0426, "num_input_tokens_seen": 79023808, "step": 36560 }, { "epoch": 5.964926590538336, "grad_norm": 0.05902295187115669, "learning_rate": 0.0008849915173919327, "loss": 0.0371, "num_input_tokens_seen": 79034560, "step": 36565 }, { "epoch": 5.9657422512234906, "grad_norm": 0.013868676498532295, "learning_rate": 0.0008849460961829909, "loss": 0.0969, "num_input_tokens_seen": 79045824, "step": 36570 }, { "epoch": 5.966557911908646, "grad_norm": 0.33222365379333496, "learning_rate": 0.0008849006671726281, "loss": 0.3332, "num_input_tokens_seen": 79055648, "step": 36575 }, { "epoch": 5.967373572593801, "grad_norm": 0.2062501609325409, "learning_rate": 0.0008848552303617651, "loss": 0.0749, "num_input_tokens_seen": 79066752, "step": 36580 }, { "epoch": 5.968189233278956, "grad_norm": 0.036664046347141266, "learning_rate": 0.0008848097857513227, "loss": 0.0406, "num_input_tokens_seen": 79077984, "step": 36585 }, { "epoch": 5.969004893964111, "grad_norm": 0.1617347002029419, "learning_rate": 0.0008847643333422216, "loss": 0.0892, "num_input_tokens_seen": 79089888, "step": 36590 }, { "epoch": 5.9698205546492655, "grad_norm": 0.048055000603199005, "learning_rate": 0.0008847188731353833, "loss": 0.2289, "num_input_tokens_seen": 79101312, "step": 36595 }, { "epoch": 5.970636215334421, "grad_norm": 0.017596082761883736, "learning_rate": 0.0008846734051317289, "loss": 0.0828, "num_input_tokens_seen": 79111616, "step": 36600 }, { "epoch": 5.971451876019576, "grad_norm": 0.005010406486690044, "learning_rate": 0.0008846279293321801, "loss": 0.1542, "num_input_tokens_seen": 79121632, "step": 36605 }, { "epoch": 5.972267536704731, "grad_norm": 0.2197558879852295, "learning_rate": 0.0008845824457376583, "loss": 0.1049, "num_input_tokens_seen": 79132672, "step": 36610 }, { "epoch": 5.973083197389886, "grad_norm": 0.12246564775705338, "learning_rate": 0.0008845369543490853, "loss": 0.1219, "num_input_tokens_seen": 79142304, "step": 36615 }, { "epoch": 5.9738988580750405, "grad_norm": 0.38152462244033813, "learning_rate": 0.0008844914551673832, "loss": 0.1163, "num_input_tokens_seen": 79153088, "step": 36620 }, { "epoch": 5.974714518760196, "grad_norm": 0.05257618799805641, "learning_rate": 0.000884445948193474, "loss": 0.1031, "num_input_tokens_seen": 79164800, "step": 36625 }, { "epoch": 5.975530179445351, "grad_norm": 0.0162787064909935, "learning_rate": 0.0008844004334282801, "loss": 0.0145, "num_input_tokens_seen": 79175072, "step": 36630 }, { "epoch": 5.976345840130506, "grad_norm": 0.2210160195827484, "learning_rate": 0.0008843549108727234, "loss": 0.1055, "num_input_tokens_seen": 79186016, "step": 36635 }, { "epoch": 5.977161500815661, "grad_norm": 0.11478012800216675, "learning_rate": 0.0008843093805277271, "loss": 0.1818, "num_input_tokens_seen": 79196224, "step": 36640 }, { "epoch": 5.9779771615008155, "grad_norm": 0.05836905911564827, "learning_rate": 0.0008842638423942136, "loss": 0.1286, "num_input_tokens_seen": 79208320, "step": 36645 }, { "epoch": 5.97879282218597, "grad_norm": 0.0437001995742321, "learning_rate": 0.0008842182964731058, "loss": 0.0551, "num_input_tokens_seen": 79218528, "step": 36650 }, { "epoch": 5.979608482871125, "grad_norm": 0.09292764961719513, "learning_rate": 0.0008841727427653269, "loss": 0.0908, "num_input_tokens_seen": 79230144, "step": 36655 }, { "epoch": 5.980424143556281, "grad_norm": 0.186200350522995, "learning_rate": 0.0008841271812717999, "loss": 0.1414, "num_input_tokens_seen": 79240960, "step": 36660 }, { "epoch": 5.981239804241436, "grad_norm": 0.30797278881073, "learning_rate": 0.0008840816119934485, "loss": 0.1893, "num_input_tokens_seen": 79251616, "step": 36665 }, { "epoch": 5.9820554649265905, "grad_norm": 0.010716347023844719, "learning_rate": 0.0008840360349311958, "loss": 0.0335, "num_input_tokens_seen": 79262560, "step": 36670 }, { "epoch": 5.982871125611745, "grad_norm": 0.014386476948857307, "learning_rate": 0.0008839904500859656, "loss": 0.0863, "num_input_tokens_seen": 79274048, "step": 36675 }, { "epoch": 5.9836867862969, "grad_norm": 0.03421509265899658, "learning_rate": 0.0008839448574586821, "loss": 0.1078, "num_input_tokens_seen": 79284736, "step": 36680 }, { "epoch": 5.984502446982056, "grad_norm": 0.00656637828797102, "learning_rate": 0.0008838992570502687, "loss": 0.0994, "num_input_tokens_seen": 79295040, "step": 36685 }, { "epoch": 5.985318107667211, "grad_norm": 0.08180084824562073, "learning_rate": 0.0008838536488616499, "loss": 0.2041, "num_input_tokens_seen": 79305536, "step": 36690 }, { "epoch": 5.986133768352365, "grad_norm": 0.13446083664894104, "learning_rate": 0.0008838080328937501, "loss": 0.0699, "num_input_tokens_seen": 79316960, "step": 36695 }, { "epoch": 5.98694942903752, "grad_norm": 0.09626548737287521, "learning_rate": 0.0008837624091474935, "loss": 0.0536, "num_input_tokens_seen": 79327584, "step": 36700 }, { "epoch": 5.987765089722675, "grad_norm": 0.07584668695926666, "learning_rate": 0.0008837167776238049, "loss": 0.0289, "num_input_tokens_seen": 79338688, "step": 36705 }, { "epoch": 5.988580750407831, "grad_norm": 0.0042076618410646915, "learning_rate": 0.0008836711383236089, "loss": 0.1248, "num_input_tokens_seen": 79349920, "step": 36710 }, { "epoch": 5.989396411092986, "grad_norm": 0.019717322662472725, "learning_rate": 0.0008836254912478308, "loss": 0.0433, "num_input_tokens_seen": 79360192, "step": 36715 }, { "epoch": 5.99021207177814, "grad_norm": 0.004804224707186222, "learning_rate": 0.0008835798363973952, "loss": 0.2098, "num_input_tokens_seen": 79368992, "step": 36720 }, { "epoch": 5.991027732463295, "grad_norm": 0.020206429064273834, "learning_rate": 0.0008835341737732276, "loss": 0.0592, "num_input_tokens_seen": 79379616, "step": 36725 }, { "epoch": 5.99184339314845, "grad_norm": 0.0091730747371912, "learning_rate": 0.0008834885033762536, "loss": 0.0136, "num_input_tokens_seen": 79389216, "step": 36730 }, { "epoch": 5.992659053833605, "grad_norm": 0.12126602977514267, "learning_rate": 0.0008834428252073986, "loss": 0.0618, "num_input_tokens_seen": 79400064, "step": 36735 }, { "epoch": 5.993474714518761, "grad_norm": 0.02198721095919609, "learning_rate": 0.0008833971392675882, "loss": 0.0269, "num_input_tokens_seen": 79411808, "step": 36740 }, { "epoch": 5.994290375203915, "grad_norm": 0.006473301909863949, "learning_rate": 0.0008833514455577485, "loss": 0.0968, "num_input_tokens_seen": 79420928, "step": 36745 }, { "epoch": 5.99510603588907, "grad_norm": 0.11051056534051895, "learning_rate": 0.0008833057440788053, "loss": 0.0488, "num_input_tokens_seen": 79432768, "step": 36750 }, { "epoch": 5.995921696574225, "grad_norm": 0.09950881451368332, "learning_rate": 0.000883260034831685, "loss": 0.0649, "num_input_tokens_seen": 79442720, "step": 36755 }, { "epoch": 5.99673735725938, "grad_norm": 0.08756718784570694, "learning_rate": 0.000883214317817314, "loss": 0.0541, "num_input_tokens_seen": 79453344, "step": 36760 }, { "epoch": 5.997553017944535, "grad_norm": 0.1397211253643036, "learning_rate": 0.0008831685930366187, "loss": 0.0768, "num_input_tokens_seen": 79463424, "step": 36765 }, { "epoch": 5.99836867862969, "grad_norm": 0.004025152884423733, "learning_rate": 0.0008831228604905257, "loss": 0.039, "num_input_tokens_seen": 79475264, "step": 36770 }, { "epoch": 5.999184339314845, "grad_norm": 0.0041638934053480625, "learning_rate": 0.0008830771201799619, "loss": 0.0379, "num_input_tokens_seen": 79486464, "step": 36775 }, { "epoch": 6.0, "grad_norm": 0.06496407091617584, "learning_rate": 0.0008830313721058543, "loss": 0.0721, "num_input_tokens_seen": 79495984, "step": 36780 }, { "epoch": 6.0, "eval_loss": 0.12523896992206573, "eval_runtime": 103.3104, "eval_samples_per_second": 26.377, "eval_steps_per_second": 6.601, "num_input_tokens_seen": 79495984, "step": 36780 }, { "epoch": 6.000815660685155, "grad_norm": 0.19501720368862152, "learning_rate": 0.00088298561626913, "loss": 0.1103, "num_input_tokens_seen": 79505904, "step": 36785 }, { "epoch": 6.00163132137031, "grad_norm": 0.0032038709614425898, "learning_rate": 0.0008829398526707164, "loss": 0.0167, "num_input_tokens_seen": 79516880, "step": 36790 }, { "epoch": 6.002446982055465, "grad_norm": 0.0025172571185976267, "learning_rate": 0.0008828940813115408, "loss": 0.1738, "num_input_tokens_seen": 79527472, "step": 36795 }, { "epoch": 6.00326264274062, "grad_norm": 0.3164461553096771, "learning_rate": 0.000882848302192531, "loss": 0.1231, "num_input_tokens_seen": 79538704, "step": 36800 }, { "epoch": 6.004078303425775, "grad_norm": 0.07731668651103973, "learning_rate": 0.0008828025153146147, "loss": 0.1044, "num_input_tokens_seen": 79550096, "step": 36805 }, { "epoch": 6.00489396411093, "grad_norm": 0.019409824162721634, "learning_rate": 0.0008827567206787197, "loss": 0.1054, "num_input_tokens_seen": 79561488, "step": 36810 }, { "epoch": 6.005709624796085, "grad_norm": 0.038065653294324875, "learning_rate": 0.0008827109182857742, "loss": 0.135, "num_input_tokens_seen": 79572048, "step": 36815 }, { "epoch": 6.006525285481239, "grad_norm": 0.04077430069446564, "learning_rate": 0.0008826651081367065, "loss": 0.0517, "num_input_tokens_seen": 79582160, "step": 36820 }, { "epoch": 6.007340946166395, "grad_norm": 0.25474321842193604, "learning_rate": 0.0008826192902324449, "loss": 0.1317, "num_input_tokens_seen": 79592304, "step": 36825 }, { "epoch": 6.00815660685155, "grad_norm": 0.23740410804748535, "learning_rate": 0.0008825734645739181, "loss": 0.1233, "num_input_tokens_seen": 79602384, "step": 36830 }, { "epoch": 6.008972267536705, "grad_norm": 0.18980100750923157, "learning_rate": 0.0008825276311620546, "loss": 0.1412, "num_input_tokens_seen": 79611440, "step": 36835 }, { "epoch": 6.00978792822186, "grad_norm": 0.08281727135181427, "learning_rate": 0.0008824817899977834, "loss": 0.0658, "num_input_tokens_seen": 79622800, "step": 36840 }, { "epoch": 6.010603588907014, "grad_norm": 0.06491725146770477, "learning_rate": 0.0008824359410820335, "loss": 0.0888, "num_input_tokens_seen": 79634608, "step": 36845 }, { "epoch": 6.011419249592169, "grad_norm": 0.10696760565042496, "learning_rate": 0.0008823900844157342, "loss": 0.1227, "num_input_tokens_seen": 79646512, "step": 36850 }, { "epoch": 6.012234910277325, "grad_norm": 0.2877226769924164, "learning_rate": 0.0008823442199998147, "loss": 0.0735, "num_input_tokens_seen": 79658160, "step": 36855 }, { "epoch": 6.01305057096248, "grad_norm": 0.04781525209546089, "learning_rate": 0.0008822983478352044, "loss": 0.1068, "num_input_tokens_seen": 79668304, "step": 36860 }, { "epoch": 6.013866231647635, "grad_norm": 0.29813289642333984, "learning_rate": 0.0008822524679228332, "loss": 0.1074, "num_input_tokens_seen": 79680048, "step": 36865 }, { "epoch": 6.014681892332789, "grad_norm": 0.11078273504972458, "learning_rate": 0.0008822065802636308, "loss": 0.0679, "num_input_tokens_seen": 79690224, "step": 36870 }, { "epoch": 6.015497553017944, "grad_norm": 0.03565460816025734, "learning_rate": 0.0008821606848585273, "loss": 0.0859, "num_input_tokens_seen": 79700688, "step": 36875 }, { "epoch": 6.0163132137031, "grad_norm": 0.02995418943464756, "learning_rate": 0.0008821147817084526, "loss": 0.0601, "num_input_tokens_seen": 79711120, "step": 36880 }, { "epoch": 6.017128874388255, "grad_norm": 0.040033359080553055, "learning_rate": 0.0008820688708143372, "loss": 0.0825, "num_input_tokens_seen": 79721360, "step": 36885 }, { "epoch": 6.0179445350734095, "grad_norm": 0.19602453708648682, "learning_rate": 0.0008820229521771112, "loss": 0.0882, "num_input_tokens_seen": 79732048, "step": 36890 }, { "epoch": 6.018760195758564, "grad_norm": 0.03513888269662857, "learning_rate": 0.0008819770257977058, "loss": 0.2078, "num_input_tokens_seen": 79742864, "step": 36895 }, { "epoch": 6.019575856443719, "grad_norm": 0.009727993980050087, "learning_rate": 0.0008819310916770511, "loss": 0.0816, "num_input_tokens_seen": 79754512, "step": 36900 }, { "epoch": 6.020391517128874, "grad_norm": 0.04514655843377113, "learning_rate": 0.0008818851498160785, "loss": 0.0333, "num_input_tokens_seen": 79766032, "step": 36905 }, { "epoch": 6.02120717781403, "grad_norm": 0.15755507349967957, "learning_rate": 0.0008818392002157188, "loss": 0.0285, "num_input_tokens_seen": 79775792, "step": 36910 }, { "epoch": 6.0220228384991845, "grad_norm": 0.07789606600999832, "learning_rate": 0.0008817932428769033, "loss": 0.0898, "num_input_tokens_seen": 79786576, "step": 36915 }, { "epoch": 6.022838499184339, "grad_norm": 0.05118430405855179, "learning_rate": 0.0008817472778005635, "loss": 0.1647, "num_input_tokens_seen": 79797456, "step": 36920 }, { "epoch": 6.023654159869494, "grad_norm": 0.030694983899593353, "learning_rate": 0.0008817013049876308, "loss": 0.1446, "num_input_tokens_seen": 79808944, "step": 36925 }, { "epoch": 6.024469820554649, "grad_norm": 0.023417538031935692, "learning_rate": 0.0008816553244390368, "loss": 0.1296, "num_input_tokens_seen": 79820592, "step": 36930 }, { "epoch": 6.025285481239805, "grad_norm": 0.03646084666252136, "learning_rate": 0.0008816093361557136, "loss": 0.0359, "num_input_tokens_seen": 79832592, "step": 36935 }, { "epoch": 6.0261011419249595, "grad_norm": 0.08928578346967697, "learning_rate": 0.0008815633401385932, "loss": 0.0778, "num_input_tokens_seen": 79843536, "step": 36940 }, { "epoch": 6.026916802610114, "grad_norm": 0.01563401333987713, "learning_rate": 0.0008815173363886075, "loss": 0.1516, "num_input_tokens_seen": 79854000, "step": 36945 }, { "epoch": 6.027732463295269, "grad_norm": 0.3969258964061737, "learning_rate": 0.000881471324906689, "loss": 0.076, "num_input_tokens_seen": 79864688, "step": 36950 }, { "epoch": 6.028548123980424, "grad_norm": 0.029993494972586632, "learning_rate": 0.0008814253056937702, "loss": 0.0336, "num_input_tokens_seen": 79876208, "step": 36955 }, { "epoch": 6.029363784665579, "grad_norm": 0.10093618929386139, "learning_rate": 0.0008813792787507837, "loss": 0.065, "num_input_tokens_seen": 79887568, "step": 36960 }, { "epoch": 6.0301794453507345, "grad_norm": 0.012898314744234085, "learning_rate": 0.0008813332440786623, "loss": 0.0167, "num_input_tokens_seen": 79897584, "step": 36965 }, { "epoch": 6.030995106035889, "grad_norm": 0.0038545397110283375, "learning_rate": 0.0008812872016783389, "loss": 0.0186, "num_input_tokens_seen": 79910096, "step": 36970 }, { "epoch": 6.031810766721044, "grad_norm": 0.3187786340713501, "learning_rate": 0.0008812411515507468, "loss": 0.2138, "num_input_tokens_seen": 79921200, "step": 36975 }, { "epoch": 6.032626427406199, "grad_norm": 0.21439680457115173, "learning_rate": 0.000881195093696819, "loss": 0.0556, "num_input_tokens_seen": 79932496, "step": 36980 }, { "epoch": 6.033442088091354, "grad_norm": 0.30229452252388, "learning_rate": 0.000881149028117489, "loss": 0.282, "num_input_tokens_seen": 79942992, "step": 36985 }, { "epoch": 6.034257748776509, "grad_norm": 0.04235079139471054, "learning_rate": 0.0008811029548136906, "loss": 0.1579, "num_input_tokens_seen": 79955504, "step": 36990 }, { "epoch": 6.035073409461664, "grad_norm": 0.1639384925365448, "learning_rate": 0.0008810568737863574, "loss": 0.1658, "num_input_tokens_seen": 79965520, "step": 36995 }, { "epoch": 6.035889070146819, "grad_norm": 0.1602403074502945, "learning_rate": 0.000881010785036423, "loss": 0.0947, "num_input_tokens_seen": 79977264, "step": 37000 }, { "epoch": 6.036704730831974, "grad_norm": 0.3046966791152954, "learning_rate": 0.0008809646885648218, "loss": 0.1439, "num_input_tokens_seen": 79986832, "step": 37005 }, { "epoch": 6.037520391517129, "grad_norm": 0.07625728845596313, "learning_rate": 0.000880918584372488, "loss": 0.041, "num_input_tokens_seen": 79998160, "step": 37010 }, { "epoch": 6.0383360522022835, "grad_norm": 0.21763290464878082, "learning_rate": 0.0008808724724603558, "loss": 0.1222, "num_input_tokens_seen": 80008720, "step": 37015 }, { "epoch": 6.039151712887439, "grad_norm": 0.031195595860481262, "learning_rate": 0.0008808263528293596, "loss": 0.0334, "num_input_tokens_seen": 80017872, "step": 37020 }, { "epoch": 6.039967373572594, "grad_norm": 0.25529634952545166, "learning_rate": 0.0008807802254804344, "loss": 0.2067, "num_input_tokens_seen": 80027664, "step": 37025 }, { "epoch": 6.040783034257749, "grad_norm": 0.12576717138290405, "learning_rate": 0.000880734090414515, "loss": 0.166, "num_input_tokens_seen": 80039152, "step": 37030 }, { "epoch": 6.041598694942904, "grad_norm": 0.013903754763305187, "learning_rate": 0.000880687947632536, "loss": 0.0329, "num_input_tokens_seen": 80049904, "step": 37035 }, { "epoch": 6.0424143556280585, "grad_norm": 0.01922302134335041, "learning_rate": 0.000880641797135433, "loss": 0.0433, "num_input_tokens_seen": 80060496, "step": 37040 }, { "epoch": 6.043230016313213, "grad_norm": 0.2248125970363617, "learning_rate": 0.000880595638924141, "loss": 0.1103, "num_input_tokens_seen": 80070992, "step": 37045 }, { "epoch": 6.044045676998369, "grad_norm": 0.04535336047410965, "learning_rate": 0.0008805494729995957, "loss": 0.0237, "num_input_tokens_seen": 80081424, "step": 37050 }, { "epoch": 6.044861337683524, "grad_norm": 0.07262910157442093, "learning_rate": 0.0008805032993627324, "loss": 0.0378, "num_input_tokens_seen": 80091824, "step": 37055 }, { "epoch": 6.045676998368679, "grad_norm": 0.1967252790927887, "learning_rate": 0.0008804571180144871, "loss": 0.1032, "num_input_tokens_seen": 80102960, "step": 37060 }, { "epoch": 6.0464926590538335, "grad_norm": 0.035517822951078415, "learning_rate": 0.0008804109289557956, "loss": 0.079, "num_input_tokens_seen": 80114096, "step": 37065 }, { "epoch": 6.047308319738988, "grad_norm": 0.02115003764629364, "learning_rate": 0.0008803647321875942, "loss": 0.0263, "num_input_tokens_seen": 80124976, "step": 37070 }, { "epoch": 6.048123980424143, "grad_norm": 0.08668390661478043, "learning_rate": 0.0008803185277108188, "loss": 0.1535, "num_input_tokens_seen": 80135376, "step": 37075 }, { "epoch": 6.048939641109299, "grad_norm": 0.014792878180742264, "learning_rate": 0.0008802723155264061, "loss": 0.0525, "num_input_tokens_seen": 80145328, "step": 37080 }, { "epoch": 6.049755301794454, "grad_norm": 0.055387213826179504, "learning_rate": 0.0008802260956352924, "loss": 0.0516, "num_input_tokens_seen": 80155856, "step": 37085 }, { "epoch": 6.0505709624796085, "grad_norm": 0.020852621644735336, "learning_rate": 0.0008801798680384145, "loss": 0.0232, "num_input_tokens_seen": 80165136, "step": 37090 }, { "epoch": 6.051386623164763, "grad_norm": 0.03771822154521942, "learning_rate": 0.0008801336327367096, "loss": 0.0203, "num_input_tokens_seen": 80175504, "step": 37095 }, { "epoch": 6.052202283849918, "grad_norm": 0.05520971119403839, "learning_rate": 0.0008800873897311141, "loss": 0.1523, "num_input_tokens_seen": 80187632, "step": 37100 }, { "epoch": 6.053017944535074, "grad_norm": 0.025719482451677322, "learning_rate": 0.0008800411390225655, "loss": 0.0728, "num_input_tokens_seen": 80198576, "step": 37105 }, { "epoch": 6.053833605220229, "grad_norm": 0.01990801841020584, "learning_rate": 0.000879994880612001, "loss": 0.03, "num_input_tokens_seen": 80209232, "step": 37110 }, { "epoch": 6.054649265905383, "grad_norm": 0.00535226333886385, "learning_rate": 0.0008799486145003583, "loss": 0.0637, "num_input_tokens_seen": 80220336, "step": 37115 }, { "epoch": 6.055464926590538, "grad_norm": 0.3502698838710785, "learning_rate": 0.0008799023406885751, "loss": 0.0758, "num_input_tokens_seen": 80230384, "step": 37120 }, { "epoch": 6.056280587275693, "grad_norm": 0.006463268771767616, "learning_rate": 0.0008798560591775889, "loss": 0.0427, "num_input_tokens_seen": 80241136, "step": 37125 }, { "epoch": 6.057096247960848, "grad_norm": 0.35583990812301636, "learning_rate": 0.0008798097699683376, "loss": 0.1532, "num_input_tokens_seen": 80252688, "step": 37130 }, { "epoch": 6.057911908646004, "grad_norm": 0.0426289327442646, "learning_rate": 0.0008797634730617598, "loss": 0.1269, "num_input_tokens_seen": 80263728, "step": 37135 }, { "epoch": 6.058727569331158, "grad_norm": 0.20918045938014984, "learning_rate": 0.0008797171684587933, "loss": 0.2027, "num_input_tokens_seen": 80273104, "step": 37140 }, { "epoch": 6.059543230016313, "grad_norm": 0.21897371113300323, "learning_rate": 0.0008796708561603766, "loss": 0.1077, "num_input_tokens_seen": 80282576, "step": 37145 }, { "epoch": 6.060358890701468, "grad_norm": 0.042841531336307526, "learning_rate": 0.0008796245361674484, "loss": 0.0861, "num_input_tokens_seen": 80293840, "step": 37150 }, { "epoch": 6.061174551386623, "grad_norm": 0.06690218299627304, "learning_rate": 0.0008795782084809473, "loss": 0.0386, "num_input_tokens_seen": 80304656, "step": 37155 }, { "epoch": 6.061990212071779, "grad_norm": 0.1094597652554512, "learning_rate": 0.0008795318731018124, "loss": 0.0313, "num_input_tokens_seen": 80315984, "step": 37160 }, { "epoch": 6.062805872756933, "grad_norm": 0.1659562736749649, "learning_rate": 0.0008794855300309827, "loss": 0.0799, "num_input_tokens_seen": 80326992, "step": 37165 }, { "epoch": 6.063621533442088, "grad_norm": 0.14440147578716278, "learning_rate": 0.0008794391792693973, "loss": 0.1157, "num_input_tokens_seen": 80337712, "step": 37170 }, { "epoch": 6.064437194127243, "grad_norm": 0.01738247647881508, "learning_rate": 0.0008793928208179955, "loss": 0.1882, "num_input_tokens_seen": 80348560, "step": 37175 }, { "epoch": 6.065252854812398, "grad_norm": 0.25447797775268555, "learning_rate": 0.000879346454677717, "loss": 0.1146, "num_input_tokens_seen": 80359664, "step": 37180 }, { "epoch": 6.066068515497553, "grad_norm": 0.3118617832660675, "learning_rate": 0.0008793000808495012, "loss": 0.0427, "num_input_tokens_seen": 80370064, "step": 37185 }, { "epoch": 6.066884176182708, "grad_norm": 0.008864902891218662, "learning_rate": 0.0008792536993342882, "loss": 0.0349, "num_input_tokens_seen": 80380880, "step": 37190 }, { "epoch": 6.067699836867863, "grad_norm": 0.10314369946718216, "learning_rate": 0.0008792073101330177, "loss": 0.212, "num_input_tokens_seen": 80392144, "step": 37195 }, { "epoch": 6.068515497553018, "grad_norm": 0.04900093749165535, "learning_rate": 0.00087916091324663, "loss": 0.0646, "num_input_tokens_seen": 80403024, "step": 37200 }, { "epoch": 6.069331158238173, "grad_norm": 0.0865040123462677, "learning_rate": 0.0008791145086760656, "loss": 0.0694, "num_input_tokens_seen": 80413488, "step": 37205 }, { "epoch": 6.070146818923328, "grad_norm": 0.1456834226846695, "learning_rate": 0.0008790680964222647, "loss": 0.1613, "num_input_tokens_seen": 80424176, "step": 37210 }, { "epoch": 6.0709624796084825, "grad_norm": 0.1410757303237915, "learning_rate": 0.000879021676486168, "loss": 0.0648, "num_input_tokens_seen": 80434256, "step": 37215 }, { "epoch": 6.071778140293638, "grad_norm": 0.02068004384636879, "learning_rate": 0.0008789752488687159, "loss": 0.0562, "num_input_tokens_seen": 80444176, "step": 37220 }, { "epoch": 6.072593800978793, "grad_norm": 0.1710319072008133, "learning_rate": 0.00087892881357085, "loss": 0.0422, "num_input_tokens_seen": 80455952, "step": 37225 }, { "epoch": 6.073409461663948, "grad_norm": 0.08538807928562164, "learning_rate": 0.0008788823705935107, "loss": 0.0306, "num_input_tokens_seen": 80467024, "step": 37230 }, { "epoch": 6.074225122349103, "grad_norm": 0.1996658742427826, "learning_rate": 0.0008788359199376396, "loss": 0.1149, "num_input_tokens_seen": 80477488, "step": 37235 }, { "epoch": 6.075040783034257, "grad_norm": 0.09354493021965027, "learning_rate": 0.0008787894616041781, "loss": 0.0556, "num_input_tokens_seen": 80488080, "step": 37240 }, { "epoch": 6.075856443719413, "grad_norm": 0.1762208789587021, "learning_rate": 0.0008787429955940675, "loss": 0.1917, "num_input_tokens_seen": 80499664, "step": 37245 }, { "epoch": 6.076672104404568, "grad_norm": 0.018932653591036797, "learning_rate": 0.0008786965219082497, "loss": 0.0719, "num_input_tokens_seen": 80510544, "step": 37250 }, { "epoch": 6.077487765089723, "grad_norm": 0.018798017874360085, "learning_rate": 0.0008786500405476664, "loss": 0.0172, "num_input_tokens_seen": 80520880, "step": 37255 }, { "epoch": 6.078303425774878, "grad_norm": 0.2359062135219574, "learning_rate": 0.0008786035515132598, "loss": 0.1641, "num_input_tokens_seen": 80532784, "step": 37260 }, { "epoch": 6.079119086460032, "grad_norm": 0.14479638636112213, "learning_rate": 0.0008785570548059718, "loss": 0.053, "num_input_tokens_seen": 80542832, "step": 37265 }, { "epoch": 6.079934747145187, "grad_norm": 0.2287491112947464, "learning_rate": 0.0008785105504267449, "loss": 0.0875, "num_input_tokens_seen": 80553744, "step": 37270 }, { "epoch": 6.080750407830343, "grad_norm": 0.1595332771539688, "learning_rate": 0.0008784640383765215, "loss": 0.0695, "num_input_tokens_seen": 80564432, "step": 37275 }, { "epoch": 6.081566068515498, "grad_norm": 0.01782151870429516, "learning_rate": 0.0008784175186562442, "loss": 0.1004, "num_input_tokens_seen": 80575856, "step": 37280 }, { "epoch": 6.082381729200653, "grad_norm": 0.1804230660200119, "learning_rate": 0.000878370991266856, "loss": 0.0825, "num_input_tokens_seen": 80586096, "step": 37285 }, { "epoch": 6.083197389885807, "grad_norm": 0.027629168704152107, "learning_rate": 0.0008783244562092996, "loss": 0.0175, "num_input_tokens_seen": 80597040, "step": 37290 }, { "epoch": 6.084013050570962, "grad_norm": 0.03521690145134926, "learning_rate": 0.0008782779134845181, "loss": 0.14, "num_input_tokens_seen": 80609168, "step": 37295 }, { "epoch": 6.084828711256117, "grad_norm": 0.02052682265639305, "learning_rate": 0.0008782313630934548, "loss": 0.1297, "num_input_tokens_seen": 80620400, "step": 37300 }, { "epoch": 6.085644371941273, "grad_norm": 0.33327436447143555, "learning_rate": 0.0008781848050370531, "loss": 0.1538, "num_input_tokens_seen": 80633104, "step": 37305 }, { "epoch": 6.0864600326264275, "grad_norm": 0.012549174949526787, "learning_rate": 0.0008781382393162566, "loss": 0.0184, "num_input_tokens_seen": 80643056, "step": 37310 }, { "epoch": 6.087275693311582, "grad_norm": 0.0040662651881575584, "learning_rate": 0.0008780916659320091, "loss": 0.1361, "num_input_tokens_seen": 80653424, "step": 37315 }, { "epoch": 6.088091353996737, "grad_norm": 0.03261783719062805, "learning_rate": 0.0008780450848852541, "loss": 0.0318, "num_input_tokens_seen": 80663376, "step": 37320 }, { "epoch": 6.088907014681892, "grad_norm": 0.0901263877749443, "learning_rate": 0.0008779984961769361, "loss": 0.0454, "num_input_tokens_seen": 80674192, "step": 37325 }, { "epoch": 6.089722675367048, "grad_norm": 0.03137005493044853, "learning_rate": 0.0008779518998079988, "loss": 0.0617, "num_input_tokens_seen": 80685840, "step": 37330 }, { "epoch": 6.0905383360522025, "grad_norm": 0.18100182712078094, "learning_rate": 0.000877905295779387, "loss": 0.0995, "num_input_tokens_seen": 80696880, "step": 37335 }, { "epoch": 6.091353996737357, "grad_norm": 0.2891119718551636, "learning_rate": 0.0008778586840920449, "loss": 0.0969, "num_input_tokens_seen": 80707952, "step": 37340 }, { "epoch": 6.092169657422512, "grad_norm": 0.14484070241451263, "learning_rate": 0.0008778120647469172, "loss": 0.0299, "num_input_tokens_seen": 80719504, "step": 37345 }, { "epoch": 6.092985318107667, "grad_norm": 0.1225421279668808, "learning_rate": 0.0008777654377449487, "loss": 0.0167, "num_input_tokens_seen": 80730448, "step": 37350 }, { "epoch": 6.093800978792822, "grad_norm": 0.23504728078842163, "learning_rate": 0.0008777188030870845, "loss": 0.1477, "num_input_tokens_seen": 80742480, "step": 37355 }, { "epoch": 6.0946166394779775, "grad_norm": 0.13727766275405884, "learning_rate": 0.0008776721607742695, "loss": 0.199, "num_input_tokens_seen": 80753488, "step": 37360 }, { "epoch": 6.095432300163132, "grad_norm": 0.014772283844649792, "learning_rate": 0.0008776255108074489, "loss": 0.2143, "num_input_tokens_seen": 80764560, "step": 37365 }, { "epoch": 6.096247960848287, "grad_norm": 0.05206260085105896, "learning_rate": 0.0008775788531875685, "loss": 0.0618, "num_input_tokens_seen": 80776496, "step": 37370 }, { "epoch": 6.097063621533442, "grad_norm": 0.08829730749130249, "learning_rate": 0.0008775321879155735, "loss": 0.0907, "num_input_tokens_seen": 80786576, "step": 37375 }, { "epoch": 6.097879282218597, "grad_norm": 0.021104643121361732, "learning_rate": 0.0008774855149924099, "loss": 0.0409, "num_input_tokens_seen": 80798256, "step": 37380 }, { "epoch": 6.0986949429037525, "grad_norm": 0.10581175982952118, "learning_rate": 0.0008774388344190234, "loss": 0.0454, "num_input_tokens_seen": 80807056, "step": 37385 }, { "epoch": 6.099510603588907, "grad_norm": 0.02822595275938511, "learning_rate": 0.0008773921461963601, "loss": 0.1279, "num_input_tokens_seen": 80818160, "step": 37390 }, { "epoch": 6.100326264274062, "grad_norm": 0.019954686984419823, "learning_rate": 0.0008773454503253662, "loss": 0.0432, "num_input_tokens_seen": 80828144, "step": 37395 }, { "epoch": 6.101141924959217, "grad_norm": 0.07640758901834488, "learning_rate": 0.0008772987468069881, "loss": 0.0477, "num_input_tokens_seen": 80839696, "step": 37400 }, { "epoch": 6.101957585644372, "grad_norm": 0.01018131896853447, "learning_rate": 0.0008772520356421723, "loss": 0.0103, "num_input_tokens_seen": 80852912, "step": 37405 }, { "epoch": 6.102773246329527, "grad_norm": 0.10560321807861328, "learning_rate": 0.0008772053168318653, "loss": 0.0675, "num_input_tokens_seen": 80864336, "step": 37410 }, { "epoch": 6.103588907014682, "grad_norm": 0.005883463192731142, "learning_rate": 0.000877158590377014, "loss": 0.0162, "num_input_tokens_seen": 80874576, "step": 37415 }, { "epoch": 6.104404567699837, "grad_norm": 0.14068001508712769, "learning_rate": 0.0008771118562785656, "loss": 0.0542, "num_input_tokens_seen": 80885904, "step": 37420 }, { "epoch": 6.105220228384992, "grad_norm": 0.23875434696674347, "learning_rate": 0.0008770651145374669, "loss": 0.1259, "num_input_tokens_seen": 80894992, "step": 37425 }, { "epoch": 6.106035889070147, "grad_norm": 0.009279138408601284, "learning_rate": 0.0008770183651546653, "loss": 0.0263, "num_input_tokens_seen": 80906448, "step": 37430 }, { "epoch": 6.1068515497553015, "grad_norm": 0.24461911618709564, "learning_rate": 0.0008769716081311083, "loss": 0.0408, "num_input_tokens_seen": 80916048, "step": 37435 }, { "epoch": 6.107667210440456, "grad_norm": 0.008548499085009098, "learning_rate": 0.0008769248434677434, "loss": 0.0109, "num_input_tokens_seen": 80926288, "step": 37440 }, { "epoch": 6.108482871125612, "grad_norm": 0.2648637890815735, "learning_rate": 0.0008768780711655185, "loss": 0.1377, "num_input_tokens_seen": 80937840, "step": 37445 }, { "epoch": 6.109298531810767, "grad_norm": 0.3084779977798462, "learning_rate": 0.0008768312912253811, "loss": 0.0928, "num_input_tokens_seen": 80947472, "step": 37450 }, { "epoch": 6.110114192495922, "grad_norm": 0.034691330045461655, "learning_rate": 0.0008767845036482798, "loss": 0.0082, "num_input_tokens_seen": 80956528, "step": 37455 }, { "epoch": 6.1109298531810765, "grad_norm": 0.04501752927899361, "learning_rate": 0.0008767377084351625, "loss": 0.0208, "num_input_tokens_seen": 80966288, "step": 37460 }, { "epoch": 6.111745513866231, "grad_norm": 0.056020502001047134, "learning_rate": 0.0008766909055869777, "loss": 0.0741, "num_input_tokens_seen": 80977456, "step": 37465 }, { "epoch": 6.112561174551387, "grad_norm": 0.012287724763154984, "learning_rate": 0.0008766440951046736, "loss": 0.0528, "num_input_tokens_seen": 80989168, "step": 37470 }, { "epoch": 6.113376835236542, "grad_norm": 0.28466975688934326, "learning_rate": 0.0008765972769891993, "loss": 0.1104, "num_input_tokens_seen": 80998256, "step": 37475 }, { "epoch": 6.114192495921697, "grad_norm": 0.03604330122470856, "learning_rate": 0.0008765504512415033, "loss": 0.0634, "num_input_tokens_seen": 81009840, "step": 37480 }, { "epoch": 6.1150081566068515, "grad_norm": 0.09890235960483551, "learning_rate": 0.0008765036178625347, "loss": 0.1132, "num_input_tokens_seen": 81020624, "step": 37485 }, { "epoch": 6.115823817292006, "grad_norm": 0.21546748280525208, "learning_rate": 0.0008764567768532427, "loss": 0.1936, "num_input_tokens_seen": 81032368, "step": 37490 }, { "epoch": 6.116639477977161, "grad_norm": 0.007293385919183493, "learning_rate": 0.0008764099282145767, "loss": 0.0463, "num_input_tokens_seen": 81043856, "step": 37495 }, { "epoch": 6.117455138662317, "grad_norm": 0.2758633494377136, "learning_rate": 0.0008763630719474857, "loss": 0.1094, "num_input_tokens_seen": 81054704, "step": 37500 }, { "epoch": 6.118270799347472, "grad_norm": 0.36068642139434814, "learning_rate": 0.0008763162080529199, "loss": 0.0931, "num_input_tokens_seen": 81066800, "step": 37505 }, { "epoch": 6.1190864600326265, "grad_norm": 0.1663595587015152, "learning_rate": 0.0008762693365318286, "loss": 0.0633, "num_input_tokens_seen": 81078192, "step": 37510 }, { "epoch": 6.119902120717781, "grad_norm": 0.2499445080757141, "learning_rate": 0.0008762224573851619, "loss": 0.167, "num_input_tokens_seen": 81089200, "step": 37515 }, { "epoch": 6.120717781402936, "grad_norm": 0.014237415045499802, "learning_rate": 0.0008761755706138698, "loss": 0.0442, "num_input_tokens_seen": 81101552, "step": 37520 }, { "epoch": 6.121533442088092, "grad_norm": 0.04967695102095604, "learning_rate": 0.0008761286762189027, "loss": 0.0679, "num_input_tokens_seen": 81112080, "step": 37525 }, { "epoch": 6.122349102773247, "grad_norm": 0.04989294335246086, "learning_rate": 0.0008760817742012106, "loss": 0.0584, "num_input_tokens_seen": 81121744, "step": 37530 }, { "epoch": 6.123164763458401, "grad_norm": 0.00509643042460084, "learning_rate": 0.0008760348645617444, "loss": 0.1544, "num_input_tokens_seen": 81132912, "step": 37535 }, { "epoch": 6.123980424143556, "grad_norm": 0.006603681482374668, "learning_rate": 0.0008759879473014545, "loss": 0.0361, "num_input_tokens_seen": 81142032, "step": 37540 }, { "epoch": 6.124796084828711, "grad_norm": 0.16137801110744476, "learning_rate": 0.000875941022421292, "loss": 0.1599, "num_input_tokens_seen": 81154160, "step": 37545 }, { "epoch": 6.125611745513866, "grad_norm": 0.10797300934791565, "learning_rate": 0.0008758940899222077, "loss": 0.0749, "num_input_tokens_seen": 81164272, "step": 37550 }, { "epoch": 6.126427406199022, "grad_norm": 0.005488535389304161, "learning_rate": 0.0008758471498051528, "loss": 0.0849, "num_input_tokens_seen": 81174544, "step": 37555 }, { "epoch": 6.127243066884176, "grad_norm": 0.08023177087306976, "learning_rate": 0.0008758002020710787, "loss": 0.2513, "num_input_tokens_seen": 81186032, "step": 37560 }, { "epoch": 6.128058727569331, "grad_norm": 0.022178582847118378, "learning_rate": 0.0008757532467209367, "loss": 0.017, "num_input_tokens_seen": 81195472, "step": 37565 }, { "epoch": 6.128874388254486, "grad_norm": 0.01314950454980135, "learning_rate": 0.0008757062837556784, "loss": 0.0286, "num_input_tokens_seen": 81206096, "step": 37570 }, { "epoch": 6.129690048939641, "grad_norm": 0.0685277059674263, "learning_rate": 0.0008756593131762557, "loss": 0.1684, "num_input_tokens_seen": 81216464, "step": 37575 }, { "epoch": 6.130505709624796, "grad_norm": 0.03624110668897629, "learning_rate": 0.0008756123349836206, "loss": 0.0953, "num_input_tokens_seen": 81227440, "step": 37580 }, { "epoch": 6.131321370309951, "grad_norm": 0.12655434012413025, "learning_rate": 0.0008755653491787249, "loss": 0.0492, "num_input_tokens_seen": 81239184, "step": 37585 }, { "epoch": 6.132137030995106, "grad_norm": 0.3611098527908325, "learning_rate": 0.000875518355762521, "loss": 0.1936, "num_input_tokens_seen": 81249008, "step": 37590 }, { "epoch": 6.132952691680261, "grad_norm": 0.16590391099452972, "learning_rate": 0.0008754713547359612, "loss": 0.0758, "num_input_tokens_seen": 81261008, "step": 37595 }, { "epoch": 6.133768352365416, "grad_norm": 0.25919094681739807, "learning_rate": 0.0008754243460999982, "loss": 0.1533, "num_input_tokens_seen": 81271024, "step": 37600 }, { "epoch": 6.134584013050571, "grad_norm": 0.021038195118308067, "learning_rate": 0.0008753773298555844, "loss": 0.1182, "num_input_tokens_seen": 81281424, "step": 37605 }, { "epoch": 6.135399673735726, "grad_norm": 0.31902235746383667, "learning_rate": 0.0008753303060036728, "loss": 0.1694, "num_input_tokens_seen": 81293296, "step": 37610 }, { "epoch": 6.136215334420881, "grad_norm": 0.07178599387407303, "learning_rate": 0.0008752832745452166, "loss": 0.0486, "num_input_tokens_seen": 81305680, "step": 37615 }, { "epoch": 6.137030995106036, "grad_norm": 0.02646786905825138, "learning_rate": 0.0008752362354811686, "loss": 0.1189, "num_input_tokens_seen": 81316976, "step": 37620 }, { "epoch": 6.137846655791191, "grad_norm": 0.0307559035718441, "learning_rate": 0.0008751891888124823, "loss": 0.0843, "num_input_tokens_seen": 81328016, "step": 37625 }, { "epoch": 6.138662316476346, "grad_norm": 0.27889934182167053, "learning_rate": 0.0008751421345401111, "loss": 0.1975, "num_input_tokens_seen": 81338736, "step": 37630 }, { "epoch": 6.1394779771615005, "grad_norm": 0.2421681433916092, "learning_rate": 0.0008750950726650089, "loss": 0.0508, "num_input_tokens_seen": 81350192, "step": 37635 }, { "epoch": 6.140293637846656, "grad_norm": 0.025474179536104202, "learning_rate": 0.0008750480031881289, "loss": 0.0527, "num_input_tokens_seen": 81360752, "step": 37640 }, { "epoch": 6.141109298531811, "grad_norm": 0.2337075024843216, "learning_rate": 0.0008750009261104255, "loss": 0.0568, "num_input_tokens_seen": 81372176, "step": 37645 }, { "epoch": 6.141924959216966, "grad_norm": 0.01117783784866333, "learning_rate": 0.0008749538414328525, "loss": 0.1526, "num_input_tokens_seen": 81383632, "step": 37650 }, { "epoch": 6.142740619902121, "grad_norm": 0.14854608476161957, "learning_rate": 0.0008749067491563643, "loss": 0.0564, "num_input_tokens_seen": 81394384, "step": 37655 }, { "epoch": 6.143556280587275, "grad_norm": 0.10728046298027039, "learning_rate": 0.0008748596492819152, "loss": 0.0813, "num_input_tokens_seen": 81404816, "step": 37660 }, { "epoch": 6.14437194127243, "grad_norm": 0.2026142179965973, "learning_rate": 0.0008748125418104598, "loss": 0.0881, "num_input_tokens_seen": 81415568, "step": 37665 }, { "epoch": 6.145187601957586, "grad_norm": 0.02100459858775139, "learning_rate": 0.0008747654267429526, "loss": 0.0187, "num_input_tokens_seen": 81427344, "step": 37670 }, { "epoch": 6.146003262642741, "grad_norm": 0.03070612996816635, "learning_rate": 0.0008747183040803488, "loss": 0.2181, "num_input_tokens_seen": 81437808, "step": 37675 }, { "epoch": 6.146818923327896, "grad_norm": 0.09371879696846008, "learning_rate": 0.000874671173823603, "loss": 0.0673, "num_input_tokens_seen": 81448912, "step": 37680 }, { "epoch": 6.14763458401305, "grad_norm": 0.09240694344043732, "learning_rate": 0.0008746240359736708, "loss": 0.1602, "num_input_tokens_seen": 81459312, "step": 37685 }, { "epoch": 6.148450244698205, "grad_norm": 0.036088816821575165, "learning_rate": 0.0008745768905315072, "loss": 0.1586, "num_input_tokens_seen": 81469328, "step": 37690 }, { "epoch": 6.149265905383361, "grad_norm": 0.06369510293006897, "learning_rate": 0.0008745297374980676, "loss": 0.0624, "num_input_tokens_seen": 81479216, "step": 37695 }, { "epoch": 6.150081566068516, "grad_norm": 0.10222480446100235, "learning_rate": 0.0008744825768743079, "loss": 0.0239, "num_input_tokens_seen": 81490032, "step": 37700 }, { "epoch": 6.150897226753671, "grad_norm": 0.06268610805273056, "learning_rate": 0.0008744354086611837, "loss": 0.1849, "num_input_tokens_seen": 81500976, "step": 37705 }, { "epoch": 6.151712887438825, "grad_norm": 0.012655073776841164, "learning_rate": 0.0008743882328596509, "loss": 0.0349, "num_input_tokens_seen": 81511632, "step": 37710 }, { "epoch": 6.15252854812398, "grad_norm": 0.06229158863425255, "learning_rate": 0.0008743410494706655, "loss": 0.1122, "num_input_tokens_seen": 81521616, "step": 37715 }, { "epoch": 6.153344208809135, "grad_norm": 0.08903874456882477, "learning_rate": 0.0008742938584951841, "loss": 0.0678, "num_input_tokens_seen": 81530864, "step": 37720 }, { "epoch": 6.154159869494291, "grad_norm": 0.005974739324301481, "learning_rate": 0.0008742466599341625, "loss": 0.0475, "num_input_tokens_seen": 81542896, "step": 37725 }, { "epoch": 6.1549755301794455, "grad_norm": 0.014743143692612648, "learning_rate": 0.0008741994537885578, "loss": 0.0512, "num_input_tokens_seen": 81553360, "step": 37730 }, { "epoch": 6.1557911908646, "grad_norm": 0.006878357846289873, "learning_rate": 0.0008741522400593265, "loss": 0.0604, "num_input_tokens_seen": 81564752, "step": 37735 }, { "epoch": 6.156606851549755, "grad_norm": 0.030033187940716743, "learning_rate": 0.0008741050187474253, "loss": 0.0392, "num_input_tokens_seen": 81575504, "step": 37740 }, { "epoch": 6.15742251223491, "grad_norm": 0.2209750860929489, "learning_rate": 0.0008740577898538114, "loss": 0.1603, "num_input_tokens_seen": 81586480, "step": 37745 }, { "epoch": 6.158238172920065, "grad_norm": 0.008535216562449932, "learning_rate": 0.0008740105533794417, "loss": 0.0455, "num_input_tokens_seen": 81597264, "step": 37750 }, { "epoch": 6.1590538336052205, "grad_norm": 0.07704164832830429, "learning_rate": 0.0008739633093252738, "loss": 0.0388, "num_input_tokens_seen": 81608592, "step": 37755 }, { "epoch": 6.159869494290375, "grad_norm": 0.07119981199502945, "learning_rate": 0.0008739160576922649, "loss": 0.0264, "num_input_tokens_seen": 81619600, "step": 37760 }, { "epoch": 6.16068515497553, "grad_norm": 0.08949284255504608, "learning_rate": 0.0008738687984813729, "loss": 0.0402, "num_input_tokens_seen": 81630160, "step": 37765 }, { "epoch": 6.161500815660685, "grad_norm": 0.0037753605283796787, "learning_rate": 0.0008738215316935554, "loss": 0.0422, "num_input_tokens_seen": 81641392, "step": 37770 }, { "epoch": 6.16231647634584, "grad_norm": 0.0014340798370540142, "learning_rate": 0.0008737742573297702, "loss": 0.1182, "num_input_tokens_seen": 81652944, "step": 37775 }, { "epoch": 6.1631321370309955, "grad_norm": 0.024722402915358543, "learning_rate": 0.0008737269753909757, "loss": 0.1752, "num_input_tokens_seen": 81664592, "step": 37780 }, { "epoch": 6.16394779771615, "grad_norm": 0.019996825605630875, "learning_rate": 0.0008736796858781297, "loss": 0.0303, "num_input_tokens_seen": 81675952, "step": 37785 }, { "epoch": 6.164763458401305, "grad_norm": 0.25875627994537354, "learning_rate": 0.0008736323887921911, "loss": 0.0558, "num_input_tokens_seen": 81685360, "step": 37790 }, { "epoch": 6.16557911908646, "grad_norm": 0.15578259527683258, "learning_rate": 0.0008735850841341179, "loss": 0.0491, "num_input_tokens_seen": 81696304, "step": 37795 }, { "epoch": 6.166394779771615, "grad_norm": 0.26379722356796265, "learning_rate": 0.0008735377719048692, "loss": 0.1074, "num_input_tokens_seen": 81706160, "step": 37800 }, { "epoch": 6.16721044045677, "grad_norm": 0.17212273180484772, "learning_rate": 0.0008734904521054037, "loss": 0.0713, "num_input_tokens_seen": 81715760, "step": 37805 }, { "epoch": 6.168026101141925, "grad_norm": 0.042617104947566986, "learning_rate": 0.0008734431247366803, "loss": 0.0534, "num_input_tokens_seen": 81726736, "step": 37810 }, { "epoch": 6.16884176182708, "grad_norm": 0.3961753249168396, "learning_rate": 0.0008733957897996583, "loss": 0.1007, "num_input_tokens_seen": 81736528, "step": 37815 }, { "epoch": 6.169657422512235, "grad_norm": 0.04119712486863136, "learning_rate": 0.0008733484472952969, "loss": 0.0921, "num_input_tokens_seen": 81747856, "step": 37820 }, { "epoch": 6.17047308319739, "grad_norm": 0.20554359257221222, "learning_rate": 0.0008733010972245554, "loss": 0.112, "num_input_tokens_seen": 81758928, "step": 37825 }, { "epoch": 6.171288743882545, "grad_norm": 0.11228428035974503, "learning_rate": 0.0008732537395883938, "loss": 0.0574, "num_input_tokens_seen": 81769392, "step": 37830 }, { "epoch": 6.1721044045677, "grad_norm": 0.021784307435154915, "learning_rate": 0.0008732063743877716, "loss": 0.0364, "num_input_tokens_seen": 81781872, "step": 37835 }, { "epoch": 6.172920065252855, "grad_norm": 0.019864261150360107, "learning_rate": 0.0008731590016236489, "loss": 0.1228, "num_input_tokens_seen": 81792976, "step": 37840 }, { "epoch": 6.17373572593801, "grad_norm": 0.05301322415471077, "learning_rate": 0.0008731116212969856, "loss": 0.0468, "num_input_tokens_seen": 81802928, "step": 37845 }, { "epoch": 6.174551386623165, "grad_norm": 0.14838729798793793, "learning_rate": 0.000873064233408742, "loss": 0.1423, "num_input_tokens_seen": 81812272, "step": 37850 }, { "epoch": 6.1753670473083195, "grad_norm": 0.03396567702293396, "learning_rate": 0.0008730168379598782, "loss": 0.0764, "num_input_tokens_seen": 81823568, "step": 37855 }, { "epoch": 6.176182707993474, "grad_norm": 0.23131468892097473, "learning_rate": 0.0008729694349513552, "loss": 0.1222, "num_input_tokens_seen": 81834000, "step": 37860 }, { "epoch": 6.17699836867863, "grad_norm": 0.041217345744371414, "learning_rate": 0.0008729220243841334, "loss": 0.3346, "num_input_tokens_seen": 81844016, "step": 37865 }, { "epoch": 6.177814029363785, "grad_norm": 0.049347419291734695, "learning_rate": 0.0008728746062591737, "loss": 0.0311, "num_input_tokens_seen": 81855600, "step": 37870 }, { "epoch": 6.17862969004894, "grad_norm": 0.03369903936982155, "learning_rate": 0.0008728271805774371, "loss": 0.0435, "num_input_tokens_seen": 81866640, "step": 37875 }, { "epoch": 6.1794453507340945, "grad_norm": 0.18959836661815643, "learning_rate": 0.0008727797473398846, "loss": 0.1385, "num_input_tokens_seen": 81876528, "step": 37880 }, { "epoch": 6.180261011419249, "grad_norm": 0.020675910636782646, "learning_rate": 0.0008727323065474778, "loss": 0.0281, "num_input_tokens_seen": 81888144, "step": 37885 }, { "epoch": 6.181076672104404, "grad_norm": 0.05461437627673149, "learning_rate": 0.000872684858201178, "loss": 0.1013, "num_input_tokens_seen": 81899344, "step": 37890 }, { "epoch": 6.18189233278956, "grad_norm": 0.09581451117992401, "learning_rate": 0.0008726374023019465, "loss": 0.1075, "num_input_tokens_seen": 81909328, "step": 37895 }, { "epoch": 6.182707993474715, "grad_norm": 0.10295901447534561, "learning_rate": 0.0008725899388507454, "loss": 0.0428, "num_input_tokens_seen": 81921232, "step": 37900 }, { "epoch": 6.1835236541598695, "grad_norm": 0.005341388285160065, "learning_rate": 0.0008725424678485366, "loss": 0.1492, "num_input_tokens_seen": 81932304, "step": 37905 }, { "epoch": 6.184339314845024, "grad_norm": 0.02357945591211319, "learning_rate": 0.0008724949892962821, "loss": 0.0893, "num_input_tokens_seen": 81943792, "step": 37910 }, { "epoch": 6.185154975530179, "grad_norm": 0.012670408934354782, "learning_rate": 0.0008724475031949441, "loss": 0.0995, "num_input_tokens_seen": 81954000, "step": 37915 }, { "epoch": 6.185970636215335, "grad_norm": 0.2212541550397873, "learning_rate": 0.0008724000095454849, "loss": 0.0837, "num_input_tokens_seen": 81964560, "step": 37920 }, { "epoch": 6.18678629690049, "grad_norm": 0.07937619090080261, "learning_rate": 0.0008723525083488671, "loss": 0.0348, "num_input_tokens_seen": 81975504, "step": 37925 }, { "epoch": 6.1876019575856445, "grad_norm": 0.21293163299560547, "learning_rate": 0.0008723049996060534, "loss": 0.1686, "num_input_tokens_seen": 81984816, "step": 37930 }, { "epoch": 6.188417618270799, "grad_norm": 0.056424580514431, "learning_rate": 0.0008722574833180065, "loss": 0.017, "num_input_tokens_seen": 81992880, "step": 37935 }, { "epoch": 6.189233278955954, "grad_norm": 0.007523713167756796, "learning_rate": 0.0008722099594856895, "loss": 0.0239, "num_input_tokens_seen": 82003216, "step": 37940 }, { "epoch": 6.190048939641109, "grad_norm": 0.09879666566848755, "learning_rate": 0.0008721624281100655, "loss": 0.0795, "num_input_tokens_seen": 82015120, "step": 37945 }, { "epoch": 6.190864600326265, "grad_norm": 0.13292758166790009, "learning_rate": 0.0008721148891920978, "loss": 0.0786, "num_input_tokens_seen": 82026512, "step": 37950 }, { "epoch": 6.191680261011419, "grad_norm": 0.01907687447965145, "learning_rate": 0.0008720673427327496, "loss": 0.0676, "num_input_tokens_seen": 82037360, "step": 37955 }, { "epoch": 6.192495921696574, "grad_norm": 0.057684943079948425, "learning_rate": 0.0008720197887329851, "loss": 0.0512, "num_input_tokens_seen": 82048048, "step": 37960 }, { "epoch": 6.193311582381729, "grad_norm": 0.011981310322880745, "learning_rate": 0.0008719722271937673, "loss": 0.0326, "num_input_tokens_seen": 82059216, "step": 37965 }, { "epoch": 6.194127243066884, "grad_norm": 0.1914396733045578, "learning_rate": 0.0008719246581160606, "loss": 0.2076, "num_input_tokens_seen": 82070160, "step": 37970 }, { "epoch": 6.19494290375204, "grad_norm": 0.051620397716760635, "learning_rate": 0.0008718770815008288, "loss": 0.085, "num_input_tokens_seen": 82081328, "step": 37975 }, { "epoch": 6.195758564437194, "grad_norm": 0.1118040457367897, "learning_rate": 0.0008718294973490362, "loss": 0.0768, "num_input_tokens_seen": 82091920, "step": 37980 }, { "epoch": 6.196574225122349, "grad_norm": 0.13779281079769135, "learning_rate": 0.0008717819056616472, "loss": 0.0242, "num_input_tokens_seen": 82103312, "step": 37985 }, { "epoch": 6.197389885807504, "grad_norm": 0.10947174578905106, "learning_rate": 0.0008717343064396262, "loss": 0.1393, "num_input_tokens_seen": 82114608, "step": 37990 }, { "epoch": 6.198205546492659, "grad_norm": 0.1319754719734192, "learning_rate": 0.0008716866996839378, "loss": 0.0664, "num_input_tokens_seen": 82125232, "step": 37995 }, { "epoch": 6.199021207177814, "grad_norm": 0.02237529121339321, "learning_rate": 0.0008716390853955472, "loss": 0.0549, "num_input_tokens_seen": 82136208, "step": 38000 }, { "epoch": 6.199836867862969, "grad_norm": 0.05175924673676491, "learning_rate": 0.0008715914635754187, "loss": 0.0199, "num_input_tokens_seen": 82147184, "step": 38005 }, { "epoch": 6.200652528548124, "grad_norm": 0.33201223611831665, "learning_rate": 0.0008715438342245181, "loss": 0.1194, "num_input_tokens_seen": 82158672, "step": 38010 }, { "epoch": 6.201468189233279, "grad_norm": 0.19700491428375244, "learning_rate": 0.0008714961973438103, "loss": 0.206, "num_input_tokens_seen": 82170448, "step": 38015 }, { "epoch": 6.202283849918434, "grad_norm": 0.0030625720974057913, "learning_rate": 0.0008714485529342606, "loss": 0.1937, "num_input_tokens_seen": 82180848, "step": 38020 }, { "epoch": 6.203099510603589, "grad_norm": 0.06964084506034851, "learning_rate": 0.0008714009009968349, "loss": 0.1514, "num_input_tokens_seen": 82191600, "step": 38025 }, { "epoch": 6.2039151712887435, "grad_norm": 0.019868431612849236, "learning_rate": 0.0008713532415324988, "loss": 0.0404, "num_input_tokens_seen": 82202384, "step": 38030 }, { "epoch": 6.204730831973899, "grad_norm": 0.00986363273113966, "learning_rate": 0.0008713055745422181, "loss": 0.0654, "num_input_tokens_seen": 82211184, "step": 38035 }, { "epoch": 6.205546492659054, "grad_norm": 0.3911730647087097, "learning_rate": 0.000871257900026959, "loss": 0.2685, "num_input_tokens_seen": 82220720, "step": 38040 }, { "epoch": 6.206362153344209, "grad_norm": 0.10592617094516754, "learning_rate": 0.0008712102179876876, "loss": 0.0356, "num_input_tokens_seen": 82232176, "step": 38045 }, { "epoch": 6.207177814029364, "grad_norm": 0.1874314248561859, "learning_rate": 0.0008711625284253701, "loss": 0.1489, "num_input_tokens_seen": 82241648, "step": 38050 }, { "epoch": 6.2079934747145185, "grad_norm": 0.008363268338143826, "learning_rate": 0.0008711148313409731, "loss": 0.3101, "num_input_tokens_seen": 82251952, "step": 38055 }, { "epoch": 6.208809135399674, "grad_norm": 0.006032775621861219, "learning_rate": 0.0008710671267354633, "loss": 0.0385, "num_input_tokens_seen": 82262800, "step": 38060 }, { "epoch": 6.209624796084829, "grad_norm": 0.018502449616789818, "learning_rate": 0.0008710194146098074, "loss": 0.1468, "num_input_tokens_seen": 82273552, "step": 38065 }, { "epoch": 6.210440456769984, "grad_norm": 0.03034134767949581, "learning_rate": 0.0008709716949649724, "loss": 0.0399, "num_input_tokens_seen": 82285552, "step": 38070 }, { "epoch": 6.211256117455139, "grad_norm": 0.20970183610916138, "learning_rate": 0.0008709239678019255, "loss": 0.1452, "num_input_tokens_seen": 82297520, "step": 38075 }, { "epoch": 6.212071778140293, "grad_norm": 0.08290934562683105, "learning_rate": 0.0008708762331216338, "loss": 0.0404, "num_input_tokens_seen": 82307856, "step": 38080 }, { "epoch": 6.212887438825448, "grad_norm": 0.06803029775619507, "learning_rate": 0.0008708284909250646, "loss": 0.0822, "num_input_tokens_seen": 82319344, "step": 38085 }, { "epoch": 6.213703099510604, "grad_norm": 0.2577921152114868, "learning_rate": 0.0008707807412131858, "loss": 0.2342, "num_input_tokens_seen": 82330672, "step": 38090 }, { "epoch": 6.214518760195759, "grad_norm": 0.08383003622293472, "learning_rate": 0.0008707329839869649, "loss": 0.0632, "num_input_tokens_seen": 82342064, "step": 38095 }, { "epoch": 6.215334420880914, "grad_norm": 0.06594287604093552, "learning_rate": 0.0008706852192473696, "loss": 0.0725, "num_input_tokens_seen": 82353232, "step": 38100 }, { "epoch": 6.216150081566068, "grad_norm": 0.09301108121871948, "learning_rate": 0.0008706374469953682, "loss": 0.023, "num_input_tokens_seen": 82364624, "step": 38105 }, { "epoch": 6.216965742251223, "grad_norm": 0.047476354986429214, "learning_rate": 0.0008705896672319286, "loss": 0.0269, "num_input_tokens_seen": 82376816, "step": 38110 }, { "epoch": 6.217781402936378, "grad_norm": 0.09446101635694504, "learning_rate": 0.0008705418799580196, "loss": 0.0419, "num_input_tokens_seen": 82386992, "step": 38115 }, { "epoch": 6.218597063621534, "grad_norm": 0.00325257726944983, "learning_rate": 0.000870494085174609, "loss": 0.0846, "num_input_tokens_seen": 82398032, "step": 38120 }, { "epoch": 6.219412724306689, "grad_norm": 0.033085815608501434, "learning_rate": 0.000870446282882666, "loss": 0.0113, "num_input_tokens_seen": 82409488, "step": 38125 }, { "epoch": 6.220228384991843, "grad_norm": 0.005405626259744167, "learning_rate": 0.0008703984730831589, "loss": 0.0081, "num_input_tokens_seen": 82420368, "step": 38130 }, { "epoch": 6.221044045676998, "grad_norm": 0.010103190317749977, "learning_rate": 0.0008703506557770571, "loss": 0.009, "num_input_tokens_seen": 82430800, "step": 38135 }, { "epoch": 6.221859706362153, "grad_norm": 0.04629164934158325, "learning_rate": 0.0008703028309653293, "loss": 0.0677, "num_input_tokens_seen": 82441680, "step": 38140 }, { "epoch": 6.222675367047309, "grad_norm": 0.009996136650443077, "learning_rate": 0.0008702549986489449, "loss": 0.0619, "num_input_tokens_seen": 82452432, "step": 38145 }, { "epoch": 6.2234910277324635, "grad_norm": 0.1453513503074646, "learning_rate": 0.0008702071588288731, "loss": 0.1359, "num_input_tokens_seen": 82463216, "step": 38150 }, { "epoch": 6.224306688417618, "grad_norm": 0.2776382863521576, "learning_rate": 0.0008701593115060837, "loss": 0.1409, "num_input_tokens_seen": 82475184, "step": 38155 }, { "epoch": 6.225122349102773, "grad_norm": 0.08298216760158539, "learning_rate": 0.0008701114566815464, "loss": 0.0307, "num_input_tokens_seen": 82484080, "step": 38160 }, { "epoch": 6.225938009787928, "grad_norm": 0.18404394388198853, "learning_rate": 0.0008700635943562308, "loss": 0.0845, "num_input_tokens_seen": 82493488, "step": 38165 }, { "epoch": 6.226753670473083, "grad_norm": 0.23019194602966309, "learning_rate": 0.0008700157245311071, "loss": 0.0372, "num_input_tokens_seen": 82504784, "step": 38170 }, { "epoch": 6.2275693311582385, "grad_norm": 0.275285929441452, "learning_rate": 0.0008699678472071453, "loss": 0.1103, "num_input_tokens_seen": 82516400, "step": 38175 }, { "epoch": 6.228384991843393, "grad_norm": 0.04369976744055748, "learning_rate": 0.0008699199623853156, "loss": 0.0648, "num_input_tokens_seen": 82526000, "step": 38180 }, { "epoch": 6.229200652528548, "grad_norm": 0.010660940781235695, "learning_rate": 0.0008698720700665888, "loss": 0.1524, "num_input_tokens_seen": 82535760, "step": 38185 }, { "epoch": 6.230016313213703, "grad_norm": 0.10546161234378815, "learning_rate": 0.0008698241702519351, "loss": 0.0292, "num_input_tokens_seen": 82547472, "step": 38190 }, { "epoch": 6.230831973898858, "grad_norm": 0.02637307532131672, "learning_rate": 0.0008697762629423254, "loss": 0.0357, "num_input_tokens_seen": 82559312, "step": 38195 }, { "epoch": 6.231647634584013, "grad_norm": 0.016469111666083336, "learning_rate": 0.0008697283481387308, "loss": 0.0954, "num_input_tokens_seen": 82570800, "step": 38200 }, { "epoch": 6.232463295269168, "grad_norm": 0.005616335663944483, "learning_rate": 0.000869680425842122, "loss": 0.0197, "num_input_tokens_seen": 82582416, "step": 38205 }, { "epoch": 6.233278955954323, "grad_norm": 0.0673634260892868, "learning_rate": 0.0008696324960534706, "loss": 0.0132, "num_input_tokens_seen": 82592400, "step": 38210 }, { "epoch": 6.234094616639478, "grad_norm": 0.02261550910770893, "learning_rate": 0.0008695845587737476, "loss": 0.0817, "num_input_tokens_seen": 82603216, "step": 38215 }, { "epoch": 6.234910277324633, "grad_norm": 0.012282857671380043, "learning_rate": 0.0008695366140039248, "loss": 0.0945, "num_input_tokens_seen": 82613648, "step": 38220 }, { "epoch": 6.235725938009788, "grad_norm": 0.01848006621003151, "learning_rate": 0.0008694886617449738, "loss": 0.0324, "num_input_tokens_seen": 82625488, "step": 38225 }, { "epoch": 6.236541598694943, "grad_norm": 0.004085164982825518, "learning_rate": 0.0008694407019978661, "loss": 0.0539, "num_input_tokens_seen": 82636880, "step": 38230 }, { "epoch": 6.237357259380098, "grad_norm": 0.412178635597229, "learning_rate": 0.0008693927347635741, "loss": 0.1095, "num_input_tokens_seen": 82648368, "step": 38235 }, { "epoch": 6.238172920065253, "grad_norm": 0.063252754509449, "learning_rate": 0.0008693447600430695, "loss": 0.1211, "num_input_tokens_seen": 82658736, "step": 38240 }, { "epoch": 6.238988580750408, "grad_norm": 0.33977410197257996, "learning_rate": 0.000869296777837325, "loss": 0.2601, "num_input_tokens_seen": 82667888, "step": 38245 }, { "epoch": 6.239804241435563, "grad_norm": 0.007731478661298752, "learning_rate": 0.0008692487881473128, "loss": 0.0068, "num_input_tokens_seen": 82678896, "step": 38250 }, { "epoch": 6.240619902120717, "grad_norm": 0.014961308799684048, "learning_rate": 0.0008692007909740054, "loss": 0.1406, "num_input_tokens_seen": 82689424, "step": 38255 }, { "epoch": 6.241435562805873, "grad_norm": 0.030552340671420097, "learning_rate": 0.0008691527863183755, "loss": 0.0516, "num_input_tokens_seen": 82700848, "step": 38260 }, { "epoch": 6.242251223491028, "grad_norm": 0.00711380410939455, "learning_rate": 0.0008691047741813963, "loss": 0.188, "num_input_tokens_seen": 82710288, "step": 38265 }, { "epoch": 6.243066884176183, "grad_norm": 0.011850893497467041, "learning_rate": 0.0008690567545640406, "loss": 0.0169, "num_input_tokens_seen": 82720240, "step": 38270 }, { "epoch": 6.2438825448613375, "grad_norm": 0.09806658327579498, "learning_rate": 0.0008690087274672814, "loss": 0.0543, "num_input_tokens_seen": 82731664, "step": 38275 }, { "epoch": 6.244698205546492, "grad_norm": 0.11136013269424438, "learning_rate": 0.0008689606928920923, "loss": 0.0897, "num_input_tokens_seen": 82742832, "step": 38280 }, { "epoch": 6.245513866231648, "grad_norm": 0.3061741292476654, "learning_rate": 0.0008689126508394467, "loss": 0.1712, "num_input_tokens_seen": 82752496, "step": 38285 }, { "epoch": 6.246329526916803, "grad_norm": 0.04090377315878868, "learning_rate": 0.0008688646013103183, "loss": 0.0762, "num_input_tokens_seen": 82764016, "step": 38290 }, { "epoch": 6.247145187601958, "grad_norm": 0.02585625648498535, "learning_rate": 0.0008688165443056808, "loss": 0.0554, "num_input_tokens_seen": 82773904, "step": 38295 }, { "epoch": 6.2479608482871125, "grad_norm": 0.12480328232049942, "learning_rate": 0.0008687684798265081, "loss": 0.0233, "num_input_tokens_seen": 82783376, "step": 38300 }, { "epoch": 6.248776508972267, "grad_norm": 0.34098994731903076, "learning_rate": 0.0008687204078737744, "loss": 0.1455, "num_input_tokens_seen": 82794256, "step": 38305 }, { "epoch": 6.249592169657422, "grad_norm": 0.2570803165435791, "learning_rate": 0.0008686723284484538, "loss": 0.2018, "num_input_tokens_seen": 82802288, "step": 38310 }, { "epoch": 6.250407830342578, "grad_norm": 0.009916471317410469, "learning_rate": 0.0008686242415515209, "loss": 0.0138, "num_input_tokens_seen": 82813584, "step": 38315 }, { "epoch": 6.251223491027733, "grad_norm": 0.2923734188079834, "learning_rate": 0.00086857614718395, "loss": 0.2181, "num_input_tokens_seen": 82823824, "step": 38320 }, { "epoch": 6.2520391517128875, "grad_norm": 0.04271473363041878, "learning_rate": 0.0008685280453467159, "loss": 0.208, "num_input_tokens_seen": 82834032, "step": 38325 }, { "epoch": 6.252854812398042, "grad_norm": 0.04432328790426254, "learning_rate": 0.0008684799360407935, "loss": 0.0405, "num_input_tokens_seen": 82845456, "step": 38330 }, { "epoch": 6.253670473083197, "grad_norm": 0.10046076774597168, "learning_rate": 0.0008684318192671576, "loss": 0.1017, "num_input_tokens_seen": 82856144, "step": 38335 }, { "epoch": 6.254486133768353, "grad_norm": 0.2138485610485077, "learning_rate": 0.0008683836950267838, "loss": 0.1575, "num_input_tokens_seen": 82867728, "step": 38340 }, { "epoch": 6.255301794453508, "grad_norm": 0.022035308182239532, "learning_rate": 0.0008683355633206469, "loss": 0.1219, "num_input_tokens_seen": 82878768, "step": 38345 }, { "epoch": 6.2561174551386625, "grad_norm": 0.05334000289440155, "learning_rate": 0.0008682874241497225, "loss": 0.0254, "num_input_tokens_seen": 82889872, "step": 38350 }, { "epoch": 6.256933115823817, "grad_norm": 0.005959376692771912, "learning_rate": 0.0008682392775149863, "loss": 0.0455, "num_input_tokens_seen": 82901616, "step": 38355 }, { "epoch": 6.257748776508972, "grad_norm": 0.007209327537566423, "learning_rate": 0.000868191123417414, "loss": 0.03, "num_input_tokens_seen": 82911792, "step": 38360 }, { "epoch": 6.258564437194127, "grad_norm": 0.11563768237829208, "learning_rate": 0.0008681429618579815, "loss": 0.0332, "num_input_tokens_seen": 82922256, "step": 38365 }, { "epoch": 6.259380097879283, "grad_norm": 0.05262453109025955, "learning_rate": 0.0008680947928376648, "loss": 0.0458, "num_input_tokens_seen": 82933072, "step": 38370 }, { "epoch": 6.260195758564437, "grad_norm": 0.10289740562438965, "learning_rate": 0.0008680466163574402, "loss": 0.0744, "num_input_tokens_seen": 82943984, "step": 38375 }, { "epoch": 6.261011419249592, "grad_norm": 0.0066475640051066875, "learning_rate": 0.000867998432418284, "loss": 0.0083, "num_input_tokens_seen": 82953072, "step": 38380 }, { "epoch": 6.261827079934747, "grad_norm": 0.21525736153125763, "learning_rate": 0.0008679502410211728, "loss": 0.0413, "num_input_tokens_seen": 82962576, "step": 38385 }, { "epoch": 6.262642740619902, "grad_norm": 0.4553488492965698, "learning_rate": 0.0008679020421670831, "loss": 0.1761, "num_input_tokens_seen": 82974192, "step": 38390 }, { "epoch": 6.263458401305057, "grad_norm": 0.06272874027490616, "learning_rate": 0.0008678538358569918, "loss": 0.0753, "num_input_tokens_seen": 82985104, "step": 38395 }, { "epoch": 6.264274061990212, "grad_norm": 0.10946105420589447, "learning_rate": 0.000867805622091876, "loss": 0.0305, "num_input_tokens_seen": 82995920, "step": 38400 }, { "epoch": 6.265089722675367, "grad_norm": 0.30534982681274414, "learning_rate": 0.0008677574008727126, "loss": 0.1317, "num_input_tokens_seen": 83007632, "step": 38405 }, { "epoch": 6.265905383360522, "grad_norm": 0.22424903512001038, "learning_rate": 0.0008677091722004788, "loss": 0.205, "num_input_tokens_seen": 83018768, "step": 38410 }, { "epoch": 6.266721044045677, "grad_norm": 0.04151112586259842, "learning_rate": 0.0008676609360761524, "loss": 0.0292, "num_input_tokens_seen": 83028976, "step": 38415 }, { "epoch": 6.267536704730832, "grad_norm": 0.24260827898979187, "learning_rate": 0.0008676126925007107, "loss": 0.1155, "num_input_tokens_seen": 83039760, "step": 38420 }, { "epoch": 6.268352365415987, "grad_norm": 0.07921306043863297, "learning_rate": 0.0008675644414751311, "loss": 0.1389, "num_input_tokens_seen": 83050512, "step": 38425 }, { "epoch": 6.269168026101142, "grad_norm": 0.3923218846321106, "learning_rate": 0.0008675161830003921, "loss": 0.0957, "num_input_tokens_seen": 83062384, "step": 38430 }, { "epoch": 6.269983686786297, "grad_norm": 0.037347909063100815, "learning_rate": 0.0008674679170774713, "loss": 0.1005, "num_input_tokens_seen": 83073680, "step": 38435 }, { "epoch": 6.270799347471452, "grad_norm": 0.08266767114400864, "learning_rate": 0.0008674196437073472, "loss": 0.0624, "num_input_tokens_seen": 83085552, "step": 38440 }, { "epoch": 6.271615008156607, "grad_norm": 0.1442742645740509, "learning_rate": 0.0008673713628909978, "loss": 0.0605, "num_input_tokens_seen": 83095792, "step": 38445 }, { "epoch": 6.2724306688417615, "grad_norm": 0.3053017854690552, "learning_rate": 0.0008673230746294016, "loss": 0.1411, "num_input_tokens_seen": 83107760, "step": 38450 }, { "epoch": 6.273246329526917, "grad_norm": 0.1329246461391449, "learning_rate": 0.0008672747789235373, "loss": 0.0365, "num_input_tokens_seen": 83118704, "step": 38455 }, { "epoch": 6.274061990212072, "grad_norm": 0.1079804003238678, "learning_rate": 0.0008672264757743838, "loss": 0.0188, "num_input_tokens_seen": 83129936, "step": 38460 }, { "epoch": 6.274877650897227, "grad_norm": 0.09248155355453491, "learning_rate": 0.0008671781651829198, "loss": 0.0307, "num_input_tokens_seen": 83139888, "step": 38465 }, { "epoch": 6.275693311582382, "grad_norm": 0.13056586682796478, "learning_rate": 0.0008671298471501246, "loss": 0.0293, "num_input_tokens_seen": 83151152, "step": 38470 }, { "epoch": 6.2765089722675365, "grad_norm": 0.12060708552598953, "learning_rate": 0.0008670815216769771, "loss": 0.0555, "num_input_tokens_seen": 83163344, "step": 38475 }, { "epoch": 6.277324632952691, "grad_norm": 0.0019625083077698946, "learning_rate": 0.0008670331887644571, "loss": 0.0822, "num_input_tokens_seen": 83175568, "step": 38480 }, { "epoch": 6.278140293637847, "grad_norm": 0.0041697206906974316, "learning_rate": 0.0008669848484135439, "loss": 0.096, "num_input_tokens_seen": 83186064, "step": 38485 }, { "epoch": 6.278955954323002, "grad_norm": 0.003180544590577483, "learning_rate": 0.0008669365006252172, "loss": 0.0647, "num_input_tokens_seen": 83195792, "step": 38490 }, { "epoch": 6.279771615008157, "grad_norm": 0.039838630706071854, "learning_rate": 0.0008668881454004567, "loss": 0.0173, "num_input_tokens_seen": 83206800, "step": 38495 }, { "epoch": 6.280587275693311, "grad_norm": 0.20914457738399506, "learning_rate": 0.0008668397827402425, "loss": 0.188, "num_input_tokens_seen": 83217168, "step": 38500 }, { "epoch": 6.281402936378466, "grad_norm": 0.12957248091697693, "learning_rate": 0.000866791412645555, "loss": 0.2052, "num_input_tokens_seen": 83228496, "step": 38505 }, { "epoch": 6.282218597063622, "grad_norm": 0.198208287358284, "learning_rate": 0.000866743035117374, "loss": 0.0561, "num_input_tokens_seen": 83238704, "step": 38510 }, { "epoch": 6.283034257748777, "grad_norm": 0.012076416984200478, "learning_rate": 0.0008666946501566801, "loss": 0.0393, "num_input_tokens_seen": 83248400, "step": 38515 }, { "epoch": 6.283849918433932, "grad_norm": 0.03215594217181206, "learning_rate": 0.000866646257764454, "loss": 0.0604, "num_input_tokens_seen": 83260048, "step": 38520 }, { "epoch": 6.284665579119086, "grad_norm": 0.011157657951116562, "learning_rate": 0.0008665978579416763, "loss": 0.0095, "num_input_tokens_seen": 83272208, "step": 38525 }, { "epoch": 6.285481239804241, "grad_norm": 0.014190180227160454, "learning_rate": 0.000866549450689328, "loss": 0.0604, "num_input_tokens_seen": 83281808, "step": 38530 }, { "epoch": 6.286296900489396, "grad_norm": 0.37947651743888855, "learning_rate": 0.0008665010360083902, "loss": 0.1194, "num_input_tokens_seen": 83291792, "step": 38535 }, { "epoch": 6.287112561174552, "grad_norm": 0.008268352597951889, "learning_rate": 0.0008664526138998438, "loss": 0.1653, "num_input_tokens_seen": 83301648, "step": 38540 }, { "epoch": 6.287928221859707, "grad_norm": 0.2546350061893463, "learning_rate": 0.0008664041843646704, "loss": 0.0901, "num_input_tokens_seen": 83312560, "step": 38545 }, { "epoch": 6.288743882544861, "grad_norm": 0.1817859411239624, "learning_rate": 0.0008663557474038512, "loss": 0.0509, "num_input_tokens_seen": 83323728, "step": 38550 }, { "epoch": 6.289559543230016, "grad_norm": 0.019393671303987503, "learning_rate": 0.0008663073030183683, "loss": 0.102, "num_input_tokens_seen": 83335600, "step": 38555 }, { "epoch": 6.290375203915171, "grad_norm": 0.005289722234010696, "learning_rate": 0.000866258851209203, "loss": 0.0309, "num_input_tokens_seen": 83345872, "step": 38560 }, { "epoch": 6.291190864600326, "grad_norm": 0.08684537559747696, "learning_rate": 0.0008662103919773375, "loss": 0.178, "num_input_tokens_seen": 83357232, "step": 38565 }, { "epoch": 6.2920065252854815, "grad_norm": 0.009959384799003601, "learning_rate": 0.0008661619253237538, "loss": 0.0118, "num_input_tokens_seen": 83368560, "step": 38570 }, { "epoch": 6.292822185970636, "grad_norm": 0.007728797383606434, "learning_rate": 0.0008661134512494343, "loss": 0.1884, "num_input_tokens_seen": 83378416, "step": 38575 }, { "epoch": 6.293637846655791, "grad_norm": 0.04982810467481613, "learning_rate": 0.0008660649697553612, "loss": 0.2022, "num_input_tokens_seen": 83389712, "step": 38580 }, { "epoch": 6.294453507340946, "grad_norm": 0.0082283029332757, "learning_rate": 0.000866016480842517, "loss": 0.0575, "num_input_tokens_seen": 83399920, "step": 38585 }, { "epoch": 6.295269168026101, "grad_norm": 0.06599834561347961, "learning_rate": 0.0008659679845118847, "loss": 0.0333, "num_input_tokens_seen": 83411504, "step": 38590 }, { "epoch": 6.2960848287112565, "grad_norm": 0.021498100832104683, "learning_rate": 0.0008659194807644468, "loss": 0.1437, "num_input_tokens_seen": 83421296, "step": 38595 }, { "epoch": 6.296900489396411, "grad_norm": 0.016979070380330086, "learning_rate": 0.0008658709696011864, "loss": 0.0244, "num_input_tokens_seen": 83432016, "step": 38600 }, { "epoch": 6.297716150081566, "grad_norm": 0.01811056397855282, "learning_rate": 0.0008658224510230867, "loss": 0.0568, "num_input_tokens_seen": 83442448, "step": 38605 }, { "epoch": 6.298531810766721, "grad_norm": 0.07393760234117508, "learning_rate": 0.0008657739250311309, "loss": 0.0306, "num_input_tokens_seen": 83453744, "step": 38610 }, { "epoch": 6.299347471451876, "grad_norm": 0.01685251295566559, "learning_rate": 0.0008657253916263026, "loss": 0.0194, "num_input_tokens_seen": 83464528, "step": 38615 }, { "epoch": 6.300163132137031, "grad_norm": 0.009738907217979431, "learning_rate": 0.0008656768508095852, "loss": 0.0169, "num_input_tokens_seen": 83474928, "step": 38620 }, { "epoch": 6.300978792822186, "grad_norm": 0.048500582575798035, "learning_rate": 0.0008656283025819626, "loss": 0.1431, "num_input_tokens_seen": 83485584, "step": 38625 }, { "epoch": 6.301794453507341, "grad_norm": 0.01439825538545847, "learning_rate": 0.0008655797469444186, "loss": 0.0978, "num_input_tokens_seen": 83496752, "step": 38630 }, { "epoch": 6.302610114192496, "grad_norm": 0.2932996451854706, "learning_rate": 0.0008655311838979371, "loss": 0.1392, "num_input_tokens_seen": 83506736, "step": 38635 }, { "epoch": 6.303425774877651, "grad_norm": 0.08580674976110458, "learning_rate": 0.0008654826134435028, "loss": 0.0396, "num_input_tokens_seen": 83518576, "step": 38640 }, { "epoch": 6.304241435562806, "grad_norm": 0.021468764171004295, "learning_rate": 0.0008654340355820993, "loss": 0.1472, "num_input_tokens_seen": 83528944, "step": 38645 }, { "epoch": 6.30505709624796, "grad_norm": 0.027125662192702293, "learning_rate": 0.0008653854503147117, "loss": 0.154, "num_input_tokens_seen": 83540560, "step": 38650 }, { "epoch": 6.305872756933116, "grad_norm": 0.004373315721750259, "learning_rate": 0.0008653368576423244, "loss": 0.0116, "num_input_tokens_seen": 83551984, "step": 38655 }, { "epoch": 6.306688417618271, "grad_norm": 0.020719408988952637, "learning_rate": 0.0008652882575659222, "loss": 0.0158, "num_input_tokens_seen": 83561712, "step": 38660 }, { "epoch": 6.307504078303426, "grad_norm": 0.05750217288732529, "learning_rate": 0.00086523965008649, "loss": 0.0459, "num_input_tokens_seen": 83571888, "step": 38665 }, { "epoch": 6.308319738988581, "grad_norm": 0.018178651109337807, "learning_rate": 0.0008651910352050129, "loss": 0.0802, "num_input_tokens_seen": 83582928, "step": 38670 }, { "epoch": 6.309135399673735, "grad_norm": 0.04456610232591629, "learning_rate": 0.0008651424129224764, "loss": 0.055, "num_input_tokens_seen": 83594672, "step": 38675 }, { "epoch": 6.309951060358891, "grad_norm": 0.1513279229402542, "learning_rate": 0.0008650937832398656, "loss": 0.1175, "num_input_tokens_seen": 83604400, "step": 38680 }, { "epoch": 6.310766721044046, "grad_norm": 0.34819474816322327, "learning_rate": 0.0008650451461581661, "loss": 0.0755, "num_input_tokens_seen": 83614320, "step": 38685 }, { "epoch": 6.311582381729201, "grad_norm": 0.030669698491692543, "learning_rate": 0.0008649965016783636, "loss": 0.0355, "num_input_tokens_seen": 83624784, "step": 38690 }, { "epoch": 6.3123980424143555, "grad_norm": 0.030591286718845367, "learning_rate": 0.0008649478498014441, "loss": 0.1491, "num_input_tokens_seen": 83635408, "step": 38695 }, { "epoch": 6.31321370309951, "grad_norm": 0.001337647088803351, "learning_rate": 0.0008648991905283931, "loss": 0.0464, "num_input_tokens_seen": 83647568, "step": 38700 }, { "epoch": 6.314029363784665, "grad_norm": 0.030285224318504333, "learning_rate": 0.0008648505238601974, "loss": 0.0486, "num_input_tokens_seen": 83658480, "step": 38705 }, { "epoch": 6.314845024469821, "grad_norm": 0.015253371559083462, "learning_rate": 0.0008648018497978429, "loss": 0.086, "num_input_tokens_seen": 83669552, "step": 38710 }, { "epoch": 6.315660685154976, "grad_norm": 0.19649551808834076, "learning_rate": 0.0008647531683423162, "loss": 0.0902, "num_input_tokens_seen": 83681392, "step": 38715 }, { "epoch": 6.3164763458401305, "grad_norm": 0.03983687609434128, "learning_rate": 0.0008647044794946038, "loss": 0.2075, "num_input_tokens_seen": 83691920, "step": 38720 }, { "epoch": 6.317292006525285, "grad_norm": 0.014638083986938, "learning_rate": 0.0008646557832556925, "loss": 0.0456, "num_input_tokens_seen": 83702576, "step": 38725 }, { "epoch": 6.31810766721044, "grad_norm": 0.01107320748269558, "learning_rate": 0.000864607079626569, "loss": 0.0201, "num_input_tokens_seen": 83715088, "step": 38730 }, { "epoch": 6.318923327895595, "grad_norm": 0.011112612672150135, "learning_rate": 0.0008645583686082206, "loss": 0.1986, "num_input_tokens_seen": 83725264, "step": 38735 }, { "epoch": 6.319738988580751, "grad_norm": 0.028126679360866547, "learning_rate": 0.0008645096502016346, "loss": 0.1116, "num_input_tokens_seen": 83736400, "step": 38740 }, { "epoch": 6.3205546492659055, "grad_norm": 0.103696309030056, "learning_rate": 0.0008644609244077978, "loss": 0.0249, "num_input_tokens_seen": 83748144, "step": 38745 }, { "epoch": 6.32137030995106, "grad_norm": 0.22747750580310822, "learning_rate": 0.0008644121912276981, "loss": 0.1537, "num_input_tokens_seen": 83758704, "step": 38750 }, { "epoch": 6.322185970636215, "grad_norm": 0.04633212834596634, "learning_rate": 0.000864363450662323, "loss": 0.0403, "num_input_tokens_seen": 83769488, "step": 38755 }, { "epoch": 6.32300163132137, "grad_norm": 0.15548771619796753, "learning_rate": 0.0008643147027126604, "loss": 0.0712, "num_input_tokens_seen": 83780080, "step": 38760 }, { "epoch": 6.323817292006526, "grad_norm": 0.053673189133405685, "learning_rate": 0.0008642659473796984, "loss": 0.1654, "num_input_tokens_seen": 83792144, "step": 38765 }, { "epoch": 6.3246329526916805, "grad_norm": 0.23594816029071808, "learning_rate": 0.0008642171846644245, "loss": 0.0615, "num_input_tokens_seen": 83803440, "step": 38770 }, { "epoch": 6.325448613376835, "grad_norm": 0.11045833677053452, "learning_rate": 0.0008641684145678275, "loss": 0.1299, "num_input_tokens_seen": 83812912, "step": 38775 }, { "epoch": 6.32626427406199, "grad_norm": 0.017149219289422035, "learning_rate": 0.0008641196370908956, "loss": 0.0582, "num_input_tokens_seen": 83822960, "step": 38780 }, { "epoch": 6.327079934747145, "grad_norm": 0.40087974071502686, "learning_rate": 0.0008640708522346173, "loss": 0.1819, "num_input_tokens_seen": 83832304, "step": 38785 }, { "epoch": 6.327895595432301, "grad_norm": 0.016898376867175102, "learning_rate": 0.0008640220599999813, "loss": 0.0217, "num_input_tokens_seen": 83841136, "step": 38790 }, { "epoch": 6.328711256117455, "grad_norm": 0.051673293113708496, "learning_rate": 0.0008639732603879766, "loss": 0.0734, "num_input_tokens_seen": 83851600, "step": 38795 }, { "epoch": 6.32952691680261, "grad_norm": 0.031015006825327873, "learning_rate": 0.0008639244533995919, "loss": 0.1093, "num_input_tokens_seen": 83862896, "step": 38800 }, { "epoch": 6.330342577487765, "grad_norm": 0.20969587564468384, "learning_rate": 0.0008638756390358164, "loss": 0.1846, "num_input_tokens_seen": 83873552, "step": 38805 }, { "epoch": 6.33115823817292, "grad_norm": 0.032477930188179016, "learning_rate": 0.0008638268172976398, "loss": 0.027, "num_input_tokens_seen": 83884720, "step": 38810 }, { "epoch": 6.331973898858075, "grad_norm": 0.0873703733086586, "learning_rate": 0.0008637779881860509, "loss": 0.1324, "num_input_tokens_seen": 83895088, "step": 38815 }, { "epoch": 6.33278955954323, "grad_norm": 0.034240808337926865, "learning_rate": 0.0008637291517020397, "loss": 0.0311, "num_input_tokens_seen": 83906640, "step": 38820 }, { "epoch": 6.333605220228385, "grad_norm": 0.03294326364994049, "learning_rate": 0.0008636803078465958, "loss": 0.0705, "num_input_tokens_seen": 83916976, "step": 38825 }, { "epoch": 6.33442088091354, "grad_norm": 0.2754383683204651, "learning_rate": 0.000863631456620709, "loss": 0.2057, "num_input_tokens_seen": 83928432, "step": 38830 }, { "epoch": 6.335236541598695, "grad_norm": 0.01746748387813568, "learning_rate": 0.0008635825980253696, "loss": 0.0447, "num_input_tokens_seen": 83939440, "step": 38835 }, { "epoch": 6.33605220228385, "grad_norm": 0.059651441872119904, "learning_rate": 0.0008635337320615675, "loss": 0.1236, "num_input_tokens_seen": 83950032, "step": 38840 }, { "epoch": 6.3368678629690045, "grad_norm": 0.12536899745464325, "learning_rate": 0.0008634848587302932, "loss": 0.1126, "num_input_tokens_seen": 83962000, "step": 38845 }, { "epoch": 6.33768352365416, "grad_norm": 0.024651646614074707, "learning_rate": 0.0008634359780325372, "loss": 0.0204, "num_input_tokens_seen": 83973360, "step": 38850 }, { "epoch": 6.338499184339315, "grad_norm": 0.20898933708667755, "learning_rate": 0.0008633870899692899, "loss": 0.0862, "num_input_tokens_seen": 83984464, "step": 38855 }, { "epoch": 6.33931484502447, "grad_norm": 0.004932452458888292, "learning_rate": 0.0008633381945415422, "loss": 0.0445, "num_input_tokens_seen": 83993808, "step": 38860 }, { "epoch": 6.340130505709625, "grad_norm": 0.06426247209310532, "learning_rate": 0.0008632892917502852, "loss": 0.023, "num_input_tokens_seen": 84004880, "step": 38865 }, { "epoch": 6.3409461663947795, "grad_norm": 0.011022510938346386, "learning_rate": 0.0008632403815965099, "loss": 0.0385, "num_input_tokens_seen": 84016240, "step": 38870 }, { "epoch": 6.341761827079935, "grad_norm": 0.36207377910614014, "learning_rate": 0.0008631914640812073, "loss": 0.1861, "num_input_tokens_seen": 84027920, "step": 38875 }, { "epoch": 6.34257748776509, "grad_norm": 0.02720283530652523, "learning_rate": 0.000863142539205369, "loss": 0.14, "num_input_tokens_seen": 84038000, "step": 38880 }, { "epoch": 6.343393148450245, "grad_norm": 0.03938665613532066, "learning_rate": 0.0008630936069699864, "loss": 0.041, "num_input_tokens_seen": 84049136, "step": 38885 }, { "epoch": 6.3442088091354, "grad_norm": 0.008454914204776287, "learning_rate": 0.0008630446673760513, "loss": 0.0379, "num_input_tokens_seen": 84059024, "step": 38890 }, { "epoch": 6.3450244698205545, "grad_norm": 0.24689482152462006, "learning_rate": 0.0008629957204245555, "loss": 0.0959, "num_input_tokens_seen": 84069616, "step": 38895 }, { "epoch": 6.345840130505709, "grad_norm": 0.16050291061401367, "learning_rate": 0.000862946766116491, "loss": 0.1066, "num_input_tokens_seen": 84079984, "step": 38900 }, { "epoch": 6.346655791190865, "grad_norm": 0.008206437341868877, "learning_rate": 0.0008628978044528496, "loss": 0.0314, "num_input_tokens_seen": 84090768, "step": 38905 }, { "epoch": 6.34747145187602, "grad_norm": 0.04334701597690582, "learning_rate": 0.000862848835434624, "loss": 0.1137, "num_input_tokens_seen": 84102352, "step": 38910 }, { "epoch": 6.348287112561175, "grad_norm": 0.025094132870435715, "learning_rate": 0.0008627998590628065, "loss": 0.0465, "num_input_tokens_seen": 84113328, "step": 38915 }, { "epoch": 6.349102773246329, "grad_norm": 0.017059357836842537, "learning_rate": 0.0008627508753383895, "loss": 0.0132, "num_input_tokens_seen": 84124592, "step": 38920 }, { "epoch": 6.349918433931484, "grad_norm": 0.00839939247816801, "learning_rate": 0.0008627018842623657, "loss": 0.0675, "num_input_tokens_seen": 84134544, "step": 38925 }, { "epoch": 6.350734094616639, "grad_norm": 0.1650896817445755, "learning_rate": 0.0008626528858357283, "loss": 0.0926, "num_input_tokens_seen": 84144944, "step": 38930 }, { "epoch": 6.351549755301795, "grad_norm": 0.12865620851516724, "learning_rate": 0.0008626038800594703, "loss": 0.079, "num_input_tokens_seen": 84155312, "step": 38935 }, { "epoch": 6.35236541598695, "grad_norm": 0.030272645875811577, "learning_rate": 0.0008625548669345842, "loss": 0.0434, "num_input_tokens_seen": 84166096, "step": 38940 }, { "epoch": 6.353181076672104, "grad_norm": 0.018891897052526474, "learning_rate": 0.0008625058464620641, "loss": 0.1183, "num_input_tokens_seen": 84177264, "step": 38945 }, { "epoch": 6.353996737357259, "grad_norm": 0.14031964540481567, "learning_rate": 0.0008624568186429031, "loss": 0.056, "num_input_tokens_seen": 84188176, "step": 38950 }, { "epoch": 6.354812398042414, "grad_norm": 0.13088087737560272, "learning_rate": 0.0008624077834780948, "loss": 0.1551, "num_input_tokens_seen": 84199792, "step": 38955 }, { "epoch": 6.35562805872757, "grad_norm": 0.4480366110801697, "learning_rate": 0.000862358740968633, "loss": 0.2585, "num_input_tokens_seen": 84210832, "step": 38960 }, { "epoch": 6.356443719412725, "grad_norm": 0.07605596631765366, "learning_rate": 0.0008623096911155117, "loss": 0.0494, "num_input_tokens_seen": 84220496, "step": 38965 }, { "epoch": 6.357259380097879, "grad_norm": 0.0019360106671229005, "learning_rate": 0.000862260633919725, "loss": 0.1394, "num_input_tokens_seen": 84231472, "step": 38970 }, { "epoch": 6.358075040783034, "grad_norm": 0.011364354752004147, "learning_rate": 0.0008622115693822668, "loss": 0.2661, "num_input_tokens_seen": 84242608, "step": 38975 }, { "epoch": 6.358890701468189, "grad_norm": 0.053338050842285156, "learning_rate": 0.0008621624975041316, "loss": 0.1578, "num_input_tokens_seen": 84254000, "step": 38980 }, { "epoch": 6.359706362153344, "grad_norm": 0.017331453040242195, "learning_rate": 0.0008621134182863142, "loss": 0.0381, "num_input_tokens_seen": 84263952, "step": 38985 }, { "epoch": 6.3605220228384995, "grad_norm": 0.24667911231517792, "learning_rate": 0.0008620643317298088, "loss": 0.1638, "num_input_tokens_seen": 84274832, "step": 38990 }, { "epoch": 6.361337683523654, "grad_norm": 0.12548623979091644, "learning_rate": 0.0008620152378356105, "loss": 0.0421, "num_input_tokens_seen": 84285552, "step": 38995 }, { "epoch": 6.362153344208809, "grad_norm": 0.12679541110992432, "learning_rate": 0.0008619661366047141, "loss": 0.0568, "num_input_tokens_seen": 84296368, "step": 39000 }, { "epoch": 6.362969004893964, "grad_norm": 0.13178208470344543, "learning_rate": 0.0008619170280381148, "loss": 0.0628, "num_input_tokens_seen": 84307792, "step": 39005 }, { "epoch": 6.363784665579119, "grad_norm": 0.051386695355176926, "learning_rate": 0.0008618679121368078, "loss": 0.0598, "num_input_tokens_seen": 84318000, "step": 39010 }, { "epoch": 6.364600326264274, "grad_norm": 0.017814617604017258, "learning_rate": 0.0008618187889017886, "loss": 0.0297, "num_input_tokens_seen": 84328368, "step": 39015 }, { "epoch": 6.365415986949429, "grad_norm": 0.04465307667851448, "learning_rate": 0.0008617696583340524, "loss": 0.0384, "num_input_tokens_seen": 84340528, "step": 39020 }, { "epoch": 6.366231647634584, "grad_norm": 0.003912598360329866, "learning_rate": 0.0008617205204345952, "loss": 0.2345, "num_input_tokens_seen": 84351344, "step": 39025 }, { "epoch": 6.367047308319739, "grad_norm": 0.15951602160930634, "learning_rate": 0.000861671375204413, "loss": 0.1794, "num_input_tokens_seen": 84361968, "step": 39030 }, { "epoch": 6.367862969004894, "grad_norm": 0.011443092487752438, "learning_rate": 0.0008616222226445014, "loss": 0.0899, "num_input_tokens_seen": 84372432, "step": 39035 }, { "epoch": 6.368678629690049, "grad_norm": 0.21486307680606842, "learning_rate": 0.0008615730627558566, "loss": 0.0626, "num_input_tokens_seen": 84382896, "step": 39040 }, { "epoch": 6.369494290375204, "grad_norm": 0.10696162283420563, "learning_rate": 0.0008615238955394753, "loss": 0.1678, "num_input_tokens_seen": 84392464, "step": 39045 }, { "epoch": 6.370309951060359, "grad_norm": 0.21196526288986206, "learning_rate": 0.0008614747209963534, "loss": 0.1264, "num_input_tokens_seen": 84403504, "step": 39050 }, { "epoch": 6.371125611745514, "grad_norm": 0.012137340381741524, "learning_rate": 0.0008614255391274877, "loss": 0.1791, "num_input_tokens_seen": 84413904, "step": 39055 }, { "epoch": 6.371941272430669, "grad_norm": 0.04827677458524704, "learning_rate": 0.0008613763499338751, "loss": 0.157, "num_input_tokens_seen": 84425392, "step": 39060 }, { "epoch": 6.372756933115824, "grad_norm": 0.010995679534971714, "learning_rate": 0.0008613271534165121, "loss": 0.0305, "num_input_tokens_seen": 84436624, "step": 39065 }, { "epoch": 6.373572593800978, "grad_norm": 0.11564923822879791, "learning_rate": 0.0008612779495763963, "loss": 0.0988, "num_input_tokens_seen": 84446352, "step": 39070 }, { "epoch": 6.374388254486134, "grad_norm": 0.1190088763833046, "learning_rate": 0.0008612287384145243, "loss": 0.0846, "num_input_tokens_seen": 84457648, "step": 39075 }, { "epoch": 6.375203915171289, "grad_norm": 0.01807386428117752, "learning_rate": 0.0008611795199318937, "loss": 0.0507, "num_input_tokens_seen": 84467792, "step": 39080 }, { "epoch": 6.376019575856444, "grad_norm": 0.17465393245220184, "learning_rate": 0.000861130294129502, "loss": 0.0798, "num_input_tokens_seen": 84478448, "step": 39085 }, { "epoch": 6.376835236541599, "grad_norm": 0.067719466984272, "learning_rate": 0.0008610810610083466, "loss": 0.0597, "num_input_tokens_seen": 84489200, "step": 39090 }, { "epoch": 6.377650897226753, "grad_norm": 0.05747228488326073, "learning_rate": 0.0008610318205694256, "loss": 0.2849, "num_input_tokens_seen": 84499536, "step": 39095 }, { "epoch": 6.378466557911908, "grad_norm": 0.028629053384065628, "learning_rate": 0.0008609825728137366, "loss": 0.1891, "num_input_tokens_seen": 84510960, "step": 39100 }, { "epoch": 6.379282218597064, "grad_norm": 0.08141916245222092, "learning_rate": 0.000860933317742278, "loss": 0.0877, "num_input_tokens_seen": 84522704, "step": 39105 }, { "epoch": 6.380097879282219, "grad_norm": 0.06588222086429596, "learning_rate": 0.0008608840553560478, "loss": 0.056, "num_input_tokens_seen": 84532688, "step": 39110 }, { "epoch": 6.3809135399673735, "grad_norm": 0.20890022814273834, "learning_rate": 0.0008608347856560443, "loss": 0.0917, "num_input_tokens_seen": 84543312, "step": 39115 }, { "epoch": 6.381729200652528, "grad_norm": 0.0732296034693718, "learning_rate": 0.0008607855086432663, "loss": 0.0376, "num_input_tokens_seen": 84555024, "step": 39120 }, { "epoch": 6.382544861337683, "grad_norm": 0.04963723570108414, "learning_rate": 0.0008607362243187121, "loss": 0.0424, "num_input_tokens_seen": 84565456, "step": 39125 }, { "epoch": 6.383360522022839, "grad_norm": 0.03612956404685974, "learning_rate": 0.0008606869326833809, "loss": 0.1472, "num_input_tokens_seen": 84576880, "step": 39130 }, { "epoch": 6.384176182707994, "grad_norm": 0.012030171230435371, "learning_rate": 0.0008606376337382711, "loss": 0.0992, "num_input_tokens_seen": 84588176, "step": 39135 }, { "epoch": 6.3849918433931485, "grad_norm": 0.02331584133207798, "learning_rate": 0.0008605883274843824, "loss": 0.0305, "num_input_tokens_seen": 84598448, "step": 39140 }, { "epoch": 6.385807504078303, "grad_norm": 0.19398914277553558, "learning_rate": 0.0008605390139227137, "loss": 0.1123, "num_input_tokens_seen": 84609904, "step": 39145 }, { "epoch": 6.386623164763458, "grad_norm": 0.21747860312461853, "learning_rate": 0.0008604896930542645, "loss": 0.0518, "num_input_tokens_seen": 84621008, "step": 39150 }, { "epoch": 6.387438825448613, "grad_norm": 0.14797638356685638, "learning_rate": 0.0008604403648800346, "loss": 0.0327, "num_input_tokens_seen": 84632048, "step": 39155 }, { "epoch": 6.388254486133769, "grad_norm": 0.024937812238931656, "learning_rate": 0.0008603910294010231, "loss": 0.1336, "num_input_tokens_seen": 84641488, "step": 39160 }, { "epoch": 6.3890701468189235, "grad_norm": 0.027049263939261436, "learning_rate": 0.0008603416866182305, "loss": 0.0497, "num_input_tokens_seen": 84651664, "step": 39165 }, { "epoch": 6.389885807504078, "grad_norm": 0.20507164299488068, "learning_rate": 0.0008602923365326563, "loss": 0.0912, "num_input_tokens_seen": 84662000, "step": 39170 }, { "epoch": 6.390701468189233, "grad_norm": 0.016058241948485374, "learning_rate": 0.000860242979145301, "loss": 0.1774, "num_input_tokens_seen": 84674000, "step": 39175 }, { "epoch": 6.391517128874388, "grad_norm": 0.07409879565238953, "learning_rate": 0.0008601936144571646, "loss": 0.0802, "num_input_tokens_seen": 84685200, "step": 39180 }, { "epoch": 6.392332789559543, "grad_norm": 0.22600983083248138, "learning_rate": 0.0008601442424692476, "loss": 0.1863, "num_input_tokens_seen": 84695984, "step": 39185 }, { "epoch": 6.3931484502446985, "grad_norm": 0.015394588001072407, "learning_rate": 0.0008600948631825508, "loss": 0.0487, "num_input_tokens_seen": 84705936, "step": 39190 }, { "epoch": 6.393964110929853, "grad_norm": 0.22620944678783417, "learning_rate": 0.0008600454765980747, "loss": 0.0313, "num_input_tokens_seen": 84716144, "step": 39195 }, { "epoch": 6.394779771615008, "grad_norm": 0.01256847195327282, "learning_rate": 0.0008599960827168204, "loss": 0.0432, "num_input_tokens_seen": 84726704, "step": 39200 }, { "epoch": 6.395595432300163, "grad_norm": 0.01900371164083481, "learning_rate": 0.0008599466815397886, "loss": 0.1159, "num_input_tokens_seen": 84736784, "step": 39205 }, { "epoch": 6.396411092985318, "grad_norm": 0.32982227206230164, "learning_rate": 0.0008598972730679809, "loss": 0.2601, "num_input_tokens_seen": 84746672, "step": 39210 }, { "epoch": 6.397226753670473, "grad_norm": 0.06382697820663452, "learning_rate": 0.0008598478573023982, "loss": 0.0882, "num_input_tokens_seen": 84756560, "step": 39215 }, { "epoch": 6.398042414355628, "grad_norm": 0.009049140848219395, "learning_rate": 0.0008597984342440421, "loss": 0.0119, "num_input_tokens_seen": 84768592, "step": 39220 }, { "epoch": 6.398858075040783, "grad_norm": 0.06087180972099304, "learning_rate": 0.0008597490038939145, "loss": 0.0424, "num_input_tokens_seen": 84778736, "step": 39225 }, { "epoch": 6.399673735725938, "grad_norm": 0.021845834329724312, "learning_rate": 0.0008596995662530169, "loss": 0.0358, "num_input_tokens_seen": 84789008, "step": 39230 }, { "epoch": 6.400489396411093, "grad_norm": 0.005335748661309481, "learning_rate": 0.0008596501213223514, "loss": 0.1087, "num_input_tokens_seen": 84800816, "step": 39235 }, { "epoch": 6.401305057096248, "grad_norm": 0.054845456033945084, "learning_rate": 0.0008596006691029196, "loss": 0.0228, "num_input_tokens_seen": 84811152, "step": 39240 }, { "epoch": 6.402120717781403, "grad_norm": 0.03387339413166046, "learning_rate": 0.0008595512095957244, "loss": 0.0684, "num_input_tokens_seen": 84822032, "step": 39245 }, { "epoch": 6.402936378466558, "grad_norm": 0.10483232140541077, "learning_rate": 0.0008595017428017677, "loss": 0.0402, "num_input_tokens_seen": 84832144, "step": 39250 }, { "epoch": 6.403752039151713, "grad_norm": 0.015923313796520233, "learning_rate": 0.000859452268722052, "loss": 0.0227, "num_input_tokens_seen": 84843440, "step": 39255 }, { "epoch": 6.404567699836868, "grad_norm": 0.0959169939160347, "learning_rate": 0.0008594027873575803, "loss": 0.1808, "num_input_tokens_seen": 84852688, "step": 39260 }, { "epoch": 6.4053833605220225, "grad_norm": 0.2450348436832428, "learning_rate": 0.0008593532987093551, "loss": 0.1963, "num_input_tokens_seen": 84862544, "step": 39265 }, { "epoch": 6.406199021207178, "grad_norm": 0.015201598405838013, "learning_rate": 0.0008593038027783793, "loss": 0.068, "num_input_tokens_seen": 84873744, "step": 39270 }, { "epoch": 6.407014681892333, "grad_norm": 0.19144880771636963, "learning_rate": 0.0008592542995656563, "loss": 0.2351, "num_input_tokens_seen": 84883856, "step": 39275 }, { "epoch": 6.407830342577488, "grad_norm": 0.021460549905896187, "learning_rate": 0.000859204789072189, "loss": 0.044, "num_input_tokens_seen": 84895536, "step": 39280 }, { "epoch": 6.408646003262643, "grad_norm": 0.025125499814748764, "learning_rate": 0.0008591552712989812, "loss": 0.0558, "num_input_tokens_seen": 84905840, "step": 39285 }, { "epoch": 6.4094616639477975, "grad_norm": 0.007889053784310818, "learning_rate": 0.0008591057462470359, "loss": 0.1521, "num_input_tokens_seen": 84917008, "step": 39290 }, { "epoch": 6.410277324632952, "grad_norm": 0.07312604784965515, "learning_rate": 0.0008590562139173573, "loss": 0.0741, "num_input_tokens_seen": 84929424, "step": 39295 }, { "epoch": 6.411092985318108, "grad_norm": 0.1872720569372177, "learning_rate": 0.000859006674310949, "loss": 0.048, "num_input_tokens_seen": 84940400, "step": 39300 }, { "epoch": 6.411908646003263, "grad_norm": 0.024740254506468773, "learning_rate": 0.000858957127428815, "loss": 0.0569, "num_input_tokens_seen": 84951888, "step": 39305 }, { "epoch": 6.412724306688418, "grad_norm": 0.2020983099937439, "learning_rate": 0.0008589075732719594, "loss": 0.1, "num_input_tokens_seen": 84961232, "step": 39310 }, { "epoch": 6.4135399673735725, "grad_norm": 0.021523285657167435, "learning_rate": 0.0008588580118413867, "loss": 0.0291, "num_input_tokens_seen": 84972240, "step": 39315 }, { "epoch": 6.414355628058727, "grad_norm": 0.046101946383714676, "learning_rate": 0.0008588084431381009, "loss": 0.0816, "num_input_tokens_seen": 84983408, "step": 39320 }, { "epoch": 6.415171288743883, "grad_norm": 0.02815198339521885, "learning_rate": 0.000858758867163107, "loss": 0.0823, "num_input_tokens_seen": 84994128, "step": 39325 }, { "epoch": 6.415986949429038, "grad_norm": 0.13036702573299408, "learning_rate": 0.0008587092839174096, "loss": 0.0584, "num_input_tokens_seen": 85005904, "step": 39330 }, { "epoch": 6.416802610114193, "grad_norm": 0.004371070768684149, "learning_rate": 0.0008586596934020132, "loss": 0.0292, "num_input_tokens_seen": 85016624, "step": 39335 }, { "epoch": 6.417618270799347, "grad_norm": 0.015720317140221596, "learning_rate": 0.0008586100956179234, "loss": 0.1512, "num_input_tokens_seen": 85027152, "step": 39340 }, { "epoch": 6.418433931484502, "grad_norm": 0.21012809872627258, "learning_rate": 0.000858560490566145, "loss": 0.0698, "num_input_tokens_seen": 85036784, "step": 39345 }, { "epoch": 6.419249592169657, "grad_norm": 0.013738599605858326, "learning_rate": 0.0008585108782476834, "loss": 0.0278, "num_input_tokens_seen": 85047440, "step": 39350 }, { "epoch": 6.420065252854813, "grad_norm": 0.1178101897239685, "learning_rate": 0.000858461258663544, "loss": 0.0739, "num_input_tokens_seen": 85057744, "step": 39355 }, { "epoch": 6.420880913539968, "grad_norm": 0.007661431562155485, "learning_rate": 0.0008584116318147324, "loss": 0.0287, "num_input_tokens_seen": 85069392, "step": 39360 }, { "epoch": 6.421696574225122, "grad_norm": 0.06654492020606995, "learning_rate": 0.0008583619977022546, "loss": 0.0798, "num_input_tokens_seen": 85080560, "step": 39365 }, { "epoch": 6.422512234910277, "grad_norm": 0.043904032558202744, "learning_rate": 0.000858312356327116, "loss": 0.0626, "num_input_tokens_seen": 85091344, "step": 39370 }, { "epoch": 6.423327895595432, "grad_norm": 0.04809953272342682, "learning_rate": 0.0008582627076903232, "loss": 0.1128, "num_input_tokens_seen": 85102256, "step": 39375 }, { "epoch": 6.424143556280587, "grad_norm": 0.3738861680030823, "learning_rate": 0.0008582130517928821, "loss": 0.2677, "num_input_tokens_seen": 85113168, "step": 39380 }, { "epoch": 6.424959216965743, "grad_norm": 0.010011561214923859, "learning_rate": 0.000858163388635799, "loss": 0.0636, "num_input_tokens_seen": 85123504, "step": 39385 }, { "epoch": 6.425774877650897, "grad_norm": 0.028765080496668816, "learning_rate": 0.0008581137182200806, "loss": 0.0107, "num_input_tokens_seen": 85133584, "step": 39390 }, { "epoch": 6.426590538336052, "grad_norm": 0.2487831860780716, "learning_rate": 0.0008580640405467333, "loss": 0.0799, "num_input_tokens_seen": 85143344, "step": 39395 }, { "epoch": 6.427406199021207, "grad_norm": 0.1759326159954071, "learning_rate": 0.0008580143556167638, "loss": 0.0973, "num_input_tokens_seen": 85154000, "step": 39400 }, { "epoch": 6.428221859706362, "grad_norm": 0.02199672721326351, "learning_rate": 0.0008579646634311795, "loss": 0.0562, "num_input_tokens_seen": 85164432, "step": 39405 }, { "epoch": 6.4290375203915175, "grad_norm": 0.16176863014698029, "learning_rate": 0.0008579149639909872, "loss": 0.045, "num_input_tokens_seen": 85175152, "step": 39410 }, { "epoch": 6.429853181076672, "grad_norm": 0.05018671602010727, "learning_rate": 0.0008578652572971939, "loss": 0.0616, "num_input_tokens_seen": 85187248, "step": 39415 }, { "epoch": 6.430668841761827, "grad_norm": 0.005079567898064852, "learning_rate": 0.0008578155433508073, "loss": 0.2459, "num_input_tokens_seen": 85199120, "step": 39420 }, { "epoch": 6.431484502446982, "grad_norm": 0.016508281230926514, "learning_rate": 0.0008577658221528349, "loss": 0.0143, "num_input_tokens_seen": 85209840, "step": 39425 }, { "epoch": 6.432300163132137, "grad_norm": 0.001730692689307034, "learning_rate": 0.000857716093704284, "loss": 0.1407, "num_input_tokens_seen": 85220592, "step": 39430 }, { "epoch": 6.433115823817292, "grad_norm": 0.013995840214192867, "learning_rate": 0.0008576663580061628, "loss": 0.0532, "num_input_tokens_seen": 85232656, "step": 39435 }, { "epoch": 6.433931484502447, "grad_norm": 0.27595826983451843, "learning_rate": 0.0008576166150594792, "loss": 0.1849, "num_input_tokens_seen": 85244368, "step": 39440 }, { "epoch": 6.434747145187602, "grad_norm": 0.025783469900488853, "learning_rate": 0.0008575668648652411, "loss": 0.1106, "num_input_tokens_seen": 85252976, "step": 39445 }, { "epoch": 6.435562805872757, "grad_norm": 0.32373544573783875, "learning_rate": 0.0008575171074244568, "loss": 0.1423, "num_input_tokens_seen": 85263984, "step": 39450 }, { "epoch": 6.436378466557912, "grad_norm": 0.025260545313358307, "learning_rate": 0.000857467342738135, "loss": 0.0328, "num_input_tokens_seen": 85274000, "step": 39455 }, { "epoch": 6.437194127243067, "grad_norm": 0.020506108179688454, "learning_rate": 0.000857417570807284, "loss": 0.1123, "num_input_tokens_seen": 85285584, "step": 39460 }, { "epoch": 6.438009787928221, "grad_norm": 0.013910512439906597, "learning_rate": 0.0008573677916329124, "loss": 0.0458, "num_input_tokens_seen": 85297072, "step": 39465 }, { "epoch": 6.438825448613377, "grad_norm": 0.011981474235653877, "learning_rate": 0.0008573180052160291, "loss": 0.0254, "num_input_tokens_seen": 85307248, "step": 39470 }, { "epoch": 6.439641109298532, "grad_norm": 0.15529952943325043, "learning_rate": 0.0008572682115576433, "loss": 0.121, "num_input_tokens_seen": 85318384, "step": 39475 }, { "epoch": 6.440456769983687, "grad_norm": 0.010499502532184124, "learning_rate": 0.0008572184106587638, "loss": 0.09, "num_input_tokens_seen": 85328208, "step": 39480 }, { "epoch": 6.441272430668842, "grad_norm": 0.1622689962387085, "learning_rate": 0.0008571686025204002, "loss": 0.1659, "num_input_tokens_seen": 85338960, "step": 39485 }, { "epoch": 6.442088091353996, "grad_norm": 0.18635737895965576, "learning_rate": 0.0008571187871435616, "loss": 0.1749, "num_input_tokens_seen": 85349904, "step": 39490 }, { "epoch": 6.442903752039152, "grad_norm": 0.3250514566898346, "learning_rate": 0.0008570689645292579, "loss": 0.1153, "num_input_tokens_seen": 85361008, "step": 39495 }, { "epoch": 6.443719412724307, "grad_norm": 0.012182512320578098, "learning_rate": 0.0008570191346784986, "loss": 0.0405, "num_input_tokens_seen": 85372336, "step": 39500 }, { "epoch": 6.444535073409462, "grad_norm": 0.009086497128009796, "learning_rate": 0.0008569692975922935, "loss": 0.0852, "num_input_tokens_seen": 85384112, "step": 39505 }, { "epoch": 6.445350734094617, "grad_norm": 0.27018457651138306, "learning_rate": 0.0008569194532716529, "loss": 0.0806, "num_input_tokens_seen": 85394192, "step": 39510 }, { "epoch": 6.446166394779771, "grad_norm": 0.17779019474983215, "learning_rate": 0.0008568696017175868, "loss": 0.0703, "num_input_tokens_seen": 85405744, "step": 39515 }, { "epoch": 6.446982055464926, "grad_norm": 0.020411750301718712, "learning_rate": 0.0008568197429311054, "loss": 0.0819, "num_input_tokens_seen": 85417968, "step": 39520 }, { "epoch": 6.447797716150082, "grad_norm": 0.013875674456357956, "learning_rate": 0.0008567698769132193, "loss": 0.0579, "num_input_tokens_seen": 85428112, "step": 39525 }, { "epoch": 6.448613376835237, "grad_norm": 0.031358882784843445, "learning_rate": 0.0008567200036649391, "loss": 0.1543, "num_input_tokens_seen": 85438800, "step": 39530 }, { "epoch": 6.4494290375203915, "grad_norm": 0.21101713180541992, "learning_rate": 0.0008566701231872753, "loss": 0.1086, "num_input_tokens_seen": 85450640, "step": 39535 }, { "epoch": 6.450244698205546, "grad_norm": 0.375827431678772, "learning_rate": 0.0008566202354812392, "loss": 0.1573, "num_input_tokens_seen": 85461712, "step": 39540 }, { "epoch": 6.451060358890701, "grad_norm": 0.15580013394355774, "learning_rate": 0.0008565703405478415, "loss": 0.171, "num_input_tokens_seen": 85472464, "step": 39545 }, { "epoch": 6.451876019575856, "grad_norm": 0.019443074241280556, "learning_rate": 0.0008565204383880937, "loss": 0.0262, "num_input_tokens_seen": 85483472, "step": 39550 }, { "epoch": 6.452691680261012, "grad_norm": 0.01120977383106947, "learning_rate": 0.0008564705290030068, "loss": 0.0563, "num_input_tokens_seen": 85492976, "step": 39555 }, { "epoch": 6.4535073409461665, "grad_norm": 0.015544314868748188, "learning_rate": 0.0008564206123935924, "loss": 0.0935, "num_input_tokens_seen": 85503216, "step": 39560 }, { "epoch": 6.454323001631321, "grad_norm": 0.042205292731523514, "learning_rate": 0.0008563706885608622, "loss": 0.0113, "num_input_tokens_seen": 85513616, "step": 39565 }, { "epoch": 6.455138662316476, "grad_norm": 0.04667263105511665, "learning_rate": 0.0008563207575058279, "loss": 0.1049, "num_input_tokens_seen": 85524624, "step": 39570 }, { "epoch": 6.455954323001631, "grad_norm": 0.21057234704494476, "learning_rate": 0.0008562708192295012, "loss": 0.1132, "num_input_tokens_seen": 85535760, "step": 39575 }, { "epoch": 6.456769983686787, "grad_norm": 0.007653217297047377, "learning_rate": 0.0008562208737328947, "loss": 0.0387, "num_input_tokens_seen": 85546576, "step": 39580 }, { "epoch": 6.4575856443719415, "grad_norm": 0.14348191022872925, "learning_rate": 0.0008561709210170201, "loss": 0.0989, "num_input_tokens_seen": 85556592, "step": 39585 }, { "epoch": 6.458401305057096, "grad_norm": 0.1331503540277481, "learning_rate": 0.00085612096108289, "loss": 0.0442, "num_input_tokens_seen": 85566640, "step": 39590 }, { "epoch": 6.459216965742251, "grad_norm": 0.10012613236904144, "learning_rate": 0.0008560709939315169, "loss": 0.032, "num_input_tokens_seen": 85577552, "step": 39595 }, { "epoch": 6.460032626427406, "grad_norm": 0.011143765412271023, "learning_rate": 0.0008560210195639133, "loss": 0.1716, "num_input_tokens_seen": 85588560, "step": 39600 }, { "epoch": 6.460848287112561, "grad_norm": 0.08696023374795914, "learning_rate": 0.0008559710379810922, "loss": 0.0537, "num_input_tokens_seen": 85599440, "step": 39605 }, { "epoch": 6.4616639477977165, "grad_norm": 0.01324189268052578, "learning_rate": 0.0008559210491840664, "loss": 0.0355, "num_input_tokens_seen": 85610192, "step": 39610 }, { "epoch": 6.462479608482871, "grad_norm": 0.02257055602967739, "learning_rate": 0.0008558710531738489, "loss": 0.2433, "num_input_tokens_seen": 85620112, "step": 39615 }, { "epoch": 6.463295269168026, "grad_norm": 0.016799110919237137, "learning_rate": 0.0008558210499514532, "loss": 0.0371, "num_input_tokens_seen": 85630832, "step": 39620 }, { "epoch": 6.464110929853181, "grad_norm": 0.028752895072102547, "learning_rate": 0.0008557710395178926, "loss": 0.1295, "num_input_tokens_seen": 85641840, "step": 39625 }, { "epoch": 6.464926590538336, "grad_norm": 0.04469837620854378, "learning_rate": 0.0008557210218741805, "loss": 0.0474, "num_input_tokens_seen": 85652112, "step": 39630 }, { "epoch": 6.465742251223491, "grad_norm": 0.007485832553356886, "learning_rate": 0.0008556709970213305, "loss": 0.0592, "num_input_tokens_seen": 85662352, "step": 39635 }, { "epoch": 6.466557911908646, "grad_norm": 0.014441374689340591, "learning_rate": 0.0008556209649603566, "loss": 0.1465, "num_input_tokens_seen": 85674576, "step": 39640 }, { "epoch": 6.467373572593801, "grad_norm": 0.0882072001695633, "learning_rate": 0.0008555709256922728, "loss": 0.0681, "num_input_tokens_seen": 85685008, "step": 39645 }, { "epoch": 6.468189233278956, "grad_norm": 0.22026503086090088, "learning_rate": 0.0008555208792180931, "loss": 0.1859, "num_input_tokens_seen": 85695600, "step": 39650 }, { "epoch": 6.469004893964111, "grad_norm": 0.30590370297431946, "learning_rate": 0.0008554708255388317, "loss": 0.0633, "num_input_tokens_seen": 85706896, "step": 39655 }, { "epoch": 6.4698205546492655, "grad_norm": 0.010739093646407127, "learning_rate": 0.0008554207646555032, "loss": 0.0633, "num_input_tokens_seen": 85718640, "step": 39660 }, { "epoch": 6.470636215334421, "grad_norm": 0.034509677439928055, "learning_rate": 0.0008553706965691218, "loss": 0.0709, "num_input_tokens_seen": 85728592, "step": 39665 }, { "epoch": 6.471451876019576, "grad_norm": 0.1682896912097931, "learning_rate": 0.0008553206212807026, "loss": 0.1079, "num_input_tokens_seen": 85739024, "step": 39670 }, { "epoch": 6.472267536704731, "grad_norm": 0.16085843741893768, "learning_rate": 0.0008552705387912602, "loss": 0.1775, "num_input_tokens_seen": 85749872, "step": 39675 }, { "epoch": 6.473083197389886, "grad_norm": 0.18290142714977264, "learning_rate": 0.0008552204491018096, "loss": 0.0961, "num_input_tokens_seen": 85760208, "step": 39680 }, { "epoch": 6.4738988580750405, "grad_norm": 0.21753232181072235, "learning_rate": 0.000855170352213366, "loss": 0.2405, "num_input_tokens_seen": 85770448, "step": 39685 }, { "epoch": 6.474714518760196, "grad_norm": 0.037998493760824203, "learning_rate": 0.0008551202481269446, "loss": 0.1286, "num_input_tokens_seen": 85782288, "step": 39690 }, { "epoch": 6.475530179445351, "grad_norm": 0.32717499136924744, "learning_rate": 0.000855070136843561, "loss": 0.1742, "num_input_tokens_seen": 85793264, "step": 39695 }, { "epoch": 6.476345840130506, "grad_norm": 0.0370999239385128, "learning_rate": 0.0008550200183642304, "loss": 0.1615, "num_input_tokens_seen": 85803952, "step": 39700 }, { "epoch": 6.477161500815661, "grad_norm": 0.025982806459069252, "learning_rate": 0.000854969892689969, "loss": 0.026, "num_input_tokens_seen": 85814576, "step": 39705 }, { "epoch": 6.4779771615008155, "grad_norm": 0.4047028124332428, "learning_rate": 0.0008549197598217923, "loss": 0.1137, "num_input_tokens_seen": 85824560, "step": 39710 }, { "epoch": 6.47879282218597, "grad_norm": 0.01007707417011261, "learning_rate": 0.0008548696197607165, "loss": 0.055, "num_input_tokens_seen": 85834896, "step": 39715 }, { "epoch": 6.479608482871126, "grad_norm": 0.22528640925884247, "learning_rate": 0.0008548194725077576, "loss": 0.0809, "num_input_tokens_seen": 85845840, "step": 39720 }, { "epoch": 6.480424143556281, "grad_norm": 0.05834813788533211, "learning_rate": 0.000854769318063932, "loss": 0.0625, "num_input_tokens_seen": 85856464, "step": 39725 }, { "epoch": 6.481239804241436, "grad_norm": 0.03998285531997681, "learning_rate": 0.0008547191564302561, "loss": 0.0396, "num_input_tokens_seen": 85868272, "step": 39730 }, { "epoch": 6.4820554649265905, "grad_norm": 0.003781791077926755, "learning_rate": 0.0008546689876077464, "loss": 0.0394, "num_input_tokens_seen": 85877936, "step": 39735 }, { "epoch": 6.482871125611745, "grad_norm": 0.012051782570779324, "learning_rate": 0.0008546188115974198, "loss": 0.1194, "num_input_tokens_seen": 85889296, "step": 39740 }, { "epoch": 6.4836867862969, "grad_norm": 0.040543217211961746, "learning_rate": 0.0008545686284002932, "loss": 0.0667, "num_input_tokens_seen": 85900496, "step": 39745 }, { "epoch": 6.484502446982056, "grad_norm": 0.03690031170845032, "learning_rate": 0.0008545184380173835, "loss": 0.0351, "num_input_tokens_seen": 85912976, "step": 39750 }, { "epoch": 6.485318107667211, "grad_norm": 0.06059866026043892, "learning_rate": 0.0008544682404497079, "loss": 0.0299, "num_input_tokens_seen": 85924112, "step": 39755 }, { "epoch": 6.486133768352365, "grad_norm": 0.008430173620581627, "learning_rate": 0.0008544180356982838, "loss": 0.0773, "num_input_tokens_seen": 85935152, "step": 39760 }, { "epoch": 6.48694942903752, "grad_norm": 0.016898339614272118, "learning_rate": 0.0008543678237641284, "loss": 0.0268, "num_input_tokens_seen": 85945616, "step": 39765 }, { "epoch": 6.487765089722675, "grad_norm": 0.24281612038612366, "learning_rate": 0.0008543176046482597, "loss": 0.1337, "num_input_tokens_seen": 85956304, "step": 39770 }, { "epoch": 6.488580750407831, "grad_norm": 0.17431525886058807, "learning_rate": 0.0008542673783516952, "loss": 0.0515, "num_input_tokens_seen": 85968752, "step": 39775 }, { "epoch": 6.489396411092986, "grad_norm": 0.10620342195034027, "learning_rate": 0.0008542171448754528, "loss": 0.0484, "num_input_tokens_seen": 85979600, "step": 39780 }, { "epoch": 6.49021207177814, "grad_norm": 0.2232169657945633, "learning_rate": 0.0008541669042205507, "loss": 0.0962, "num_input_tokens_seen": 85990416, "step": 39785 }, { "epoch": 6.491027732463295, "grad_norm": 0.11217696219682693, "learning_rate": 0.0008541166563880069, "loss": 0.0293, "num_input_tokens_seen": 86002064, "step": 39790 }, { "epoch": 6.49184339314845, "grad_norm": 0.17561346292495728, "learning_rate": 0.00085406640137884, "loss": 0.2404, "num_input_tokens_seen": 86012240, "step": 39795 }, { "epoch": 6.492659053833605, "grad_norm": 0.3116014003753662, "learning_rate": 0.0008540161391940681, "loss": 0.1079, "num_input_tokens_seen": 86022224, "step": 39800 }, { "epoch": 6.493474714518761, "grad_norm": 0.24169039726257324, "learning_rate": 0.0008539658698347102, "loss": 0.1294, "num_input_tokens_seen": 86032176, "step": 39805 }, { "epoch": 6.494290375203915, "grad_norm": 0.026461800560355186, "learning_rate": 0.0008539155933017848, "loss": 0.0391, "num_input_tokens_seen": 86043088, "step": 39810 }, { "epoch": 6.49510603588907, "grad_norm": 0.012956095859408379, "learning_rate": 0.0008538653095963109, "loss": 0.0505, "num_input_tokens_seen": 86053360, "step": 39815 }, { "epoch": 6.495921696574225, "grad_norm": 0.05258989706635475, "learning_rate": 0.0008538150187193076, "loss": 0.1171, "num_input_tokens_seen": 86063888, "step": 39820 }, { "epoch": 6.49673735725938, "grad_norm": 0.01811002753674984, "learning_rate": 0.0008537647206717942, "loss": 0.0675, "num_input_tokens_seen": 86074448, "step": 39825 }, { "epoch": 6.497553017944535, "grad_norm": 0.09665459394454956, "learning_rate": 0.00085371441545479, "loss": 0.0486, "num_input_tokens_seen": 86085392, "step": 39830 }, { "epoch": 6.49836867862969, "grad_norm": 0.020809080451726913, "learning_rate": 0.0008536641030693143, "loss": 0.0123, "num_input_tokens_seen": 86095120, "step": 39835 }, { "epoch": 6.499184339314845, "grad_norm": 0.033111944794654846, "learning_rate": 0.000853613783516387, "loss": 0.074, "num_input_tokens_seen": 86106000, "step": 39840 }, { "epoch": 6.5, "grad_norm": 0.01744101569056511, "learning_rate": 0.0008535634567970277, "loss": 0.0766, "num_input_tokens_seen": 86117456, "step": 39845 }, { "epoch": 6.500815660685155, "grad_norm": 0.05731053650379181, "learning_rate": 0.0008535131229122565, "loss": 0.1427, "num_input_tokens_seen": 86127984, "step": 39850 }, { "epoch": 6.50163132137031, "grad_norm": 0.0589575469493866, "learning_rate": 0.0008534627818630933, "loss": 0.0319, "num_input_tokens_seen": 86138928, "step": 39855 }, { "epoch": 6.502446982055465, "grad_norm": 0.08219664543867111, "learning_rate": 0.0008534124336505585, "loss": 0.0133, "num_input_tokens_seen": 86149424, "step": 39860 }, { "epoch": 6.50326264274062, "grad_norm": 0.2619974911212921, "learning_rate": 0.0008533620782756724, "loss": 0.2123, "num_input_tokens_seen": 86159376, "step": 39865 }, { "epoch": 6.504078303425775, "grad_norm": 0.08538439869880676, "learning_rate": 0.0008533117157394556, "loss": 0.0734, "num_input_tokens_seen": 86170320, "step": 39870 }, { "epoch": 6.50489396411093, "grad_norm": 0.011978470720350742, "learning_rate": 0.0008532613460429285, "loss": 0.0706, "num_input_tokens_seen": 86181136, "step": 39875 }, { "epoch": 6.505709624796085, "grad_norm": 0.03728099912405014, "learning_rate": 0.0008532109691871122, "loss": 0.0537, "num_input_tokens_seen": 86189808, "step": 39880 }, { "epoch": 6.506525285481239, "grad_norm": 0.05977741256356239, "learning_rate": 0.0008531605851730275, "loss": 0.0707, "num_input_tokens_seen": 86200752, "step": 39885 }, { "epoch": 6.507340946166395, "grad_norm": 0.005612279754132032, "learning_rate": 0.0008531101940016954, "loss": 0.0123, "num_input_tokens_seen": 86211088, "step": 39890 }, { "epoch": 6.50815660685155, "grad_norm": 0.012287049554288387, "learning_rate": 0.0008530597956741374, "loss": 0.0329, "num_input_tokens_seen": 86222576, "step": 39895 }, { "epoch": 6.508972267536705, "grad_norm": 0.07354990392923355, "learning_rate": 0.0008530093901913748, "loss": 0.0792, "num_input_tokens_seen": 86233008, "step": 39900 }, { "epoch": 6.50978792822186, "grad_norm": 0.0029094265773892403, "learning_rate": 0.000852958977554429, "loss": 0.0263, "num_input_tokens_seen": 86245296, "step": 39905 }, { "epoch": 6.510603588907014, "grad_norm": 0.019716400653123856, "learning_rate": 0.0008529085577643217, "loss": 0.0412, "num_input_tokens_seen": 86256016, "step": 39910 }, { "epoch": 6.511419249592169, "grad_norm": 0.0031939074397087097, "learning_rate": 0.0008528581308220748, "loss": 0.0833, "num_input_tokens_seen": 86267088, "step": 39915 }, { "epoch": 6.512234910277325, "grad_norm": 0.22552239894866943, "learning_rate": 0.0008528076967287103, "loss": 0.0648, "num_input_tokens_seen": 86278640, "step": 39920 }, { "epoch": 6.51305057096248, "grad_norm": 0.22357866168022156, "learning_rate": 0.0008527572554852502, "loss": 0.2424, "num_input_tokens_seen": 86289616, "step": 39925 }, { "epoch": 6.513866231647635, "grad_norm": 0.004282618407160044, "learning_rate": 0.0008527068070927169, "loss": 0.1507, "num_input_tokens_seen": 86300048, "step": 39930 }, { "epoch": 6.514681892332789, "grad_norm": 0.16823726892471313, "learning_rate": 0.0008526563515521327, "loss": 0.2235, "num_input_tokens_seen": 86312496, "step": 39935 }, { "epoch": 6.515497553017944, "grad_norm": 0.22176630795001984, "learning_rate": 0.0008526058888645202, "loss": 0.0328, "num_input_tokens_seen": 86322768, "step": 39940 }, { "epoch": 6.5163132137031, "grad_norm": 0.204921692609787, "learning_rate": 0.000852555419030902, "loss": 0.0765, "num_input_tokens_seen": 86333392, "step": 39945 }, { "epoch": 6.517128874388255, "grad_norm": 0.06411628425121307, "learning_rate": 0.000852504942052301, "loss": 0.0981, "num_input_tokens_seen": 86345456, "step": 39950 }, { "epoch": 6.5179445350734095, "grad_norm": 0.0035396197345107794, "learning_rate": 0.0008524544579297402, "loss": 0.0942, "num_input_tokens_seen": 86356464, "step": 39955 }, { "epoch": 6.518760195758564, "grad_norm": 0.28199630975723267, "learning_rate": 0.0008524039666642424, "loss": 0.2473, "num_input_tokens_seen": 86367120, "step": 39960 }, { "epoch": 6.519575856443719, "grad_norm": 0.025316933169960976, "learning_rate": 0.0008523534682568315, "loss": 0.0578, "num_input_tokens_seen": 86377712, "step": 39965 }, { "epoch": 6.520391517128875, "grad_norm": 0.04194442555308342, "learning_rate": 0.0008523029627085306, "loss": 0.0543, "num_input_tokens_seen": 86387760, "step": 39970 }, { "epoch": 6.52120717781403, "grad_norm": 0.10103817284107208, "learning_rate": 0.000852252450020363, "loss": 0.1271, "num_input_tokens_seen": 86398736, "step": 39975 }, { "epoch": 6.5220228384991845, "grad_norm": 0.018434442579746246, "learning_rate": 0.0008522019301933528, "loss": 0.0569, "num_input_tokens_seen": 86409648, "step": 39980 }, { "epoch": 6.522838499184339, "grad_norm": 0.22668053209781647, "learning_rate": 0.0008521514032285236, "loss": 0.099, "num_input_tokens_seen": 86420432, "step": 39985 }, { "epoch": 6.523654159869494, "grad_norm": 0.07056698203086853, "learning_rate": 0.0008521008691268994, "loss": 0.0622, "num_input_tokens_seen": 86431536, "step": 39990 }, { "epoch": 6.524469820554649, "grad_norm": 0.17081086337566376, "learning_rate": 0.0008520503278895045, "loss": 0.0583, "num_input_tokens_seen": 86442800, "step": 39995 }, { "epoch": 6.525285481239804, "grad_norm": 0.07947465777397156, "learning_rate": 0.0008519997795173632, "loss": 0.041, "num_input_tokens_seen": 86454224, "step": 40000 }, { "epoch": 6.5261011419249595, "grad_norm": 0.027177680283784866, "learning_rate": 0.0008519492240114996, "loss": 0.1133, "num_input_tokens_seen": 86464656, "step": 40005 }, { "epoch": 6.526916802610114, "grad_norm": 0.0470040962100029, "learning_rate": 0.0008518986613729387, "loss": 0.0338, "num_input_tokens_seen": 86476336, "step": 40010 }, { "epoch": 6.527732463295269, "grad_norm": 0.2636547386646271, "learning_rate": 0.0008518480916027049, "loss": 0.0809, "num_input_tokens_seen": 86486864, "step": 40015 }, { "epoch": 6.528548123980424, "grad_norm": 0.047732576727867126, "learning_rate": 0.0008517975147018233, "loss": 0.0607, "num_input_tokens_seen": 86497424, "step": 40020 }, { "epoch": 6.529363784665579, "grad_norm": 0.025507677346467972, "learning_rate": 0.0008517469306713187, "loss": 0.0947, "num_input_tokens_seen": 86508080, "step": 40025 }, { "epoch": 6.5301794453507345, "grad_norm": 0.026355070993304253, "learning_rate": 0.0008516963395122163, "loss": 0.1351, "num_input_tokens_seen": 86518608, "step": 40030 }, { "epoch": 6.530995106035889, "grad_norm": 0.004582028370350599, "learning_rate": 0.0008516457412255414, "loss": 0.0083, "num_input_tokens_seen": 86530160, "step": 40035 }, { "epoch": 6.531810766721044, "grad_norm": 0.24956628680229187, "learning_rate": 0.0008515951358123195, "loss": 0.0364, "num_input_tokens_seen": 86540592, "step": 40040 }, { "epoch": 6.532626427406199, "grad_norm": 0.013599184341728687, "learning_rate": 0.0008515445232735761, "loss": 0.2517, "num_input_tokens_seen": 86551152, "step": 40045 }, { "epoch": 6.533442088091354, "grad_norm": 0.006095684599131346, "learning_rate": 0.0008514939036103371, "loss": 0.2383, "num_input_tokens_seen": 86561456, "step": 40050 }, { "epoch": 6.5342577487765094, "grad_norm": 0.031160254031419754, "learning_rate": 0.0008514432768236282, "loss": 0.02, "num_input_tokens_seen": 86573648, "step": 40055 }, { "epoch": 6.535073409461664, "grad_norm": 0.12235338240861893, "learning_rate": 0.0008513926429144754, "loss": 0.1384, "num_input_tokens_seen": 86585552, "step": 40060 }, { "epoch": 6.535889070146819, "grad_norm": 0.16848307847976685, "learning_rate": 0.0008513420018839049, "loss": 0.0781, "num_input_tokens_seen": 86595632, "step": 40065 }, { "epoch": 6.536704730831974, "grad_norm": 0.195616215467453, "learning_rate": 0.0008512913537329431, "loss": 0.0789, "num_input_tokens_seen": 86606512, "step": 40070 }, { "epoch": 6.537520391517129, "grad_norm": 0.011958951130509377, "learning_rate": 0.0008512406984626162, "loss": 0.0838, "num_input_tokens_seen": 86616688, "step": 40075 }, { "epoch": 6.5383360522022835, "grad_norm": 0.013706839643418789, "learning_rate": 0.0008511900360739512, "loss": 0.1612, "num_input_tokens_seen": 86626736, "step": 40080 }, { "epoch": 6.539151712887438, "grad_norm": 0.03659415990114212, "learning_rate": 0.0008511393665679745, "loss": 0.0696, "num_input_tokens_seen": 86638576, "step": 40085 }, { "epoch": 6.539967373572594, "grad_norm": 0.06943966448307037, "learning_rate": 0.000851088689945713, "loss": 0.0754, "num_input_tokens_seen": 86649360, "step": 40090 }, { "epoch": 6.540783034257749, "grad_norm": 0.02747497893869877, "learning_rate": 0.0008510380062081939, "loss": 0.1278, "num_input_tokens_seen": 86659536, "step": 40095 }, { "epoch": 6.541598694942904, "grad_norm": 0.07961980253458023, "learning_rate": 0.0008509873153564443, "loss": 0.0776, "num_input_tokens_seen": 86670512, "step": 40100 }, { "epoch": 6.5424143556280585, "grad_norm": 0.0976494625210762, "learning_rate": 0.0008509366173914914, "loss": 0.0581, "num_input_tokens_seen": 86681264, "step": 40105 }, { "epoch": 6.543230016313213, "grad_norm": 0.004687887150794268, "learning_rate": 0.0008508859123143628, "loss": 0.149, "num_input_tokens_seen": 86692368, "step": 40110 }, { "epoch": 6.544045676998369, "grad_norm": 0.014002878218889236, "learning_rate": 0.0008508352001260861, "loss": 0.093, "num_input_tokens_seen": 86702608, "step": 40115 }, { "epoch": 6.544861337683524, "grad_norm": 0.03292868286371231, "learning_rate": 0.000850784480827689, "loss": 0.0346, "num_input_tokens_seen": 86713776, "step": 40120 }, { "epoch": 6.545676998368679, "grad_norm": 0.05072006955742836, "learning_rate": 0.0008507337544201994, "loss": 0.1226, "num_input_tokens_seen": 86724624, "step": 40125 }, { "epoch": 6.5464926590538335, "grad_norm": 0.017592573538422585, "learning_rate": 0.0008506830209046453, "loss": 0.0755, "num_input_tokens_seen": 86736048, "step": 40130 }, { "epoch": 6.547308319738988, "grad_norm": 0.020308518782258034, "learning_rate": 0.000850632280282055, "loss": 0.0187, "num_input_tokens_seen": 86747216, "step": 40135 }, { "epoch": 6.548123980424144, "grad_norm": 0.09209541976451874, "learning_rate": 0.0008505815325534565, "loss": 0.0561, "num_input_tokens_seen": 86757296, "step": 40140 }, { "epoch": 6.548939641109299, "grad_norm": 0.06564757227897644, "learning_rate": 0.0008505307777198788, "loss": 0.0243, "num_input_tokens_seen": 86769072, "step": 40145 }, { "epoch": 6.549755301794454, "grad_norm": 0.052758727222681046, "learning_rate": 0.0008504800157823501, "loss": 0.0323, "num_input_tokens_seen": 86780816, "step": 40150 }, { "epoch": 6.5505709624796085, "grad_norm": 0.023643581196665764, "learning_rate": 0.000850429246741899, "loss": 0.0383, "num_input_tokens_seen": 86791280, "step": 40155 }, { "epoch": 6.551386623164763, "grad_norm": 0.2505358159542084, "learning_rate": 0.0008503784705995549, "loss": 0.0601, "num_input_tokens_seen": 86800912, "step": 40160 }, { "epoch": 6.552202283849918, "grad_norm": 0.007761516608297825, "learning_rate": 0.0008503276873563465, "loss": 0.0122, "num_input_tokens_seen": 86812592, "step": 40165 }, { "epoch": 6.553017944535073, "grad_norm": 0.029418349266052246, "learning_rate": 0.0008502768970133032, "loss": 0.17, "num_input_tokens_seen": 86824048, "step": 40170 }, { "epoch": 6.553833605220229, "grad_norm": 0.22073465585708618, "learning_rate": 0.0008502260995714543, "loss": 0.0459, "num_input_tokens_seen": 86834096, "step": 40175 }, { "epoch": 6.554649265905383, "grad_norm": 0.003069857368245721, "learning_rate": 0.0008501752950318292, "loss": 0.0489, "num_input_tokens_seen": 86844752, "step": 40180 }, { "epoch": 6.555464926590538, "grad_norm": 0.008434565737843513, "learning_rate": 0.0008501244833954573, "loss": 0.1582, "num_input_tokens_seen": 86855056, "step": 40185 }, { "epoch": 6.556280587275693, "grad_norm": 0.32387346029281616, "learning_rate": 0.0008500736646633686, "loss": 0.1796, "num_input_tokens_seen": 86866128, "step": 40190 }, { "epoch": 6.557096247960848, "grad_norm": 0.2440408319234848, "learning_rate": 0.0008500228388365933, "loss": 0.1675, "num_input_tokens_seen": 86877328, "step": 40195 }, { "epoch": 6.557911908646004, "grad_norm": 0.0660281777381897, "learning_rate": 0.0008499720059161608, "loss": 0.0356, "num_input_tokens_seen": 86888240, "step": 40200 }, { "epoch": 6.558727569331158, "grad_norm": 0.013883471488952637, "learning_rate": 0.0008499211659031018, "loss": 0.08, "num_input_tokens_seen": 86899664, "step": 40205 }, { "epoch": 6.559543230016313, "grad_norm": 0.16646042466163635, "learning_rate": 0.0008498703187984465, "loss": 0.1258, "num_input_tokens_seen": 86910448, "step": 40210 }, { "epoch": 6.560358890701468, "grad_norm": 0.01823389157652855, "learning_rate": 0.0008498194646032253, "loss": 0.0364, "num_input_tokens_seen": 86921168, "step": 40215 }, { "epoch": 6.561174551386623, "grad_norm": 0.010996226221323013, "learning_rate": 0.0008497686033184687, "loss": 0.0631, "num_input_tokens_seen": 86931632, "step": 40220 }, { "epoch": 6.561990212071779, "grad_norm": 0.021542511880397797, "learning_rate": 0.0008497177349452077, "loss": 0.1433, "num_input_tokens_seen": 86942608, "step": 40225 }, { "epoch": 6.562805872756933, "grad_norm": 0.04288684204220772, "learning_rate": 0.0008496668594844733, "loss": 0.0376, "num_input_tokens_seen": 86954352, "step": 40230 }, { "epoch": 6.563621533442088, "grad_norm": 0.1718064695596695, "learning_rate": 0.0008496159769372964, "loss": 0.0615, "num_input_tokens_seen": 86964816, "step": 40235 }, { "epoch": 6.564437194127243, "grad_norm": 0.15476419031620026, "learning_rate": 0.0008495650873047081, "loss": 0.0596, "num_input_tokens_seen": 86975760, "step": 40240 }, { "epoch": 6.565252854812398, "grad_norm": 0.29840338230133057, "learning_rate": 0.0008495141905877398, "loss": 0.0854, "num_input_tokens_seen": 86986672, "step": 40245 }, { "epoch": 6.566068515497553, "grad_norm": 0.318273663520813, "learning_rate": 0.0008494632867874232, "loss": 0.1865, "num_input_tokens_seen": 86996560, "step": 40250 }, { "epoch": 6.566884176182708, "grad_norm": 0.15846861898899078, "learning_rate": 0.0008494123759047897, "loss": 0.0864, "num_input_tokens_seen": 87008080, "step": 40255 }, { "epoch": 6.567699836867863, "grad_norm": 0.013275109231472015, "learning_rate": 0.0008493614579408712, "loss": 0.0272, "num_input_tokens_seen": 87019920, "step": 40260 }, { "epoch": 6.568515497553018, "grad_norm": 0.6214556097984314, "learning_rate": 0.0008493105328966995, "loss": 0.1214, "num_input_tokens_seen": 87031056, "step": 40265 }, { "epoch": 6.569331158238173, "grad_norm": 0.2053581178188324, "learning_rate": 0.0008492596007733066, "loss": 0.0857, "num_input_tokens_seen": 87041872, "step": 40270 }, { "epoch": 6.570146818923328, "grad_norm": 0.22130665183067322, "learning_rate": 0.0008492086615717251, "loss": 0.049, "num_input_tokens_seen": 87051888, "step": 40275 }, { "epoch": 6.5709624796084825, "grad_norm": 0.010671776719391346, "learning_rate": 0.0008491577152929867, "loss": 0.1461, "num_input_tokens_seen": 87061744, "step": 40280 }, { "epoch": 6.571778140293638, "grad_norm": 0.007222020998597145, "learning_rate": 0.0008491067619381247, "loss": 0.1972, "num_input_tokens_seen": 87073008, "step": 40285 }, { "epoch": 6.572593800978793, "grad_norm": 0.22576162219047546, "learning_rate": 0.0008490558015081711, "loss": 0.1103, "num_input_tokens_seen": 87084336, "step": 40290 }, { "epoch": 6.573409461663948, "grad_norm": 0.24591238796710968, "learning_rate": 0.0008490048340041587, "loss": 0.1464, "num_input_tokens_seen": 87095088, "step": 40295 }, { "epoch": 6.574225122349103, "grad_norm": 0.002666117623448372, "learning_rate": 0.0008489538594271209, "loss": 0.1626, "num_input_tokens_seen": 87105744, "step": 40300 }, { "epoch": 6.575040783034257, "grad_norm": 0.004681061487644911, "learning_rate": 0.0008489028777780901, "loss": 0.0815, "num_input_tokens_seen": 87115984, "step": 40305 }, { "epoch": 6.575856443719413, "grad_norm": 0.1317053884267807, "learning_rate": 0.0008488518890581002, "loss": 0.0587, "num_input_tokens_seen": 87127344, "step": 40310 }, { "epoch": 6.576672104404568, "grad_norm": 0.44375360012054443, "learning_rate": 0.0008488008932681841, "loss": 0.1411, "num_input_tokens_seen": 87138512, "step": 40315 }, { "epoch": 6.577487765089723, "grad_norm": 0.00884460099041462, "learning_rate": 0.0008487498904093753, "loss": 0.1103, "num_input_tokens_seen": 87149776, "step": 40320 }, { "epoch": 6.578303425774878, "grad_norm": 0.054936353117227554, "learning_rate": 0.0008486988804827077, "loss": 0.0426, "num_input_tokens_seen": 87160848, "step": 40325 }, { "epoch": 6.579119086460032, "grad_norm": 0.3017171621322632, "learning_rate": 0.0008486478634892149, "loss": 0.0796, "num_input_tokens_seen": 87171984, "step": 40330 }, { "epoch": 6.579934747145187, "grad_norm": 0.1470268815755844, "learning_rate": 0.0008485968394299308, "loss": 0.1392, "num_input_tokens_seen": 87182736, "step": 40335 }, { "epoch": 6.580750407830343, "grad_norm": 0.03465595841407776, "learning_rate": 0.0008485458083058896, "loss": 0.0727, "num_input_tokens_seen": 87193872, "step": 40340 }, { "epoch": 6.581566068515498, "grad_norm": 0.09754090011119843, "learning_rate": 0.0008484947701181254, "loss": 0.1589, "num_input_tokens_seen": 87204912, "step": 40345 }, { "epoch": 6.582381729200653, "grad_norm": 0.029379529878497124, "learning_rate": 0.0008484437248676726, "loss": 0.0285, "num_input_tokens_seen": 87215120, "step": 40350 }, { "epoch": 6.583197389885807, "grad_norm": 0.02292765863239765, "learning_rate": 0.0008483926725555655, "loss": 0.0411, "num_input_tokens_seen": 87227024, "step": 40355 }, { "epoch": 6.584013050570962, "grad_norm": 0.01272663101553917, "learning_rate": 0.0008483416131828392, "loss": 0.0231, "num_input_tokens_seen": 87238352, "step": 40360 }, { "epoch": 6.584828711256117, "grad_norm": 0.29345691204071045, "learning_rate": 0.000848290546750528, "loss": 0.1485, "num_input_tokens_seen": 87248656, "step": 40365 }, { "epoch": 6.585644371941273, "grad_norm": 0.017028363421559334, "learning_rate": 0.0008482394732596672, "loss": 0.0282, "num_input_tokens_seen": 87259696, "step": 40370 }, { "epoch": 6.5864600326264275, "grad_norm": 0.06555546075105667, "learning_rate": 0.0008481883927112917, "loss": 0.0583, "num_input_tokens_seen": 87269776, "step": 40375 }, { "epoch": 6.587275693311582, "grad_norm": 0.03757341951131821, "learning_rate": 0.0008481373051064365, "loss": 0.0747, "num_input_tokens_seen": 87280112, "step": 40380 }, { "epoch": 6.588091353996737, "grad_norm": 0.11471926420927048, "learning_rate": 0.0008480862104461374, "loss": 0.0875, "num_input_tokens_seen": 87290256, "step": 40385 }, { "epoch": 6.588907014681892, "grad_norm": 0.022442886605858803, "learning_rate": 0.0008480351087314295, "loss": 0.0722, "num_input_tokens_seen": 87301488, "step": 40390 }, { "epoch": 6.589722675367048, "grad_norm": 0.02271287702023983, "learning_rate": 0.0008479839999633487, "loss": 0.069, "num_input_tokens_seen": 87311088, "step": 40395 }, { "epoch": 6.5905383360522025, "grad_norm": 0.16364285349845886, "learning_rate": 0.0008479328841429306, "loss": 0.1194, "num_input_tokens_seen": 87321776, "step": 40400 }, { "epoch": 6.591353996737357, "grad_norm": 0.17481482028961182, "learning_rate": 0.0008478817612712113, "loss": 0.0324, "num_input_tokens_seen": 87333328, "step": 40405 }, { "epoch": 6.592169657422512, "grad_norm": 0.0992930606007576, "learning_rate": 0.0008478306313492267, "loss": 0.0716, "num_input_tokens_seen": 87344112, "step": 40410 }, { "epoch": 6.592985318107667, "grad_norm": 0.015793804079294205, "learning_rate": 0.0008477794943780132, "loss": 0.0133, "num_input_tokens_seen": 87355888, "step": 40415 }, { "epoch": 6.593800978792823, "grad_norm": 0.24398945271968842, "learning_rate": 0.0008477283503586072, "loss": 0.0351, "num_input_tokens_seen": 87367088, "step": 40420 }, { "epoch": 6.5946166394779775, "grad_norm": 0.0020906298886984587, "learning_rate": 0.0008476771992920449, "loss": 0.0301, "num_input_tokens_seen": 87377904, "step": 40425 }, { "epoch": 6.595432300163132, "grad_norm": 0.2970251142978668, "learning_rate": 0.0008476260411793631, "loss": 0.045, "num_input_tokens_seen": 87387920, "step": 40430 }, { "epoch": 6.596247960848287, "grad_norm": 0.07806608080863953, "learning_rate": 0.0008475748760215984, "loss": 0.1143, "num_input_tokens_seen": 87397392, "step": 40435 }, { "epoch": 6.597063621533442, "grad_norm": 0.08590800315141678, "learning_rate": 0.0008475237038197882, "loss": 0.4011, "num_input_tokens_seen": 87407952, "step": 40440 }, { "epoch": 6.597879282218597, "grad_norm": 0.014154416508972645, "learning_rate": 0.0008474725245749691, "loss": 0.0905, "num_input_tokens_seen": 87418576, "step": 40445 }, { "epoch": 6.598694942903752, "grad_norm": 0.2728196978569031, "learning_rate": 0.0008474213382881786, "loss": 0.0507, "num_input_tokens_seen": 87429456, "step": 40450 }, { "epoch": 6.599510603588907, "grad_norm": 0.04241441190242767, "learning_rate": 0.0008473701449604539, "loss": 0.0273, "num_input_tokens_seen": 87439248, "step": 40455 }, { "epoch": 6.600326264274062, "grad_norm": 0.15882225334644318, "learning_rate": 0.0008473189445928325, "loss": 0.2533, "num_input_tokens_seen": 87449936, "step": 40460 }, { "epoch": 6.601141924959217, "grad_norm": 0.36263370513916016, "learning_rate": 0.0008472677371863521, "loss": 0.2535, "num_input_tokens_seen": 87460432, "step": 40465 }, { "epoch": 6.601957585644372, "grad_norm": 0.09968896955251694, "learning_rate": 0.0008472165227420505, "loss": 0.251, "num_input_tokens_seen": 87470288, "step": 40470 }, { "epoch": 6.602773246329527, "grad_norm": 0.24596227705478668, "learning_rate": 0.0008471653012609655, "loss": 0.0642, "num_input_tokens_seen": 87480208, "step": 40475 }, { "epoch": 6.603588907014682, "grad_norm": 0.010824929922819138, "learning_rate": 0.0008471140727441353, "loss": 0.0965, "num_input_tokens_seen": 87490768, "step": 40480 }, { "epoch": 6.604404567699837, "grad_norm": 0.012988324277102947, "learning_rate": 0.0008470628371925981, "loss": 0.0368, "num_input_tokens_seen": 87501040, "step": 40485 }, { "epoch": 6.605220228384992, "grad_norm": 0.11417032778263092, "learning_rate": 0.0008470115946073922, "loss": 0.1614, "num_input_tokens_seen": 87511824, "step": 40490 }, { "epoch": 6.606035889070147, "grad_norm": 0.09179277718067169, "learning_rate": 0.0008469603449895562, "loss": 0.1003, "num_input_tokens_seen": 87522928, "step": 40495 }, { "epoch": 6.6068515497553015, "grad_norm": 0.03256848081946373, "learning_rate": 0.0008469090883401286, "loss": 0.0587, "num_input_tokens_seen": 87533200, "step": 40500 }, { "epoch": 6.607667210440457, "grad_norm": 0.09992846846580505, "learning_rate": 0.0008468578246601482, "loss": 0.1608, "num_input_tokens_seen": 87541808, "step": 40505 }, { "epoch": 6.608482871125612, "grad_norm": 0.19650889933109283, "learning_rate": 0.000846806553950654, "loss": 0.0926, "num_input_tokens_seen": 87552656, "step": 40510 }, { "epoch": 6.609298531810767, "grad_norm": 0.33135658502578735, "learning_rate": 0.0008467552762126851, "loss": 0.1255, "num_input_tokens_seen": 87564048, "step": 40515 }, { "epoch": 6.610114192495922, "grad_norm": 0.01710195280611515, "learning_rate": 0.0008467039914472805, "loss": 0.0421, "num_input_tokens_seen": 87574832, "step": 40520 }, { "epoch": 6.6109298531810765, "grad_norm": 0.024433651939034462, "learning_rate": 0.0008466526996554797, "loss": 0.078, "num_input_tokens_seen": 87585040, "step": 40525 }, { "epoch": 6.611745513866231, "grad_norm": 0.1268104910850525, "learning_rate": 0.0008466014008383224, "loss": 0.0534, "num_input_tokens_seen": 87595312, "step": 40530 }, { "epoch": 6.612561174551386, "grad_norm": 0.02771225944161415, "learning_rate": 0.0008465500949968479, "loss": 0.0801, "num_input_tokens_seen": 87606224, "step": 40535 }, { "epoch": 6.613376835236542, "grad_norm": 0.01759571023285389, "learning_rate": 0.000846498782132096, "loss": 0.0336, "num_input_tokens_seen": 87617520, "step": 40540 }, { "epoch": 6.614192495921697, "grad_norm": 0.019269373267889023, "learning_rate": 0.0008464474622451067, "loss": 0.0567, "num_input_tokens_seen": 87629008, "step": 40545 }, { "epoch": 6.6150081566068515, "grad_norm": 0.017509333789348602, "learning_rate": 0.0008463961353369202, "loss": 0.0433, "num_input_tokens_seen": 87639824, "step": 40550 }, { "epoch": 6.615823817292006, "grad_norm": 0.06460079550743103, "learning_rate": 0.0008463448014085765, "loss": 0.0763, "num_input_tokens_seen": 87649040, "step": 40555 }, { "epoch": 6.616639477977161, "grad_norm": 0.0920075848698616, "learning_rate": 0.000846293460461116, "loss": 0.185, "num_input_tokens_seen": 87659536, "step": 40560 }, { "epoch": 6.617455138662317, "grad_norm": 0.1951504349708557, "learning_rate": 0.0008462421124955792, "loss": 0.2377, "num_input_tokens_seen": 87670256, "step": 40565 }, { "epoch": 6.618270799347472, "grad_norm": 0.016024351119995117, "learning_rate": 0.0008461907575130069, "loss": 0.1435, "num_input_tokens_seen": 87681008, "step": 40570 }, { "epoch": 6.6190864600326265, "grad_norm": 0.104428231716156, "learning_rate": 0.0008461393955144397, "loss": 0.1053, "num_input_tokens_seen": 87692400, "step": 40575 }, { "epoch": 6.619902120717781, "grad_norm": 0.02700626105070114, "learning_rate": 0.0008460880265009185, "loss": 0.1724, "num_input_tokens_seen": 87703888, "step": 40580 }, { "epoch": 6.620717781402936, "grad_norm": 0.08827092498540878, "learning_rate": 0.0008460366504734843, "loss": 0.0314, "num_input_tokens_seen": 87714096, "step": 40585 }, { "epoch": 6.621533442088092, "grad_norm": 0.03388110548257828, "learning_rate": 0.0008459852674331785, "loss": 0.0655, "num_input_tokens_seen": 87723760, "step": 40590 }, { "epoch": 6.622349102773247, "grad_norm": 0.27784040570259094, "learning_rate": 0.0008459338773810424, "loss": 0.2896, "num_input_tokens_seen": 87734160, "step": 40595 }, { "epoch": 6.623164763458401, "grad_norm": 0.018514566123485565, "learning_rate": 0.0008458824803181174, "loss": 0.0615, "num_input_tokens_seen": 87744496, "step": 40600 }, { "epoch": 6.623980424143556, "grad_norm": 0.19091467559337616, "learning_rate": 0.0008458310762454451, "loss": 0.153, "num_input_tokens_seen": 87755376, "step": 40605 }, { "epoch": 6.624796084828711, "grad_norm": 0.01676258258521557, "learning_rate": 0.0008457796651640672, "loss": 0.0911, "num_input_tokens_seen": 87765552, "step": 40610 }, { "epoch": 6.625611745513866, "grad_norm": 0.14877432584762573, "learning_rate": 0.0008457282470750259, "loss": 0.0568, "num_input_tokens_seen": 87777552, "step": 40615 }, { "epoch": 6.626427406199021, "grad_norm": 0.0045336890034377575, "learning_rate": 0.0008456768219793631, "loss": 0.1728, "num_input_tokens_seen": 87788176, "step": 40620 }, { "epoch": 6.627243066884176, "grad_norm": 0.047274842858314514, "learning_rate": 0.000845625389878121, "loss": 0.0987, "num_input_tokens_seen": 87800240, "step": 40625 }, { "epoch": 6.628058727569331, "grad_norm": 0.26113563776016235, "learning_rate": 0.0008455739507723418, "loss": 0.0529, "num_input_tokens_seen": 87811248, "step": 40630 }, { "epoch": 6.628874388254486, "grad_norm": 0.046045806258916855, "learning_rate": 0.0008455225046630681, "loss": 0.1242, "num_input_tokens_seen": 87823280, "step": 40635 }, { "epoch": 6.629690048939641, "grad_norm": 0.007317631971091032, "learning_rate": 0.0008454710515513426, "loss": 0.0486, "num_input_tokens_seen": 87833456, "step": 40640 }, { "epoch": 6.630505709624796, "grad_norm": 0.033978912979364395, "learning_rate": 0.0008454195914382079, "loss": 0.1417, "num_input_tokens_seen": 87844016, "step": 40645 }, { "epoch": 6.631321370309951, "grad_norm": 0.01450091227889061, "learning_rate": 0.0008453681243247071, "loss": 0.0215, "num_input_tokens_seen": 87855216, "step": 40650 }, { "epoch": 6.632137030995106, "grad_norm": 0.10272349417209625, "learning_rate": 0.000845316650211883, "loss": 0.1389, "num_input_tokens_seen": 87865392, "step": 40655 }, { "epoch": 6.632952691680261, "grad_norm": 0.26933956146240234, "learning_rate": 0.0008452651691007789, "loss": 0.0634, "num_input_tokens_seen": 87876496, "step": 40660 }, { "epoch": 6.633768352365416, "grad_norm": 0.2286217361688614, "learning_rate": 0.0008452136809924384, "loss": 0.1909, "num_input_tokens_seen": 87888112, "step": 40665 }, { "epoch": 6.634584013050571, "grad_norm": 0.026156388223171234, "learning_rate": 0.0008451621858879043, "loss": 0.0643, "num_input_tokens_seen": 87898160, "step": 40670 }, { "epoch": 6.635399673735726, "grad_norm": 0.010627840645611286, "learning_rate": 0.000845110683788221, "loss": 0.2, "num_input_tokens_seen": 87909200, "step": 40675 }, { "epoch": 6.636215334420881, "grad_norm": 0.003927143756300211, "learning_rate": 0.0008450591746944319, "loss": 0.0798, "num_input_tokens_seen": 87919856, "step": 40680 }, { "epoch": 6.637030995106036, "grad_norm": 0.25393131375312805, "learning_rate": 0.0008450076586075805, "loss": 0.2177, "num_input_tokens_seen": 87931088, "step": 40685 }, { "epoch": 6.637846655791191, "grad_norm": 0.09210921078920364, "learning_rate": 0.0008449561355287116, "loss": 0.1009, "num_input_tokens_seen": 87942096, "step": 40690 }, { "epoch": 6.638662316476346, "grad_norm": 0.06515488773584366, "learning_rate": 0.000844904605458869, "loss": 0.0668, "num_input_tokens_seen": 87952784, "step": 40695 }, { "epoch": 6.6394779771615005, "grad_norm": 0.1152672991156578, "learning_rate": 0.0008448530683990968, "loss": 0.0984, "num_input_tokens_seen": 87962096, "step": 40700 }, { "epoch": 6.640293637846656, "grad_norm": 0.011901522055268288, "learning_rate": 0.0008448015243504398, "loss": 0.0648, "num_input_tokens_seen": 87972144, "step": 40705 }, { "epoch": 6.641109298531811, "grad_norm": 0.010754693299531937, "learning_rate": 0.0008447499733139426, "loss": 0.0753, "num_input_tokens_seen": 87983920, "step": 40710 }, { "epoch": 6.641924959216966, "grad_norm": 0.526595950126648, "learning_rate": 0.0008446984152906496, "loss": 0.1452, "num_input_tokens_seen": 87993360, "step": 40715 }, { "epoch": 6.642740619902121, "grad_norm": 0.0625261515378952, "learning_rate": 0.0008446468502816061, "loss": 0.2296, "num_input_tokens_seen": 88002320, "step": 40720 }, { "epoch": 6.643556280587275, "grad_norm": 0.04473881796002388, "learning_rate": 0.000844595278287857, "loss": 0.0903, "num_input_tokens_seen": 88013200, "step": 40725 }, { "epoch": 6.64437194127243, "grad_norm": 0.21388088166713715, "learning_rate": 0.0008445436993104473, "loss": 0.0913, "num_input_tokens_seen": 88024368, "step": 40730 }, { "epoch": 6.645187601957586, "grad_norm": 0.009727729484438896, "learning_rate": 0.0008444921133504225, "loss": 0.0832, "num_input_tokens_seen": 88035984, "step": 40735 }, { "epoch": 6.646003262642741, "grad_norm": 0.11908382922410965, "learning_rate": 0.0008444405204088281, "loss": 0.0735, "num_input_tokens_seen": 88047344, "step": 40740 }, { "epoch": 6.646818923327896, "grad_norm": 0.010134859941899776, "learning_rate": 0.0008443889204867095, "loss": 0.0272, "num_input_tokens_seen": 88058608, "step": 40745 }, { "epoch": 6.64763458401305, "grad_norm": 0.03795737773180008, "learning_rate": 0.0008443373135851125, "loss": 0.133, "num_input_tokens_seen": 88069648, "step": 40750 }, { "epoch": 6.648450244698205, "grad_norm": 0.02392597869038582, "learning_rate": 0.0008442856997050832, "loss": 0.0323, "num_input_tokens_seen": 88080368, "step": 40755 }, { "epoch": 6.649265905383361, "grad_norm": 0.0146209467202425, "learning_rate": 0.0008442340788476672, "loss": 0.0301, "num_input_tokens_seen": 88090832, "step": 40760 }, { "epoch": 6.650081566068516, "grad_norm": 0.042282216250896454, "learning_rate": 0.0008441824510139111, "loss": 0.1166, "num_input_tokens_seen": 88101456, "step": 40765 }, { "epoch": 6.650897226753671, "grad_norm": 0.1621234118938446, "learning_rate": 0.0008441308162048609, "loss": 0.0638, "num_input_tokens_seen": 88112624, "step": 40770 }, { "epoch": 6.651712887438825, "grad_norm": 0.01184393372386694, "learning_rate": 0.0008440791744215632, "loss": 0.03, "num_input_tokens_seen": 88124080, "step": 40775 }, { "epoch": 6.65252854812398, "grad_norm": 0.10488130897283554, "learning_rate": 0.0008440275256650644, "loss": 0.1571, "num_input_tokens_seen": 88135088, "step": 40780 }, { "epoch": 6.653344208809135, "grad_norm": 0.009288913570344448, "learning_rate": 0.0008439758699364115, "loss": 0.0498, "num_input_tokens_seen": 88145296, "step": 40785 }, { "epoch": 6.654159869494291, "grad_norm": 0.17210085690021515, "learning_rate": 0.0008439242072366511, "loss": 0.1509, "num_input_tokens_seen": 88154864, "step": 40790 }, { "epoch": 6.6549755301794455, "grad_norm": 0.03770684078335762, "learning_rate": 0.0008438725375668305, "loss": 0.2683, "num_input_tokens_seen": 88164528, "step": 40795 }, { "epoch": 6.6557911908646, "grad_norm": 0.030847519636154175, "learning_rate": 0.0008438208609279967, "loss": 0.0984, "num_input_tokens_seen": 88177136, "step": 40800 }, { "epoch": 6.656606851549755, "grad_norm": 0.02910693734884262, "learning_rate": 0.0008437691773211969, "loss": 0.0326, "num_input_tokens_seen": 88187824, "step": 40805 }, { "epoch": 6.65742251223491, "grad_norm": 0.013066194951534271, "learning_rate": 0.0008437174867474786, "loss": 0.1456, "num_input_tokens_seen": 88198288, "step": 40810 }, { "epoch": 6.658238172920065, "grad_norm": 0.005027337931096554, "learning_rate": 0.0008436657892078895, "loss": 0.0227, "num_input_tokens_seen": 88208720, "step": 40815 }, { "epoch": 6.6590538336052205, "grad_norm": 0.1727529913187027, "learning_rate": 0.0008436140847034772, "loss": 0.1289, "num_input_tokens_seen": 88219344, "step": 40820 }, { "epoch": 6.659869494290375, "grad_norm": 0.13390670716762543, "learning_rate": 0.0008435623732352895, "loss": 0.1687, "num_input_tokens_seen": 88229392, "step": 40825 }, { "epoch": 6.66068515497553, "grad_norm": 0.09145081788301468, "learning_rate": 0.0008435106548043745, "loss": 0.0726, "num_input_tokens_seen": 88240624, "step": 40830 }, { "epoch": 6.661500815660685, "grad_norm": 0.18923735618591309, "learning_rate": 0.0008434589294117802, "loss": 0.0453, "num_input_tokens_seen": 88250640, "step": 40835 }, { "epoch": 6.66231647634584, "grad_norm": 0.12857620418071747, "learning_rate": 0.0008434071970585551, "loss": 0.103, "num_input_tokens_seen": 88262224, "step": 40840 }, { "epoch": 6.6631321370309955, "grad_norm": 0.04811937361955643, "learning_rate": 0.0008433554577457475, "loss": 0.0248, "num_input_tokens_seen": 88271856, "step": 40845 }, { "epoch": 6.66394779771615, "grad_norm": 0.0026477025821805, "learning_rate": 0.000843303711474406, "loss": 0.155, "num_input_tokens_seen": 88283344, "step": 40850 }, { "epoch": 6.664763458401305, "grad_norm": 0.04366447031497955, "learning_rate": 0.0008432519582455792, "loss": 0.0734, "num_input_tokens_seen": 88294064, "step": 40855 }, { "epoch": 6.66557911908646, "grad_norm": 0.09165249764919281, "learning_rate": 0.0008432001980603161, "loss": 0.2132, "num_input_tokens_seen": 88304656, "step": 40860 }, { "epoch": 6.666394779771615, "grad_norm": 0.09211524575948715, "learning_rate": 0.0008431484309196656, "loss": 0.1458, "num_input_tokens_seen": 88314192, "step": 40865 }, { "epoch": 6.6672104404567705, "grad_norm": 0.019264977425336838, "learning_rate": 0.0008430966568246768, "loss": 0.0255, "num_input_tokens_seen": 88324784, "step": 40870 }, { "epoch": 6.668026101141925, "grad_norm": 0.03427863493561745, "learning_rate": 0.0008430448757763989, "loss": 0.0462, "num_input_tokens_seen": 88335280, "step": 40875 }, { "epoch": 6.66884176182708, "grad_norm": 0.34995800256729126, "learning_rate": 0.0008429930877758814, "loss": 0.1108, "num_input_tokens_seen": 88346096, "step": 40880 }, { "epoch": 6.669657422512235, "grad_norm": 0.012390978634357452, "learning_rate": 0.000842941292824174, "loss": 0.0087, "num_input_tokens_seen": 88357968, "step": 40885 }, { "epoch": 6.67047308319739, "grad_norm": 0.25816184282302856, "learning_rate": 0.0008428894909223261, "loss": 0.0435, "num_input_tokens_seen": 88368816, "step": 40890 }, { "epoch": 6.671288743882545, "grad_norm": 0.027464818209409714, "learning_rate": 0.0008428376820713879, "loss": 0.1062, "num_input_tokens_seen": 88378256, "step": 40895 }, { "epoch": 6.672104404567699, "grad_norm": 0.009099733084440231, "learning_rate": 0.000842785866272409, "loss": 0.0545, "num_input_tokens_seen": 88387888, "step": 40900 }, { "epoch": 6.672920065252855, "grad_norm": 0.5091248750686646, "learning_rate": 0.0008427340435264397, "loss": 0.0938, "num_input_tokens_seen": 88397360, "step": 40905 }, { "epoch": 6.67373572593801, "grad_norm": 0.021805984899401665, "learning_rate": 0.0008426822138345302, "loss": 0.2499, "num_input_tokens_seen": 88408880, "step": 40910 }, { "epoch": 6.674551386623165, "grad_norm": 0.055580854415893555, "learning_rate": 0.0008426303771977311, "loss": 0.0147, "num_input_tokens_seen": 88420784, "step": 40915 }, { "epoch": 6.6753670473083195, "grad_norm": 0.09892192482948303, "learning_rate": 0.0008425785336170925, "loss": 0.1976, "num_input_tokens_seen": 88432432, "step": 40920 }, { "epoch": 6.676182707993474, "grad_norm": 0.08244457095861435, "learning_rate": 0.0008425266830936654, "loss": 0.149, "num_input_tokens_seen": 88442960, "step": 40925 }, { "epoch": 6.67699836867863, "grad_norm": 0.007770029827952385, "learning_rate": 0.0008424748256285005, "loss": 0.1115, "num_input_tokens_seen": 88453936, "step": 40930 }, { "epoch": 6.677814029363785, "grad_norm": 0.0658392608165741, "learning_rate": 0.0008424229612226488, "loss": 0.0897, "num_input_tokens_seen": 88464304, "step": 40935 }, { "epoch": 6.67862969004894, "grad_norm": 0.17187225818634033, "learning_rate": 0.0008423710898771614, "loss": 0.0562, "num_input_tokens_seen": 88475280, "step": 40940 }, { "epoch": 6.6794453507340945, "grad_norm": 0.3012283146381378, "learning_rate": 0.0008423192115930897, "loss": 0.1038, "num_input_tokens_seen": 88485424, "step": 40945 }, { "epoch": 6.680261011419249, "grad_norm": 0.08645543456077576, "learning_rate": 0.0008422673263714848, "loss": 0.0516, "num_input_tokens_seen": 88495504, "step": 40950 }, { "epoch": 6.681076672104405, "grad_norm": 0.015715450048446655, "learning_rate": 0.0008422154342133983, "loss": 0.0286, "num_input_tokens_seen": 88506960, "step": 40955 }, { "epoch": 6.68189233278956, "grad_norm": 0.01696091704070568, "learning_rate": 0.0008421635351198819, "loss": 0.0101, "num_input_tokens_seen": 88519088, "step": 40960 }, { "epoch": 6.682707993474715, "grad_norm": 0.14385735988616943, "learning_rate": 0.0008421116290919875, "loss": 0.1959, "num_input_tokens_seen": 88529584, "step": 40965 }, { "epoch": 6.6835236541598695, "grad_norm": 0.040347855538129807, "learning_rate": 0.0008420597161307668, "loss": 0.0822, "num_input_tokens_seen": 88540976, "step": 40970 }, { "epoch": 6.684339314845024, "grad_norm": 0.0037138413172215223, "learning_rate": 0.0008420077962372721, "loss": 0.0141, "num_input_tokens_seen": 88551952, "step": 40975 }, { "epoch": 6.685154975530179, "grad_norm": 0.41363850235939026, "learning_rate": 0.0008419558694125555, "loss": 0.113, "num_input_tokens_seen": 88563408, "step": 40980 }, { "epoch": 6.685970636215334, "grad_norm": 0.009157951921224594, "learning_rate": 0.0008419039356576695, "loss": 0.2461, "num_input_tokens_seen": 88574384, "step": 40985 }, { "epoch": 6.68678629690049, "grad_norm": 0.19476893544197083, "learning_rate": 0.0008418519949736664, "loss": 0.0781, "num_input_tokens_seen": 88585200, "step": 40990 }, { "epoch": 6.6876019575856445, "grad_norm": 0.03723384067416191, "learning_rate": 0.000841800047361599, "loss": 0.0361, "num_input_tokens_seen": 88596272, "step": 40995 }, { "epoch": 6.688417618270799, "grad_norm": 0.02606227621436119, "learning_rate": 0.00084174809282252, "loss": 0.1431, "num_input_tokens_seen": 88605904, "step": 41000 }, { "epoch": 6.689233278955954, "grad_norm": 0.018963932991027832, "learning_rate": 0.0008416961313574824, "loss": 0.1336, "num_input_tokens_seen": 88615888, "step": 41005 }, { "epoch": 6.690048939641109, "grad_norm": 0.057104069739580154, "learning_rate": 0.0008416441629675391, "loss": 0.0611, "num_input_tokens_seen": 88627792, "step": 41010 }, { "epoch": 6.690864600326265, "grad_norm": 0.01525149866938591, "learning_rate": 0.0008415921876537436, "loss": 0.1299, "num_input_tokens_seen": 88639632, "step": 41015 }, { "epoch": 6.691680261011419, "grad_norm": 0.12123435735702515, "learning_rate": 0.000841540205417149, "loss": 0.0662, "num_input_tokens_seen": 88650384, "step": 41020 }, { "epoch": 6.692495921696574, "grad_norm": 0.056442294269800186, "learning_rate": 0.0008414882162588089, "loss": 0.0486, "num_input_tokens_seen": 88660880, "step": 41025 }, { "epoch": 6.693311582381729, "grad_norm": 0.0264727883040905, "learning_rate": 0.0008414362201797768, "loss": 0.0384, "num_input_tokens_seen": 88671312, "step": 41030 }, { "epoch": 6.694127243066884, "grad_norm": 0.13660025596618652, "learning_rate": 0.0008413842171811066, "loss": 0.0359, "num_input_tokens_seen": 88681232, "step": 41035 }, { "epoch": 6.69494290375204, "grad_norm": 0.03457217290997505, "learning_rate": 0.0008413322072638523, "loss": 0.022, "num_input_tokens_seen": 88691472, "step": 41040 }, { "epoch": 6.695758564437194, "grad_norm": 0.03694334253668785, "learning_rate": 0.0008412801904290677, "loss": 0.0197, "num_input_tokens_seen": 88702544, "step": 41045 }, { "epoch": 6.696574225122349, "grad_norm": 0.03526332601904869, "learning_rate": 0.000841228166677807, "loss": 0.1232, "num_input_tokens_seen": 88713648, "step": 41050 }, { "epoch": 6.697389885807504, "grad_norm": 0.009007184766232967, "learning_rate": 0.0008411761360111248, "loss": 0.0462, "num_input_tokens_seen": 88724464, "step": 41055 }, { "epoch": 6.698205546492659, "grad_norm": 0.10562512278556824, "learning_rate": 0.0008411240984300752, "loss": 0.0197, "num_input_tokens_seen": 88735280, "step": 41060 }, { "epoch": 6.699021207177814, "grad_norm": 0.0942317321896553, "learning_rate": 0.0008410720539357132, "loss": 0.1576, "num_input_tokens_seen": 88746448, "step": 41065 }, { "epoch": 6.699836867862969, "grad_norm": 0.02955956757068634, "learning_rate": 0.0008410200025290933, "loss": 0.0215, "num_input_tokens_seen": 88757200, "step": 41070 }, { "epoch": 6.700652528548124, "grad_norm": 0.02018778957426548, "learning_rate": 0.0008409679442112703, "loss": 0.1164, "num_input_tokens_seen": 88768208, "step": 41075 }, { "epoch": 6.701468189233279, "grad_norm": 0.019691454246640205, "learning_rate": 0.0008409158789832994, "loss": 0.0729, "num_input_tokens_seen": 88779152, "step": 41080 }, { "epoch": 6.702283849918434, "grad_norm": 0.0125938281416893, "learning_rate": 0.0008408638068462357, "loss": 0.1236, "num_input_tokens_seen": 88790256, "step": 41085 }, { "epoch": 6.703099510603589, "grad_norm": 0.2372901290655136, "learning_rate": 0.0008408117278011347, "loss": 0.1925, "num_input_tokens_seen": 88800976, "step": 41090 }, { "epoch": 6.7039151712887435, "grad_norm": 0.24161028861999512, "learning_rate": 0.0008407596418490515, "loss": 0.0759, "num_input_tokens_seen": 88811312, "step": 41095 }, { "epoch": 6.704730831973899, "grad_norm": 0.034654729068279266, "learning_rate": 0.0008407075489910421, "loss": 0.0624, "num_input_tokens_seen": 88821968, "step": 41100 }, { "epoch": 6.705546492659054, "grad_norm": 0.062173739075660706, "learning_rate": 0.0008406554492281618, "loss": 0.0306, "num_input_tokens_seen": 88832784, "step": 41105 }, { "epoch": 6.706362153344209, "grad_norm": 0.01632249914109707, "learning_rate": 0.0008406033425614667, "loss": 0.0783, "num_input_tokens_seen": 88843152, "step": 41110 }, { "epoch": 6.707177814029364, "grad_norm": 0.04637160152196884, "learning_rate": 0.0008405512289920129, "loss": 0.2796, "num_input_tokens_seen": 88853264, "step": 41115 }, { "epoch": 6.7079934747145185, "grad_norm": 0.05950487405061722, "learning_rate": 0.0008404991085208562, "loss": 0.0801, "num_input_tokens_seen": 88863440, "step": 41120 }, { "epoch": 6.708809135399674, "grad_norm": 0.04365135356783867, "learning_rate": 0.0008404469811490534, "loss": 0.0398, "num_input_tokens_seen": 88873616, "step": 41125 }, { "epoch": 6.709624796084829, "grad_norm": 0.19524741172790527, "learning_rate": 0.0008403948468776604, "loss": 0.0655, "num_input_tokens_seen": 88884816, "step": 41130 }, { "epoch": 6.710440456769984, "grad_norm": 0.2746712863445282, "learning_rate": 0.0008403427057077342, "loss": 0.1683, "num_input_tokens_seen": 88896080, "step": 41135 }, { "epoch": 6.711256117455139, "grad_norm": 0.3242705464363098, "learning_rate": 0.0008402905576403312, "loss": 0.1226, "num_input_tokens_seen": 88907152, "step": 41140 }, { "epoch": 6.712071778140293, "grad_norm": 0.022281266748905182, "learning_rate": 0.0008402384026765084, "loss": 0.0701, "num_input_tokens_seen": 88918640, "step": 41145 }, { "epoch": 6.712887438825448, "grad_norm": 0.0167181808501482, "learning_rate": 0.0008401862408173226, "loss": 0.0659, "num_input_tokens_seen": 88928880, "step": 41150 }, { "epoch": 6.713703099510604, "grad_norm": 0.039158862084150314, "learning_rate": 0.0008401340720638313, "loss": 0.1253, "num_input_tokens_seen": 88941296, "step": 41155 }, { "epoch": 6.714518760195759, "grad_norm": 0.2806652784347534, "learning_rate": 0.0008400818964170913, "loss": 0.1515, "num_input_tokens_seen": 88951888, "step": 41160 }, { "epoch": 6.715334420880914, "grad_norm": 0.02568766102194786, "learning_rate": 0.0008400297138781605, "loss": 0.0607, "num_input_tokens_seen": 88962352, "step": 41165 }, { "epoch": 6.716150081566068, "grad_norm": 0.07870490849018097, "learning_rate": 0.0008399775244480961, "loss": 0.0313, "num_input_tokens_seen": 88974512, "step": 41170 }, { "epoch": 6.716965742251223, "grad_norm": 0.018581105396151543, "learning_rate": 0.0008399253281279557, "loss": 0.0362, "num_input_tokens_seen": 88985168, "step": 41175 }, { "epoch": 6.717781402936378, "grad_norm": 0.24575214087963104, "learning_rate": 0.0008398731249187975, "loss": 0.2494, "num_input_tokens_seen": 88996592, "step": 41180 }, { "epoch": 6.718597063621534, "grad_norm": 0.023241423070430756, "learning_rate": 0.0008398209148216793, "loss": 0.02, "num_input_tokens_seen": 89008912, "step": 41185 }, { "epoch": 6.719412724306689, "grad_norm": 0.2728913128376007, "learning_rate": 0.000839768697837659, "loss": 0.0942, "num_input_tokens_seen": 89020240, "step": 41190 }, { "epoch": 6.720228384991843, "grad_norm": 0.004862621426582336, "learning_rate": 0.0008397164739677951, "loss": 0.1391, "num_input_tokens_seen": 89031792, "step": 41195 }, { "epoch": 6.721044045676998, "grad_norm": 0.2995845079421997, "learning_rate": 0.0008396642432131459, "loss": 0.1093, "num_input_tokens_seen": 89042928, "step": 41200 }, { "epoch": 6.721859706362153, "grad_norm": 0.01482780184596777, "learning_rate": 0.0008396120055747698, "loss": 0.1983, "num_input_tokens_seen": 89052432, "step": 41205 }, { "epoch": 6.722675367047309, "grad_norm": 0.1203823983669281, "learning_rate": 0.0008395597610537257, "loss": 0.0408, "num_input_tokens_seen": 89063792, "step": 41210 }, { "epoch": 6.7234910277324635, "grad_norm": 0.05856647342443466, "learning_rate": 0.0008395075096510723, "loss": 0.1183, "num_input_tokens_seen": 89074384, "step": 41215 }, { "epoch": 6.724306688417618, "grad_norm": 0.07476924359798431, "learning_rate": 0.0008394552513678684, "loss": 0.0963, "num_input_tokens_seen": 89085488, "step": 41220 }, { "epoch": 6.725122349102773, "grad_norm": 0.03770218417048454, "learning_rate": 0.0008394029862051733, "loss": 0.0795, "num_input_tokens_seen": 89095728, "step": 41225 }, { "epoch": 6.725938009787928, "grad_norm": 0.019872894510626793, "learning_rate": 0.0008393507141640461, "loss": 0.0649, "num_input_tokens_seen": 89106096, "step": 41230 }, { "epoch": 6.726753670473083, "grad_norm": 0.004693881142884493, "learning_rate": 0.0008392984352455461, "loss": 0.0563, "num_input_tokens_seen": 89116048, "step": 41235 }, { "epoch": 6.7275693311582385, "grad_norm": 0.07502961158752441, "learning_rate": 0.0008392461494507331, "loss": 0.0422, "num_input_tokens_seen": 89127472, "step": 41240 }, { "epoch": 6.728384991843393, "grad_norm": 0.0028481758199632168, "learning_rate": 0.0008391938567806663, "loss": 0.0258, "num_input_tokens_seen": 89138288, "step": 41245 }, { "epoch": 6.729200652528548, "grad_norm": 0.006228649523109198, "learning_rate": 0.0008391415572364058, "loss": 0.0626, "num_input_tokens_seen": 89149456, "step": 41250 }, { "epoch": 6.730016313213703, "grad_norm": 0.2872016429901123, "learning_rate": 0.0008390892508190113, "loss": 0.0579, "num_input_tokens_seen": 89161008, "step": 41255 }, { "epoch": 6.730831973898858, "grad_norm": 0.12412890046834946, "learning_rate": 0.000839036937529543, "loss": 0.0991, "num_input_tokens_seen": 89171856, "step": 41260 }, { "epoch": 6.731647634584013, "grad_norm": 0.02800234593451023, "learning_rate": 0.0008389846173690611, "loss": 0.0191, "num_input_tokens_seen": 89183024, "step": 41265 }, { "epoch": 6.732463295269168, "grad_norm": 0.012140207923948765, "learning_rate": 0.0008389322903386261, "loss": 0.0071, "num_input_tokens_seen": 89192880, "step": 41270 }, { "epoch": 6.733278955954323, "grad_norm": 0.02547260746359825, "learning_rate": 0.0008388799564392979, "loss": 0.0876, "num_input_tokens_seen": 89203888, "step": 41275 }, { "epoch": 6.734094616639478, "grad_norm": 0.267605185508728, "learning_rate": 0.0008388276156721377, "loss": 0.0582, "num_input_tokens_seen": 89214192, "step": 41280 }, { "epoch": 6.734910277324633, "grad_norm": 0.03963426128029823, "learning_rate": 0.0008387752680382062, "loss": 0.1477, "num_input_tokens_seen": 89225424, "step": 41285 }, { "epoch": 6.735725938009788, "grad_norm": 0.2313862442970276, "learning_rate": 0.0008387229135385638, "loss": 0.0645, "num_input_tokens_seen": 89235600, "step": 41290 }, { "epoch": 6.736541598694943, "grad_norm": 0.06140168383717537, "learning_rate": 0.0008386705521742719, "loss": 0.1396, "num_input_tokens_seen": 89246544, "step": 41295 }, { "epoch": 6.737357259380098, "grad_norm": 0.23113910853862762, "learning_rate": 0.0008386181839463918, "loss": 0.1171, "num_input_tokens_seen": 89257040, "step": 41300 }, { "epoch": 6.738172920065253, "grad_norm": 0.011116042733192444, "learning_rate": 0.0008385658088559845, "loss": 0.164, "num_input_tokens_seen": 89267792, "step": 41305 }, { "epoch": 6.738988580750408, "grad_norm": 0.12740331888198853, "learning_rate": 0.0008385134269041116, "loss": 0.0459, "num_input_tokens_seen": 89278832, "step": 41310 }, { "epoch": 6.739804241435563, "grad_norm": 0.0157408956438303, "learning_rate": 0.0008384610380918347, "loss": 0.0332, "num_input_tokens_seen": 89290000, "step": 41315 }, { "epoch": 6.740619902120718, "grad_norm": 0.3017098605632782, "learning_rate": 0.0008384086424202156, "loss": 0.0939, "num_input_tokens_seen": 89300080, "step": 41320 }, { "epoch": 6.741435562805873, "grad_norm": 0.01924244500696659, "learning_rate": 0.0008383562398903157, "loss": 0.0786, "num_input_tokens_seen": 89310960, "step": 41325 }, { "epoch": 6.742251223491028, "grad_norm": 0.06611377000808716, "learning_rate": 0.0008383038305031976, "loss": 0.1008, "num_input_tokens_seen": 89320784, "step": 41330 }, { "epoch": 6.743066884176183, "grad_norm": 0.26710790395736694, "learning_rate": 0.0008382514142599234, "loss": 0.1293, "num_input_tokens_seen": 89330416, "step": 41335 }, { "epoch": 6.7438825448613375, "grad_norm": 0.005136616062372923, "learning_rate": 0.0008381989911615548, "loss": 0.1704, "num_input_tokens_seen": 89340976, "step": 41340 }, { "epoch": 6.744698205546492, "grad_norm": 0.37061432003974915, "learning_rate": 0.0008381465612091549, "loss": 0.1181, "num_input_tokens_seen": 89352240, "step": 41345 }, { "epoch": 6.745513866231647, "grad_norm": 0.0033552530221641064, "learning_rate": 0.0008380941244037858, "loss": 0.0418, "num_input_tokens_seen": 89362704, "step": 41350 }, { "epoch": 6.746329526916803, "grad_norm": 0.003381171729415655, "learning_rate": 0.0008380416807465106, "loss": 0.0364, "num_input_tokens_seen": 89373392, "step": 41355 }, { "epoch": 6.747145187601958, "grad_norm": 0.0964924544095993, "learning_rate": 0.0008379892302383916, "loss": 0.021, "num_input_tokens_seen": 89384368, "step": 41360 }, { "epoch": 6.7479608482871125, "grad_norm": 0.021633053198456764, "learning_rate": 0.0008379367728804923, "loss": 0.0493, "num_input_tokens_seen": 89394736, "step": 41365 }, { "epoch": 6.748776508972267, "grad_norm": 0.10366320610046387, "learning_rate": 0.0008378843086738755, "loss": 0.0979, "num_input_tokens_seen": 89405296, "step": 41370 }, { "epoch": 6.749592169657422, "grad_norm": 0.0048525105230510235, "learning_rate": 0.0008378318376196046, "loss": 0.036, "num_input_tokens_seen": 89416624, "step": 41375 }, { "epoch": 6.750407830342578, "grad_norm": 0.3233185112476349, "learning_rate": 0.0008377793597187428, "loss": 0.1037, "num_input_tokens_seen": 89426928, "step": 41380 }, { "epoch": 6.751223491027733, "grad_norm": 0.01112865749746561, "learning_rate": 0.000837726874972354, "loss": 0.0211, "num_input_tokens_seen": 89439024, "step": 41385 }, { "epoch": 6.7520391517128875, "grad_norm": 0.28287333250045776, "learning_rate": 0.0008376743833815015, "loss": 0.1352, "num_input_tokens_seen": 89447472, "step": 41390 }, { "epoch": 6.752854812398042, "grad_norm": 0.3251228928565979, "learning_rate": 0.0008376218849472493, "loss": 0.201, "num_input_tokens_seen": 89458192, "step": 41395 }, { "epoch": 6.753670473083197, "grad_norm": 0.03416355699300766, "learning_rate": 0.0008375693796706613, "loss": 0.0452, "num_input_tokens_seen": 89468912, "step": 41400 }, { "epoch": 6.754486133768353, "grad_norm": 0.049930673092603683, "learning_rate": 0.0008375168675528016, "loss": 0.0615, "num_input_tokens_seen": 89479856, "step": 41405 }, { "epoch": 6.755301794453508, "grad_norm": 0.18245936930179596, "learning_rate": 0.0008374643485947342, "loss": 0.2387, "num_input_tokens_seen": 89490736, "step": 41410 }, { "epoch": 6.7561174551386625, "grad_norm": 0.09749633818864822, "learning_rate": 0.0008374118227975238, "loss": 0.0335, "num_input_tokens_seen": 89501392, "step": 41415 }, { "epoch": 6.756933115823817, "grad_norm": 0.022130804136395454, "learning_rate": 0.0008373592901622349, "loss": 0.0518, "num_input_tokens_seen": 89512656, "step": 41420 }, { "epoch": 6.757748776508972, "grad_norm": 0.02885841391980648, "learning_rate": 0.0008373067506899319, "loss": 0.0253, "num_input_tokens_seen": 89523088, "step": 41425 }, { "epoch": 6.758564437194127, "grad_norm": 0.09676162898540497, "learning_rate": 0.0008372542043816797, "loss": 0.1792, "num_input_tokens_seen": 89534288, "step": 41430 }, { "epoch": 6.759380097879282, "grad_norm": 0.00921018235385418, "learning_rate": 0.0008372016512385432, "loss": 0.0125, "num_input_tokens_seen": 89545872, "step": 41435 }, { "epoch": 6.760195758564437, "grad_norm": 0.06077948957681656, "learning_rate": 0.0008371490912615875, "loss": 0.0715, "num_input_tokens_seen": 89556336, "step": 41440 }, { "epoch": 6.761011419249592, "grad_norm": 0.031869012862443924, "learning_rate": 0.0008370965244518778, "loss": 0.0465, "num_input_tokens_seen": 89566768, "step": 41445 }, { "epoch": 6.761827079934747, "grad_norm": 0.02272254228591919, "learning_rate": 0.0008370439508104794, "loss": 0.0356, "num_input_tokens_seen": 89577456, "step": 41450 }, { "epoch": 6.762642740619902, "grad_norm": 0.09011733531951904, "learning_rate": 0.0008369913703384576, "loss": 0.1329, "num_input_tokens_seen": 89589136, "step": 41455 }, { "epoch": 6.763458401305057, "grad_norm": 0.22700747847557068, "learning_rate": 0.0008369387830368785, "loss": 0.1296, "num_input_tokens_seen": 89599312, "step": 41460 }, { "epoch": 6.764274061990212, "grad_norm": 0.24166239798069, "learning_rate": 0.0008368861889068071, "loss": 0.1623, "num_input_tokens_seen": 89610960, "step": 41465 }, { "epoch": 6.765089722675367, "grad_norm": 0.05804312974214554, "learning_rate": 0.0008368335879493099, "loss": 0.0187, "num_input_tokens_seen": 89620784, "step": 41470 }, { "epoch": 6.765905383360522, "grad_norm": 0.0035739641170948744, "learning_rate": 0.0008367809801654529, "loss": 0.1717, "num_input_tokens_seen": 89632272, "step": 41475 }, { "epoch": 6.766721044045677, "grad_norm": 0.23069559037685394, "learning_rate": 0.0008367283655563018, "loss": 0.1694, "num_input_tokens_seen": 89643120, "step": 41480 }, { "epoch": 6.767536704730832, "grad_norm": 0.15718932449817657, "learning_rate": 0.0008366757441229235, "loss": 0.0793, "num_input_tokens_seen": 89653584, "step": 41485 }, { "epoch": 6.768352365415987, "grad_norm": 0.08193394541740417, "learning_rate": 0.000836623115866384, "loss": 0.0785, "num_input_tokens_seen": 89665264, "step": 41490 }, { "epoch": 6.769168026101142, "grad_norm": 0.010544022545218468, "learning_rate": 0.00083657048078775, "loss": 0.0782, "num_input_tokens_seen": 89675216, "step": 41495 }, { "epoch": 6.769983686786297, "grad_norm": 0.16942720115184784, "learning_rate": 0.0008365178388880883, "loss": 0.2512, "num_input_tokens_seen": 89686480, "step": 41500 }, { "epoch": 6.770799347471452, "grad_norm": 0.024143319576978683, "learning_rate": 0.0008364651901684657, "loss": 0.117, "num_input_tokens_seen": 89697232, "step": 41505 }, { "epoch": 6.771615008156607, "grad_norm": 0.18397311866283417, "learning_rate": 0.0008364125346299492, "loss": 0.0661, "num_input_tokens_seen": 89708080, "step": 41510 }, { "epoch": 6.7724306688417615, "grad_norm": 0.02165348269045353, "learning_rate": 0.0008363598722736057, "loss": 0.1404, "num_input_tokens_seen": 89718416, "step": 41515 }, { "epoch": 6.773246329526917, "grad_norm": 0.07183000445365906, "learning_rate": 0.0008363072031005028, "loss": 0.0314, "num_input_tokens_seen": 89728688, "step": 41520 }, { "epoch": 6.774061990212072, "grad_norm": 0.0437711663544178, "learning_rate": 0.0008362545271117079, "loss": 0.1382, "num_input_tokens_seen": 89740784, "step": 41525 }, { "epoch": 6.774877650897227, "grad_norm": 0.1806856095790863, "learning_rate": 0.0008362018443082884, "loss": 0.0989, "num_input_tokens_seen": 89751632, "step": 41530 }, { "epoch": 6.775693311582382, "grad_norm": 0.08479801565408707, "learning_rate": 0.000836149154691312, "loss": 0.0721, "num_input_tokens_seen": 89762224, "step": 41535 }, { "epoch": 6.7765089722675365, "grad_norm": 0.013879367150366306, "learning_rate": 0.0008360964582618465, "loss": 0.1656, "num_input_tokens_seen": 89772624, "step": 41540 }, { "epoch": 6.777324632952691, "grad_norm": 0.014607289806008339, "learning_rate": 0.0008360437550209599, "loss": 0.0195, "num_input_tokens_seen": 89782832, "step": 41545 }, { "epoch": 6.778140293637847, "grad_norm": 0.3534904420375824, "learning_rate": 0.0008359910449697203, "loss": 0.1276, "num_input_tokens_seen": 89793072, "step": 41550 }, { "epoch": 6.778955954323002, "grad_norm": 0.01215518917888403, "learning_rate": 0.0008359383281091961, "loss": 0.055, "num_input_tokens_seen": 89804080, "step": 41555 }, { "epoch": 6.779771615008157, "grad_norm": 0.028247803449630737, "learning_rate": 0.0008358856044404553, "loss": 0.1613, "num_input_tokens_seen": 89815888, "step": 41560 }, { "epoch": 6.780587275693311, "grad_norm": 0.010149382054805756, "learning_rate": 0.0008358328739645668, "loss": 0.0612, "num_input_tokens_seen": 89827344, "step": 41565 }, { "epoch": 6.781402936378466, "grad_norm": 0.281253457069397, "learning_rate": 0.000835780136682599, "loss": 0.2119, "num_input_tokens_seen": 89837328, "step": 41570 }, { "epoch": 6.782218597063622, "grad_norm": 0.05083395168185234, "learning_rate": 0.0008357273925956208, "loss": 0.0712, "num_input_tokens_seen": 89847056, "step": 41575 }, { "epoch": 6.783034257748777, "grad_norm": 0.06286054849624634, "learning_rate": 0.000835674641704701, "loss": 0.0331, "num_input_tokens_seen": 89857488, "step": 41580 }, { "epoch": 6.783849918433932, "grad_norm": 0.01152315828949213, "learning_rate": 0.0008356218840109089, "loss": 0.0839, "num_input_tokens_seen": 89867920, "step": 41585 }, { "epoch": 6.784665579119086, "grad_norm": 0.025084182620048523, "learning_rate": 0.0008355691195153134, "loss": 0.0258, "num_input_tokens_seen": 89878768, "step": 41590 }, { "epoch": 6.785481239804241, "grad_norm": 0.046773433685302734, "learning_rate": 0.000835516348218984, "loss": 0.1176, "num_input_tokens_seen": 89890768, "step": 41595 }, { "epoch": 6.786296900489396, "grad_norm": 0.039644379168748856, "learning_rate": 0.0008354635701229902, "loss": 0.083, "num_input_tokens_seen": 89901712, "step": 41600 }, { "epoch": 6.787112561174552, "grad_norm": 0.14292879402637482, "learning_rate": 0.0008354107852284016, "loss": 0.0583, "num_input_tokens_seen": 89913104, "step": 41605 }, { "epoch": 6.787928221859707, "grad_norm": 0.2356540560722351, "learning_rate": 0.0008353579935362881, "loss": 0.1072, "num_input_tokens_seen": 89923120, "step": 41610 }, { "epoch": 6.788743882544861, "grad_norm": 0.2634557783603668, "learning_rate": 0.0008353051950477192, "loss": 0.0516, "num_input_tokens_seen": 89934736, "step": 41615 }, { "epoch": 6.789559543230016, "grad_norm": 0.10258938372135162, "learning_rate": 0.0008352523897637652, "loss": 0.0387, "num_input_tokens_seen": 89945808, "step": 41620 }, { "epoch": 6.790375203915171, "grad_norm": 0.014087699353694916, "learning_rate": 0.0008351995776854962, "loss": 0.1091, "num_input_tokens_seen": 89956880, "step": 41625 }, { "epoch": 6.791190864600326, "grad_norm": 0.01874430850148201, "learning_rate": 0.0008351467588139827, "loss": 0.0937, "num_input_tokens_seen": 89967504, "step": 41630 }, { "epoch": 6.7920065252854815, "grad_norm": 0.029873300343751907, "learning_rate": 0.0008350939331502949, "loss": 0.0384, "num_input_tokens_seen": 89978448, "step": 41635 }, { "epoch": 6.792822185970636, "grad_norm": 0.011422554962337017, "learning_rate": 0.0008350411006955033, "loss": 0.0058, "num_input_tokens_seen": 89990160, "step": 41640 }, { "epoch": 6.793637846655791, "grad_norm": 0.11676127463579178, "learning_rate": 0.0008349882614506789, "loss": 0.049, "num_input_tokens_seen": 90000592, "step": 41645 }, { "epoch": 6.794453507340946, "grad_norm": 0.1809616982936859, "learning_rate": 0.0008349354154168924, "loss": 0.1131, "num_input_tokens_seen": 90011152, "step": 41650 }, { "epoch": 6.795269168026101, "grad_norm": 0.19138872623443604, "learning_rate": 0.0008348825625952148, "loss": 0.0849, "num_input_tokens_seen": 90020432, "step": 41655 }, { "epoch": 6.7960848287112565, "grad_norm": 0.278010755777359, "learning_rate": 0.0008348297029867172, "loss": 0.135, "num_input_tokens_seen": 90031120, "step": 41660 }, { "epoch": 6.796900489396411, "grad_norm": 0.06747753918170929, "learning_rate": 0.0008347768365924709, "loss": 0.0888, "num_input_tokens_seen": 90042160, "step": 41665 }, { "epoch": 6.797716150081566, "grad_norm": 0.14137335121631622, "learning_rate": 0.0008347239634135474, "loss": 0.1913, "num_input_tokens_seen": 90052976, "step": 41670 }, { "epoch": 6.798531810766721, "grad_norm": 0.021791979670524597, "learning_rate": 0.0008346710834510181, "loss": 0.0335, "num_input_tokens_seen": 90063728, "step": 41675 }, { "epoch": 6.799347471451876, "grad_norm": 0.10690966993570328, "learning_rate": 0.0008346181967059548, "loss": 0.041, "num_input_tokens_seen": 90076272, "step": 41680 }, { "epoch": 6.800163132137031, "grad_norm": 0.22786171734333038, "learning_rate": 0.0008345653031794292, "loss": 0.079, "num_input_tokens_seen": 90087056, "step": 41685 }, { "epoch": 6.800978792822186, "grad_norm": 0.41516733169555664, "learning_rate": 0.0008345124028725133, "loss": 0.1288, "num_input_tokens_seen": 90096944, "step": 41690 }, { "epoch": 6.801794453507341, "grad_norm": 0.007719927933067083, "learning_rate": 0.0008344594957862792, "loss": 0.0907, "num_input_tokens_seen": 90108752, "step": 41695 }, { "epoch": 6.802610114192496, "grad_norm": 0.012921489775180817, "learning_rate": 0.000834406581921799, "loss": 0.1094, "num_input_tokens_seen": 90119856, "step": 41700 }, { "epoch": 6.803425774877651, "grad_norm": 0.12971089780330658, "learning_rate": 0.0008343536612801454, "loss": 0.0577, "num_input_tokens_seen": 90130896, "step": 41705 }, { "epoch": 6.804241435562806, "grad_norm": 0.010836289264261723, "learning_rate": 0.0008343007338623906, "loss": 0.1355, "num_input_tokens_seen": 90139632, "step": 41710 }, { "epoch": 6.80505709624796, "grad_norm": 0.006006123032420874, "learning_rate": 0.0008342477996696074, "loss": 0.1392, "num_input_tokens_seen": 90150096, "step": 41715 }, { "epoch": 6.805872756933116, "grad_norm": 0.15588663518428802, "learning_rate": 0.0008341948587028684, "loss": 0.0615, "num_input_tokens_seen": 90161360, "step": 41720 }, { "epoch": 6.806688417618271, "grad_norm": 0.18570052087306976, "learning_rate": 0.0008341419109632466, "loss": 0.14, "num_input_tokens_seen": 90172144, "step": 41725 }, { "epoch": 6.807504078303426, "grad_norm": 0.07356259226799011, "learning_rate": 0.0008340889564518153, "loss": 0.0617, "num_input_tokens_seen": 90183536, "step": 41730 }, { "epoch": 6.808319738988581, "grad_norm": 0.10233187675476074, "learning_rate": 0.0008340359951696472, "loss": 0.1075, "num_input_tokens_seen": 90194224, "step": 41735 }, { "epoch": 6.809135399673735, "grad_norm": 0.027137896046042442, "learning_rate": 0.0008339830271178162, "loss": 0.0263, "num_input_tokens_seen": 90205200, "step": 41740 }, { "epoch": 6.809951060358891, "grad_norm": 0.13853560388088226, "learning_rate": 0.0008339300522973952, "loss": 0.1033, "num_input_tokens_seen": 90214224, "step": 41745 }, { "epoch": 6.810766721044046, "grad_norm": 0.03210921958088875, "learning_rate": 0.0008338770707094583, "loss": 0.0439, "num_input_tokens_seen": 90225552, "step": 41750 }, { "epoch": 6.811582381729201, "grad_norm": 0.0702987015247345, "learning_rate": 0.0008338240823550789, "loss": 0.2171, "num_input_tokens_seen": 90236048, "step": 41755 }, { "epoch": 6.8123980424143555, "grad_norm": 0.35574427247047424, "learning_rate": 0.000833771087235331, "loss": 0.094, "num_input_tokens_seen": 90247344, "step": 41760 }, { "epoch": 6.81321370309951, "grad_norm": 0.2279064953327179, "learning_rate": 0.0008337180853512885, "loss": 0.0731, "num_input_tokens_seen": 90258320, "step": 41765 }, { "epoch": 6.814029363784666, "grad_norm": 0.025548186153173447, "learning_rate": 0.0008336650767040258, "loss": 0.0733, "num_input_tokens_seen": 90268336, "step": 41770 }, { "epoch": 6.814845024469821, "grad_norm": 0.20487023890018463, "learning_rate": 0.000833612061294617, "loss": 0.1404, "num_input_tokens_seen": 90279120, "step": 41775 }, { "epoch": 6.815660685154976, "grad_norm": 0.10038571059703827, "learning_rate": 0.0008335590391241365, "loss": 0.0751, "num_input_tokens_seen": 90290064, "step": 41780 }, { "epoch": 6.8164763458401305, "grad_norm": 0.41231435537338257, "learning_rate": 0.000833506010193659, "loss": 0.125, "num_input_tokens_seen": 90299952, "step": 41785 }, { "epoch": 6.817292006525285, "grad_norm": 0.20405316352844238, "learning_rate": 0.000833452974504259, "loss": 0.0788, "num_input_tokens_seen": 90310640, "step": 41790 }, { "epoch": 6.81810766721044, "grad_norm": 0.03132156655192375, "learning_rate": 0.0008333999320570116, "loss": 0.0719, "num_input_tokens_seen": 90320656, "step": 41795 }, { "epoch": 6.818923327895595, "grad_norm": 0.012432006187736988, "learning_rate": 0.0008333468828529916, "loss": 0.1274, "num_input_tokens_seen": 90331824, "step": 41800 }, { "epoch": 6.819738988580751, "grad_norm": 0.021729158237576485, "learning_rate": 0.0008332938268932742, "loss": 0.06, "num_input_tokens_seen": 90344016, "step": 41805 }, { "epoch": 6.8205546492659055, "grad_norm": 0.04156330227851868, "learning_rate": 0.0008332407641789344, "loss": 0.0443, "num_input_tokens_seen": 90356048, "step": 41810 }, { "epoch": 6.82137030995106, "grad_norm": 0.14998769760131836, "learning_rate": 0.0008331876947110478, "loss": 0.1661, "num_input_tokens_seen": 90367792, "step": 41815 }, { "epoch": 6.822185970636215, "grad_norm": 0.05917227268218994, "learning_rate": 0.00083313461849069, "loss": 0.0593, "num_input_tokens_seen": 90377840, "step": 41820 }, { "epoch": 6.82300163132137, "grad_norm": 0.012237275019288063, "learning_rate": 0.0008330815355189365, "loss": 0.0382, "num_input_tokens_seen": 90387920, "step": 41825 }, { "epoch": 6.823817292006526, "grad_norm": 0.19908970594406128, "learning_rate": 0.0008330284457968631, "loss": 0.0849, "num_input_tokens_seen": 90399760, "step": 41830 }, { "epoch": 6.8246329526916805, "grad_norm": 0.12390464544296265, "learning_rate": 0.0008329753493255458, "loss": 0.1322, "num_input_tokens_seen": 90409840, "step": 41835 }, { "epoch": 6.825448613376835, "grad_norm": 0.0513911172747612, "learning_rate": 0.0008329222461060606, "loss": 0.1119, "num_input_tokens_seen": 90421136, "step": 41840 }, { "epoch": 6.82626427406199, "grad_norm": 0.22825922071933746, "learning_rate": 0.0008328691361394838, "loss": 0.1985, "num_input_tokens_seen": 90430640, "step": 41845 }, { "epoch": 6.827079934747145, "grad_norm": 0.01640220545232296, "learning_rate": 0.0008328160194268916, "loss": 0.0438, "num_input_tokens_seen": 90442064, "step": 41850 }, { "epoch": 6.827895595432301, "grad_norm": 0.09551920741796494, "learning_rate": 0.0008327628959693606, "loss": 0.046, "num_input_tokens_seen": 90452336, "step": 41855 }, { "epoch": 6.828711256117455, "grad_norm": 0.25587597489356995, "learning_rate": 0.0008327097657679674, "loss": 0.1737, "num_input_tokens_seen": 90463600, "step": 41860 }, { "epoch": 6.82952691680261, "grad_norm": 0.04232428967952728, "learning_rate": 0.0008326566288237887, "loss": 0.0204, "num_input_tokens_seen": 90474576, "step": 41865 }, { "epoch": 6.830342577487765, "grad_norm": 0.03149278461933136, "learning_rate": 0.0008326034851379014, "loss": 0.1502, "num_input_tokens_seen": 90484976, "step": 41870 }, { "epoch": 6.83115823817292, "grad_norm": 0.28385284543037415, "learning_rate": 0.0008325503347113826, "loss": 0.1137, "num_input_tokens_seen": 90494576, "step": 41875 }, { "epoch": 6.831973898858075, "grad_norm": 0.2156359702348709, "learning_rate": 0.0008324971775453094, "loss": 0.2996, "num_input_tokens_seen": 90504592, "step": 41880 }, { "epoch": 6.8327895595432295, "grad_norm": 0.01356032956391573, "learning_rate": 0.0008324440136407591, "loss": 0.0573, "num_input_tokens_seen": 90515120, "step": 41885 }, { "epoch": 6.833605220228385, "grad_norm": 0.1261776089668274, "learning_rate": 0.000832390842998809, "loss": 0.0498, "num_input_tokens_seen": 90526000, "step": 41890 }, { "epoch": 6.83442088091354, "grad_norm": 0.1900995522737503, "learning_rate": 0.0008323376656205369, "loss": 0.1378, "num_input_tokens_seen": 90537552, "step": 41895 }, { "epoch": 6.835236541598695, "grad_norm": 0.31141209602355957, "learning_rate": 0.0008322844815070204, "loss": 0.1889, "num_input_tokens_seen": 90548272, "step": 41900 }, { "epoch": 6.83605220228385, "grad_norm": 0.2049756795167923, "learning_rate": 0.0008322312906593373, "loss": 0.1449, "num_input_tokens_seen": 90558064, "step": 41905 }, { "epoch": 6.8368678629690045, "grad_norm": 0.03546799719333649, "learning_rate": 0.0008321780930785657, "loss": 0.0302, "num_input_tokens_seen": 90569712, "step": 41910 }, { "epoch": 6.83768352365416, "grad_norm": 0.09230761975049973, "learning_rate": 0.0008321248887657836, "loss": 0.1765, "num_input_tokens_seen": 90580656, "step": 41915 }, { "epoch": 6.838499184339315, "grad_norm": 0.052474021911621094, "learning_rate": 0.0008320716777220694, "loss": 0.0439, "num_input_tokens_seen": 90590512, "step": 41920 }, { "epoch": 6.83931484502447, "grad_norm": 0.05721645429730415, "learning_rate": 0.0008320184599485012, "loss": 0.0613, "num_input_tokens_seen": 90600784, "step": 41925 }, { "epoch": 6.840130505709625, "grad_norm": 0.015794144943356514, "learning_rate": 0.0008319652354461577, "loss": 0.0657, "num_input_tokens_seen": 90611984, "step": 41930 }, { "epoch": 6.8409461663947795, "grad_norm": 0.024517951533198357, "learning_rate": 0.0008319120042161179, "loss": 0.0459, "num_input_tokens_seen": 90623152, "step": 41935 }, { "epoch": 6.841761827079935, "grad_norm": 0.24330684542655945, "learning_rate": 0.00083185876625946, "loss": 0.1472, "num_input_tokens_seen": 90634736, "step": 41940 }, { "epoch": 6.84257748776509, "grad_norm": 0.015622702427208424, "learning_rate": 0.0008318055215772633, "loss": 0.1043, "num_input_tokens_seen": 90644976, "step": 41945 }, { "epoch": 6.843393148450245, "grad_norm": 0.1785089373588562, "learning_rate": 0.0008317522701706066, "loss": 0.1045, "num_input_tokens_seen": 90656656, "step": 41950 }, { "epoch": 6.8442088091354, "grad_norm": 0.16877882182598114, "learning_rate": 0.0008316990120405695, "loss": 0.0717, "num_input_tokens_seen": 90668592, "step": 41955 }, { "epoch": 6.8450244698205545, "grad_norm": 0.009568187408149242, "learning_rate": 0.0008316457471882311, "loss": 0.0108, "num_input_tokens_seen": 90678800, "step": 41960 }, { "epoch": 6.845840130505709, "grad_norm": 0.012613932602107525, "learning_rate": 0.0008315924756146708, "loss": 0.0715, "num_input_tokens_seen": 90689584, "step": 41965 }, { "epoch": 6.846655791190865, "grad_norm": 0.20978093147277832, "learning_rate": 0.0008315391973209685, "loss": 0.2168, "num_input_tokens_seen": 90699536, "step": 41970 }, { "epoch": 6.84747145187602, "grad_norm": 0.04319089278578758, "learning_rate": 0.0008314859123082037, "loss": 0.0733, "num_input_tokens_seen": 90710800, "step": 41975 }, { "epoch": 6.848287112561175, "grad_norm": 0.17170238494873047, "learning_rate": 0.0008314326205774563, "loss": 0.0658, "num_input_tokens_seen": 90720976, "step": 41980 }, { "epoch": 6.849102773246329, "grad_norm": 0.1302386075258255, "learning_rate": 0.0008313793221298065, "loss": 0.1116, "num_input_tokens_seen": 90730832, "step": 41985 }, { "epoch": 6.849918433931484, "grad_norm": 0.01649455539882183, "learning_rate": 0.0008313260169663343, "loss": 0.0702, "num_input_tokens_seen": 90742320, "step": 41990 }, { "epoch": 6.850734094616639, "grad_norm": 0.046188920736312866, "learning_rate": 0.00083127270508812, "loss": 0.0986, "num_input_tokens_seen": 90753680, "step": 41995 }, { "epoch": 6.851549755301795, "grad_norm": 0.02995547652244568, "learning_rate": 0.0008312193864962442, "loss": 0.0453, "num_input_tokens_seen": 90764272, "step": 42000 }, { "epoch": 6.85236541598695, "grad_norm": 0.023149535059928894, "learning_rate": 0.0008311660611917873, "loss": 0.026, "num_input_tokens_seen": 90774672, "step": 42005 }, { "epoch": 6.853181076672104, "grad_norm": 0.1374523937702179, "learning_rate": 0.00083111272917583, "loss": 0.1136, "num_input_tokens_seen": 90786416, "step": 42010 }, { "epoch": 6.853996737357259, "grad_norm": 0.25843703746795654, "learning_rate": 0.0008310593904494532, "loss": 0.1107, "num_input_tokens_seen": 90795472, "step": 42015 }, { "epoch": 6.854812398042414, "grad_norm": 0.01731134206056595, "learning_rate": 0.000831006045013738, "loss": 0.0317, "num_input_tokens_seen": 90807248, "step": 42020 }, { "epoch": 6.85562805872757, "grad_norm": 0.10636473447084427, "learning_rate": 0.0008309526928697653, "loss": 0.0349, "num_input_tokens_seen": 90818544, "step": 42025 }, { "epoch": 6.856443719412725, "grad_norm": 0.01988835819065571, "learning_rate": 0.0008308993340186164, "loss": 0.1655, "num_input_tokens_seen": 90829264, "step": 42030 }, { "epoch": 6.857259380097879, "grad_norm": 0.03717343881726265, "learning_rate": 0.0008308459684613727, "loss": 0.0217, "num_input_tokens_seen": 90840048, "step": 42035 }, { "epoch": 6.858075040783034, "grad_norm": 0.16305583715438843, "learning_rate": 0.0008307925961991158, "loss": 0.1935, "num_input_tokens_seen": 90850512, "step": 42040 }, { "epoch": 6.858890701468189, "grad_norm": 0.14231441915035248, "learning_rate": 0.0008307392172329273, "loss": 0.1166, "num_input_tokens_seen": 90862064, "step": 42045 }, { "epoch": 6.859706362153344, "grad_norm": 0.012785021215677261, "learning_rate": 0.000830685831563889, "loss": 0.0339, "num_input_tokens_seen": 90872592, "step": 42050 }, { "epoch": 6.8605220228384995, "grad_norm": 0.07883328944444656, "learning_rate": 0.0008306324391930827, "loss": 0.0422, "num_input_tokens_seen": 90883024, "step": 42055 }, { "epoch": 6.861337683523654, "grad_norm": 0.04878608137369156, "learning_rate": 0.0008305790401215906, "loss": 0.0309, "num_input_tokens_seen": 90893392, "step": 42060 }, { "epoch": 6.862153344208809, "grad_norm": 0.13074623048305511, "learning_rate": 0.000830525634350495, "loss": 0.1722, "num_input_tokens_seen": 90904400, "step": 42065 }, { "epoch": 6.862969004893964, "grad_norm": 0.07179665565490723, "learning_rate": 0.0008304722218808782, "loss": 0.1888, "num_input_tokens_seen": 90916560, "step": 42070 }, { "epoch": 6.863784665579119, "grad_norm": 0.12156625837087631, "learning_rate": 0.0008304188027138225, "loss": 0.0426, "num_input_tokens_seen": 90928368, "step": 42075 }, { "epoch": 6.864600326264274, "grad_norm": 0.15231293439865112, "learning_rate": 0.0008303653768504105, "loss": 0.0825, "num_input_tokens_seen": 90939600, "step": 42080 }, { "epoch": 6.865415986949429, "grad_norm": 0.1693643033504486, "learning_rate": 0.000830311944291725, "loss": 0.084, "num_input_tokens_seen": 90950256, "step": 42085 }, { "epoch": 6.866231647634584, "grad_norm": 0.2293866127729416, "learning_rate": 0.0008302585050388491, "loss": 0.0804, "num_input_tokens_seen": 90960656, "step": 42090 }, { "epoch": 6.867047308319739, "grad_norm": 0.011171751655638218, "learning_rate": 0.0008302050590928656, "loss": 0.0496, "num_input_tokens_seen": 90971376, "step": 42095 }, { "epoch": 6.867862969004894, "grad_norm": 0.01649610511958599, "learning_rate": 0.0008301516064548577, "loss": 0.0216, "num_input_tokens_seen": 90980144, "step": 42100 }, { "epoch": 6.868678629690049, "grad_norm": 0.14276473224163055, "learning_rate": 0.0008300981471259086, "loss": 0.165, "num_input_tokens_seen": 90990960, "step": 42105 }, { "epoch": 6.869494290375204, "grad_norm": 0.017909109592437744, "learning_rate": 0.0008300446811071018, "loss": 0.0642, "num_input_tokens_seen": 91002864, "step": 42110 }, { "epoch": 6.870309951060359, "grad_norm": 0.013232512399554253, "learning_rate": 0.0008299912083995208, "loss": 0.0539, "num_input_tokens_seen": 91013968, "step": 42115 }, { "epoch": 6.871125611745514, "grad_norm": 0.01880715787410736, "learning_rate": 0.0008299377290042493, "loss": 0.0368, "num_input_tokens_seen": 91025136, "step": 42120 }, { "epoch": 6.871941272430669, "grad_norm": 0.14953921735286713, "learning_rate": 0.0008298842429223714, "loss": 0.0699, "num_input_tokens_seen": 91035184, "step": 42125 }, { "epoch": 6.872756933115824, "grad_norm": 0.22939430177211761, "learning_rate": 0.0008298307501549706, "loss": 0.069, "num_input_tokens_seen": 91046544, "step": 42130 }, { "epoch": 6.873572593800979, "grad_norm": 0.005941577255725861, "learning_rate": 0.0008297772507031314, "loss": 0.0412, "num_input_tokens_seen": 91057488, "step": 42135 }, { "epoch": 6.874388254486134, "grad_norm": 0.23242725431919098, "learning_rate": 0.0008297237445679378, "loss": 0.1376, "num_input_tokens_seen": 91069040, "step": 42140 }, { "epoch": 6.875203915171289, "grad_norm": 0.04065469652414322, "learning_rate": 0.0008296702317504741, "loss": 0.0106, "num_input_tokens_seen": 91078352, "step": 42145 }, { "epoch": 6.876019575856444, "grad_norm": 0.06341571360826492, "learning_rate": 0.0008296167122518252, "loss": 0.1237, "num_input_tokens_seen": 91089008, "step": 42150 }, { "epoch": 6.876835236541599, "grad_norm": 0.058492325246334076, "learning_rate": 0.0008295631860730752, "loss": 0.1141, "num_input_tokens_seen": 91099536, "step": 42155 }, { "epoch": 6.877650897226753, "grad_norm": 0.036050185561180115, "learning_rate": 0.0008295096532153093, "loss": 0.0745, "num_input_tokens_seen": 91110160, "step": 42160 }, { "epoch": 6.878466557911908, "grad_norm": 0.006050860974937677, "learning_rate": 0.0008294561136796122, "loss": 0.0198, "num_input_tokens_seen": 91121136, "step": 42165 }, { "epoch": 6.879282218597064, "grad_norm": 0.10723251849412918, "learning_rate": 0.000829402567467069, "loss": 0.0796, "num_input_tokens_seen": 91131568, "step": 42170 }, { "epoch": 6.880097879282219, "grad_norm": 0.040584757924079895, "learning_rate": 0.000829349014578765, "loss": 0.0434, "num_input_tokens_seen": 91142192, "step": 42175 }, { "epoch": 6.8809135399673735, "grad_norm": 0.012464815750718117, "learning_rate": 0.0008292954550157853, "loss": 0.0365, "num_input_tokens_seen": 91153424, "step": 42180 }, { "epoch": 6.881729200652528, "grad_norm": 0.29857343435287476, "learning_rate": 0.0008292418887792155, "loss": 0.1503, "num_input_tokens_seen": 91163088, "step": 42185 }, { "epoch": 6.882544861337683, "grad_norm": 0.013671220280230045, "learning_rate": 0.0008291883158701413, "loss": 0.1858, "num_input_tokens_seen": 91174320, "step": 42190 }, { "epoch": 6.883360522022839, "grad_norm": 0.08105769008398056, "learning_rate": 0.000829134736289648, "loss": 0.1649, "num_input_tokens_seen": 91183792, "step": 42195 }, { "epoch": 6.884176182707994, "grad_norm": 0.026280393823981285, "learning_rate": 0.0008290811500388219, "loss": 0.0126, "num_input_tokens_seen": 91194608, "step": 42200 }, { "epoch": 6.8849918433931485, "grad_norm": 0.056491460651159286, "learning_rate": 0.0008290275571187488, "loss": 0.0839, "num_input_tokens_seen": 91204784, "step": 42205 }, { "epoch": 6.885807504078303, "grad_norm": 0.13008785247802734, "learning_rate": 0.0008289739575305148, "loss": 0.0596, "num_input_tokens_seen": 91215152, "step": 42210 }, { "epoch": 6.886623164763458, "grad_norm": 0.25845256447792053, "learning_rate": 0.0008289203512752063, "loss": 0.1248, "num_input_tokens_seen": 91225232, "step": 42215 }, { "epoch": 6.887438825448614, "grad_norm": 0.12239914387464523, "learning_rate": 0.0008288667383539097, "loss": 0.0507, "num_input_tokens_seen": 91236528, "step": 42220 }, { "epoch": 6.888254486133769, "grad_norm": 0.10594190657138824, "learning_rate": 0.0008288131187677112, "loss": 0.0668, "num_input_tokens_seen": 91247600, "step": 42225 }, { "epoch": 6.8890701468189235, "grad_norm": 0.11915894597768784, "learning_rate": 0.000828759492517698, "loss": 0.1642, "num_input_tokens_seen": 91257744, "step": 42230 }, { "epoch": 6.889885807504078, "grad_norm": 0.03344777226448059, "learning_rate": 0.0008287058596049563, "loss": 0.1174, "num_input_tokens_seen": 91268624, "step": 42235 }, { "epoch": 6.890701468189233, "grad_norm": 0.006844738032668829, "learning_rate": 0.0008286522200305738, "loss": 0.0567, "num_input_tokens_seen": 91279984, "step": 42240 }, { "epoch": 6.891517128874388, "grad_norm": 0.20053116977214813, "learning_rate": 0.0008285985737956367, "loss": 0.0718, "num_input_tokens_seen": 91290896, "step": 42245 }, { "epoch": 6.892332789559543, "grad_norm": 0.026900721713900566, "learning_rate": 0.0008285449209012328, "loss": 0.0506, "num_input_tokens_seen": 91302608, "step": 42250 }, { "epoch": 6.8931484502446985, "grad_norm": 0.30847597122192383, "learning_rate": 0.0008284912613484493, "loss": 0.1317, "num_input_tokens_seen": 91313104, "step": 42255 }, { "epoch": 6.893964110929853, "grad_norm": 0.12379782646894455, "learning_rate": 0.0008284375951383738, "loss": 0.0368, "num_input_tokens_seen": 91322416, "step": 42260 }, { "epoch": 6.894779771615008, "grad_norm": 0.015151728875935078, "learning_rate": 0.0008283839222720935, "loss": 0.1379, "num_input_tokens_seen": 91333328, "step": 42265 }, { "epoch": 6.895595432300163, "grad_norm": 0.3223811089992523, "learning_rate": 0.0008283302427506966, "loss": 0.0628, "num_input_tokens_seen": 91345264, "step": 42270 }, { "epoch": 6.896411092985318, "grad_norm": 0.01046650018543005, "learning_rate": 0.0008282765565752708, "loss": 0.1245, "num_input_tokens_seen": 91356528, "step": 42275 }, { "epoch": 6.897226753670473, "grad_norm": 0.13600951433181763, "learning_rate": 0.0008282228637469042, "loss": 0.1784, "num_input_tokens_seen": 91365904, "step": 42280 }, { "epoch": 6.898042414355628, "grad_norm": 0.11428900063037872, "learning_rate": 0.0008281691642666848, "loss": 0.0718, "num_input_tokens_seen": 91376528, "step": 42285 }, { "epoch": 6.898858075040783, "grad_norm": 0.02441113069653511, "learning_rate": 0.000828115458135701, "loss": 0.1084, "num_input_tokens_seen": 91387376, "step": 42290 }, { "epoch": 6.899673735725938, "grad_norm": 0.05587480589747429, "learning_rate": 0.0008280617453550412, "loss": 0.0298, "num_input_tokens_seen": 91398160, "step": 42295 }, { "epoch": 6.900489396411093, "grad_norm": 0.1454855501651764, "learning_rate": 0.0008280080259257939, "loss": 0.2052, "num_input_tokens_seen": 91409872, "step": 42300 }, { "epoch": 6.901305057096248, "grad_norm": 0.046002261340618134, "learning_rate": 0.0008279542998490479, "loss": 0.0353, "num_input_tokens_seen": 91420752, "step": 42305 }, { "epoch": 6.902120717781403, "grad_norm": 0.010383290238678455, "learning_rate": 0.000827900567125892, "loss": 0.1391, "num_input_tokens_seen": 91431632, "step": 42310 }, { "epoch": 6.902936378466558, "grad_norm": 0.005426608491688967, "learning_rate": 0.0008278468277574152, "loss": 0.0978, "num_input_tokens_seen": 91441488, "step": 42315 }, { "epoch": 6.903752039151713, "grad_norm": 0.1538834422826767, "learning_rate": 0.0008277930817447063, "loss": 0.0675, "num_input_tokens_seen": 91453040, "step": 42320 }, { "epoch": 6.904567699836868, "grad_norm": 0.06816184520721436, "learning_rate": 0.000827739329088855, "loss": 0.0395, "num_input_tokens_seen": 91463952, "step": 42325 }, { "epoch": 6.9053833605220225, "grad_norm": 0.018162427470088005, "learning_rate": 0.0008276855697909502, "loss": 0.058, "num_input_tokens_seen": 91475216, "step": 42330 }, { "epoch": 6.906199021207177, "grad_norm": 0.26269209384918213, "learning_rate": 0.0008276318038520818, "loss": 0.0946, "num_input_tokens_seen": 91486160, "step": 42335 }, { "epoch": 6.907014681892333, "grad_norm": 0.041546259075403214, "learning_rate": 0.0008275780312733392, "loss": 0.1047, "num_input_tokens_seen": 91496208, "step": 42340 }, { "epoch": 6.907830342577488, "grad_norm": 0.037615060806274414, "learning_rate": 0.0008275242520558124, "loss": 0.0692, "num_input_tokens_seen": 91505520, "step": 42345 }, { "epoch": 6.908646003262643, "grad_norm": 0.03843872621655464, "learning_rate": 0.000827470466200591, "loss": 0.03, "num_input_tokens_seen": 91515312, "step": 42350 }, { "epoch": 6.9094616639477975, "grad_norm": 0.023156536743044853, "learning_rate": 0.0008274166737087652, "loss": 0.4206, "num_input_tokens_seen": 91525808, "step": 42355 }, { "epoch": 6.910277324632952, "grad_norm": 0.04622017592191696, "learning_rate": 0.000827362874581425, "loss": 0.0939, "num_input_tokens_seen": 91536016, "step": 42360 }, { "epoch": 6.911092985318108, "grad_norm": 0.08515045791864395, "learning_rate": 0.000827309068819661, "loss": 0.135, "num_input_tokens_seen": 91546704, "step": 42365 }, { "epoch": 6.911908646003263, "grad_norm": 0.034274887293577194, "learning_rate": 0.0008272552564245635, "loss": 0.068, "num_input_tokens_seen": 91557552, "step": 42370 }, { "epoch": 6.912724306688418, "grad_norm": 0.22742615640163422, "learning_rate": 0.000827201437397223, "loss": 0.0693, "num_input_tokens_seen": 91568080, "step": 42375 }, { "epoch": 6.9135399673735725, "grad_norm": 0.06458600610494614, "learning_rate": 0.0008271476117387303, "loss": 0.0532, "num_input_tokens_seen": 91578384, "step": 42380 }, { "epoch": 6.914355628058727, "grad_norm": 0.20055918395519257, "learning_rate": 0.0008270937794501763, "loss": 0.1223, "num_input_tokens_seen": 91589936, "step": 42385 }, { "epoch": 6.915171288743883, "grad_norm": 0.07637903094291687, "learning_rate": 0.0008270399405326519, "loss": 0.0543, "num_input_tokens_seen": 91601392, "step": 42390 }, { "epoch": 6.915986949429038, "grad_norm": 0.13277378678321838, "learning_rate": 0.0008269860949872484, "loss": 0.1121, "num_input_tokens_seen": 91613808, "step": 42395 }, { "epoch": 6.916802610114193, "grad_norm": 0.1605086475610733, "learning_rate": 0.0008269322428150565, "loss": 0.1092, "num_input_tokens_seen": 91624400, "step": 42400 }, { "epoch": 6.917618270799347, "grad_norm": 0.05308017507195473, "learning_rate": 0.0008268783840171682, "loss": 0.081, "num_input_tokens_seen": 91635888, "step": 42405 }, { "epoch": 6.918433931484502, "grad_norm": 0.027449732646346092, "learning_rate": 0.0008268245185946748, "loss": 0.0868, "num_input_tokens_seen": 91645488, "step": 42410 }, { "epoch": 6.919249592169657, "grad_norm": 0.03316226229071617, "learning_rate": 0.0008267706465486677, "loss": 0.0518, "num_input_tokens_seen": 91655632, "step": 42415 }, { "epoch": 6.920065252854813, "grad_norm": 0.10044834017753601, "learning_rate": 0.000826716767880239, "loss": 0.0434, "num_input_tokens_seen": 91666608, "step": 42420 }, { "epoch": 6.920880913539968, "grad_norm": 0.33315449953079224, "learning_rate": 0.0008266628825904807, "loss": 0.1557, "num_input_tokens_seen": 91676656, "step": 42425 }, { "epoch": 6.921696574225122, "grad_norm": 0.21813486516475677, "learning_rate": 0.0008266089906804845, "loss": 0.2951, "num_input_tokens_seen": 91688240, "step": 42430 }, { "epoch": 6.922512234910277, "grad_norm": 0.002060960978269577, "learning_rate": 0.0008265550921513428, "loss": 0.1761, "num_input_tokens_seen": 91699632, "step": 42435 }, { "epoch": 6.923327895595432, "grad_norm": 0.12456963211297989, "learning_rate": 0.000826501187004148, "loss": 0.0603, "num_input_tokens_seen": 91709744, "step": 42440 }, { "epoch": 6.924143556280587, "grad_norm": 0.003388361306861043, "learning_rate": 0.0008264472752399923, "loss": 0.105, "num_input_tokens_seen": 91719472, "step": 42445 }, { "epoch": 6.924959216965743, "grad_norm": 0.4648321568965912, "learning_rate": 0.0008263933568599687, "loss": 0.298, "num_input_tokens_seen": 91729776, "step": 42450 }, { "epoch": 6.925774877650897, "grad_norm": 0.19051282107830048, "learning_rate": 0.0008263394318651693, "loss": 0.1332, "num_input_tokens_seen": 91740176, "step": 42455 }, { "epoch": 6.926590538336052, "grad_norm": 0.07513487339019775, "learning_rate": 0.0008262855002566876, "loss": 0.0346, "num_input_tokens_seen": 91750288, "step": 42460 }, { "epoch": 6.927406199021207, "grad_norm": 0.0671396404504776, "learning_rate": 0.0008262315620356163, "loss": 0.1645, "num_input_tokens_seen": 91762000, "step": 42465 }, { "epoch": 6.928221859706362, "grad_norm": 0.04479534178972244, "learning_rate": 0.0008261776172030484, "loss": 0.049, "num_input_tokens_seen": 91771760, "step": 42470 }, { "epoch": 6.9290375203915175, "grad_norm": 0.007520393934100866, "learning_rate": 0.0008261236657600773, "loss": 0.075, "num_input_tokens_seen": 91781936, "step": 42475 }, { "epoch": 6.929853181076672, "grad_norm": 0.019585467875003815, "learning_rate": 0.0008260697077077964, "loss": 0.1323, "num_input_tokens_seen": 91793264, "step": 42480 }, { "epoch": 6.930668841761827, "grad_norm": 0.13702279329299927, "learning_rate": 0.0008260157430472992, "loss": 0.1284, "num_input_tokens_seen": 91803824, "step": 42485 }, { "epoch": 6.931484502446982, "grad_norm": 0.01728702522814274, "learning_rate": 0.0008259617717796795, "loss": 0.1255, "num_input_tokens_seen": 91814384, "step": 42490 }, { "epoch": 6.932300163132137, "grad_norm": 0.042796917259693146, "learning_rate": 0.0008259077939060309, "loss": 0.1423, "num_input_tokens_seen": 91823920, "step": 42495 }, { "epoch": 6.933115823817292, "grad_norm": 0.02835630439221859, "learning_rate": 0.0008258538094274475, "loss": 0.0458, "num_input_tokens_seen": 91834640, "step": 42500 }, { "epoch": 6.933931484502447, "grad_norm": 0.0981273278594017, "learning_rate": 0.0008257998183450233, "loss": 0.0381, "num_input_tokens_seen": 91845968, "step": 42505 }, { "epoch": 6.934747145187602, "grad_norm": 0.02087876945734024, "learning_rate": 0.0008257458206598524, "loss": 0.1194, "num_input_tokens_seen": 91856464, "step": 42510 }, { "epoch": 6.935562805872757, "grad_norm": 0.04924190044403076, "learning_rate": 0.0008256918163730291, "loss": 0.0882, "num_input_tokens_seen": 91867984, "step": 42515 }, { "epoch": 6.936378466557912, "grad_norm": 0.23019491136074066, "learning_rate": 0.0008256378054856482, "loss": 0.1184, "num_input_tokens_seen": 91878576, "step": 42520 }, { "epoch": 6.937194127243067, "grad_norm": 0.025902308523654938, "learning_rate": 0.000825583787998804, "loss": 0.1507, "num_input_tokens_seen": 91888176, "step": 42525 }, { "epoch": 6.938009787928221, "grad_norm": 0.02752247266471386, "learning_rate": 0.0008255297639135912, "loss": 0.176, "num_input_tokens_seen": 91901104, "step": 42530 }, { "epoch": 6.938825448613377, "grad_norm": 0.1430131047964096, "learning_rate": 0.000825475733231105, "loss": 0.05, "num_input_tokens_seen": 91911920, "step": 42535 }, { "epoch": 6.939641109298532, "grad_norm": 0.1026284396648407, "learning_rate": 0.0008254216959524399, "loss": 0.1036, "num_input_tokens_seen": 91922512, "step": 42540 }, { "epoch": 6.940456769983687, "grad_norm": 0.035261936485767365, "learning_rate": 0.0008253676520786914, "loss": 0.0472, "num_input_tokens_seen": 91934224, "step": 42545 }, { "epoch": 6.941272430668842, "grad_norm": 0.02183767594397068, "learning_rate": 0.0008253136016109547, "loss": 0.0493, "num_input_tokens_seen": 91944592, "step": 42550 }, { "epoch": 6.942088091353996, "grad_norm": 0.09058975428342819, "learning_rate": 0.0008252595445503253, "loss": 0.0245, "num_input_tokens_seen": 91955024, "step": 42555 }, { "epoch": 6.942903752039152, "grad_norm": 0.1628194898366928, "learning_rate": 0.0008252054808978984, "loss": 0.0704, "num_input_tokens_seen": 91963696, "step": 42560 }, { "epoch": 6.943719412724307, "grad_norm": 0.07157375663518906, "learning_rate": 0.0008251514106547698, "loss": 0.0774, "num_input_tokens_seen": 91973936, "step": 42565 }, { "epoch": 6.944535073409462, "grad_norm": 0.06799112260341644, "learning_rate": 0.0008250973338220356, "loss": 0.1247, "num_input_tokens_seen": 91984080, "step": 42570 }, { "epoch": 6.945350734094617, "grad_norm": 0.06843625754117966, "learning_rate": 0.0008250432504007914, "loss": 0.0725, "num_input_tokens_seen": 91994800, "step": 42575 }, { "epoch": 6.946166394779771, "grad_norm": 0.07280784845352173, "learning_rate": 0.0008249891603921334, "loss": 0.0485, "num_input_tokens_seen": 92006160, "step": 42580 }, { "epoch": 6.946982055464927, "grad_norm": 0.016119126230478287, "learning_rate": 0.0008249350637971577, "loss": 0.0951, "num_input_tokens_seen": 92017520, "step": 42585 }, { "epoch": 6.947797716150082, "grad_norm": 0.26134583353996277, "learning_rate": 0.0008248809606169609, "loss": 0.1383, "num_input_tokens_seen": 92028400, "step": 42590 }, { "epoch": 6.948613376835237, "grad_norm": 0.30048561096191406, "learning_rate": 0.0008248268508526393, "loss": 0.0728, "num_input_tokens_seen": 92040368, "step": 42595 }, { "epoch": 6.9494290375203915, "grad_norm": 0.0028530319686979055, "learning_rate": 0.0008247727345052894, "loss": 0.054, "num_input_tokens_seen": 92049968, "step": 42600 }, { "epoch": 6.950244698205546, "grad_norm": 0.08610218018293381, "learning_rate": 0.000824718611576008, "loss": 0.0521, "num_input_tokens_seen": 92059536, "step": 42605 }, { "epoch": 6.951060358890701, "grad_norm": 0.01567983254790306, "learning_rate": 0.0008246644820658922, "loss": 0.0264, "num_input_tokens_seen": 92070352, "step": 42610 }, { "epoch": 6.951876019575856, "grad_norm": 0.022035297006368637, "learning_rate": 0.0008246103459760385, "loss": 0.054, "num_input_tokens_seen": 92081584, "step": 42615 }, { "epoch": 6.952691680261012, "grad_norm": 0.2599090337753296, "learning_rate": 0.0008245562033075446, "loss": 0.1472, "num_input_tokens_seen": 92092880, "step": 42620 }, { "epoch": 6.9535073409461665, "grad_norm": 0.0031842731405049562, "learning_rate": 0.0008245020540615074, "loss": 0.0356, "num_input_tokens_seen": 92104368, "step": 42625 }, { "epoch": 6.954323001631321, "grad_norm": 0.11851377040147781, "learning_rate": 0.0008244478982390245, "loss": 0.141, "num_input_tokens_seen": 92115152, "step": 42630 }, { "epoch": 6.955138662316476, "grad_norm": 0.03890161216259003, "learning_rate": 0.0008243937358411933, "loss": 0.1503, "num_input_tokens_seen": 92126352, "step": 42635 }, { "epoch": 6.955954323001631, "grad_norm": 0.012479268014431, "learning_rate": 0.0008243395668691113, "loss": 0.0521, "num_input_tokens_seen": 92137488, "step": 42640 }, { "epoch": 6.956769983686787, "grad_norm": 0.21884118020534515, "learning_rate": 0.0008242853913238769, "loss": 0.1481, "num_input_tokens_seen": 92148976, "step": 42645 }, { "epoch": 6.9575856443719415, "grad_norm": 0.044424448162317276, "learning_rate": 0.0008242312092065873, "loss": 0.1143, "num_input_tokens_seen": 92158896, "step": 42650 }, { "epoch": 6.958401305057096, "grad_norm": 0.008710110560059547, "learning_rate": 0.0008241770205183412, "loss": 0.0932, "num_input_tokens_seen": 92169552, "step": 42655 }, { "epoch": 6.959216965742251, "grad_norm": 0.12920400500297546, "learning_rate": 0.0008241228252602364, "loss": 0.0838, "num_input_tokens_seen": 92179792, "step": 42660 }, { "epoch": 6.960032626427406, "grad_norm": 0.11128882318735123, "learning_rate": 0.0008240686234333714, "loss": 0.0433, "num_input_tokens_seen": 92190416, "step": 42665 }, { "epoch": 6.960848287112562, "grad_norm": 0.11121262609958649, "learning_rate": 0.0008240144150388446, "loss": 0.0997, "num_input_tokens_seen": 92201456, "step": 42670 }, { "epoch": 6.9616639477977165, "grad_norm": 0.019484156742691994, "learning_rate": 0.0008239602000777548, "loss": 0.1558, "num_input_tokens_seen": 92213072, "step": 42675 }, { "epoch": 6.962479608482871, "grad_norm": 0.01814747042953968, "learning_rate": 0.0008239059785512005, "loss": 0.0192, "num_input_tokens_seen": 92223632, "step": 42680 }, { "epoch": 6.963295269168026, "grad_norm": 0.33034294843673706, "learning_rate": 0.0008238517504602805, "loss": 0.0463, "num_input_tokens_seen": 92234960, "step": 42685 }, { "epoch": 6.964110929853181, "grad_norm": 0.026794755831360817, "learning_rate": 0.0008237975158060939, "loss": 0.0358, "num_input_tokens_seen": 92245712, "step": 42690 }, { "epoch": 6.964926590538336, "grad_norm": 0.22729530930519104, "learning_rate": 0.0008237432745897402, "loss": 0.0682, "num_input_tokens_seen": 92256784, "step": 42695 }, { "epoch": 6.9657422512234906, "grad_norm": 0.009565351530909538, "learning_rate": 0.000823689026812318, "loss": 0.0174, "num_input_tokens_seen": 92265936, "step": 42700 }, { "epoch": 6.966557911908646, "grad_norm": 0.05226115137338638, "learning_rate": 0.0008236347724749274, "loss": 0.2109, "num_input_tokens_seen": 92276464, "step": 42705 }, { "epoch": 6.967373572593801, "grad_norm": 0.2930293083190918, "learning_rate": 0.0008235805115786672, "loss": 0.1611, "num_input_tokens_seen": 92287664, "step": 42710 }, { "epoch": 6.968189233278956, "grad_norm": 0.1569661796092987, "learning_rate": 0.0008235262441246376, "loss": 0.2314, "num_input_tokens_seen": 92296976, "step": 42715 }, { "epoch": 6.969004893964111, "grad_norm": 0.2670922875404358, "learning_rate": 0.0008234719701139384, "loss": 0.0767, "num_input_tokens_seen": 92307184, "step": 42720 }, { "epoch": 6.9698205546492655, "grad_norm": 0.004752719309180975, "learning_rate": 0.0008234176895476692, "loss": 0.0497, "num_input_tokens_seen": 92318128, "step": 42725 }, { "epoch": 6.970636215334421, "grad_norm": 0.012683387845754623, "learning_rate": 0.0008233634024269302, "loss": 0.0872, "num_input_tokens_seen": 92329744, "step": 42730 }, { "epoch": 6.971451876019576, "grad_norm": 0.032268162816762924, "learning_rate": 0.0008233091087528217, "loss": 0.098, "num_input_tokens_seen": 92340720, "step": 42735 }, { "epoch": 6.972267536704731, "grad_norm": 0.19561608135700226, "learning_rate": 0.000823254808526444, "loss": 0.0382, "num_input_tokens_seen": 92351248, "step": 42740 }, { "epoch": 6.973083197389886, "grad_norm": 0.021456921473145485, "learning_rate": 0.0008232005017488975, "loss": 0.0165, "num_input_tokens_seen": 92361680, "step": 42745 }, { "epoch": 6.9738988580750405, "grad_norm": 0.11003857105970383, "learning_rate": 0.0008231461884212828, "loss": 0.0841, "num_input_tokens_seen": 92373616, "step": 42750 }, { "epoch": 6.974714518760196, "grad_norm": 0.07811323553323746, "learning_rate": 0.0008230918685447006, "loss": 0.0345, "num_input_tokens_seen": 92384464, "step": 42755 }, { "epoch": 6.975530179445351, "grad_norm": 0.02601797506213188, "learning_rate": 0.000823037542120252, "loss": 0.0467, "num_input_tokens_seen": 92395984, "step": 42760 }, { "epoch": 6.976345840130506, "grad_norm": 0.01498804334551096, "learning_rate": 0.0008229832091490377, "loss": 0.1297, "num_input_tokens_seen": 92407120, "step": 42765 }, { "epoch": 6.977161500815661, "grad_norm": 0.09473436325788498, "learning_rate": 0.0008229288696321588, "loss": 0.0315, "num_input_tokens_seen": 92417296, "step": 42770 }, { "epoch": 6.9779771615008155, "grad_norm": 0.05961019545793533, "learning_rate": 0.0008228745235707169, "loss": 0.0963, "num_input_tokens_seen": 92428240, "step": 42775 }, { "epoch": 6.97879282218597, "grad_norm": 0.012499609962105751, "learning_rate": 0.000822820170965813, "loss": 0.2952, "num_input_tokens_seen": 92438864, "step": 42780 }, { "epoch": 6.979608482871125, "grad_norm": 0.03288532793521881, "learning_rate": 0.0008227658118185491, "loss": 0.1119, "num_input_tokens_seen": 92449424, "step": 42785 }, { "epoch": 6.980424143556281, "grad_norm": 0.16200962662696838, "learning_rate": 0.0008227114461300262, "loss": 0.092, "num_input_tokens_seen": 92460016, "step": 42790 }, { "epoch": 6.981239804241436, "grad_norm": 0.03325765207409859, "learning_rate": 0.0008226570739013466, "loss": 0.0655, "num_input_tokens_seen": 92472336, "step": 42795 }, { "epoch": 6.9820554649265905, "grad_norm": 0.06443525105714798, "learning_rate": 0.0008226026951336121, "loss": 0.1035, "num_input_tokens_seen": 92482576, "step": 42800 }, { "epoch": 6.982871125611745, "grad_norm": 0.09831217676401138, "learning_rate": 0.0008225483098279247, "loss": 0.0297, "num_input_tokens_seen": 92494160, "step": 42805 }, { "epoch": 6.9836867862969, "grad_norm": 0.061349742114543915, "learning_rate": 0.0008224939179853868, "loss": 0.0775, "num_input_tokens_seen": 92504432, "step": 42810 }, { "epoch": 6.984502446982056, "grad_norm": 0.01365916058421135, "learning_rate": 0.0008224395196071003, "loss": 0.0592, "num_input_tokens_seen": 92514576, "step": 42815 }, { "epoch": 6.985318107667211, "grad_norm": 0.06067565083503723, "learning_rate": 0.000822385114694168, "loss": 0.0316, "num_input_tokens_seen": 92525648, "step": 42820 }, { "epoch": 6.986133768352365, "grad_norm": 0.0380895771086216, "learning_rate": 0.0008223307032476923, "loss": 0.0947, "num_input_tokens_seen": 92535248, "step": 42825 }, { "epoch": 6.98694942903752, "grad_norm": 0.008909545838832855, "learning_rate": 0.0008222762852687762, "loss": 0.0511, "num_input_tokens_seen": 92546800, "step": 42830 }, { "epoch": 6.987765089722675, "grad_norm": 0.04149286076426506, "learning_rate": 0.0008222218607585221, "loss": 0.056, "num_input_tokens_seen": 92557584, "step": 42835 }, { "epoch": 6.988580750407831, "grad_norm": 0.02532074600458145, "learning_rate": 0.0008221674297180334, "loss": 0.0636, "num_input_tokens_seen": 92567184, "step": 42840 }, { "epoch": 6.989396411092986, "grad_norm": 0.28405627608299255, "learning_rate": 0.000822112992148413, "loss": 0.0643, "num_input_tokens_seen": 92576880, "step": 42845 }, { "epoch": 6.99021207177814, "grad_norm": 0.08372216671705246, "learning_rate": 0.000822058548050764, "loss": 0.1165, "num_input_tokens_seen": 92588368, "step": 42850 }, { "epoch": 6.991027732463295, "grad_norm": 0.01633065938949585, "learning_rate": 0.0008220040974261901, "loss": 0.0704, "num_input_tokens_seen": 92598992, "step": 42855 }, { "epoch": 6.99184339314845, "grad_norm": 0.004168565850704908, "learning_rate": 0.0008219496402757948, "loss": 0.0553, "num_input_tokens_seen": 92609904, "step": 42860 }, { "epoch": 6.992659053833605, "grad_norm": 0.030385682359337807, "learning_rate": 0.0008218951766006815, "loss": 0.0593, "num_input_tokens_seen": 92621360, "step": 42865 }, { "epoch": 6.993474714518761, "grad_norm": 0.044751256704330444, "learning_rate": 0.0008218407064019541, "loss": 0.1026, "num_input_tokens_seen": 92631312, "step": 42870 }, { "epoch": 6.994290375203915, "grad_norm": 0.004565827082842588, "learning_rate": 0.0008217862296807165, "loss": 0.0222, "num_input_tokens_seen": 92641584, "step": 42875 }, { "epoch": 6.99510603588907, "grad_norm": 0.019839083775877953, "learning_rate": 0.0008217317464380727, "loss": 0.1736, "num_input_tokens_seen": 92651824, "step": 42880 }, { "epoch": 6.995921696574225, "grad_norm": 0.013745338656008244, "learning_rate": 0.0008216772566751269, "loss": 0.0456, "num_input_tokens_seen": 92662384, "step": 42885 }, { "epoch": 6.99673735725938, "grad_norm": 0.012423294596374035, "learning_rate": 0.0008216227603929835, "loss": 0.1445, "num_input_tokens_seen": 92673008, "step": 42890 }, { "epoch": 6.997553017944535, "grad_norm": 0.1849156767129898, "learning_rate": 0.0008215682575927468, "loss": 0.056, "num_input_tokens_seen": 92683568, "step": 42895 }, { "epoch": 6.99836867862969, "grad_norm": 0.08922215551137924, "learning_rate": 0.0008215137482755215, "loss": 0.0186, "num_input_tokens_seen": 92693392, "step": 42900 }, { "epoch": 6.999184339314845, "grad_norm": 0.04565683752298355, "learning_rate": 0.0008214592324424122, "loss": 0.0873, "num_input_tokens_seen": 92704240, "step": 42905 }, { "epoch": 7.0, "grad_norm": 0.013504397124052048, "learning_rate": 0.0008214047100945236, "loss": 0.1325, "num_input_tokens_seen": 92713360, "step": 42910 }, { "epoch": 7.0, "eval_loss": 0.13006359338760376, "eval_runtime": 103.5374, "eval_samples_per_second": 26.319, "eval_steps_per_second": 6.587, "num_input_tokens_seen": 92713360, "step": 42910 }, { "epoch": 7.000815660685155, "grad_norm": 0.3289978504180908, "learning_rate": 0.0008213501812329609, "loss": 0.1642, "num_input_tokens_seen": 92724208, "step": 42915 }, { "epoch": 7.00163132137031, "grad_norm": 0.3054993748664856, "learning_rate": 0.0008212956458588292, "loss": 0.1031, "num_input_tokens_seen": 92735600, "step": 42920 }, { "epoch": 7.002446982055465, "grad_norm": 0.027686649933457375, "learning_rate": 0.0008212411039732336, "loss": 0.1013, "num_input_tokens_seen": 92746544, "step": 42925 }, { "epoch": 7.00326264274062, "grad_norm": 0.08517606556415558, "learning_rate": 0.0008211865555772795, "loss": 0.0472, "num_input_tokens_seen": 92756912, "step": 42930 }, { "epoch": 7.004078303425775, "grad_norm": 0.006800774950534105, "learning_rate": 0.0008211320006720723, "loss": 0.0581, "num_input_tokens_seen": 92767504, "step": 42935 }, { "epoch": 7.00489396411093, "grad_norm": 0.02842605859041214, "learning_rate": 0.000821077439258718, "loss": 0.0366, "num_input_tokens_seen": 92778064, "step": 42940 }, { "epoch": 7.005709624796085, "grad_norm": 0.005044872872531414, "learning_rate": 0.0008210228713383218, "loss": 0.0951, "num_input_tokens_seen": 92788784, "step": 42945 }, { "epoch": 7.006525285481239, "grad_norm": 0.06589864194393158, "learning_rate": 0.00082096829691199, "loss": 0.1049, "num_input_tokens_seen": 92800144, "step": 42950 }, { "epoch": 7.007340946166395, "grad_norm": 0.11859538406133652, "learning_rate": 0.0008209137159808284, "loss": 0.0534, "num_input_tokens_seen": 92809744, "step": 42955 }, { "epoch": 7.00815660685155, "grad_norm": 0.0624762699007988, "learning_rate": 0.0008208591285459434, "loss": 0.0615, "num_input_tokens_seen": 92820240, "step": 42960 }, { "epoch": 7.008972267536705, "grad_norm": 0.35461515188217163, "learning_rate": 0.0008208045346084409, "loss": 0.2508, "num_input_tokens_seen": 92831984, "step": 42965 }, { "epoch": 7.00978792822186, "grad_norm": 0.18568821251392365, "learning_rate": 0.0008207499341694278, "loss": 0.1936, "num_input_tokens_seen": 92842928, "step": 42970 }, { "epoch": 7.010603588907014, "grad_norm": 0.09701191633939743, "learning_rate": 0.0008206953272300102, "loss": 0.0443, "num_input_tokens_seen": 92853776, "step": 42975 }, { "epoch": 7.011419249592169, "grad_norm": 0.010077468119561672, "learning_rate": 0.000820640713791295, "loss": 0.0585, "num_input_tokens_seen": 92863856, "step": 42980 }, { "epoch": 7.012234910277325, "grad_norm": 0.43398287892341614, "learning_rate": 0.000820586093854389, "loss": 0.129, "num_input_tokens_seen": 92874992, "step": 42985 }, { "epoch": 7.01305057096248, "grad_norm": 0.028587957844138145, "learning_rate": 0.0008205314674203989, "loss": 0.0651, "num_input_tokens_seen": 92886704, "step": 42990 }, { "epoch": 7.013866231647635, "grad_norm": 0.03041784279048443, "learning_rate": 0.0008204768344904323, "loss": 0.0336, "num_input_tokens_seen": 92897328, "step": 42995 }, { "epoch": 7.014681892332789, "grad_norm": 0.008256292901933193, "learning_rate": 0.0008204221950655959, "loss": 0.0472, "num_input_tokens_seen": 92909616, "step": 43000 }, { "epoch": 7.015497553017944, "grad_norm": 0.028076890856027603, "learning_rate": 0.0008203675491469973, "loss": 0.123, "num_input_tokens_seen": 92920976, "step": 43005 }, { "epoch": 7.0163132137031, "grad_norm": 0.0032557565718889236, "learning_rate": 0.0008203128967357438, "loss": 0.0525, "num_input_tokens_seen": 92933200, "step": 43010 }, { "epoch": 7.017128874388255, "grad_norm": 0.006004045717418194, "learning_rate": 0.0008202582378329433, "loss": 0.0474, "num_input_tokens_seen": 92944688, "step": 43015 }, { "epoch": 7.0179445350734095, "grad_norm": 0.025199543684720993, "learning_rate": 0.0008202035724397032, "loss": 0.0692, "num_input_tokens_seen": 92954704, "step": 43020 }, { "epoch": 7.018760195758564, "grad_norm": 0.008000586181879044, "learning_rate": 0.0008201489005571316, "loss": 0.0496, "num_input_tokens_seen": 92966096, "step": 43025 }, { "epoch": 7.019575856443719, "grad_norm": 0.17397743463516235, "learning_rate": 0.0008200942221863363, "loss": 0.0542, "num_input_tokens_seen": 92977360, "step": 43030 }, { "epoch": 7.020391517128874, "grad_norm": 0.0518217608332634, "learning_rate": 0.0008200395373284255, "loss": 0.0628, "num_input_tokens_seen": 92988400, "step": 43035 }, { "epoch": 7.02120717781403, "grad_norm": 0.02842766046524048, "learning_rate": 0.0008199848459845077, "loss": 0.0218, "num_input_tokens_seen": 92998480, "step": 43040 }, { "epoch": 7.0220228384991845, "grad_norm": 0.1324327141046524, "learning_rate": 0.0008199301481556907, "loss": 0.0617, "num_input_tokens_seen": 93009616, "step": 43045 }, { "epoch": 7.022838499184339, "grad_norm": 0.07451920211315155, "learning_rate": 0.0008198754438430836, "loss": 0.0133, "num_input_tokens_seen": 93019312, "step": 43050 }, { "epoch": 7.023654159869494, "grad_norm": 0.12650872766971588, "learning_rate": 0.000819820733047795, "loss": 0.0332, "num_input_tokens_seen": 93030160, "step": 43055 }, { "epoch": 7.024469820554649, "grad_norm": 0.004953624680638313, "learning_rate": 0.0008197660157709333, "loss": 0.0893, "num_input_tokens_seen": 93040112, "step": 43060 }, { "epoch": 7.025285481239805, "grad_norm": 0.40456417202949524, "learning_rate": 0.0008197112920136076, "loss": 0.0968, "num_input_tokens_seen": 93051760, "step": 43065 }, { "epoch": 7.0261011419249595, "grad_norm": 0.2973378598690033, "learning_rate": 0.000819656561776927, "loss": 0.0515, "num_input_tokens_seen": 93063024, "step": 43070 }, { "epoch": 7.026916802610114, "grad_norm": 0.14938867092132568, "learning_rate": 0.0008196018250620008, "loss": 0.081, "num_input_tokens_seen": 93073008, "step": 43075 }, { "epoch": 7.027732463295269, "grad_norm": 0.22964538633823395, "learning_rate": 0.0008195470818699381, "loss": 0.1137, "num_input_tokens_seen": 93083664, "step": 43080 }, { "epoch": 7.028548123980424, "grad_norm": 0.28837212920188904, "learning_rate": 0.0008194923322018484, "loss": 0.1966, "num_input_tokens_seen": 93092880, "step": 43085 }, { "epoch": 7.029363784665579, "grad_norm": 0.2682326138019562, "learning_rate": 0.0008194375760588413, "loss": 0.1459, "num_input_tokens_seen": 93103504, "step": 43090 }, { "epoch": 7.0301794453507345, "grad_norm": 0.04698742553591728, "learning_rate": 0.0008193828134420265, "loss": 0.1205, "num_input_tokens_seen": 93116016, "step": 43095 }, { "epoch": 7.030995106035889, "grad_norm": 0.007682493422180414, "learning_rate": 0.0008193280443525138, "loss": 0.0109, "num_input_tokens_seen": 93128048, "step": 43100 }, { "epoch": 7.031810766721044, "grad_norm": 0.13157765567302704, "learning_rate": 0.0008192732687914131, "loss": 0.0206, "num_input_tokens_seen": 93139440, "step": 43105 }, { "epoch": 7.032626427406199, "grad_norm": 0.20668326318264008, "learning_rate": 0.0008192184867598347, "loss": 0.1582, "num_input_tokens_seen": 93150352, "step": 43110 }, { "epoch": 7.033442088091354, "grad_norm": 0.005746053997427225, "learning_rate": 0.0008191636982588887, "loss": 0.1208, "num_input_tokens_seen": 93160432, "step": 43115 }, { "epoch": 7.034257748776509, "grad_norm": 0.14109240472316742, "learning_rate": 0.0008191089032896855, "loss": 0.11, "num_input_tokens_seen": 93169936, "step": 43120 }, { "epoch": 7.035073409461664, "grad_norm": 0.03715949505567551, "learning_rate": 0.0008190541018533353, "loss": 0.0124, "num_input_tokens_seen": 93181264, "step": 43125 }, { "epoch": 7.035889070146819, "grad_norm": 0.027663471177220345, "learning_rate": 0.0008189992939509491, "loss": 0.0626, "num_input_tokens_seen": 93192624, "step": 43130 }, { "epoch": 7.036704730831974, "grad_norm": 0.14800049364566803, "learning_rate": 0.0008189444795836377, "loss": 0.0593, "num_input_tokens_seen": 93202608, "step": 43135 }, { "epoch": 7.037520391517129, "grad_norm": 0.024344148114323616, "learning_rate": 0.0008188896587525118, "loss": 0.0277, "num_input_tokens_seen": 93214096, "step": 43140 }, { "epoch": 7.0383360522022835, "grad_norm": 0.1449653059244156, "learning_rate": 0.0008188348314586823, "loss": 0.1141, "num_input_tokens_seen": 93223856, "step": 43145 }, { "epoch": 7.039151712887439, "grad_norm": 0.017205238342285156, "learning_rate": 0.0008187799977032605, "loss": 0.0296, "num_input_tokens_seen": 93234576, "step": 43150 }, { "epoch": 7.039967373572594, "grad_norm": 0.3828336298465729, "learning_rate": 0.0008187251574873576, "loss": 0.2549, "num_input_tokens_seen": 93245968, "step": 43155 }, { "epoch": 7.040783034257749, "grad_norm": 0.40320709347724915, "learning_rate": 0.0008186703108120852, "loss": 0.0536, "num_input_tokens_seen": 93255728, "step": 43160 }, { "epoch": 7.041598694942904, "grad_norm": 0.03069511242210865, "learning_rate": 0.0008186154576785545, "loss": 0.1759, "num_input_tokens_seen": 93265328, "step": 43165 }, { "epoch": 7.0424143556280585, "grad_norm": 0.0060121663846075535, "learning_rate": 0.0008185605980878775, "loss": 0.0751, "num_input_tokens_seen": 93276048, "step": 43170 }, { "epoch": 7.043230016313213, "grad_norm": 0.18002556264400482, "learning_rate": 0.0008185057320411658, "loss": 0.0342, "num_input_tokens_seen": 93286736, "step": 43175 }, { "epoch": 7.044045676998369, "grad_norm": 0.02318798191845417, "learning_rate": 0.0008184508595395314, "loss": 0.0313, "num_input_tokens_seen": 93297040, "step": 43180 }, { "epoch": 7.044861337683524, "grad_norm": 0.02688850648701191, "learning_rate": 0.0008183959805840863, "loss": 0.0638, "num_input_tokens_seen": 93307472, "step": 43185 }, { "epoch": 7.045676998368679, "grad_norm": 0.23760437965393066, "learning_rate": 0.0008183410951759429, "loss": 0.1342, "num_input_tokens_seen": 93319280, "step": 43190 }, { "epoch": 7.0464926590538335, "grad_norm": 0.016625650227069855, "learning_rate": 0.0008182862033162131, "loss": 0.1159, "num_input_tokens_seen": 93330128, "step": 43195 }, { "epoch": 7.047308319738988, "grad_norm": 0.004090285860002041, "learning_rate": 0.0008182313050060098, "loss": 0.0634, "num_input_tokens_seen": 93341776, "step": 43200 }, { "epoch": 7.048123980424143, "grad_norm": 0.14525869488716125, "learning_rate": 0.0008181764002464454, "loss": 0.1078, "num_input_tokens_seen": 93353008, "step": 43205 }, { "epoch": 7.048939641109299, "grad_norm": 0.1771831512451172, "learning_rate": 0.0008181214890386326, "loss": 0.0713, "num_input_tokens_seen": 93363664, "step": 43210 }, { "epoch": 7.049755301794454, "grad_norm": 0.11869829148054123, "learning_rate": 0.0008180665713836842, "loss": 0.0565, "num_input_tokens_seen": 93374000, "step": 43215 }, { "epoch": 7.0505709624796085, "grad_norm": 0.012297256849706173, "learning_rate": 0.0008180116472827133, "loss": 0.0368, "num_input_tokens_seen": 93384368, "step": 43220 }, { "epoch": 7.051386623164763, "grad_norm": 0.07448185235261917, "learning_rate": 0.000817956716736833, "loss": 0.0206, "num_input_tokens_seen": 93396080, "step": 43225 }, { "epoch": 7.052202283849918, "grad_norm": 0.019102323800325394, "learning_rate": 0.0008179017797471562, "loss": 0.0733, "num_input_tokens_seen": 93407728, "step": 43230 }, { "epoch": 7.053017944535074, "grad_norm": 0.04259370639920235, "learning_rate": 0.0008178468363147968, "loss": 0.0183, "num_input_tokens_seen": 93419024, "step": 43235 }, { "epoch": 7.053833605220229, "grad_norm": 0.0029630782082676888, "learning_rate": 0.000817791886440868, "loss": 0.0074, "num_input_tokens_seen": 93429040, "step": 43240 }, { "epoch": 7.054649265905383, "grad_norm": 0.01488267257809639, "learning_rate": 0.0008177369301264834, "loss": 0.0293, "num_input_tokens_seen": 93439312, "step": 43245 }, { "epoch": 7.055464926590538, "grad_norm": 0.032309968024492264, "learning_rate": 0.0008176819673727569, "loss": 0.1367, "num_input_tokens_seen": 93451312, "step": 43250 }, { "epoch": 7.056280587275693, "grad_norm": 0.18661397695541382, "learning_rate": 0.0008176269981808023, "loss": 0.0899, "num_input_tokens_seen": 93462576, "step": 43255 }, { "epoch": 7.057096247960848, "grad_norm": 0.31488245725631714, "learning_rate": 0.0008175720225517337, "loss": 0.1092, "num_input_tokens_seen": 93472656, "step": 43260 }, { "epoch": 7.057911908646004, "grad_norm": 0.2543412148952484, "learning_rate": 0.0008175170404866652, "loss": 0.0472, "num_input_tokens_seen": 93483024, "step": 43265 }, { "epoch": 7.058727569331158, "grad_norm": 0.00109146349132061, "learning_rate": 0.0008174620519867109, "loss": 0.074, "num_input_tokens_seen": 93493712, "step": 43270 }, { "epoch": 7.059543230016313, "grad_norm": 0.021653829142451286, "learning_rate": 0.0008174070570529854, "loss": 0.098, "num_input_tokens_seen": 93505040, "step": 43275 }, { "epoch": 7.060358890701468, "grad_norm": 0.2788853943347931, "learning_rate": 0.0008173520556866035, "loss": 0.1521, "num_input_tokens_seen": 93516528, "step": 43280 }, { "epoch": 7.061174551386623, "grad_norm": 0.01834874600172043, "learning_rate": 0.0008172970478886794, "loss": 0.0338, "num_input_tokens_seen": 93528688, "step": 43285 }, { "epoch": 7.061990212071779, "grad_norm": 0.020214732736349106, "learning_rate": 0.0008172420336603281, "loss": 0.0116, "num_input_tokens_seen": 93540112, "step": 43290 }, { "epoch": 7.062805872756933, "grad_norm": 0.21407492458820343, "learning_rate": 0.0008171870130026646, "loss": 0.2408, "num_input_tokens_seen": 93550608, "step": 43295 }, { "epoch": 7.063621533442088, "grad_norm": 0.04193660989403725, "learning_rate": 0.000817131985916804, "loss": 0.012, "num_input_tokens_seen": 93561232, "step": 43300 }, { "epoch": 7.064437194127243, "grad_norm": 0.1706007868051529, "learning_rate": 0.0008170769524038613, "loss": 0.0536, "num_input_tokens_seen": 93572272, "step": 43305 }, { "epoch": 7.065252854812398, "grad_norm": 0.007401083130389452, "learning_rate": 0.0008170219124649518, "loss": 0.1607, "num_input_tokens_seen": 93583376, "step": 43310 }, { "epoch": 7.066068515497553, "grad_norm": 0.014208367094397545, "learning_rate": 0.0008169668661011912, "loss": 0.0789, "num_input_tokens_seen": 93592624, "step": 43315 }, { "epoch": 7.066884176182708, "grad_norm": 0.11837077885866165, "learning_rate": 0.0008169118133136951, "loss": 0.0174, "num_input_tokens_seen": 93602864, "step": 43320 }, { "epoch": 7.067699836867863, "grad_norm": 0.021076519042253494, "learning_rate": 0.0008168567541035788, "loss": 0.24, "num_input_tokens_seen": 93613072, "step": 43325 }, { "epoch": 7.068515497553018, "grad_norm": 0.30501484870910645, "learning_rate": 0.0008168016884719585, "loss": 0.1192, "num_input_tokens_seen": 93624016, "step": 43330 }, { "epoch": 7.069331158238173, "grad_norm": 0.02154276892542839, "learning_rate": 0.0008167466164199499, "loss": 0.0635, "num_input_tokens_seen": 93635216, "step": 43335 }, { "epoch": 7.070146818923328, "grad_norm": 0.006805689074099064, "learning_rate": 0.0008166915379486697, "loss": 0.0354, "num_input_tokens_seen": 93646320, "step": 43340 }, { "epoch": 7.0709624796084825, "grad_norm": 0.05280419811606407, "learning_rate": 0.0008166364530592334, "loss": 0.0714, "num_input_tokens_seen": 93655600, "step": 43345 }, { "epoch": 7.071778140293638, "grad_norm": 0.011205635033547878, "learning_rate": 0.0008165813617527579, "loss": 0.2762, "num_input_tokens_seen": 93666544, "step": 43350 }, { "epoch": 7.072593800978793, "grad_norm": 0.23616188764572144, "learning_rate": 0.0008165262640303595, "loss": 0.1106, "num_input_tokens_seen": 93677776, "step": 43355 }, { "epoch": 7.073409461663948, "grad_norm": 0.02595806121826172, "learning_rate": 0.0008164711598931546, "loss": 0.0192, "num_input_tokens_seen": 93688368, "step": 43360 }, { "epoch": 7.074225122349103, "grad_norm": 0.23422791063785553, "learning_rate": 0.0008164160493422604, "loss": 0.0558, "num_input_tokens_seen": 93698256, "step": 43365 }, { "epoch": 7.075040783034257, "grad_norm": 0.025671793147921562, "learning_rate": 0.0008163609323787934, "loss": 0.0263, "num_input_tokens_seen": 93708048, "step": 43370 }, { "epoch": 7.075856443719413, "grad_norm": 0.03166002407670021, "learning_rate": 0.0008163058090038709, "loss": 0.0987, "num_input_tokens_seen": 93717712, "step": 43375 }, { "epoch": 7.076672104404568, "grad_norm": 0.016035977751016617, "learning_rate": 0.0008162506792186099, "loss": 0.0638, "num_input_tokens_seen": 93728848, "step": 43380 }, { "epoch": 7.077487765089723, "grad_norm": 0.05759859457612038, "learning_rate": 0.0008161955430241276, "loss": 0.0724, "num_input_tokens_seen": 93740688, "step": 43385 }, { "epoch": 7.078303425774878, "grad_norm": 0.057698123157024384, "learning_rate": 0.0008161404004215415, "loss": 0.1149, "num_input_tokens_seen": 93751696, "step": 43390 }, { "epoch": 7.079119086460032, "grad_norm": 0.0237799733877182, "learning_rate": 0.0008160852514119692, "loss": 0.0246, "num_input_tokens_seen": 93762480, "step": 43395 }, { "epoch": 7.079934747145187, "grad_norm": 0.16691306233406067, "learning_rate": 0.0008160300959965284, "loss": 0.0571, "num_input_tokens_seen": 93772592, "step": 43400 }, { "epoch": 7.080750407830343, "grad_norm": 0.43984997272491455, "learning_rate": 0.0008159749341763367, "loss": 0.113, "num_input_tokens_seen": 93782640, "step": 43405 }, { "epoch": 7.081566068515498, "grad_norm": 0.01615430787205696, "learning_rate": 0.000815919765952512, "loss": 0.0495, "num_input_tokens_seen": 93793552, "step": 43410 }, { "epoch": 7.082381729200653, "grad_norm": 0.011169064790010452, "learning_rate": 0.0008158645913261726, "loss": 0.2136, "num_input_tokens_seen": 93803952, "step": 43415 }, { "epoch": 7.083197389885807, "grad_norm": 0.018460217863321304, "learning_rate": 0.0008158094102984366, "loss": 0.025, "num_input_tokens_seen": 93816048, "step": 43420 }, { "epoch": 7.084013050570962, "grad_norm": 0.06511392444372177, "learning_rate": 0.0008157542228704221, "loss": 0.0282, "num_input_tokens_seen": 93827088, "step": 43425 }, { "epoch": 7.084828711256117, "grad_norm": 0.010363047942519188, "learning_rate": 0.0008156990290432478, "loss": 0.0142, "num_input_tokens_seen": 93837968, "step": 43430 }, { "epoch": 7.085644371941273, "grad_norm": 0.3792910575866699, "learning_rate": 0.0008156438288180321, "loss": 0.0871, "num_input_tokens_seen": 93849808, "step": 43435 }, { "epoch": 7.0864600326264275, "grad_norm": 0.00462770601734519, "learning_rate": 0.0008155886221958939, "loss": 0.1214, "num_input_tokens_seen": 93860816, "step": 43440 }, { "epoch": 7.087275693311582, "grad_norm": 0.004159488715231419, "learning_rate": 0.0008155334091779518, "loss": 0.077, "num_input_tokens_seen": 93871024, "step": 43445 }, { "epoch": 7.088091353996737, "grad_norm": 0.010639780201017857, "learning_rate": 0.0008154781897653251, "loss": 0.0076, "num_input_tokens_seen": 93883280, "step": 43450 }, { "epoch": 7.088907014681892, "grad_norm": 0.23465438187122345, "learning_rate": 0.0008154229639591324, "loss": 0.0977, "num_input_tokens_seen": 93894928, "step": 43455 }, { "epoch": 7.089722675367048, "grad_norm": 0.008510327897965908, "learning_rate": 0.0008153677317604935, "loss": 0.1191, "num_input_tokens_seen": 93906320, "step": 43460 }, { "epoch": 7.0905383360522025, "grad_norm": 0.02426774427294731, "learning_rate": 0.0008153124931705271, "loss": 0.0701, "num_input_tokens_seen": 93917520, "step": 43465 }, { "epoch": 7.091353996737357, "grad_norm": 0.08482329547405243, "learning_rate": 0.0008152572481903533, "loss": 0.101, "num_input_tokens_seen": 93928560, "step": 43470 }, { "epoch": 7.092169657422512, "grad_norm": 0.25256073474884033, "learning_rate": 0.0008152019968210913, "loss": 0.1663, "num_input_tokens_seen": 93939536, "step": 43475 }, { "epoch": 7.092985318107667, "grad_norm": 0.08296633511781693, "learning_rate": 0.0008151467390638611, "loss": 0.1023, "num_input_tokens_seen": 93950032, "step": 43480 }, { "epoch": 7.093800978792822, "grad_norm": 0.08412051200866699, "learning_rate": 0.0008150914749197823, "loss": 0.0835, "num_input_tokens_seen": 93961744, "step": 43485 }, { "epoch": 7.0946166394779775, "grad_norm": 0.004528961610049009, "learning_rate": 0.0008150362043899751, "loss": 0.0824, "num_input_tokens_seen": 93972656, "step": 43490 }, { "epoch": 7.095432300163132, "grad_norm": 0.05135902389883995, "learning_rate": 0.0008149809274755595, "loss": 0.0755, "num_input_tokens_seen": 93983248, "step": 43495 }, { "epoch": 7.096247960848287, "grad_norm": 0.09717827290296555, "learning_rate": 0.0008149256441776559, "loss": 0.0423, "num_input_tokens_seen": 93993904, "step": 43500 }, { "epoch": 7.097063621533442, "grad_norm": 0.054743360728025436, "learning_rate": 0.0008148703544973846, "loss": 0.2156, "num_input_tokens_seen": 94004784, "step": 43505 }, { "epoch": 7.097879282218597, "grad_norm": 0.05458596348762512, "learning_rate": 0.000814815058435866, "loss": 0.1561, "num_input_tokens_seen": 94015472, "step": 43510 }, { "epoch": 7.0986949429037525, "grad_norm": 0.022231120616197586, "learning_rate": 0.0008147597559942211, "loss": 0.0298, "num_input_tokens_seen": 94026000, "step": 43515 }, { "epoch": 7.099510603588907, "grad_norm": 0.2611452639102936, "learning_rate": 0.0008147044471735703, "loss": 0.1484, "num_input_tokens_seen": 94037680, "step": 43520 }, { "epoch": 7.100326264274062, "grad_norm": 0.12025828659534454, "learning_rate": 0.0008146491319750346, "loss": 0.1683, "num_input_tokens_seen": 94048528, "step": 43525 }, { "epoch": 7.101141924959217, "grad_norm": 0.09317224472761154, "learning_rate": 0.0008145938103997352, "loss": 0.0316, "num_input_tokens_seen": 94059248, "step": 43530 }, { "epoch": 7.101957585644372, "grad_norm": 0.09532984346151352, "learning_rate": 0.0008145384824487931, "loss": 0.12, "num_input_tokens_seen": 94069680, "step": 43535 }, { "epoch": 7.102773246329527, "grad_norm": 0.2917781472206116, "learning_rate": 0.0008144831481233296, "loss": 0.184, "num_input_tokens_seen": 94079632, "step": 43540 }, { "epoch": 7.103588907014682, "grad_norm": 0.18950186669826508, "learning_rate": 0.0008144278074244662, "loss": 0.0446, "num_input_tokens_seen": 94090288, "step": 43545 }, { "epoch": 7.104404567699837, "grad_norm": 0.00261941971257329, "learning_rate": 0.0008143724603533243, "loss": 0.0615, "num_input_tokens_seen": 94101168, "step": 43550 }, { "epoch": 7.105220228384992, "grad_norm": 0.04184950143098831, "learning_rate": 0.0008143171069110258, "loss": 0.0339, "num_input_tokens_seen": 94111088, "step": 43555 }, { "epoch": 7.106035889070147, "grad_norm": 0.009390073828399181, "learning_rate": 0.0008142617470986924, "loss": 0.1462, "num_input_tokens_seen": 94121872, "step": 43560 }, { "epoch": 7.1068515497553015, "grad_norm": 0.10224417597055435, "learning_rate": 0.000814206380917446, "loss": 0.0273, "num_input_tokens_seen": 94133776, "step": 43565 }, { "epoch": 7.107667210440456, "grad_norm": 0.004597066435962915, "learning_rate": 0.0008141510083684087, "loss": 0.0346, "num_input_tokens_seen": 94143600, "step": 43570 }, { "epoch": 7.108482871125612, "grad_norm": 0.042119644582271576, "learning_rate": 0.0008140956294527026, "loss": 0.0294, "num_input_tokens_seen": 94154704, "step": 43575 }, { "epoch": 7.109298531810767, "grad_norm": 0.2623211145401001, "learning_rate": 0.00081404024417145, "loss": 0.1258, "num_input_tokens_seen": 94164592, "step": 43580 }, { "epoch": 7.110114192495922, "grad_norm": 0.006643175147473812, "learning_rate": 0.0008139848525257737, "loss": 0.0998, "num_input_tokens_seen": 94174960, "step": 43585 }, { "epoch": 7.1109298531810765, "grad_norm": 0.14614737033843994, "learning_rate": 0.000813929454516796, "loss": 0.0271, "num_input_tokens_seen": 94185328, "step": 43590 }, { "epoch": 7.111745513866231, "grad_norm": 0.2504687011241913, "learning_rate": 0.0008138740501456396, "loss": 0.1811, "num_input_tokens_seen": 94194896, "step": 43595 }, { "epoch": 7.112561174551387, "grad_norm": 0.19602181017398834, "learning_rate": 0.0008138186394134275, "loss": 0.1734, "num_input_tokens_seen": 94203920, "step": 43600 }, { "epoch": 7.113376835236542, "grad_norm": 0.06594168394804001, "learning_rate": 0.0008137632223212824, "loss": 0.094, "num_input_tokens_seen": 94215408, "step": 43605 }, { "epoch": 7.114192495921697, "grad_norm": 0.3192061185836792, "learning_rate": 0.0008137077988703276, "loss": 0.3059, "num_input_tokens_seen": 94225840, "step": 43610 }, { "epoch": 7.1150081566068515, "grad_norm": 0.028179455548524857, "learning_rate": 0.0008136523690616864, "loss": 0.0297, "num_input_tokens_seen": 94235120, "step": 43615 }, { "epoch": 7.115823817292006, "grad_norm": 0.01822386495769024, "learning_rate": 0.000813596932896482, "loss": 0.1139, "num_input_tokens_seen": 94247312, "step": 43620 }, { "epoch": 7.116639477977161, "grad_norm": 0.06899786740541458, "learning_rate": 0.000813541490375838, "loss": 0.0928, "num_input_tokens_seen": 94258128, "step": 43625 }, { "epoch": 7.117455138662317, "grad_norm": 0.19595955312252045, "learning_rate": 0.0008134860415008778, "loss": 0.0488, "num_input_tokens_seen": 94268048, "step": 43630 }, { "epoch": 7.118270799347472, "grad_norm": 0.08394555747509003, "learning_rate": 0.0008134305862727253, "loss": 0.0358, "num_input_tokens_seen": 94279184, "step": 43635 }, { "epoch": 7.1190864600326265, "grad_norm": 0.3096727132797241, "learning_rate": 0.0008133751246925046, "loss": 0.0491, "num_input_tokens_seen": 94289744, "step": 43640 }, { "epoch": 7.119902120717781, "grad_norm": 0.21318064630031586, "learning_rate": 0.0008133196567613391, "loss": 0.0773, "num_input_tokens_seen": 94300656, "step": 43645 }, { "epoch": 7.120717781402936, "grad_norm": 0.2673552632331848, "learning_rate": 0.0008132641824803534, "loss": 0.1315, "num_input_tokens_seen": 94310608, "step": 43650 }, { "epoch": 7.121533442088092, "grad_norm": 0.0654841959476471, "learning_rate": 0.0008132087018506716, "loss": 0.0438, "num_input_tokens_seen": 94321584, "step": 43655 }, { "epoch": 7.122349102773247, "grad_norm": 0.010458219796419144, "learning_rate": 0.0008131532148734182, "loss": 0.0208, "num_input_tokens_seen": 94331056, "step": 43660 }, { "epoch": 7.123164763458401, "grad_norm": 0.03026103600859642, "learning_rate": 0.0008130977215497177, "loss": 0.0341, "num_input_tokens_seen": 94341840, "step": 43665 }, { "epoch": 7.123980424143556, "grad_norm": 0.27282023429870605, "learning_rate": 0.0008130422218806945, "loss": 0.1214, "num_input_tokens_seen": 94353232, "step": 43670 }, { "epoch": 7.124796084828711, "grad_norm": 0.006859856657683849, "learning_rate": 0.0008129867158674737, "loss": 0.0763, "num_input_tokens_seen": 94364368, "step": 43675 }, { "epoch": 7.125611745513866, "grad_norm": 0.23025669157505035, "learning_rate": 0.00081293120351118, "loss": 0.1177, "num_input_tokens_seen": 94375216, "step": 43680 }, { "epoch": 7.126427406199022, "grad_norm": 0.10104181617498398, "learning_rate": 0.0008128756848129386, "loss": 0.0252, "num_input_tokens_seen": 94384144, "step": 43685 }, { "epoch": 7.127243066884176, "grad_norm": 0.005190078169107437, "learning_rate": 0.0008128201597738744, "loss": 0.0214, "num_input_tokens_seen": 94394288, "step": 43690 }, { "epoch": 7.128058727569331, "grad_norm": 0.020754588767886162, "learning_rate": 0.0008127646283951129, "loss": 0.1413, "num_input_tokens_seen": 94405808, "step": 43695 }, { "epoch": 7.128874388254486, "grad_norm": 0.2567387521266937, "learning_rate": 0.0008127090906777793, "loss": 0.0685, "num_input_tokens_seen": 94417680, "step": 43700 }, { "epoch": 7.129690048939641, "grad_norm": 0.23241105675697327, "learning_rate": 0.0008126535466229993, "loss": 0.0605, "num_input_tokens_seen": 94428912, "step": 43705 }, { "epoch": 7.130505709624796, "grad_norm": 0.031064637005329132, "learning_rate": 0.0008125979962318987, "loss": 0.1147, "num_input_tokens_seen": 94439344, "step": 43710 }, { "epoch": 7.131321370309951, "grad_norm": 0.11722356826066971, "learning_rate": 0.000812542439505603, "loss": 0.0777, "num_input_tokens_seen": 94450992, "step": 43715 }, { "epoch": 7.132137030995106, "grad_norm": 0.04877146705985069, "learning_rate": 0.0008124868764452384, "loss": 0.0381, "num_input_tokens_seen": 94462064, "step": 43720 }, { "epoch": 7.132952691680261, "grad_norm": 0.262928307056427, "learning_rate": 0.0008124313070519307, "loss": 0.0774, "num_input_tokens_seen": 94472880, "step": 43725 }, { "epoch": 7.133768352365416, "grad_norm": 0.20131583511829376, "learning_rate": 0.0008123757313268064, "loss": 0.0411, "num_input_tokens_seen": 94483600, "step": 43730 }, { "epoch": 7.134584013050571, "grad_norm": 0.03627254441380501, "learning_rate": 0.0008123201492709915, "loss": 0.2043, "num_input_tokens_seen": 94494160, "step": 43735 }, { "epoch": 7.135399673735726, "grad_norm": 0.0354943573474884, "learning_rate": 0.0008122645608856125, "loss": 0.0844, "num_input_tokens_seen": 94505104, "step": 43740 }, { "epoch": 7.136215334420881, "grad_norm": 0.2703050374984741, "learning_rate": 0.0008122089661717961, "loss": 0.1285, "num_input_tokens_seen": 94513680, "step": 43745 }, { "epoch": 7.137030995106036, "grad_norm": 0.00605939282104373, "learning_rate": 0.000812153365130669, "loss": 0.0212, "num_input_tokens_seen": 94523792, "step": 43750 }, { "epoch": 7.137846655791191, "grad_norm": 0.33780086040496826, "learning_rate": 0.0008120977577633578, "loss": 0.1763, "num_input_tokens_seen": 94534896, "step": 43755 }, { "epoch": 7.138662316476346, "grad_norm": 0.29291245341300964, "learning_rate": 0.0008120421440709897, "loss": 0.1751, "num_input_tokens_seen": 94544592, "step": 43760 }, { "epoch": 7.1394779771615005, "grad_norm": 0.024935364723205566, "learning_rate": 0.0008119865240546918, "loss": 0.0174, "num_input_tokens_seen": 94555056, "step": 43765 }, { "epoch": 7.140293637846656, "grad_norm": 0.06435410678386688, "learning_rate": 0.000811930897715591, "loss": 0.04, "num_input_tokens_seen": 94564272, "step": 43770 }, { "epoch": 7.141109298531811, "grad_norm": 0.01669169031083584, "learning_rate": 0.0008118752650548151, "loss": 0.0208, "num_input_tokens_seen": 94574832, "step": 43775 }, { "epoch": 7.141924959216966, "grad_norm": 0.17030321061611176, "learning_rate": 0.0008118196260734911, "loss": 0.1151, "num_input_tokens_seen": 94585616, "step": 43780 }, { "epoch": 7.142740619902121, "grad_norm": 0.015497151762247086, "learning_rate": 0.000811763980772747, "loss": 0.1636, "num_input_tokens_seen": 94596432, "step": 43785 }, { "epoch": 7.143556280587275, "grad_norm": 0.17114491760730743, "learning_rate": 0.0008117083291537102, "loss": 0.2757, "num_input_tokens_seen": 94607600, "step": 43790 }, { "epoch": 7.14437194127243, "grad_norm": 0.12084033340215683, "learning_rate": 0.0008116526712175087, "loss": 0.0772, "num_input_tokens_seen": 94618288, "step": 43795 }, { "epoch": 7.145187601957586, "grad_norm": 0.04583222046494484, "learning_rate": 0.0008115970069652705, "loss": 0.0244, "num_input_tokens_seen": 94628656, "step": 43800 }, { "epoch": 7.146003262642741, "grad_norm": 0.029846591874957085, "learning_rate": 0.0008115413363981237, "loss": 0.018, "num_input_tokens_seen": 94639696, "step": 43805 }, { "epoch": 7.146818923327896, "grad_norm": 0.05696532502770424, "learning_rate": 0.0008114856595171963, "loss": 0.0417, "num_input_tokens_seen": 94651216, "step": 43810 }, { "epoch": 7.14763458401305, "grad_norm": 0.015203659422695637, "learning_rate": 0.000811429976323617, "loss": 0.0945, "num_input_tokens_seen": 94661328, "step": 43815 }, { "epoch": 7.148450244698205, "grad_norm": 0.020137041807174683, "learning_rate": 0.0008113742868185142, "loss": 0.0442, "num_input_tokens_seen": 94672464, "step": 43820 }, { "epoch": 7.149265905383361, "grad_norm": 0.017115939408540726, "learning_rate": 0.0008113185910030163, "loss": 0.0405, "num_input_tokens_seen": 94683888, "step": 43825 }, { "epoch": 7.150081566068516, "grad_norm": 0.2196558266878128, "learning_rate": 0.0008112628888782523, "loss": 0.179, "num_input_tokens_seen": 94695504, "step": 43830 }, { "epoch": 7.150897226753671, "grad_norm": 0.011017675511538982, "learning_rate": 0.0008112071804453511, "loss": 0.0671, "num_input_tokens_seen": 94706768, "step": 43835 }, { "epoch": 7.151712887438825, "grad_norm": 0.2845572233200073, "learning_rate": 0.0008111514657054415, "loss": 0.199, "num_input_tokens_seen": 94718288, "step": 43840 }, { "epoch": 7.15252854812398, "grad_norm": 0.009154774248600006, "learning_rate": 0.0008110957446596527, "loss": 0.1263, "num_input_tokens_seen": 94728752, "step": 43845 }, { "epoch": 7.153344208809135, "grad_norm": 0.015152211301028728, "learning_rate": 0.0008110400173091142, "loss": 0.0405, "num_input_tokens_seen": 94740464, "step": 43850 }, { "epoch": 7.154159869494291, "grad_norm": 0.3790530562400818, "learning_rate": 0.0008109842836549549, "loss": 0.1817, "num_input_tokens_seen": 94751152, "step": 43855 }, { "epoch": 7.1549755301794455, "grad_norm": 0.13960027694702148, "learning_rate": 0.0008109285436983047, "loss": 0.0275, "num_input_tokens_seen": 94761584, "step": 43860 }, { "epoch": 7.1557911908646, "grad_norm": 0.22438912093639374, "learning_rate": 0.000810872797440293, "loss": 0.1195, "num_input_tokens_seen": 94772816, "step": 43865 }, { "epoch": 7.156606851549755, "grad_norm": 0.007492088247090578, "learning_rate": 0.0008108170448820498, "loss": 0.0219, "num_input_tokens_seen": 94784112, "step": 43870 }, { "epoch": 7.15742251223491, "grad_norm": 0.1964537799358368, "learning_rate": 0.0008107612860247049, "loss": 0.0462, "num_input_tokens_seen": 94796240, "step": 43875 }, { "epoch": 7.158238172920065, "grad_norm": 0.013916724361479282, "learning_rate": 0.0008107055208693882, "loss": 0.1627, "num_input_tokens_seen": 94807280, "step": 43880 }, { "epoch": 7.1590538336052205, "grad_norm": 0.04685168340802193, "learning_rate": 0.00081064974941723, "loss": 0.0447, "num_input_tokens_seen": 94816720, "step": 43885 }, { "epoch": 7.159869494290375, "grad_norm": 0.03049752674996853, "learning_rate": 0.0008105939716693606, "loss": 0.0282, "num_input_tokens_seen": 94827984, "step": 43890 }, { "epoch": 7.16068515497553, "grad_norm": 0.006316207814961672, "learning_rate": 0.0008105381876269104, "loss": 0.099, "num_input_tokens_seen": 94839632, "step": 43895 }, { "epoch": 7.161500815660685, "grad_norm": 0.0397644080221653, "learning_rate": 0.0008104823972910098, "loss": 0.0905, "num_input_tokens_seen": 94849168, "step": 43900 }, { "epoch": 7.16231647634584, "grad_norm": 0.025401227176189423, "learning_rate": 0.0008104266006627895, "loss": 0.0366, "num_input_tokens_seen": 94859664, "step": 43905 }, { "epoch": 7.1631321370309955, "grad_norm": 0.3657297194004059, "learning_rate": 0.0008103707977433804, "loss": 0.0846, "num_input_tokens_seen": 94870736, "step": 43910 }, { "epoch": 7.16394779771615, "grad_norm": 0.10411134362220764, "learning_rate": 0.0008103149885339134, "loss": 0.0258, "num_input_tokens_seen": 94881456, "step": 43915 }, { "epoch": 7.164763458401305, "grad_norm": 0.014874082058668137, "learning_rate": 0.0008102591730355193, "loss": 0.0352, "num_input_tokens_seen": 94892976, "step": 43920 }, { "epoch": 7.16557911908646, "grad_norm": 0.03545144945383072, "learning_rate": 0.0008102033512493297, "loss": 0.0273, "num_input_tokens_seen": 94904464, "step": 43925 }, { "epoch": 7.166394779771615, "grad_norm": 0.21843662858009338, "learning_rate": 0.0008101475231764756, "loss": 0.0497, "num_input_tokens_seen": 94917008, "step": 43930 }, { "epoch": 7.16721044045677, "grad_norm": 0.13957096636295319, "learning_rate": 0.0008100916888180884, "loss": 0.061, "num_input_tokens_seen": 94928560, "step": 43935 }, { "epoch": 7.168026101141925, "grad_norm": 0.26728448271751404, "learning_rate": 0.0008100358481752998, "loss": 0.0351, "num_input_tokens_seen": 94939536, "step": 43940 }, { "epoch": 7.16884176182708, "grad_norm": 0.006182427518069744, "learning_rate": 0.0008099800012492415, "loss": 0.0131, "num_input_tokens_seen": 94950640, "step": 43945 }, { "epoch": 7.169657422512235, "grad_norm": 0.004266462288796902, "learning_rate": 0.0008099241480410451, "loss": 0.1088, "num_input_tokens_seen": 94961968, "step": 43950 }, { "epoch": 7.17047308319739, "grad_norm": 0.17784513533115387, "learning_rate": 0.0008098682885518427, "loss": 0.1978, "num_input_tokens_seen": 94972272, "step": 43955 }, { "epoch": 7.171288743882545, "grad_norm": 0.008746275678277016, "learning_rate": 0.0008098124227827663, "loss": 0.0554, "num_input_tokens_seen": 94983088, "step": 43960 }, { "epoch": 7.1721044045677, "grad_norm": 0.009621957316994667, "learning_rate": 0.0008097565507349482, "loss": 0.166, "num_input_tokens_seen": 94992560, "step": 43965 }, { "epoch": 7.172920065252855, "grad_norm": 0.34376588463783264, "learning_rate": 0.0008097006724095208, "loss": 0.1315, "num_input_tokens_seen": 95003568, "step": 43970 }, { "epoch": 7.17373572593801, "grad_norm": 0.11799240112304688, "learning_rate": 0.0008096447878076161, "loss": 0.1875, "num_input_tokens_seen": 95014768, "step": 43975 }, { "epoch": 7.174551386623165, "grad_norm": 0.2644333839416504, "learning_rate": 0.0008095888969303672, "loss": 0.2692, "num_input_tokens_seen": 95025296, "step": 43980 }, { "epoch": 7.1753670473083195, "grad_norm": 0.052062440663576126, "learning_rate": 0.0008095329997789063, "loss": 0.0439, "num_input_tokens_seen": 95036464, "step": 43985 }, { "epoch": 7.176182707993474, "grad_norm": 0.012716952711343765, "learning_rate": 0.0008094770963543667, "loss": 0.0288, "num_input_tokens_seen": 95047344, "step": 43990 }, { "epoch": 7.17699836867863, "grad_norm": 0.03887278586626053, "learning_rate": 0.0008094211866578812, "loss": 0.0392, "num_input_tokens_seen": 95058320, "step": 43995 }, { "epoch": 7.177814029363785, "grad_norm": 0.14700531959533691, "learning_rate": 0.0008093652706905827, "loss": 0.0444, "num_input_tokens_seen": 95069488, "step": 44000 }, { "epoch": 7.17862969004894, "grad_norm": 0.018018363043665886, "learning_rate": 0.0008093093484536045, "loss": 0.0374, "num_input_tokens_seen": 95079792, "step": 44005 }, { "epoch": 7.1794453507340945, "grad_norm": 0.03422345593571663, "learning_rate": 0.0008092534199480801, "loss": 0.0164, "num_input_tokens_seen": 95090832, "step": 44010 }, { "epoch": 7.180261011419249, "grad_norm": 0.10804907977581024, "learning_rate": 0.0008091974851751427, "loss": 0.0491, "num_input_tokens_seen": 95102736, "step": 44015 }, { "epoch": 7.181076672104404, "grad_norm": 0.20800091326236725, "learning_rate": 0.0008091415441359261, "loss": 0.1226, "num_input_tokens_seen": 95112496, "step": 44020 }, { "epoch": 7.18189233278956, "grad_norm": 0.0028511809650808573, "learning_rate": 0.000809085596831564, "loss": 0.0309, "num_input_tokens_seen": 95122896, "step": 44025 }, { "epoch": 7.182707993474715, "grad_norm": 0.25854846835136414, "learning_rate": 0.0008090296432631901, "loss": 0.0932, "num_input_tokens_seen": 95133808, "step": 44030 }, { "epoch": 7.1835236541598695, "grad_norm": 0.017109766602516174, "learning_rate": 0.0008089736834319384, "loss": 0.0283, "num_input_tokens_seen": 95143952, "step": 44035 }, { "epoch": 7.184339314845024, "grad_norm": 0.07196343690156937, "learning_rate": 0.0008089177173389431, "loss": 0.0225, "num_input_tokens_seen": 95154096, "step": 44040 }, { "epoch": 7.185154975530179, "grad_norm": 0.2786267101764679, "learning_rate": 0.0008088617449853382, "loss": 0.0926, "num_input_tokens_seen": 95164816, "step": 44045 }, { "epoch": 7.185970636215335, "grad_norm": 0.0031654785852879286, "learning_rate": 0.0008088057663722583, "loss": 0.0361, "num_input_tokens_seen": 95174608, "step": 44050 }, { "epoch": 7.18678629690049, "grad_norm": 0.006744810845702887, "learning_rate": 0.000808749781500838, "loss": 0.0053, "num_input_tokens_seen": 95185552, "step": 44055 }, { "epoch": 7.1876019575856445, "grad_norm": 0.23967309296131134, "learning_rate": 0.0008086937903722114, "loss": 0.0432, "num_input_tokens_seen": 95196816, "step": 44060 }, { "epoch": 7.188417618270799, "grad_norm": 0.060102108865976334, "learning_rate": 0.0008086377929875137, "loss": 0.1255, "num_input_tokens_seen": 95207856, "step": 44065 }, { "epoch": 7.189233278955954, "grad_norm": 0.012995628640055656, "learning_rate": 0.0008085817893478797, "loss": 0.0434, "num_input_tokens_seen": 95217968, "step": 44070 }, { "epoch": 7.190048939641109, "grad_norm": 0.3781433403491974, "learning_rate": 0.0008085257794544441, "loss": 0.0734, "num_input_tokens_seen": 95229488, "step": 44075 }, { "epoch": 7.190864600326265, "grad_norm": 0.47650331258773804, "learning_rate": 0.0008084697633083422, "loss": 0.2665, "num_input_tokens_seen": 95239312, "step": 44080 }, { "epoch": 7.191680261011419, "grad_norm": 0.05169449746608734, "learning_rate": 0.0008084137409107093, "loss": 0.0848, "num_input_tokens_seen": 95248784, "step": 44085 }, { "epoch": 7.192495921696574, "grad_norm": 0.22721447050571442, "learning_rate": 0.0008083577122626806, "loss": 0.1741, "num_input_tokens_seen": 95260176, "step": 44090 }, { "epoch": 7.193311582381729, "grad_norm": 0.004407059401273727, "learning_rate": 0.0008083016773653917, "loss": 0.0358, "num_input_tokens_seen": 95271248, "step": 44095 }, { "epoch": 7.194127243066884, "grad_norm": 0.004881150089204311, "learning_rate": 0.0008082456362199783, "loss": 0.0105, "num_input_tokens_seen": 95281104, "step": 44100 }, { "epoch": 7.19494290375204, "grad_norm": 0.1279035061597824, "learning_rate": 0.000808189588827576, "loss": 0.0558, "num_input_tokens_seen": 95291344, "step": 44105 }, { "epoch": 7.195758564437194, "grad_norm": 0.07432336360216141, "learning_rate": 0.0008081335351893206, "loss": 0.0268, "num_input_tokens_seen": 95301904, "step": 44110 }, { "epoch": 7.196574225122349, "grad_norm": 0.02257407084107399, "learning_rate": 0.0008080774753063485, "loss": 0.0824, "num_input_tokens_seen": 95312144, "step": 44115 }, { "epoch": 7.197389885807504, "grad_norm": 0.22624976933002472, "learning_rate": 0.0008080214091797953, "loss": 0.1775, "num_input_tokens_seen": 95321360, "step": 44120 }, { "epoch": 7.198205546492659, "grad_norm": 0.005118417553603649, "learning_rate": 0.0008079653368107975, "loss": 0.0346, "num_input_tokens_seen": 95331888, "step": 44125 }, { "epoch": 7.199021207177814, "grad_norm": 0.006295321509242058, "learning_rate": 0.0008079092582004915, "loss": 0.1056, "num_input_tokens_seen": 95343152, "step": 44130 }, { "epoch": 7.199836867862969, "grad_norm": 0.02929351106286049, "learning_rate": 0.0008078531733500137, "loss": 0.0619, "num_input_tokens_seen": 95353936, "step": 44135 }, { "epoch": 7.200652528548124, "grad_norm": 0.16848796606063843, "learning_rate": 0.000807797082260501, "loss": 0.0424, "num_input_tokens_seen": 95364240, "step": 44140 }, { "epoch": 7.201468189233279, "grad_norm": 0.015281510539352894, "learning_rate": 0.0008077409849330898, "loss": 0.0405, "num_input_tokens_seen": 95374448, "step": 44145 }, { "epoch": 7.202283849918434, "grad_norm": 0.3676411807537079, "learning_rate": 0.0008076848813689171, "loss": 0.155, "num_input_tokens_seen": 95384528, "step": 44150 }, { "epoch": 7.203099510603589, "grad_norm": 0.035579223185777664, "learning_rate": 0.0008076287715691201, "loss": 0.0363, "num_input_tokens_seen": 95395728, "step": 44155 }, { "epoch": 7.2039151712887435, "grad_norm": 0.31866884231567383, "learning_rate": 0.0008075726555348357, "loss": 0.1915, "num_input_tokens_seen": 95407120, "step": 44160 }, { "epoch": 7.204730831973899, "grad_norm": 0.2983294725418091, "learning_rate": 0.0008075165332672013, "loss": 0.0841, "num_input_tokens_seen": 95417008, "step": 44165 }, { "epoch": 7.205546492659054, "grad_norm": 0.2822973430156708, "learning_rate": 0.0008074604047673542, "loss": 0.0586, "num_input_tokens_seen": 95427632, "step": 44170 }, { "epoch": 7.206362153344209, "grad_norm": 0.3057224154472351, "learning_rate": 0.000807404270036432, "loss": 0.0935, "num_input_tokens_seen": 95437424, "step": 44175 }, { "epoch": 7.207177814029364, "grad_norm": 0.009443351998925209, "learning_rate": 0.0008073481290755723, "loss": 0.1251, "num_input_tokens_seen": 95449424, "step": 44180 }, { "epoch": 7.2079934747145185, "grad_norm": 0.13818103075027466, "learning_rate": 0.0008072919818859128, "loss": 0.0718, "num_input_tokens_seen": 95460624, "step": 44185 }, { "epoch": 7.208809135399674, "grad_norm": 0.022867241874337196, "learning_rate": 0.0008072358284685915, "loss": 0.1683, "num_input_tokens_seen": 95470672, "step": 44190 }, { "epoch": 7.209624796084829, "grad_norm": 0.3809795379638672, "learning_rate": 0.0008071796688247463, "loss": 0.0934, "num_input_tokens_seen": 95481936, "step": 44195 }, { "epoch": 7.210440456769984, "grad_norm": 0.05303087458014488, "learning_rate": 0.0008071235029555155, "loss": 0.0491, "num_input_tokens_seen": 95492368, "step": 44200 }, { "epoch": 7.211256117455139, "grad_norm": 0.30895814299583435, "learning_rate": 0.0008070673308620373, "loss": 0.1043, "num_input_tokens_seen": 95503280, "step": 44205 }, { "epoch": 7.212071778140293, "grad_norm": 0.05848781764507294, "learning_rate": 0.0008070111525454501, "loss": 0.0508, "num_input_tokens_seen": 95514544, "step": 44210 }, { "epoch": 7.212887438825448, "grad_norm": 0.016617875546216965, "learning_rate": 0.0008069549680068923, "loss": 0.037, "num_input_tokens_seen": 95525200, "step": 44215 }, { "epoch": 7.213703099510604, "grad_norm": 0.010751327499747276, "learning_rate": 0.0008068987772475029, "loss": 0.1975, "num_input_tokens_seen": 95536144, "step": 44220 }, { "epoch": 7.214518760195759, "grad_norm": 0.3213596045970917, "learning_rate": 0.0008068425802684204, "loss": 0.1174, "num_input_tokens_seen": 95547664, "step": 44225 }, { "epoch": 7.215334420880914, "grad_norm": 0.012682809494435787, "learning_rate": 0.0008067863770707838, "loss": 0.0114, "num_input_tokens_seen": 95559024, "step": 44230 }, { "epoch": 7.216150081566068, "grad_norm": 0.02761760540306568, "learning_rate": 0.0008067301676557319, "loss": 0.0405, "num_input_tokens_seen": 95569680, "step": 44235 }, { "epoch": 7.216965742251223, "grad_norm": 0.22118832170963287, "learning_rate": 0.0008066739520244042, "loss": 0.1241, "num_input_tokens_seen": 95580944, "step": 44240 }, { "epoch": 7.217781402936378, "grad_norm": 0.44345036149024963, "learning_rate": 0.0008066177301779396, "loss": 0.116, "num_input_tokens_seen": 95590928, "step": 44245 }, { "epoch": 7.218597063621534, "grad_norm": 0.07468734681606293, "learning_rate": 0.0008065615021174779, "loss": 0.0334, "num_input_tokens_seen": 95602128, "step": 44250 }, { "epoch": 7.219412724306689, "grad_norm": 0.2577841281890869, "learning_rate": 0.0008065052678441584, "loss": 0.0726, "num_input_tokens_seen": 95613264, "step": 44255 }, { "epoch": 7.220228384991843, "grad_norm": 0.04845619201660156, "learning_rate": 0.0008064490273591209, "loss": 0.0159, "num_input_tokens_seen": 95623600, "step": 44260 }, { "epoch": 7.221044045676998, "grad_norm": 0.3292473256587982, "learning_rate": 0.000806392780663505, "loss": 0.0465, "num_input_tokens_seen": 95635696, "step": 44265 }, { "epoch": 7.221859706362153, "grad_norm": 0.11419453471899033, "learning_rate": 0.0008063365277584508, "loss": 0.0303, "num_input_tokens_seen": 95647088, "step": 44270 }, { "epoch": 7.222675367047309, "grad_norm": 0.01460292749106884, "learning_rate": 0.0008062802686450982, "loss": 0.0346, "num_input_tokens_seen": 95657936, "step": 44275 }, { "epoch": 7.2234910277324635, "grad_norm": 0.03610467538237572, "learning_rate": 0.0008062240033245875, "loss": 0.0664, "num_input_tokens_seen": 95668400, "step": 44280 }, { "epoch": 7.224306688417618, "grad_norm": 0.01440652459859848, "learning_rate": 0.0008061677317980587, "loss": 0.1672, "num_input_tokens_seen": 95678256, "step": 44285 }, { "epoch": 7.225122349102773, "grad_norm": 0.002639458980411291, "learning_rate": 0.0008061114540666525, "loss": 0.0697, "num_input_tokens_seen": 95688112, "step": 44290 }, { "epoch": 7.225938009787928, "grad_norm": 0.0032658553682267666, "learning_rate": 0.0008060551701315093, "loss": 0.0458, "num_input_tokens_seen": 95699088, "step": 44295 }, { "epoch": 7.226753670473083, "grad_norm": 0.09494752436876297, "learning_rate": 0.00080599887999377, "loss": 0.081, "num_input_tokens_seen": 95709392, "step": 44300 }, { "epoch": 7.2275693311582385, "grad_norm": 0.0071384357288479805, "learning_rate": 0.0008059425836545751, "loss": 0.0594, "num_input_tokens_seen": 95719952, "step": 44305 }, { "epoch": 7.228384991843393, "grad_norm": 0.010850159451365471, "learning_rate": 0.0008058862811150657, "loss": 0.0724, "num_input_tokens_seen": 95730032, "step": 44310 }, { "epoch": 7.229200652528548, "grad_norm": 0.1265016794204712, "learning_rate": 0.0008058299723763826, "loss": 0.0379, "num_input_tokens_seen": 95740752, "step": 44315 }, { "epoch": 7.230016313213703, "grad_norm": 0.2655118703842163, "learning_rate": 0.0008057736574396673, "loss": 0.048, "num_input_tokens_seen": 95751888, "step": 44320 }, { "epoch": 7.230831973898858, "grad_norm": 0.025311551988124847, "learning_rate": 0.000805717336306061, "loss": 0.1106, "num_input_tokens_seen": 95762512, "step": 44325 }, { "epoch": 7.231647634584013, "grad_norm": 0.0299469456076622, "learning_rate": 0.000805661008976705, "loss": 0.261, "num_input_tokens_seen": 95773424, "step": 44330 }, { "epoch": 7.232463295269168, "grad_norm": 0.09730672836303711, "learning_rate": 0.0008056046754527406, "loss": 0.0398, "num_input_tokens_seen": 95783888, "step": 44335 }, { "epoch": 7.233278955954323, "grad_norm": 0.2526463568210602, "learning_rate": 0.00080554833573531, "loss": 0.0983, "num_input_tokens_seen": 95794992, "step": 44340 }, { "epoch": 7.234094616639478, "grad_norm": 0.03998822346329689, "learning_rate": 0.0008054919898255548, "loss": 0.0873, "num_input_tokens_seen": 95806224, "step": 44345 }, { "epoch": 7.234910277324633, "grad_norm": 0.252896785736084, "learning_rate": 0.0008054356377246168, "loss": 0.0704, "num_input_tokens_seen": 95817168, "step": 44350 }, { "epoch": 7.235725938009788, "grad_norm": 0.01774667389690876, "learning_rate": 0.0008053792794336381, "loss": 0.1547, "num_input_tokens_seen": 95827792, "step": 44355 }, { "epoch": 7.236541598694943, "grad_norm": 0.015287657268345356, "learning_rate": 0.0008053229149537611, "loss": 0.0861, "num_input_tokens_seen": 95837488, "step": 44360 }, { "epoch": 7.237357259380098, "grad_norm": 0.05477646738290787, "learning_rate": 0.0008052665442861278, "loss": 0.0597, "num_input_tokens_seen": 95849744, "step": 44365 }, { "epoch": 7.238172920065253, "grad_norm": 0.22003793716430664, "learning_rate": 0.0008052101674318805, "loss": 0.056, "num_input_tokens_seen": 95861328, "step": 44370 }, { "epoch": 7.238988580750408, "grad_norm": 0.017607053741812706, "learning_rate": 0.0008051537843921623, "loss": 0.153, "num_input_tokens_seen": 95872752, "step": 44375 }, { "epoch": 7.239804241435563, "grad_norm": 0.015592445619404316, "learning_rate": 0.0008050973951681153, "loss": 0.0481, "num_input_tokens_seen": 95883984, "step": 44380 }, { "epoch": 7.240619902120717, "grad_norm": 0.10997194796800613, "learning_rate": 0.0008050409997608827, "loss": 0.2259, "num_input_tokens_seen": 95895056, "step": 44385 }, { "epoch": 7.241435562805873, "grad_norm": 0.025174317881464958, "learning_rate": 0.0008049845981716072, "loss": 0.0652, "num_input_tokens_seen": 95905104, "step": 44390 }, { "epoch": 7.242251223491028, "grad_norm": 0.2000531107187271, "learning_rate": 0.0008049281904014318, "loss": 0.0521, "num_input_tokens_seen": 95915216, "step": 44395 }, { "epoch": 7.243066884176183, "grad_norm": 0.12257369607686996, "learning_rate": 0.0008048717764514999, "loss": 0.033, "num_input_tokens_seen": 95924560, "step": 44400 }, { "epoch": 7.2438825448613375, "grad_norm": 0.27642714977264404, "learning_rate": 0.0008048153563229548, "loss": 0.083, "num_input_tokens_seen": 95935024, "step": 44405 }, { "epoch": 7.244698205546492, "grad_norm": 0.113319993019104, "learning_rate": 0.0008047589300169398, "loss": 0.0861, "num_input_tokens_seen": 95944464, "step": 44410 }, { "epoch": 7.245513866231648, "grad_norm": 0.007153376936912537, "learning_rate": 0.0008047024975345983, "loss": 0.1051, "num_input_tokens_seen": 95954704, "step": 44415 }, { "epoch": 7.246329526916803, "grad_norm": 0.01275695487856865, "learning_rate": 0.0008046460588770743, "loss": 0.0642, "num_input_tokens_seen": 95965456, "step": 44420 }, { "epoch": 7.247145187601958, "grad_norm": 0.14486052095890045, "learning_rate": 0.0008045896140455114, "loss": 0.1648, "num_input_tokens_seen": 95976752, "step": 44425 }, { "epoch": 7.2479608482871125, "grad_norm": 0.24699799716472626, "learning_rate": 0.0008045331630410535, "loss": 0.065, "num_input_tokens_seen": 95988400, "step": 44430 }, { "epoch": 7.248776508972267, "grad_norm": 0.015178795903921127, "learning_rate": 0.0008044767058648448, "loss": 0.0229, "num_input_tokens_seen": 95999280, "step": 44435 }, { "epoch": 7.249592169657422, "grad_norm": 0.04336842894554138, "learning_rate": 0.0008044202425180293, "loss": 0.0238, "num_input_tokens_seen": 96008144, "step": 44440 }, { "epoch": 7.250407830342578, "grad_norm": 0.11812159419059753, "learning_rate": 0.0008043637730017515, "loss": 0.2407, "num_input_tokens_seen": 96018768, "step": 44445 }, { "epoch": 7.251223491027733, "grad_norm": 0.25787487626075745, "learning_rate": 0.0008043072973171557, "loss": 0.0677, "num_input_tokens_seen": 96028688, "step": 44450 }, { "epoch": 7.2520391517128875, "grad_norm": 0.04251531511545181, "learning_rate": 0.0008042508154653865, "loss": 0.068, "num_input_tokens_seen": 96040368, "step": 44455 }, { "epoch": 7.252854812398042, "grad_norm": 0.019232304766774178, "learning_rate": 0.0008041943274475886, "loss": 0.0173, "num_input_tokens_seen": 96051120, "step": 44460 }, { "epoch": 7.253670473083197, "grad_norm": 0.3413325548171997, "learning_rate": 0.0008041378332649067, "loss": 0.1559, "num_input_tokens_seen": 96062224, "step": 44465 }, { "epoch": 7.254486133768353, "grad_norm": 0.006722092628479004, "learning_rate": 0.0008040813329184857, "loss": 0.102, "num_input_tokens_seen": 96073072, "step": 44470 }, { "epoch": 7.255301794453508, "grad_norm": 0.053375717252492905, "learning_rate": 0.000804024826409471, "loss": 0.1075, "num_input_tokens_seen": 96082992, "step": 44475 }, { "epoch": 7.2561174551386625, "grad_norm": 0.14441102743148804, "learning_rate": 0.0008039683137390073, "loss": 0.1015, "num_input_tokens_seen": 96093136, "step": 44480 }, { "epoch": 7.256933115823817, "grad_norm": 0.22683408856391907, "learning_rate": 0.0008039117949082401, "loss": 0.0542, "num_input_tokens_seen": 96103376, "step": 44485 }, { "epoch": 7.257748776508972, "grad_norm": 0.2603529989719391, "learning_rate": 0.0008038552699183148, "loss": 0.049, "num_input_tokens_seen": 96114672, "step": 44490 }, { "epoch": 7.258564437194127, "grad_norm": 0.03789392486214638, "learning_rate": 0.0008037987387703771, "loss": 0.1357, "num_input_tokens_seen": 96125136, "step": 44495 }, { "epoch": 7.259380097879283, "grad_norm": 0.008358441293239594, "learning_rate": 0.0008037422014655725, "loss": 0.1643, "num_input_tokens_seen": 96135856, "step": 44500 }, { "epoch": 7.260195758564437, "grad_norm": 0.3523517847061157, "learning_rate": 0.0008036856580050469, "loss": 0.2079, "num_input_tokens_seen": 96145712, "step": 44505 }, { "epoch": 7.261011419249592, "grad_norm": 0.07002881169319153, "learning_rate": 0.000803629108389946, "loss": 0.1167, "num_input_tokens_seen": 96156304, "step": 44510 }, { "epoch": 7.261827079934747, "grad_norm": 0.05014079064130783, "learning_rate": 0.0008035725526214164, "loss": 0.0708, "num_input_tokens_seen": 96166864, "step": 44515 }, { "epoch": 7.262642740619902, "grad_norm": 0.2250353842973709, "learning_rate": 0.0008035159907006037, "loss": 0.0655, "num_input_tokens_seen": 96177552, "step": 44520 }, { "epoch": 7.263458401305057, "grad_norm": 0.2552667260169983, "learning_rate": 0.0008034594226286545, "loss": 0.0755, "num_input_tokens_seen": 96188592, "step": 44525 }, { "epoch": 7.264274061990212, "grad_norm": 0.22745175659656525, "learning_rate": 0.0008034028484067149, "loss": 0.1418, "num_input_tokens_seen": 96199376, "step": 44530 }, { "epoch": 7.265089722675367, "grad_norm": 0.1307612508535385, "learning_rate": 0.0008033462680359319, "loss": 0.0757, "num_input_tokens_seen": 96210800, "step": 44535 }, { "epoch": 7.265905383360522, "grad_norm": 0.015104546211659908, "learning_rate": 0.000803289681517452, "loss": 0.018, "num_input_tokens_seen": 96221488, "step": 44540 }, { "epoch": 7.266721044045677, "grad_norm": 0.2224571257829666, "learning_rate": 0.0008032330888524217, "loss": 0.1082, "num_input_tokens_seen": 96231024, "step": 44545 }, { "epoch": 7.267536704730832, "grad_norm": 0.05627712979912758, "learning_rate": 0.0008031764900419885, "loss": 0.0475, "num_input_tokens_seen": 96241328, "step": 44550 }, { "epoch": 7.268352365415987, "grad_norm": 0.12514592707157135, "learning_rate": 0.000803119885087299, "loss": 0.0325, "num_input_tokens_seen": 96253072, "step": 44555 }, { "epoch": 7.269168026101142, "grad_norm": 0.052119843661785126, "learning_rate": 0.0008030632739895004, "loss": 0.0786, "num_input_tokens_seen": 96262928, "step": 44560 }, { "epoch": 7.269983686786297, "grad_norm": 0.030936799943447113, "learning_rate": 0.0008030066567497401, "loss": 0.0753, "num_input_tokens_seen": 96274640, "step": 44565 }, { "epoch": 7.270799347471452, "grad_norm": 0.00539214164018631, "learning_rate": 0.0008029500333691656, "loss": 0.0261, "num_input_tokens_seen": 96285104, "step": 44570 }, { "epoch": 7.271615008156607, "grad_norm": 0.012339390814304352, "learning_rate": 0.0008028934038489243, "loss": 0.0689, "num_input_tokens_seen": 96296720, "step": 44575 }, { "epoch": 7.2724306688417615, "grad_norm": 0.08805263042449951, "learning_rate": 0.000802836768190164, "loss": 0.0618, "num_input_tokens_seen": 96307280, "step": 44580 }, { "epoch": 7.273246329526917, "grad_norm": 0.03065626323223114, "learning_rate": 0.0008027801263940322, "loss": 0.0505, "num_input_tokens_seen": 96317424, "step": 44585 }, { "epoch": 7.274061990212072, "grad_norm": 0.2728951871395111, "learning_rate": 0.0008027234784616773, "loss": 0.1392, "num_input_tokens_seen": 96328528, "step": 44590 }, { "epoch": 7.274877650897227, "grad_norm": 0.198155015707016, "learning_rate": 0.0008026668243942469, "loss": 0.0426, "num_input_tokens_seen": 96338864, "step": 44595 }, { "epoch": 7.275693311582382, "grad_norm": 0.1933411955833435, "learning_rate": 0.0008026101641928895, "loss": 0.0569, "num_input_tokens_seen": 96349936, "step": 44600 }, { "epoch": 7.2765089722675365, "grad_norm": 0.04269864410161972, "learning_rate": 0.000802553497858753, "loss": 0.0611, "num_input_tokens_seen": 96360944, "step": 44605 }, { "epoch": 7.277324632952691, "grad_norm": 0.02413656748831272, "learning_rate": 0.0008024968253929861, "loss": 0.0228, "num_input_tokens_seen": 96372016, "step": 44610 }, { "epoch": 7.278140293637847, "grad_norm": 0.0752306878566742, "learning_rate": 0.0008024401467967375, "loss": 0.0947, "num_input_tokens_seen": 96382064, "step": 44615 }, { "epoch": 7.278955954323002, "grad_norm": 0.20823276042938232, "learning_rate": 0.0008023834620711555, "loss": 0.1745, "num_input_tokens_seen": 96392592, "step": 44620 }, { "epoch": 7.279771615008157, "grad_norm": 0.0009308802546001971, "learning_rate": 0.000802326771217389, "loss": 0.0337, "num_input_tokens_seen": 96403760, "step": 44625 }, { "epoch": 7.280587275693311, "grad_norm": 0.06344801187515259, "learning_rate": 0.0008022700742365871, "loss": 0.0351, "num_input_tokens_seen": 96415600, "step": 44630 }, { "epoch": 7.281402936378466, "grad_norm": 0.02278982475399971, "learning_rate": 0.0008022133711298987, "loss": 0.0133, "num_input_tokens_seen": 96424592, "step": 44635 }, { "epoch": 7.282218597063622, "grad_norm": 0.2168564349412918, "learning_rate": 0.0008021566618984728, "loss": 0.0382, "num_input_tokens_seen": 96435376, "step": 44640 }, { "epoch": 7.283034257748777, "grad_norm": 0.014572087675333023, "learning_rate": 0.0008020999465434589, "loss": 0.0164, "num_input_tokens_seen": 96445680, "step": 44645 }, { "epoch": 7.283849918433932, "grad_norm": 0.003936320077627897, "learning_rate": 0.0008020432250660063, "loss": 0.1148, "num_input_tokens_seen": 96455440, "step": 44650 }, { "epoch": 7.284665579119086, "grad_norm": 0.422513872385025, "learning_rate": 0.0008019864974672646, "loss": 0.077, "num_input_tokens_seen": 96465328, "step": 44655 }, { "epoch": 7.285481239804241, "grad_norm": 0.006895182654261589, "learning_rate": 0.0008019297637483836, "loss": 0.1485, "num_input_tokens_seen": 96476304, "step": 44660 }, { "epoch": 7.286296900489396, "grad_norm": 0.021543532609939575, "learning_rate": 0.0008018730239105127, "loss": 0.031, "num_input_tokens_seen": 96486928, "step": 44665 }, { "epoch": 7.287112561174552, "grad_norm": 0.08389617502689362, "learning_rate": 0.000801816277954802, "loss": 0.0707, "num_input_tokens_seen": 96496592, "step": 44670 }, { "epoch": 7.287928221859707, "grad_norm": 0.0032530969474464655, "learning_rate": 0.0008017595258824016, "loss": 0.0888, "num_input_tokens_seen": 96507216, "step": 44675 }, { "epoch": 7.288743882544861, "grad_norm": 0.052641693502664566, "learning_rate": 0.0008017027676944617, "loss": 0.1027, "num_input_tokens_seen": 96518160, "step": 44680 }, { "epoch": 7.289559543230016, "grad_norm": 0.02181733213365078, "learning_rate": 0.0008016460033921323, "loss": 0.1183, "num_input_tokens_seen": 96531568, "step": 44685 }, { "epoch": 7.290375203915171, "grad_norm": 0.14702098071575165, "learning_rate": 0.0008015892329765642, "loss": 0.199, "num_input_tokens_seen": 96541840, "step": 44690 }, { "epoch": 7.291190864600326, "grad_norm": 0.016916362568736076, "learning_rate": 0.0008015324564489075, "loss": 0.1729, "num_input_tokens_seen": 96553072, "step": 44695 }, { "epoch": 7.2920065252854815, "grad_norm": 0.02293366566300392, "learning_rate": 0.0008014756738103132, "loss": 0.0379, "num_input_tokens_seen": 96563568, "step": 44700 }, { "epoch": 7.292822185970636, "grad_norm": 0.031679704785346985, "learning_rate": 0.0008014188850619318, "loss": 0.0292, "num_input_tokens_seen": 96573392, "step": 44705 }, { "epoch": 7.293637846655791, "grad_norm": 0.06691129505634308, "learning_rate": 0.0008013620902049143, "loss": 0.0507, "num_input_tokens_seen": 96584464, "step": 44710 }, { "epoch": 7.294453507340946, "grad_norm": 0.632112443447113, "learning_rate": 0.0008013052892404118, "loss": 0.1124, "num_input_tokens_seen": 96595824, "step": 44715 }, { "epoch": 7.295269168026101, "grad_norm": 0.01686936616897583, "learning_rate": 0.0008012484821695754, "loss": 0.1156, "num_input_tokens_seen": 96607312, "step": 44720 }, { "epoch": 7.2960848287112565, "grad_norm": 0.008439785800874233, "learning_rate": 0.0008011916689935563, "loss": 0.0843, "num_input_tokens_seen": 96617680, "step": 44725 }, { "epoch": 7.296900489396411, "grad_norm": 0.020563535392284393, "learning_rate": 0.000801134849713506, "loss": 0.0913, "num_input_tokens_seen": 96627440, "step": 44730 }, { "epoch": 7.297716150081566, "grad_norm": 0.012274558655917645, "learning_rate": 0.0008010780243305758, "loss": 0.054, "num_input_tokens_seen": 96638064, "step": 44735 }, { "epoch": 7.298531810766721, "grad_norm": 0.03836611658334732, "learning_rate": 0.0008010211928459177, "loss": 0.0708, "num_input_tokens_seen": 96649840, "step": 44740 }, { "epoch": 7.299347471451876, "grad_norm": 0.02701750211417675, "learning_rate": 0.0008009643552606831, "loss": 0.008, "num_input_tokens_seen": 96660784, "step": 44745 }, { "epoch": 7.300163132137031, "grad_norm": 0.06201131269335747, "learning_rate": 0.0008009075115760243, "loss": 0.0744, "num_input_tokens_seen": 96672144, "step": 44750 }, { "epoch": 7.300978792822186, "grad_norm": 0.004208603873848915, "learning_rate": 0.0008008506617930926, "loss": 0.1152, "num_input_tokens_seen": 96684784, "step": 44755 }, { "epoch": 7.301794453507341, "grad_norm": 0.022887179628014565, "learning_rate": 0.000800793805913041, "loss": 0.0218, "num_input_tokens_seen": 96694576, "step": 44760 }, { "epoch": 7.302610114192496, "grad_norm": 0.014200643636286259, "learning_rate": 0.0008007369439370211, "loss": 0.0331, "num_input_tokens_seen": 96705072, "step": 44765 }, { "epoch": 7.303425774877651, "grad_norm": 0.0050778696313500404, "learning_rate": 0.0008006800758661856, "loss": 0.0181, "num_input_tokens_seen": 96716368, "step": 44770 }, { "epoch": 7.304241435562806, "grad_norm": 0.09507616609334946, "learning_rate": 0.000800623201701687, "loss": 0.0259, "num_input_tokens_seen": 96726576, "step": 44775 }, { "epoch": 7.30505709624796, "grad_norm": 0.009630247950553894, "learning_rate": 0.0008005663214446777, "loss": 0.0309, "num_input_tokens_seen": 96735952, "step": 44780 }, { "epoch": 7.305872756933116, "grad_norm": 0.0041391802951693535, "learning_rate": 0.0008005094350963107, "loss": 0.0134, "num_input_tokens_seen": 96746064, "step": 44785 }, { "epoch": 7.306688417618271, "grad_norm": 0.09530620276927948, "learning_rate": 0.0008004525426577387, "loss": 0.1831, "num_input_tokens_seen": 96755504, "step": 44790 }, { "epoch": 7.307504078303426, "grad_norm": 0.010489806532859802, "learning_rate": 0.0008003956441301149, "loss": 0.0492, "num_input_tokens_seen": 96766800, "step": 44795 }, { "epoch": 7.308319738988581, "grad_norm": 0.05959814041852951, "learning_rate": 0.0008003387395145922, "loss": 0.0286, "num_input_tokens_seen": 96776944, "step": 44800 }, { "epoch": 7.309135399673735, "grad_norm": 0.017102934420108795, "learning_rate": 0.0008002818288123239, "loss": 0.0684, "num_input_tokens_seen": 96788528, "step": 44805 }, { "epoch": 7.309951060358891, "grad_norm": 0.06432080268859863, "learning_rate": 0.0008002249120244635, "loss": 0.0208, "num_input_tokens_seen": 96799632, "step": 44810 }, { "epoch": 7.310766721044046, "grad_norm": 0.011474508792161942, "learning_rate": 0.0008001679891521642, "loss": 0.1318, "num_input_tokens_seen": 96810288, "step": 44815 }, { "epoch": 7.311582381729201, "grad_norm": 0.013832269236445427, "learning_rate": 0.00080011106019658, "loss": 0.0292, "num_input_tokens_seen": 96821360, "step": 44820 }, { "epoch": 7.3123980424143555, "grad_norm": 0.3839665949344635, "learning_rate": 0.0008000541251588644, "loss": 0.1622, "num_input_tokens_seen": 96832944, "step": 44825 }, { "epoch": 7.31321370309951, "grad_norm": 0.1246161013841629, "learning_rate": 0.0007999971840401714, "loss": 0.0526, "num_input_tokens_seen": 96843824, "step": 44830 }, { "epoch": 7.314029363784665, "grad_norm": 0.0059314328245818615, "learning_rate": 0.0007999402368416548, "loss": 0.0425, "num_input_tokens_seen": 96854416, "step": 44835 }, { "epoch": 7.314845024469821, "grad_norm": 0.31664329767227173, "learning_rate": 0.0007998832835644687, "loss": 0.0656, "num_input_tokens_seen": 96864976, "step": 44840 }, { "epoch": 7.315660685154976, "grad_norm": 0.28005892038345337, "learning_rate": 0.0007998263242097675, "loss": 0.1451, "num_input_tokens_seen": 96876400, "step": 44845 }, { "epoch": 7.3164763458401305, "grad_norm": 0.04676670581102371, "learning_rate": 0.0007997693587787056, "loss": 0.1371, "num_input_tokens_seen": 96887152, "step": 44850 }, { "epoch": 7.317292006525285, "grad_norm": 0.01653682440519333, "learning_rate": 0.0007997123872724373, "loss": 0.0475, "num_input_tokens_seen": 96896976, "step": 44855 }, { "epoch": 7.31810766721044, "grad_norm": 0.011989987455308437, "learning_rate": 0.0007996554096921172, "loss": 0.0299, "num_input_tokens_seen": 96908048, "step": 44860 }, { "epoch": 7.318923327895595, "grad_norm": 0.030787119641900063, "learning_rate": 0.0007995984260389001, "loss": 0.0846, "num_input_tokens_seen": 96918960, "step": 44865 }, { "epoch": 7.319738988580751, "grad_norm": 0.16728070378303528, "learning_rate": 0.0007995414363139408, "loss": 0.1015, "num_input_tokens_seen": 96929520, "step": 44870 }, { "epoch": 7.3205546492659055, "grad_norm": 0.009303702041506767, "learning_rate": 0.0007994844405183944, "loss": 0.0311, "num_input_tokens_seen": 96940560, "step": 44875 }, { "epoch": 7.32137030995106, "grad_norm": 0.10620930045843124, "learning_rate": 0.0007994274386534158, "loss": 0.0973, "num_input_tokens_seen": 96952496, "step": 44880 }, { "epoch": 7.322185970636215, "grad_norm": 0.251691609621048, "learning_rate": 0.0007993704307201604, "loss": 0.0335, "num_input_tokens_seen": 96963120, "step": 44885 }, { "epoch": 7.32300163132137, "grad_norm": 0.12604647874832153, "learning_rate": 0.0007993134167197833, "loss": 0.1191, "num_input_tokens_seen": 96973168, "step": 44890 }, { "epoch": 7.323817292006526, "grad_norm": 0.040935318917036057, "learning_rate": 0.0007992563966534403, "loss": 0.1098, "num_input_tokens_seen": 96984240, "step": 44895 }, { "epoch": 7.3246329526916805, "grad_norm": 0.020672090351581573, "learning_rate": 0.0007991993705222867, "loss": 0.0637, "num_input_tokens_seen": 96995504, "step": 44900 }, { "epoch": 7.325448613376835, "grad_norm": 0.3290887176990509, "learning_rate": 0.0007991423383274782, "loss": 0.1589, "num_input_tokens_seen": 97007312, "step": 44905 }, { "epoch": 7.32626427406199, "grad_norm": 0.05711958557367325, "learning_rate": 0.0007990853000701708, "loss": 0.067, "num_input_tokens_seen": 97019408, "step": 44910 }, { "epoch": 7.327079934747145, "grad_norm": 0.05303024500608444, "learning_rate": 0.0007990282557515204, "loss": 0.1594, "num_input_tokens_seen": 97031344, "step": 44915 }, { "epoch": 7.327895595432301, "grad_norm": 0.005337063688784838, "learning_rate": 0.0007989712053726829, "loss": 0.0444, "num_input_tokens_seen": 97041648, "step": 44920 }, { "epoch": 7.328711256117455, "grad_norm": 0.032504886388778687, "learning_rate": 0.0007989141489348149, "loss": 0.0186, "num_input_tokens_seen": 97052432, "step": 44925 }, { "epoch": 7.32952691680261, "grad_norm": 0.07863004505634308, "learning_rate": 0.0007988570864390723, "loss": 0.1965, "num_input_tokens_seen": 97062928, "step": 44930 }, { "epoch": 7.330342577487765, "grad_norm": 0.07831018418073654, "learning_rate": 0.0007988000178866117, "loss": 0.1547, "num_input_tokens_seen": 97073968, "step": 44935 }, { "epoch": 7.33115823817292, "grad_norm": 0.17209608852863312, "learning_rate": 0.0007987429432785897, "loss": 0.0334, "num_input_tokens_seen": 97085520, "step": 44940 }, { "epoch": 7.331973898858075, "grad_norm": 0.037289805710315704, "learning_rate": 0.000798685862616163, "loss": 0.0173, "num_input_tokens_seen": 97096080, "step": 44945 }, { "epoch": 7.33278955954323, "grad_norm": 0.05421232804656029, "learning_rate": 0.0007986287759004884, "loss": 0.0347, "num_input_tokens_seen": 97107152, "step": 44950 }, { "epoch": 7.333605220228385, "grad_norm": 0.01405141968280077, "learning_rate": 0.000798571683132723, "loss": 0.0802, "num_input_tokens_seen": 97117296, "step": 44955 }, { "epoch": 7.33442088091354, "grad_norm": 0.015522617846727371, "learning_rate": 0.0007985145843140233, "loss": 0.0175, "num_input_tokens_seen": 97127440, "step": 44960 }, { "epoch": 7.335236541598695, "grad_norm": 0.09611742943525314, "learning_rate": 0.0007984574794455472, "loss": 0.0393, "num_input_tokens_seen": 97138288, "step": 44965 }, { "epoch": 7.33605220228385, "grad_norm": 0.2853439450263977, "learning_rate": 0.0007984003685284516, "loss": 0.0372, "num_input_tokens_seen": 97150864, "step": 44970 }, { "epoch": 7.3368678629690045, "grad_norm": 0.14586283266544342, "learning_rate": 0.0007983432515638937, "loss": 0.023, "num_input_tokens_seen": 97161776, "step": 44975 }, { "epoch": 7.33768352365416, "grad_norm": 0.03794190287590027, "learning_rate": 0.0007982861285530317, "loss": 0.0311, "num_input_tokens_seen": 97175184, "step": 44980 }, { "epoch": 7.338499184339315, "grad_norm": 0.07810980826616287, "learning_rate": 0.0007982289994970227, "loss": 0.0555, "num_input_tokens_seen": 97185488, "step": 44985 }, { "epoch": 7.33931484502447, "grad_norm": 0.07301853597164154, "learning_rate": 0.0007981718643970246, "loss": 0.1488, "num_input_tokens_seen": 97196368, "step": 44990 }, { "epoch": 7.340130505709625, "grad_norm": 0.00231625372543931, "learning_rate": 0.0007981147232541956, "loss": 0.0321, "num_input_tokens_seen": 97205872, "step": 44995 }, { "epoch": 7.3409461663947795, "grad_norm": 0.007339373230934143, "learning_rate": 0.0007980575760696935, "loss": 0.0172, "num_input_tokens_seen": 97217072, "step": 45000 }, { "epoch": 7.341761827079935, "grad_norm": 0.002107437001541257, "learning_rate": 0.0007980004228446765, "loss": 0.0124, "num_input_tokens_seen": 97227216, "step": 45005 }, { "epoch": 7.34257748776509, "grad_norm": 0.4507990777492523, "learning_rate": 0.0007979432635803029, "loss": 0.2407, "num_input_tokens_seen": 97238064, "step": 45010 }, { "epoch": 7.343393148450245, "grad_norm": 0.054382532835006714, "learning_rate": 0.000797886098277731, "loss": 0.0231, "num_input_tokens_seen": 97249328, "step": 45015 }, { "epoch": 7.3442088091354, "grad_norm": 0.04891600459814072, "learning_rate": 0.0007978289269381196, "loss": 0.0437, "num_input_tokens_seen": 97260144, "step": 45020 }, { "epoch": 7.3450244698205545, "grad_norm": 0.4334297180175781, "learning_rate": 0.0007977717495626271, "loss": 0.1371, "num_input_tokens_seen": 97271024, "step": 45025 }, { "epoch": 7.345840130505709, "grad_norm": 0.024992918595671654, "learning_rate": 0.0007977145661524123, "loss": 0.1881, "num_input_tokens_seen": 97281776, "step": 45030 }, { "epoch": 7.346655791190865, "grad_norm": 0.06149057671427727, "learning_rate": 0.000797657376708634, "loss": 0.1039, "num_input_tokens_seen": 97292368, "step": 45035 }, { "epoch": 7.34747145187602, "grad_norm": 0.3460540175437927, "learning_rate": 0.0007976001812324516, "loss": 0.0483, "num_input_tokens_seen": 97303088, "step": 45040 }, { "epoch": 7.348287112561175, "grad_norm": 0.05683022737503052, "learning_rate": 0.0007975429797250239, "loss": 0.0936, "num_input_tokens_seen": 97313776, "step": 45045 }, { "epoch": 7.349102773246329, "grad_norm": 0.1997668594121933, "learning_rate": 0.0007974857721875102, "loss": 0.0713, "num_input_tokens_seen": 97323952, "step": 45050 }, { "epoch": 7.349918433931484, "grad_norm": 0.06268063187599182, "learning_rate": 0.0007974285586210701, "loss": 0.0096, "num_input_tokens_seen": 97334448, "step": 45055 }, { "epoch": 7.350734094616639, "grad_norm": 0.010480429045855999, "learning_rate": 0.0007973713390268629, "loss": 0.0375, "num_input_tokens_seen": 97345360, "step": 45060 }, { "epoch": 7.351549755301795, "grad_norm": 0.006281863432377577, "learning_rate": 0.0007973141134060483, "loss": 0.0453, "num_input_tokens_seen": 97356080, "step": 45065 }, { "epoch": 7.35236541598695, "grad_norm": 0.10553544759750366, "learning_rate": 0.0007972568817597857, "loss": 0.0659, "num_input_tokens_seen": 97367248, "step": 45070 }, { "epoch": 7.353181076672104, "grad_norm": 0.1673373132944107, "learning_rate": 0.0007971996440892356, "loss": 0.0627, "num_input_tokens_seen": 97378000, "step": 45075 }, { "epoch": 7.353996737357259, "grad_norm": 0.13530519604682922, "learning_rate": 0.0007971424003955577, "loss": 0.0529, "num_input_tokens_seen": 97388432, "step": 45080 }, { "epoch": 7.354812398042414, "grad_norm": 0.02766413986682892, "learning_rate": 0.0007970851506799119, "loss": 0.0407, "num_input_tokens_seen": 97397520, "step": 45085 }, { "epoch": 7.35562805872757, "grad_norm": 0.0584256574511528, "learning_rate": 0.0007970278949434588, "loss": 0.0141, "num_input_tokens_seen": 97407952, "step": 45090 }, { "epoch": 7.356443719412725, "grad_norm": 0.28726014494895935, "learning_rate": 0.0007969706331873586, "loss": 0.2492, "num_input_tokens_seen": 97419824, "step": 45095 }, { "epoch": 7.357259380097879, "grad_norm": 0.2784290313720703, "learning_rate": 0.0007969133654127718, "loss": 0.1095, "num_input_tokens_seen": 97431056, "step": 45100 }, { "epoch": 7.358075040783034, "grad_norm": 0.07640790939331055, "learning_rate": 0.0007968560916208589, "loss": 0.016, "num_input_tokens_seen": 97440976, "step": 45105 }, { "epoch": 7.358890701468189, "grad_norm": 0.06213083863258362, "learning_rate": 0.0007967988118127808, "loss": 0.1767, "num_input_tokens_seen": 97452720, "step": 45110 }, { "epoch": 7.359706362153344, "grad_norm": 0.0668218657374382, "learning_rate": 0.0007967415259896982, "loss": 0.0154, "num_input_tokens_seen": 97464240, "step": 45115 }, { "epoch": 7.3605220228384995, "grad_norm": 0.038856931030750275, "learning_rate": 0.0007966842341527722, "loss": 0.021, "num_input_tokens_seen": 97474032, "step": 45120 }, { "epoch": 7.361337683523654, "grad_norm": 0.03655475750565529, "learning_rate": 0.0007966269363031637, "loss": 0.0358, "num_input_tokens_seen": 97483952, "step": 45125 }, { "epoch": 7.362153344208809, "grad_norm": 0.005070575047284365, "learning_rate": 0.0007965696324420342, "loss": 0.045, "num_input_tokens_seen": 97495728, "step": 45130 }, { "epoch": 7.362969004893964, "grad_norm": 0.007292062509804964, "learning_rate": 0.0007965123225705447, "loss": 0.1607, "num_input_tokens_seen": 97507216, "step": 45135 }, { "epoch": 7.363784665579119, "grad_norm": 0.028081052005290985, "learning_rate": 0.000796455006689857, "loss": 0.0573, "num_input_tokens_seen": 97516880, "step": 45140 }, { "epoch": 7.364600326264274, "grad_norm": 0.19714754819869995, "learning_rate": 0.0007963976848011324, "loss": 0.0737, "num_input_tokens_seen": 97526928, "step": 45145 }, { "epoch": 7.365415986949429, "grad_norm": 0.006840975489467382, "learning_rate": 0.0007963403569055328, "loss": 0.0077, "num_input_tokens_seen": 97538192, "step": 45150 }, { "epoch": 7.366231647634584, "grad_norm": 0.016417210921645164, "learning_rate": 0.0007962830230042197, "loss": 0.009, "num_input_tokens_seen": 97547024, "step": 45155 }, { "epoch": 7.367047308319739, "grad_norm": 0.15059006214141846, "learning_rate": 0.0007962256830983556, "loss": 0.0252, "num_input_tokens_seen": 97557840, "step": 45160 }, { "epoch": 7.367862969004894, "grad_norm": 0.01066130492836237, "learning_rate": 0.0007961683371891019, "loss": 0.0116, "num_input_tokens_seen": 97569616, "step": 45165 }, { "epoch": 7.368678629690049, "grad_norm": 0.017685212194919586, "learning_rate": 0.0007961109852776214, "loss": 0.1703, "num_input_tokens_seen": 97580944, "step": 45170 }, { "epoch": 7.369494290375204, "grad_norm": 0.046484678983688354, "learning_rate": 0.0007960536273650761, "loss": 0.0465, "num_input_tokens_seen": 97590512, "step": 45175 }, { "epoch": 7.370309951060359, "grad_norm": 0.07047548145055771, "learning_rate": 0.0007959962634526285, "loss": 0.0777, "num_input_tokens_seen": 97600944, "step": 45180 }, { "epoch": 7.371125611745514, "grad_norm": 0.06450872123241425, "learning_rate": 0.0007959388935414411, "loss": 0.0801, "num_input_tokens_seen": 97611952, "step": 45185 }, { "epoch": 7.371941272430669, "grad_norm": 0.24986594915390015, "learning_rate": 0.0007958815176326764, "loss": 0.0463, "num_input_tokens_seen": 97624208, "step": 45190 }, { "epoch": 7.372756933115824, "grad_norm": 0.19626054167747498, "learning_rate": 0.0007958241357274976, "loss": 0.1218, "num_input_tokens_seen": 97635152, "step": 45195 }, { "epoch": 7.373572593800978, "grad_norm": 0.01864909753203392, "learning_rate": 0.0007957667478270674, "loss": 0.0121, "num_input_tokens_seen": 97646064, "step": 45200 }, { "epoch": 7.374388254486134, "grad_norm": 0.025611115619540215, "learning_rate": 0.0007957093539325489, "loss": 0.0788, "num_input_tokens_seen": 97657232, "step": 45205 }, { "epoch": 7.375203915171289, "grad_norm": 0.17264457046985626, "learning_rate": 0.000795651954045105, "loss": 0.0596, "num_input_tokens_seen": 97668176, "step": 45210 }, { "epoch": 7.376019575856444, "grad_norm": 0.021459899842739105, "learning_rate": 0.0007955945481658992, "loss": 0.069, "num_input_tokens_seen": 97678480, "step": 45215 }, { "epoch": 7.376835236541599, "grad_norm": 0.008294719271361828, "learning_rate": 0.0007955371362960951, "loss": 0.0507, "num_input_tokens_seen": 97688944, "step": 45220 }, { "epoch": 7.377650897226753, "grad_norm": 0.11790649592876434, "learning_rate": 0.000795479718436856, "loss": 0.1997, "num_input_tokens_seen": 97700592, "step": 45225 }, { "epoch": 7.378466557911908, "grad_norm": 0.0033008514437824488, "learning_rate": 0.0007954222945893455, "loss": 0.0278, "num_input_tokens_seen": 97710224, "step": 45230 }, { "epoch": 7.379282218597064, "grad_norm": 0.1821359246969223, "learning_rate": 0.0007953648647547274, "loss": 0.1602, "num_input_tokens_seen": 97720976, "step": 45235 }, { "epoch": 7.380097879282219, "grad_norm": 0.03195223957300186, "learning_rate": 0.0007953074289341655, "loss": 0.1326, "num_input_tokens_seen": 97732752, "step": 45240 }, { "epoch": 7.3809135399673735, "grad_norm": 0.008191731758415699, "learning_rate": 0.0007952499871288241, "loss": 0.0657, "num_input_tokens_seen": 97741648, "step": 45245 }, { "epoch": 7.381729200652528, "grad_norm": 0.28001055121421814, "learning_rate": 0.0007951925393398672, "loss": 0.1205, "num_input_tokens_seen": 97752816, "step": 45250 }, { "epoch": 7.382544861337683, "grad_norm": 0.13458704948425293, "learning_rate": 0.0007951350855684588, "loss": 0.0565, "num_input_tokens_seen": 97762864, "step": 45255 }, { "epoch": 7.383360522022839, "grad_norm": 0.14959220588207245, "learning_rate": 0.0007950776258157637, "loss": 0.0523, "num_input_tokens_seen": 97773424, "step": 45260 }, { "epoch": 7.384176182707994, "grad_norm": 0.20786850154399872, "learning_rate": 0.000795020160082946, "loss": 0.1556, "num_input_tokens_seen": 97783088, "step": 45265 }, { "epoch": 7.3849918433931485, "grad_norm": 0.012430977076292038, "learning_rate": 0.0007949626883711707, "loss": 0.0616, "num_input_tokens_seen": 97794032, "step": 45270 }, { "epoch": 7.385807504078303, "grad_norm": 0.01813976839184761, "learning_rate": 0.0007949052106816022, "loss": 0.0188, "num_input_tokens_seen": 97804336, "step": 45275 }, { "epoch": 7.386623164763458, "grad_norm": 0.2485961616039276, "learning_rate": 0.0007948477270154056, "loss": 0.1332, "num_input_tokens_seen": 97815312, "step": 45280 }, { "epoch": 7.387438825448613, "grad_norm": 0.021446917206048965, "learning_rate": 0.0007947902373737456, "loss": 0.024, "num_input_tokens_seen": 97825872, "step": 45285 }, { "epoch": 7.388254486133769, "grad_norm": 0.002523197792470455, "learning_rate": 0.0007947327417577875, "loss": 0.0537, "num_input_tokens_seen": 97836656, "step": 45290 }, { "epoch": 7.3890701468189235, "grad_norm": 0.006098889745771885, "learning_rate": 0.0007946752401686966, "loss": 0.0246, "num_input_tokens_seen": 97847824, "step": 45295 }, { "epoch": 7.389885807504078, "grad_norm": 0.04989955946803093, "learning_rate": 0.000794617732607638, "loss": 0.0457, "num_input_tokens_seen": 97857904, "step": 45300 }, { "epoch": 7.390701468189233, "grad_norm": 0.24254950881004333, "learning_rate": 0.0007945602190757775, "loss": 0.0901, "num_input_tokens_seen": 97867824, "step": 45305 }, { "epoch": 7.391517128874388, "grad_norm": 0.007170901633799076, "learning_rate": 0.0007945026995742803, "loss": 0.0133, "num_input_tokens_seen": 97876976, "step": 45310 }, { "epoch": 7.392332789559543, "grad_norm": 0.033077552914619446, "learning_rate": 0.0007944451741043124, "loss": 0.0667, "num_input_tokens_seen": 97887568, "step": 45315 }, { "epoch": 7.3931484502446985, "grad_norm": 0.03981386125087738, "learning_rate": 0.0007943876426670395, "loss": 0.0257, "num_input_tokens_seen": 97898576, "step": 45320 }, { "epoch": 7.393964110929853, "grad_norm": 0.027024945244193077, "learning_rate": 0.0007943301052636276, "loss": 0.0196, "num_input_tokens_seen": 97909072, "step": 45325 }, { "epoch": 7.394779771615008, "grad_norm": 0.02054162509739399, "learning_rate": 0.0007942725618952426, "loss": 0.0272, "num_input_tokens_seen": 97920016, "step": 45330 }, { "epoch": 7.395595432300163, "grad_norm": 0.0020551327615976334, "learning_rate": 0.000794215012563051, "loss": 0.0239, "num_input_tokens_seen": 97931888, "step": 45335 }, { "epoch": 7.396411092985318, "grad_norm": 0.1443719118833542, "learning_rate": 0.0007941574572682187, "loss": 0.0298, "num_input_tokens_seen": 97942896, "step": 45340 }, { "epoch": 7.397226753670473, "grad_norm": 0.0606955960392952, "learning_rate": 0.0007940998960119126, "loss": 0.0194, "num_input_tokens_seen": 97955152, "step": 45345 }, { "epoch": 7.398042414355628, "grad_norm": 0.016795361414551735, "learning_rate": 0.0007940423287952989, "loss": 0.0377, "num_input_tokens_seen": 97965776, "step": 45350 }, { "epoch": 7.398858075040783, "grad_norm": 0.004982766695320606, "learning_rate": 0.0007939847556195443, "loss": 0.014, "num_input_tokens_seen": 97976240, "step": 45355 }, { "epoch": 7.399673735725938, "grad_norm": 0.004065237939357758, "learning_rate": 0.0007939271764858158, "loss": 0.1471, "num_input_tokens_seen": 97986800, "step": 45360 }, { "epoch": 7.400489396411093, "grad_norm": 0.3481471836566925, "learning_rate": 0.0007938695913952802, "loss": 0.1904, "num_input_tokens_seen": 97997840, "step": 45365 }, { "epoch": 7.401305057096248, "grad_norm": 0.006440913304686546, "learning_rate": 0.0007938120003491045, "loss": 0.0164, "num_input_tokens_seen": 98008688, "step": 45370 }, { "epoch": 7.402120717781403, "grad_norm": 0.0016812963876873255, "learning_rate": 0.0007937544033484558, "loss": 0.0401, "num_input_tokens_seen": 98019920, "step": 45375 }, { "epoch": 7.402936378466558, "grad_norm": 0.004809595178812742, "learning_rate": 0.0007936968003945015, "loss": 0.0346, "num_input_tokens_seen": 98030576, "step": 45380 }, { "epoch": 7.403752039151713, "grad_norm": 0.057477615773677826, "learning_rate": 0.0007936391914884092, "loss": 0.1052, "num_input_tokens_seen": 98041104, "step": 45385 }, { "epoch": 7.404567699836868, "grad_norm": 0.009014656767249107, "learning_rate": 0.0007935815766313459, "loss": 0.0873, "num_input_tokens_seen": 98052048, "step": 45390 }, { "epoch": 7.4053833605220225, "grad_norm": 0.1354934573173523, "learning_rate": 0.0007935239558244795, "loss": 0.0482, "num_input_tokens_seen": 98062384, "step": 45395 }, { "epoch": 7.406199021207178, "grad_norm": 0.17773400247097015, "learning_rate": 0.000793466329068978, "loss": 0.1396, "num_input_tokens_seen": 98073232, "step": 45400 }, { "epoch": 7.407014681892333, "grad_norm": 0.07813189923763275, "learning_rate": 0.000793408696366009, "loss": 0.0567, "num_input_tokens_seen": 98085232, "step": 45405 }, { "epoch": 7.407830342577488, "grad_norm": 0.035286080092191696, "learning_rate": 0.0007933510577167404, "loss": 0.0474, "num_input_tokens_seen": 98095824, "step": 45410 }, { "epoch": 7.408646003262643, "grad_norm": 0.1701454520225525, "learning_rate": 0.0007932934131223406, "loss": 0.3416, "num_input_tokens_seen": 98106640, "step": 45415 }, { "epoch": 7.4094616639477975, "grad_norm": 0.011776736006140709, "learning_rate": 0.0007932357625839776, "loss": 0.0757, "num_input_tokens_seen": 98118384, "step": 45420 }, { "epoch": 7.410277324632952, "grad_norm": 0.2138156294822693, "learning_rate": 0.0007931781061028201, "loss": 0.1299, "num_input_tokens_seen": 98129168, "step": 45425 }, { "epoch": 7.411092985318108, "grad_norm": 0.01285554189234972, "learning_rate": 0.0007931204436800361, "loss": 0.0365, "num_input_tokens_seen": 98139408, "step": 45430 }, { "epoch": 7.411908646003263, "grad_norm": 0.2702222764492035, "learning_rate": 0.0007930627753167945, "loss": 0.1871, "num_input_tokens_seen": 98149744, "step": 45435 }, { "epoch": 7.412724306688418, "grad_norm": 0.1068132221698761, "learning_rate": 0.0007930051010142641, "loss": 0.0408, "num_input_tokens_seen": 98160528, "step": 45440 }, { "epoch": 7.4135399673735725, "grad_norm": 0.13657432794570923, "learning_rate": 0.0007929474207736136, "loss": 0.0438, "num_input_tokens_seen": 98170768, "step": 45445 }, { "epoch": 7.414355628058727, "grad_norm": 0.0018624071963131428, "learning_rate": 0.000792889734596012, "loss": 0.0343, "num_input_tokens_seen": 98180816, "step": 45450 }, { "epoch": 7.415171288743883, "grad_norm": 0.00952328834682703, "learning_rate": 0.0007928320424826284, "loss": 0.1863, "num_input_tokens_seen": 98191376, "step": 45455 }, { "epoch": 7.415986949429038, "grad_norm": 0.0030028768815100193, "learning_rate": 0.0007927743444346317, "loss": 0.0213, "num_input_tokens_seen": 98202320, "step": 45460 }, { "epoch": 7.416802610114193, "grad_norm": 0.555898904800415, "learning_rate": 0.0007927166404531916, "loss": 0.0916, "num_input_tokens_seen": 98212912, "step": 45465 }, { "epoch": 7.417618270799347, "grad_norm": 0.010686096735298634, "learning_rate": 0.0007926589305394776, "loss": 0.1291, "num_input_tokens_seen": 98224688, "step": 45470 }, { "epoch": 7.418433931484502, "grad_norm": 0.1383063644170761, "learning_rate": 0.0007926012146946591, "loss": 0.0652, "num_input_tokens_seen": 98235216, "step": 45475 }, { "epoch": 7.419249592169657, "grad_norm": 0.14411990344524384, "learning_rate": 0.0007925434929199058, "loss": 0.0662, "num_input_tokens_seen": 98245104, "step": 45480 }, { "epoch": 7.420065252854813, "grad_norm": 0.15971876680850983, "learning_rate": 0.0007924857652163873, "loss": 0.1169, "num_input_tokens_seen": 98256912, "step": 45485 }, { "epoch": 7.420880913539968, "grad_norm": 0.009572381153702736, "learning_rate": 0.0007924280315852739, "loss": 0.0096, "num_input_tokens_seen": 98266992, "step": 45490 }, { "epoch": 7.421696574225122, "grad_norm": 0.01877368800342083, "learning_rate": 0.0007923702920277355, "loss": 0.0309, "num_input_tokens_seen": 98277488, "step": 45495 }, { "epoch": 7.422512234910277, "grad_norm": 0.5380867123603821, "learning_rate": 0.0007923125465449421, "loss": 0.1168, "num_input_tokens_seen": 98287408, "step": 45500 }, { "epoch": 7.423327895595432, "grad_norm": 0.017852267250418663, "learning_rate": 0.0007922547951380643, "loss": 0.0567, "num_input_tokens_seen": 98299568, "step": 45505 }, { "epoch": 7.424143556280587, "grad_norm": 0.0074815042316913605, "learning_rate": 0.0007921970378082722, "loss": 0.0244, "num_input_tokens_seen": 98310800, "step": 45510 }, { "epoch": 7.424959216965743, "grad_norm": 0.4314858317375183, "learning_rate": 0.0007921392745567364, "loss": 0.0492, "num_input_tokens_seen": 98321104, "step": 45515 }, { "epoch": 7.425774877650897, "grad_norm": 0.012821875512599945, "learning_rate": 0.0007920815053846277, "loss": 0.1269, "num_input_tokens_seen": 98331312, "step": 45520 }, { "epoch": 7.426590538336052, "grad_norm": 0.10156050324440002, "learning_rate": 0.0007920237302931167, "loss": 0.049, "num_input_tokens_seen": 98341584, "step": 45525 }, { "epoch": 7.427406199021207, "grad_norm": 0.08236122131347656, "learning_rate": 0.0007919659492833744, "loss": 0.0163, "num_input_tokens_seen": 98351024, "step": 45530 }, { "epoch": 7.428221859706362, "grad_norm": 0.549354076385498, "learning_rate": 0.0007919081623565717, "loss": 0.0509, "num_input_tokens_seen": 98362224, "step": 45535 }, { "epoch": 7.4290375203915175, "grad_norm": 0.21111354231834412, "learning_rate": 0.0007918503695138799, "loss": 0.0355, "num_input_tokens_seen": 98373616, "step": 45540 }, { "epoch": 7.429853181076672, "grad_norm": 0.015019206330180168, "learning_rate": 0.0007917925707564699, "loss": 0.0085, "num_input_tokens_seen": 98384464, "step": 45545 }, { "epoch": 7.430668841761827, "grad_norm": 0.2213941514492035, "learning_rate": 0.0007917347660855134, "loss": 0.0996, "num_input_tokens_seen": 98394544, "step": 45550 }, { "epoch": 7.431484502446982, "grad_norm": 0.2764107286930084, "learning_rate": 0.0007916769555021819, "loss": 0.258, "num_input_tokens_seen": 98405904, "step": 45555 }, { "epoch": 7.432300163132137, "grad_norm": 0.005178771913051605, "learning_rate": 0.0007916191390076468, "loss": 0.0095, "num_input_tokens_seen": 98417296, "step": 45560 }, { "epoch": 7.433115823817292, "grad_norm": 0.006140346638858318, "learning_rate": 0.0007915613166030799, "loss": 0.1488, "num_input_tokens_seen": 98428496, "step": 45565 }, { "epoch": 7.433931484502447, "grad_norm": 0.05736982077360153, "learning_rate": 0.0007915034882896528, "loss": 0.0199, "num_input_tokens_seen": 98438480, "step": 45570 }, { "epoch": 7.434747145187602, "grad_norm": 0.018076226115226746, "learning_rate": 0.0007914456540685379, "loss": 0.1387, "num_input_tokens_seen": 98448784, "step": 45575 }, { "epoch": 7.435562805872757, "grad_norm": 0.5139619708061218, "learning_rate": 0.0007913878139409072, "loss": 0.1692, "num_input_tokens_seen": 98459952, "step": 45580 }, { "epoch": 7.436378466557912, "grad_norm": 0.09341350197792053, "learning_rate": 0.0007913299679079326, "loss": 0.1271, "num_input_tokens_seen": 98470256, "step": 45585 }, { "epoch": 7.437194127243067, "grad_norm": 0.025008317083120346, "learning_rate": 0.000791272115970787, "loss": 0.1429, "num_input_tokens_seen": 98481552, "step": 45590 }, { "epoch": 7.438009787928221, "grad_norm": 0.031664974987506866, "learning_rate": 0.0007912142581306421, "loss": 0.0941, "num_input_tokens_seen": 98492208, "step": 45595 }, { "epoch": 7.438825448613377, "grad_norm": 0.28572022914886475, "learning_rate": 0.0007911563943886709, "loss": 0.1129, "num_input_tokens_seen": 98502672, "step": 45600 }, { "epoch": 7.439641109298532, "grad_norm": 0.051957886666059494, "learning_rate": 0.000791098524746046, "loss": 0.074, "num_input_tokens_seen": 98514000, "step": 45605 }, { "epoch": 7.440456769983687, "grad_norm": 0.10540501028299332, "learning_rate": 0.0007910406492039404, "loss": 0.0328, "num_input_tokens_seen": 98525712, "step": 45610 }, { "epoch": 7.441272430668842, "grad_norm": 0.00548113789409399, "learning_rate": 0.0007909827677635267, "loss": 0.1057, "num_input_tokens_seen": 98535984, "step": 45615 }, { "epoch": 7.442088091353996, "grad_norm": 0.07310794293880463, "learning_rate": 0.000790924880425978, "loss": 0.049, "num_input_tokens_seen": 98548016, "step": 45620 }, { "epoch": 7.442903752039152, "grad_norm": 0.05371404439210892, "learning_rate": 0.0007908669871924676, "loss": 0.102, "num_input_tokens_seen": 98558736, "step": 45625 }, { "epoch": 7.443719412724307, "grad_norm": 0.033556073904037476, "learning_rate": 0.0007908090880641688, "loss": 0.148, "num_input_tokens_seen": 98570192, "step": 45630 }, { "epoch": 7.444535073409462, "grad_norm": 0.011591989547014236, "learning_rate": 0.0007907511830422547, "loss": 0.1166, "num_input_tokens_seen": 98582320, "step": 45635 }, { "epoch": 7.445350734094617, "grad_norm": 0.30247440934181213, "learning_rate": 0.0007906932721278992, "loss": 0.181, "num_input_tokens_seen": 98592656, "step": 45640 }, { "epoch": 7.446166394779771, "grad_norm": 0.004149232525378466, "learning_rate": 0.0007906353553222757, "loss": 0.0424, "num_input_tokens_seen": 98601936, "step": 45645 }, { "epoch": 7.446982055464926, "grad_norm": 0.2527449131011963, "learning_rate": 0.000790577432626558, "loss": 0.1473, "num_input_tokens_seen": 98613424, "step": 45650 }, { "epoch": 7.447797716150082, "grad_norm": 0.012989042326807976, "learning_rate": 0.0007905195040419202, "loss": 0.0551, "num_input_tokens_seen": 98623056, "step": 45655 }, { "epoch": 7.448613376835237, "grad_norm": 0.014558068476617336, "learning_rate": 0.0007904615695695359, "loss": 0.0796, "num_input_tokens_seen": 98633904, "step": 45660 }, { "epoch": 7.4494290375203915, "grad_norm": 0.037499744445085526, "learning_rate": 0.0007904036292105794, "loss": 0.0371, "num_input_tokens_seen": 98644560, "step": 45665 }, { "epoch": 7.450244698205546, "grad_norm": 0.06032086908817291, "learning_rate": 0.000790345682966225, "loss": 0.0921, "num_input_tokens_seen": 98655216, "step": 45670 }, { "epoch": 7.451060358890701, "grad_norm": 0.035724010318517685, "learning_rate": 0.000790287730837647, "loss": 0.0217, "num_input_tokens_seen": 98666288, "step": 45675 }, { "epoch": 7.451876019575856, "grad_norm": 0.19834664463996887, "learning_rate": 0.0007902297728260199, "loss": 0.1434, "num_input_tokens_seen": 98677584, "step": 45680 }, { "epoch": 7.452691680261012, "grad_norm": 0.11141058057546616, "learning_rate": 0.0007901718089325183, "loss": 0.0282, "num_input_tokens_seen": 98688016, "step": 45685 }, { "epoch": 7.4535073409461665, "grad_norm": 0.04874371364712715, "learning_rate": 0.0007901138391583169, "loss": 0.0702, "num_input_tokens_seen": 98698960, "step": 45690 }, { "epoch": 7.454323001631321, "grad_norm": 0.09582873433828354, "learning_rate": 0.0007900558635045904, "loss": 0.1012, "num_input_tokens_seen": 98709456, "step": 45695 }, { "epoch": 7.455138662316476, "grad_norm": 0.10680019855499268, "learning_rate": 0.000789997881972514, "loss": 0.13, "num_input_tokens_seen": 98720752, "step": 45700 }, { "epoch": 7.455954323001631, "grad_norm": 0.007165440358221531, "learning_rate": 0.0007899398945632626, "loss": 0.0142, "num_input_tokens_seen": 98731856, "step": 45705 }, { "epoch": 7.456769983686787, "grad_norm": 0.03316681459546089, "learning_rate": 0.0007898819012780114, "loss": 0.245, "num_input_tokens_seen": 98742160, "step": 45710 }, { "epoch": 7.4575856443719415, "grad_norm": 0.22914770245552063, "learning_rate": 0.0007898239021179356, "loss": 0.1253, "num_input_tokens_seen": 98751568, "step": 45715 }, { "epoch": 7.458401305057096, "grad_norm": 0.251247763633728, "learning_rate": 0.000789765897084211, "loss": 0.1455, "num_input_tokens_seen": 98763664, "step": 45720 }, { "epoch": 7.459216965742251, "grad_norm": 0.023140182718634605, "learning_rate": 0.0007897078861780127, "loss": 0.0735, "num_input_tokens_seen": 98774640, "step": 45725 }, { "epoch": 7.460032626427406, "grad_norm": 0.15593160688877106, "learning_rate": 0.0007896498694005168, "loss": 0.0445, "num_input_tokens_seen": 98784656, "step": 45730 }, { "epoch": 7.460848287112561, "grad_norm": 0.060400452464818954, "learning_rate": 0.0007895918467528987, "loss": 0.087, "num_input_tokens_seen": 98795792, "step": 45735 }, { "epoch": 7.4616639477977165, "grad_norm": 0.008460725657641888, "learning_rate": 0.0007895338182363343, "loss": 0.0236, "num_input_tokens_seen": 98806416, "step": 45740 }, { "epoch": 7.462479608482871, "grad_norm": 0.11589276045560837, "learning_rate": 0.0007894757838519999, "loss": 0.0469, "num_input_tokens_seen": 98817616, "step": 45745 }, { "epoch": 7.463295269168026, "grad_norm": 0.20811273157596588, "learning_rate": 0.0007894177436010716, "loss": 0.134, "num_input_tokens_seen": 98828304, "step": 45750 }, { "epoch": 7.464110929853181, "grad_norm": 0.02944091148674488, "learning_rate": 0.0007893596974847255, "loss": 0.0251, "num_input_tokens_seen": 98837648, "step": 45755 }, { "epoch": 7.464926590538336, "grad_norm": 0.2792534828186035, "learning_rate": 0.000789301645504138, "loss": 0.0759, "num_input_tokens_seen": 98849168, "step": 45760 }, { "epoch": 7.465742251223491, "grad_norm": 0.011199427768588066, "learning_rate": 0.0007892435876604857, "loss": 0.1193, "num_input_tokens_seen": 98859984, "step": 45765 }, { "epoch": 7.466557911908646, "grad_norm": 0.17388412356376648, "learning_rate": 0.0007891855239549453, "loss": 0.0971, "num_input_tokens_seen": 98870992, "step": 45770 }, { "epoch": 7.467373572593801, "grad_norm": 0.004538760520517826, "learning_rate": 0.0007891274543886933, "loss": 0.053, "num_input_tokens_seen": 98882160, "step": 45775 }, { "epoch": 7.468189233278956, "grad_norm": 0.22801204025745392, "learning_rate": 0.0007890693789629064, "loss": 0.0405, "num_input_tokens_seen": 98892880, "step": 45780 }, { "epoch": 7.469004893964111, "grad_norm": 0.049073074012994766, "learning_rate": 0.0007890112976787621, "loss": 0.1171, "num_input_tokens_seen": 98903984, "step": 45785 }, { "epoch": 7.4698205546492655, "grad_norm": 0.023845715448260307, "learning_rate": 0.0007889532105374373, "loss": 0.0861, "num_input_tokens_seen": 98914992, "step": 45790 }, { "epoch": 7.470636215334421, "grad_norm": 0.0964706763625145, "learning_rate": 0.0007888951175401089, "loss": 0.0648, "num_input_tokens_seen": 98924752, "step": 45795 }, { "epoch": 7.471451876019576, "grad_norm": 0.040027473121881485, "learning_rate": 0.0007888370186879545, "loss": 0.0357, "num_input_tokens_seen": 98935536, "step": 45800 }, { "epoch": 7.472267536704731, "grad_norm": 0.019557340070605278, "learning_rate": 0.0007887789139821516, "loss": 0.0342, "num_input_tokens_seen": 98946672, "step": 45805 }, { "epoch": 7.473083197389886, "grad_norm": 0.06337641179561615, "learning_rate": 0.0007887208034238777, "loss": 0.0753, "num_input_tokens_seen": 98956880, "step": 45810 }, { "epoch": 7.4738988580750405, "grad_norm": 0.019905684515833855, "learning_rate": 0.0007886626870143103, "loss": 0.0212, "num_input_tokens_seen": 98966192, "step": 45815 }, { "epoch": 7.474714518760196, "grad_norm": 0.18070393800735474, "learning_rate": 0.0007886045647546274, "loss": 0.2547, "num_input_tokens_seen": 98976528, "step": 45820 }, { "epoch": 7.475530179445351, "grad_norm": 0.16771891713142395, "learning_rate": 0.0007885464366460069, "loss": 0.0834, "num_input_tokens_seen": 98987152, "step": 45825 }, { "epoch": 7.476345840130506, "grad_norm": 0.04264623671770096, "learning_rate": 0.0007884883026896268, "loss": 0.0263, "num_input_tokens_seen": 98998992, "step": 45830 }, { "epoch": 7.477161500815661, "grad_norm": 0.006334866862744093, "learning_rate": 0.0007884301628866652, "loss": 0.0243, "num_input_tokens_seen": 99009328, "step": 45835 }, { "epoch": 7.4779771615008155, "grad_norm": 0.005012247245758772, "learning_rate": 0.0007883720172383007, "loss": 0.0347, "num_input_tokens_seen": 99019728, "step": 45840 }, { "epoch": 7.47879282218597, "grad_norm": 0.18069060146808624, "learning_rate": 0.0007883138657457111, "loss": 0.2113, "num_input_tokens_seen": 99031440, "step": 45845 }, { "epoch": 7.479608482871126, "grad_norm": 0.18505054712295532, "learning_rate": 0.0007882557084100755, "loss": 0.1176, "num_input_tokens_seen": 99042192, "step": 45850 }, { "epoch": 7.480424143556281, "grad_norm": 0.26251259446144104, "learning_rate": 0.0007881975452325722, "loss": 0.0476, "num_input_tokens_seen": 99053552, "step": 45855 }, { "epoch": 7.481239804241436, "grad_norm": 0.04688497632741928, "learning_rate": 0.00078813937621438, "loss": 0.1765, "num_input_tokens_seen": 99064208, "step": 45860 }, { "epoch": 7.4820554649265905, "grad_norm": 0.20419980585575104, "learning_rate": 0.000788081201356678, "loss": 0.1365, "num_input_tokens_seen": 99073680, "step": 45865 }, { "epoch": 7.482871125611745, "grad_norm": 0.026826782152056694, "learning_rate": 0.0007880230206606449, "loss": 0.0253, "num_input_tokens_seen": 99085360, "step": 45870 }, { "epoch": 7.4836867862969, "grad_norm": 0.09688873589038849, "learning_rate": 0.0007879648341274599, "loss": 0.065, "num_input_tokens_seen": 99096112, "step": 45875 }, { "epoch": 7.484502446982056, "grad_norm": 0.13799415528774261, "learning_rate": 0.0007879066417583021, "loss": 0.0537, "num_input_tokens_seen": 99106832, "step": 45880 }, { "epoch": 7.485318107667211, "grad_norm": 0.05497647821903229, "learning_rate": 0.0007878484435543511, "loss": 0.0885, "num_input_tokens_seen": 99118832, "step": 45885 }, { "epoch": 7.486133768352365, "grad_norm": 0.3244342505931854, "learning_rate": 0.0007877902395167862, "loss": 0.226, "num_input_tokens_seen": 99129776, "step": 45890 }, { "epoch": 7.48694942903752, "grad_norm": 0.03948109596967697, "learning_rate": 0.000787732029646787, "loss": 0.1059, "num_input_tokens_seen": 99141424, "step": 45895 }, { "epoch": 7.487765089722675, "grad_norm": 0.02498193085193634, "learning_rate": 0.0007876738139455332, "loss": 0.019, "num_input_tokens_seen": 99151888, "step": 45900 }, { "epoch": 7.488580750407831, "grad_norm": 0.1953345686197281, "learning_rate": 0.0007876155924142046, "loss": 0.1031, "num_input_tokens_seen": 99162128, "step": 45905 }, { "epoch": 7.489396411092986, "grad_norm": 0.0279481690376997, "learning_rate": 0.0007875573650539811, "loss": 0.222, "num_input_tokens_seen": 99172592, "step": 45910 }, { "epoch": 7.49021207177814, "grad_norm": 0.03121795877814293, "learning_rate": 0.0007874991318660429, "loss": 0.0298, "num_input_tokens_seen": 99183856, "step": 45915 }, { "epoch": 7.491027732463295, "grad_norm": 0.016351960599422455, "learning_rate": 0.0007874408928515702, "loss": 0.0499, "num_input_tokens_seen": 99195216, "step": 45920 }, { "epoch": 7.49184339314845, "grad_norm": 0.006542586255818605, "learning_rate": 0.000787382648011743, "loss": 0.0124, "num_input_tokens_seen": 99203984, "step": 45925 }, { "epoch": 7.492659053833605, "grad_norm": 0.16917765140533447, "learning_rate": 0.0007873243973477419, "loss": 0.0249, "num_input_tokens_seen": 99214448, "step": 45930 }, { "epoch": 7.493474714518761, "grad_norm": 0.05392063409090042, "learning_rate": 0.0007872661408607473, "loss": 0.1237, "num_input_tokens_seen": 99224976, "step": 45935 }, { "epoch": 7.494290375203915, "grad_norm": 0.10604605078697205, "learning_rate": 0.0007872078785519401, "loss": 0.0346, "num_input_tokens_seen": 99236944, "step": 45940 }, { "epoch": 7.49510603588907, "grad_norm": 0.13532358407974243, "learning_rate": 0.0007871496104225007, "loss": 0.0768, "num_input_tokens_seen": 99247984, "step": 45945 }, { "epoch": 7.495921696574225, "grad_norm": 0.05853382125496864, "learning_rate": 0.0007870913364736103, "loss": 0.1162, "num_input_tokens_seen": 99259344, "step": 45950 }, { "epoch": 7.49673735725938, "grad_norm": 0.03759206831455231, "learning_rate": 0.0007870330567064499, "loss": 0.0555, "num_input_tokens_seen": 99270576, "step": 45955 }, { "epoch": 7.497553017944535, "grad_norm": 0.12168195843696594, "learning_rate": 0.0007869747711222001, "loss": 0.0328, "num_input_tokens_seen": 99280848, "step": 45960 }, { "epoch": 7.49836867862969, "grad_norm": 0.22778406739234924, "learning_rate": 0.0007869164797220429, "loss": 0.2144, "num_input_tokens_seen": 99291536, "step": 45965 }, { "epoch": 7.499184339314845, "grad_norm": 0.17280970513820648, "learning_rate": 0.000786858182507159, "loss": 0.0656, "num_input_tokens_seen": 99302640, "step": 45970 }, { "epoch": 7.5, "grad_norm": 0.03377150744199753, "learning_rate": 0.0007867998794787303, "loss": 0.0341, "num_input_tokens_seen": 99314160, "step": 45975 }, { "epoch": 7.500815660685155, "grad_norm": 0.06262348592281342, "learning_rate": 0.0007867415706379381, "loss": 0.0689, "num_input_tokens_seen": 99325456, "step": 45980 }, { "epoch": 7.50163132137031, "grad_norm": 0.005712342448532581, "learning_rate": 0.0007866832559859642, "loss": 0.1304, "num_input_tokens_seen": 99336976, "step": 45985 }, { "epoch": 7.502446982055465, "grad_norm": 0.3855852782726288, "learning_rate": 0.0007866249355239905, "loss": 0.1711, "num_input_tokens_seen": 99347536, "step": 45990 }, { "epoch": 7.50326264274062, "grad_norm": 0.005695751868188381, "learning_rate": 0.0007865666092531989, "loss": 0.0842, "num_input_tokens_seen": 99359504, "step": 45995 }, { "epoch": 7.504078303425775, "grad_norm": 0.0534052737057209, "learning_rate": 0.0007865082771747713, "loss": 0.0217, "num_input_tokens_seen": 99370032, "step": 46000 }, { "epoch": 7.50489396411093, "grad_norm": 0.038309045135974884, "learning_rate": 0.00078644993928989, "loss": 0.0391, "num_input_tokens_seen": 99380816, "step": 46005 }, { "epoch": 7.505709624796085, "grad_norm": 0.010843995958566666, "learning_rate": 0.0007863915955997374, "loss": 0.0524, "num_input_tokens_seen": 99392592, "step": 46010 }, { "epoch": 7.506525285481239, "grad_norm": 0.017481878399848938, "learning_rate": 0.0007863332461054957, "loss": 0.0117, "num_input_tokens_seen": 99403760, "step": 46015 }, { "epoch": 7.507340946166395, "grad_norm": 0.03930390253663063, "learning_rate": 0.0007862748908083477, "loss": 0.0803, "num_input_tokens_seen": 99414768, "step": 46020 }, { "epoch": 7.50815660685155, "grad_norm": 0.07113105058670044, "learning_rate": 0.0007862165297094758, "loss": 0.0497, "num_input_tokens_seen": 99425040, "step": 46025 }, { "epoch": 7.508972267536705, "grad_norm": 0.015223911963403225, "learning_rate": 0.0007861581628100628, "loss": 0.0549, "num_input_tokens_seen": 99436016, "step": 46030 }, { "epoch": 7.50978792822186, "grad_norm": 0.03532985597848892, "learning_rate": 0.0007860997901112917, "loss": 0.091, "num_input_tokens_seen": 99448624, "step": 46035 }, { "epoch": 7.510603588907014, "grad_norm": 0.013420658186078072, "learning_rate": 0.0007860414116143453, "loss": 0.0429, "num_input_tokens_seen": 99458608, "step": 46040 }, { "epoch": 7.511419249592169, "grad_norm": 0.06856658309698105, "learning_rate": 0.0007859830273204069, "loss": 0.1099, "num_input_tokens_seen": 99469040, "step": 46045 }, { "epoch": 7.512234910277325, "grad_norm": 0.05944625288248062, "learning_rate": 0.0007859246372306595, "loss": 0.0799, "num_input_tokens_seen": 99479888, "step": 46050 }, { "epoch": 7.51305057096248, "grad_norm": 0.3955201208591461, "learning_rate": 0.0007858662413462867, "loss": 0.1141, "num_input_tokens_seen": 99489520, "step": 46055 }, { "epoch": 7.513866231647635, "grad_norm": 0.2453528493642807, "learning_rate": 0.000785807839668472, "loss": 0.1028, "num_input_tokens_seen": 99501136, "step": 46060 }, { "epoch": 7.514681892332789, "grad_norm": 0.3181774616241455, "learning_rate": 0.0007857494321983987, "loss": 0.0933, "num_input_tokens_seen": 99513040, "step": 46065 }, { "epoch": 7.515497553017944, "grad_norm": 0.05358253791928291, "learning_rate": 0.0007856910189372506, "loss": 0.0786, "num_input_tokens_seen": 99523760, "step": 46070 }, { "epoch": 7.5163132137031, "grad_norm": 0.005161238834261894, "learning_rate": 0.0007856325998862118, "loss": 0.051, "num_input_tokens_seen": 99533456, "step": 46075 }, { "epoch": 7.517128874388255, "grad_norm": 0.02480381354689598, "learning_rate": 0.0007855741750464658, "loss": 0.1233, "num_input_tokens_seen": 99544944, "step": 46080 }, { "epoch": 7.5179445350734095, "grad_norm": 0.02038867212831974, "learning_rate": 0.0007855157444191969, "loss": 0.0735, "num_input_tokens_seen": 99555152, "step": 46085 }, { "epoch": 7.518760195758564, "grad_norm": 0.006652286276221275, "learning_rate": 0.0007854573080055894, "loss": 0.0325, "num_input_tokens_seen": 99566256, "step": 46090 }, { "epoch": 7.519575856443719, "grad_norm": 0.31422188878059387, "learning_rate": 0.0007853988658068274, "loss": 0.0718, "num_input_tokens_seen": 99575952, "step": 46095 }, { "epoch": 7.520391517128875, "grad_norm": 0.26823487877845764, "learning_rate": 0.000785340417824095, "loss": 0.1506, "num_input_tokens_seen": 99588688, "step": 46100 }, { "epoch": 7.52120717781403, "grad_norm": 0.04891170188784599, "learning_rate": 0.0007852819640585773, "loss": 0.0632, "num_input_tokens_seen": 99599280, "step": 46105 }, { "epoch": 7.5220228384991845, "grad_norm": 0.11184188723564148, "learning_rate": 0.0007852235045114588, "loss": 0.0441, "num_input_tokens_seen": 99609776, "step": 46110 }, { "epoch": 7.522838499184339, "grad_norm": 0.07456564158201218, "learning_rate": 0.000785165039183924, "loss": 0.2539, "num_input_tokens_seen": 99620208, "step": 46115 }, { "epoch": 7.523654159869494, "grad_norm": 0.2406882345676422, "learning_rate": 0.0007851065680771581, "loss": 0.1051, "num_input_tokens_seen": 99631088, "step": 46120 }, { "epoch": 7.524469820554649, "grad_norm": 0.058970607817173004, "learning_rate": 0.0007850480911923457, "loss": 0.1523, "num_input_tokens_seen": 99643664, "step": 46125 }, { "epoch": 7.525285481239804, "grad_norm": 0.3528447151184082, "learning_rate": 0.0007849896085306723, "loss": 0.0559, "num_input_tokens_seen": 99652304, "step": 46130 }, { "epoch": 7.5261011419249595, "grad_norm": 0.08873096853494644, "learning_rate": 0.0007849311200933228, "loss": 0.1321, "num_input_tokens_seen": 99662416, "step": 46135 }, { "epoch": 7.526916802610114, "grad_norm": 0.041165851056575775, "learning_rate": 0.0007848726258814826, "loss": 0.045, "num_input_tokens_seen": 99672400, "step": 46140 }, { "epoch": 7.527732463295269, "grad_norm": 0.005180297419428825, "learning_rate": 0.0007848141258963375, "loss": 0.0275, "num_input_tokens_seen": 99683216, "step": 46145 }, { "epoch": 7.528548123980424, "grad_norm": 0.18040695786476135, "learning_rate": 0.0007847556201390727, "loss": 0.0728, "num_input_tokens_seen": 99695056, "step": 46150 }, { "epoch": 7.529363784665579, "grad_norm": 0.09594681859016418, "learning_rate": 0.0007846971086108741, "loss": 0.0779, "num_input_tokens_seen": 99704816, "step": 46155 }, { "epoch": 7.5301794453507345, "grad_norm": 0.0594286173582077, "learning_rate": 0.0007846385913129273, "loss": 0.0263, "num_input_tokens_seen": 99714704, "step": 46160 }, { "epoch": 7.530995106035889, "grad_norm": 0.32255420088768005, "learning_rate": 0.0007845800682464185, "loss": 0.0777, "num_input_tokens_seen": 99726000, "step": 46165 }, { "epoch": 7.531810766721044, "grad_norm": 0.3754183351993561, "learning_rate": 0.0007845215394125336, "loss": 0.1829, "num_input_tokens_seen": 99736688, "step": 46170 }, { "epoch": 7.532626427406199, "grad_norm": 0.25886958837509155, "learning_rate": 0.0007844630048124586, "loss": 0.1646, "num_input_tokens_seen": 99748400, "step": 46175 }, { "epoch": 7.533442088091354, "grad_norm": 0.38430964946746826, "learning_rate": 0.00078440446444738, "loss": 0.1293, "num_input_tokens_seen": 99759344, "step": 46180 }, { "epoch": 7.5342577487765094, "grad_norm": 0.02618015743792057, "learning_rate": 0.0007843459183184843, "loss": 0.0407, "num_input_tokens_seen": 99769616, "step": 46185 }, { "epoch": 7.535073409461664, "grad_norm": 0.23281855881214142, "learning_rate": 0.0007842873664269576, "loss": 0.0932, "num_input_tokens_seen": 99780656, "step": 46190 }, { "epoch": 7.535889070146819, "grad_norm": 0.265331894159317, "learning_rate": 0.0007842288087739868, "loss": 0.0768, "num_input_tokens_seen": 99790544, "step": 46195 }, { "epoch": 7.536704730831974, "grad_norm": 0.2676144242286682, "learning_rate": 0.0007841702453607589, "loss": 0.1303, "num_input_tokens_seen": 99801616, "step": 46200 }, { "epoch": 7.537520391517129, "grad_norm": 0.2378363013267517, "learning_rate": 0.0007841116761884601, "loss": 0.1382, "num_input_tokens_seen": 99812848, "step": 46205 }, { "epoch": 7.5383360522022835, "grad_norm": 0.02555975876748562, "learning_rate": 0.000784053101258278, "loss": 0.0295, "num_input_tokens_seen": 99823152, "step": 46210 }, { "epoch": 7.539151712887438, "grad_norm": 0.01895485259592533, "learning_rate": 0.0007839945205713995, "loss": 0.0686, "num_input_tokens_seen": 99834160, "step": 46215 }, { "epoch": 7.539967373572594, "grad_norm": 0.021273165941238403, "learning_rate": 0.0007839359341290116, "loss": 0.0391, "num_input_tokens_seen": 99844784, "step": 46220 }, { "epoch": 7.540783034257749, "grad_norm": 0.1254192292690277, "learning_rate": 0.0007838773419323019, "loss": 0.0602, "num_input_tokens_seen": 99855536, "step": 46225 }, { "epoch": 7.541598694942904, "grad_norm": 0.17011789977550507, "learning_rate": 0.0007838187439824577, "loss": 0.0475, "num_input_tokens_seen": 99866800, "step": 46230 }, { "epoch": 7.5424143556280585, "grad_norm": 0.272366464138031, "learning_rate": 0.0007837601402806666, "loss": 0.257, "num_input_tokens_seen": 99877520, "step": 46235 }, { "epoch": 7.543230016313213, "grad_norm": 0.4665481448173523, "learning_rate": 0.0007837015308281163, "loss": 0.1388, "num_input_tokens_seen": 99887632, "step": 46240 }, { "epoch": 7.544045676998369, "grad_norm": 0.007717117201536894, "learning_rate": 0.0007836429156259946, "loss": 0.0402, "num_input_tokens_seen": 99899664, "step": 46245 }, { "epoch": 7.544861337683524, "grad_norm": 0.3290517330169678, "learning_rate": 0.0007835842946754893, "loss": 0.0653, "num_input_tokens_seen": 99910928, "step": 46250 }, { "epoch": 7.545676998368679, "grad_norm": 0.18104241788387299, "learning_rate": 0.0007835256679777887, "loss": 0.0232, "num_input_tokens_seen": 99922128, "step": 46255 }, { "epoch": 7.5464926590538335, "grad_norm": 0.2232947051525116, "learning_rate": 0.0007834670355340805, "loss": 0.0346, "num_input_tokens_seen": 99932112, "step": 46260 }, { "epoch": 7.547308319738988, "grad_norm": 0.04595812410116196, "learning_rate": 0.0007834083973455535, "loss": 0.0139, "num_input_tokens_seen": 99942096, "step": 46265 }, { "epoch": 7.548123980424144, "grad_norm": 0.04887615144252777, "learning_rate": 0.0007833497534133955, "loss": 0.0205, "num_input_tokens_seen": 99953712, "step": 46270 }, { "epoch": 7.548939641109299, "grad_norm": 0.03450680151581764, "learning_rate": 0.0007832911037387955, "loss": 0.1711, "num_input_tokens_seen": 99965904, "step": 46275 }, { "epoch": 7.549755301794454, "grad_norm": 0.1929694563150406, "learning_rate": 0.000783232448322942, "loss": 0.044, "num_input_tokens_seen": 99977424, "step": 46280 }, { "epoch": 7.5505709624796085, "grad_norm": 0.12964440882205963, "learning_rate": 0.0007831737871670235, "loss": 0.0528, "num_input_tokens_seen": 99989072, "step": 46285 }, { "epoch": 7.551386623164763, "grad_norm": 0.009152422659099102, "learning_rate": 0.0007831151202722288, "loss": 0.0278, "num_input_tokens_seen": 99998992, "step": 46290 }, { "epoch": 7.552202283849918, "grad_norm": 0.01057891920208931, "learning_rate": 0.0007830564476397473, "loss": 0.2981, "num_input_tokens_seen": 100008976, "step": 46295 }, { "epoch": 7.553017944535073, "grad_norm": 0.09518894553184509, "learning_rate": 0.0007829977692707676, "loss": 0.0199, "num_input_tokens_seen": 100018672, "step": 46300 }, { "epoch": 7.553833605220229, "grad_norm": 0.24947939813137054, "learning_rate": 0.0007829390851664793, "loss": 0.2036, "num_input_tokens_seen": 100029904, "step": 46305 }, { "epoch": 7.554649265905383, "grad_norm": 0.14291471242904663, "learning_rate": 0.0007828803953280713, "loss": 0.0521, "num_input_tokens_seen": 100040304, "step": 46310 }, { "epoch": 7.555464926590538, "grad_norm": 0.04056019335985184, "learning_rate": 0.0007828216997567333, "loss": 0.0403, "num_input_tokens_seen": 100050544, "step": 46315 }, { "epoch": 7.556280587275693, "grad_norm": 0.027434013783931732, "learning_rate": 0.0007827629984536548, "loss": 0.0211, "num_input_tokens_seen": 100060496, "step": 46320 }, { "epoch": 7.557096247960848, "grad_norm": 0.014924759976565838, "learning_rate": 0.0007827042914200254, "loss": 0.057, "num_input_tokens_seen": 100071568, "step": 46325 }, { "epoch": 7.557911908646004, "grad_norm": 0.24778462946414948, "learning_rate": 0.000782645578657035, "loss": 0.2794, "num_input_tokens_seen": 100081840, "step": 46330 }, { "epoch": 7.558727569331158, "grad_norm": 0.1591310203075409, "learning_rate": 0.0007825868601658733, "loss": 0.0877, "num_input_tokens_seen": 100091472, "step": 46335 }, { "epoch": 7.559543230016313, "grad_norm": 0.13435420393943787, "learning_rate": 0.0007825281359477303, "loss": 0.0849, "num_input_tokens_seen": 100102448, "step": 46340 }, { "epoch": 7.560358890701468, "grad_norm": 0.029477981850504875, "learning_rate": 0.0007824694060037964, "loss": 0.0293, "num_input_tokens_seen": 100113360, "step": 46345 }, { "epoch": 7.561174551386623, "grad_norm": 0.10839947313070297, "learning_rate": 0.0007824106703352616, "loss": 0.0268, "num_input_tokens_seen": 100124912, "step": 46350 }, { "epoch": 7.561990212071779, "grad_norm": 0.2539820075035095, "learning_rate": 0.0007823519289433162, "loss": 0.0822, "num_input_tokens_seen": 100134512, "step": 46355 }, { "epoch": 7.562805872756933, "grad_norm": 0.29448628425598145, "learning_rate": 0.0007822931818291508, "loss": 0.0657, "num_input_tokens_seen": 100146160, "step": 46360 }, { "epoch": 7.563621533442088, "grad_norm": 0.007594697643071413, "learning_rate": 0.0007822344289939561, "loss": 0.0502, "num_input_tokens_seen": 100157264, "step": 46365 }, { "epoch": 7.564437194127243, "grad_norm": 0.045168206095695496, "learning_rate": 0.0007821756704389224, "loss": 0.1212, "num_input_tokens_seen": 100168400, "step": 46370 }, { "epoch": 7.565252854812398, "grad_norm": 0.4243335723876953, "learning_rate": 0.000782116906165241, "loss": 0.1991, "num_input_tokens_seen": 100179632, "step": 46375 }, { "epoch": 7.566068515497553, "grad_norm": 0.08852332830429077, "learning_rate": 0.0007820581361741025, "loss": 0.0195, "num_input_tokens_seen": 100191600, "step": 46380 }, { "epoch": 7.566884176182708, "grad_norm": 0.07176997512578964, "learning_rate": 0.0007819993604666982, "loss": 0.2055, "num_input_tokens_seen": 100202736, "step": 46385 }, { "epoch": 7.567699836867863, "grad_norm": 0.09689157456159592, "learning_rate": 0.0007819405790442189, "loss": 0.0999, "num_input_tokens_seen": 100213104, "step": 46390 }, { "epoch": 7.568515497553018, "grad_norm": 0.002046206733211875, "learning_rate": 0.0007818817919078562, "loss": 0.1328, "num_input_tokens_seen": 100223408, "step": 46395 }, { "epoch": 7.569331158238173, "grad_norm": 0.005216080229729414, "learning_rate": 0.0007818229990588013, "loss": 0.2068, "num_input_tokens_seen": 100233872, "step": 46400 }, { "epoch": 7.570146818923328, "grad_norm": 0.0509600006043911, "learning_rate": 0.000781764200498246, "loss": 0.1134, "num_input_tokens_seen": 100244528, "step": 46405 }, { "epoch": 7.5709624796084825, "grad_norm": 0.061129264533519745, "learning_rate": 0.0007817053962273817, "loss": 0.0379, "num_input_tokens_seen": 100255344, "step": 46410 }, { "epoch": 7.571778140293638, "grad_norm": 0.08482226729393005, "learning_rate": 0.0007816465862474, "loss": 0.1293, "num_input_tokens_seen": 100266448, "step": 46415 }, { "epoch": 7.572593800978793, "grad_norm": 0.007290207780897617, "learning_rate": 0.000781587770559493, "loss": 0.0334, "num_input_tokens_seen": 100277584, "step": 46420 }, { "epoch": 7.573409461663948, "grad_norm": 0.00973653793334961, "learning_rate": 0.0007815289491648527, "loss": 0.039, "num_input_tokens_seen": 100290160, "step": 46425 }, { "epoch": 7.574225122349103, "grad_norm": 0.0023570421617478132, "learning_rate": 0.000781470122064671, "loss": 0.0381, "num_input_tokens_seen": 100300784, "step": 46430 }, { "epoch": 7.575040783034257, "grad_norm": 0.001977626234292984, "learning_rate": 0.0007814112892601403, "loss": 0.1092, "num_input_tokens_seen": 100311344, "step": 46435 }, { "epoch": 7.575856443719413, "grad_norm": 0.0060659232549369335, "learning_rate": 0.0007813524507524527, "loss": 0.0216, "num_input_tokens_seen": 100322608, "step": 46440 }, { "epoch": 7.576672104404568, "grad_norm": 0.060652635991573334, "learning_rate": 0.0007812936065428009, "loss": 0.0623, "num_input_tokens_seen": 100332912, "step": 46445 }, { "epoch": 7.577487765089723, "grad_norm": 0.0259055495262146, "learning_rate": 0.0007812347566323774, "loss": 0.1047, "num_input_tokens_seen": 100344144, "step": 46450 }, { "epoch": 7.578303425774878, "grad_norm": 0.02046637050807476, "learning_rate": 0.0007811759010223747, "loss": 0.0468, "num_input_tokens_seen": 100355856, "step": 46455 }, { "epoch": 7.579119086460032, "grad_norm": 0.18777872622013092, "learning_rate": 0.0007811170397139855, "loss": 0.056, "num_input_tokens_seen": 100366960, "step": 46460 }, { "epoch": 7.579934747145187, "grad_norm": 0.10581226646900177, "learning_rate": 0.000781058172708403, "loss": 0.0651, "num_input_tokens_seen": 100377584, "step": 46465 }, { "epoch": 7.580750407830343, "grad_norm": 0.22157377004623413, "learning_rate": 0.00078099930000682, "loss": 0.0466, "num_input_tokens_seen": 100389424, "step": 46470 }, { "epoch": 7.581566068515498, "grad_norm": 0.39631515741348267, "learning_rate": 0.0007809404216104299, "loss": 0.1734, "num_input_tokens_seen": 100400688, "step": 46475 }, { "epoch": 7.582381729200653, "grad_norm": 0.09392768889665604, "learning_rate": 0.0007808815375204257, "loss": 0.1044, "num_input_tokens_seen": 100410448, "step": 46480 }, { "epoch": 7.583197389885807, "grad_norm": 0.044808097183704376, "learning_rate": 0.0007808226477380007, "loss": 0.0971, "num_input_tokens_seen": 100421232, "step": 46485 }, { "epoch": 7.584013050570962, "grad_norm": 0.009121015667915344, "learning_rate": 0.0007807637522643484, "loss": 0.0537, "num_input_tokens_seen": 100431312, "step": 46490 }, { "epoch": 7.584828711256117, "grad_norm": 0.08114711195230484, "learning_rate": 0.0007807048511006628, "loss": 0.1994, "num_input_tokens_seen": 100441936, "step": 46495 }, { "epoch": 7.585644371941273, "grad_norm": 0.20501329004764557, "learning_rate": 0.0007806459442481372, "loss": 0.1497, "num_input_tokens_seen": 100453488, "step": 46500 }, { "epoch": 7.5864600326264275, "grad_norm": 0.2496049851179123, "learning_rate": 0.0007805870317079654, "loss": 0.1331, "num_input_tokens_seen": 100466224, "step": 46505 }, { "epoch": 7.587275693311582, "grad_norm": 0.04062649607658386, "learning_rate": 0.0007805281134813416, "loss": 0.0313, "num_input_tokens_seen": 100476560, "step": 46510 }, { "epoch": 7.588091353996737, "grad_norm": 0.266244500875473, "learning_rate": 0.0007804691895694595, "loss": 0.0772, "num_input_tokens_seen": 100487184, "step": 46515 }, { "epoch": 7.588907014681892, "grad_norm": 0.010720369406044483, "learning_rate": 0.0007804102599735137, "loss": 0.0226, "num_input_tokens_seen": 100498608, "step": 46520 }, { "epoch": 7.589722675367048, "grad_norm": 0.009115898050367832, "learning_rate": 0.0007803513246946981, "loss": 0.0456, "num_input_tokens_seen": 100509104, "step": 46525 }, { "epoch": 7.5905383360522025, "grad_norm": 0.019223831593990326, "learning_rate": 0.0007802923837342072, "loss": 0.1405, "num_input_tokens_seen": 100520368, "step": 46530 }, { "epoch": 7.591353996737357, "grad_norm": 0.04066868871450424, "learning_rate": 0.0007802334370932357, "loss": 0.0156, "num_input_tokens_seen": 100530928, "step": 46535 }, { "epoch": 7.592169657422512, "grad_norm": 0.10982602834701538, "learning_rate": 0.0007801744847729781, "loss": 0.0714, "num_input_tokens_seen": 100542960, "step": 46540 }, { "epoch": 7.592985318107667, "grad_norm": 0.006750395521521568, "learning_rate": 0.0007801155267746291, "loss": 0.0703, "num_input_tokens_seen": 100553232, "step": 46545 }, { "epoch": 7.593800978792823, "grad_norm": 0.008056842721998692, "learning_rate": 0.0007800565630993834, "loss": 0.0737, "num_input_tokens_seen": 100565552, "step": 46550 }, { "epoch": 7.5946166394779775, "grad_norm": 0.07503590732812881, "learning_rate": 0.0007799975937484365, "loss": 0.0222, "num_input_tokens_seen": 100576592, "step": 46555 }, { "epoch": 7.595432300163132, "grad_norm": 0.011532962322235107, "learning_rate": 0.000779938618722983, "loss": 0.0278, "num_input_tokens_seen": 100586512, "step": 46560 }, { "epoch": 7.596247960848287, "grad_norm": 0.3396737575531006, "learning_rate": 0.0007798796380242183, "loss": 0.2314, "num_input_tokens_seen": 100597584, "step": 46565 }, { "epoch": 7.597063621533442, "grad_norm": 0.06101497262716293, "learning_rate": 0.0007798206516533377, "loss": 0.0452, "num_input_tokens_seen": 100609168, "step": 46570 }, { "epoch": 7.597879282218597, "grad_norm": 0.2853509187698364, "learning_rate": 0.0007797616596115365, "loss": 0.0593, "num_input_tokens_seen": 100620368, "step": 46575 }, { "epoch": 7.598694942903752, "grad_norm": 0.08300846070051193, "learning_rate": 0.0007797026619000105, "loss": 0.0298, "num_input_tokens_seen": 100630160, "step": 46580 }, { "epoch": 7.599510603588907, "grad_norm": 0.12231241166591644, "learning_rate": 0.0007796436585199553, "loss": 0.0236, "num_input_tokens_seen": 100641872, "step": 46585 }, { "epoch": 7.600326264274062, "grad_norm": 0.46338924765586853, "learning_rate": 0.0007795846494725665, "loss": 0.2078, "num_input_tokens_seen": 100653168, "step": 46590 }, { "epoch": 7.601141924959217, "grad_norm": 0.11617031693458557, "learning_rate": 0.00077952563475904, "loss": 0.056, "num_input_tokens_seen": 100664144, "step": 46595 }, { "epoch": 7.601957585644372, "grad_norm": 0.1972285658121109, "learning_rate": 0.000779466614380572, "loss": 0.1623, "num_input_tokens_seen": 100674640, "step": 46600 }, { "epoch": 7.602773246329527, "grad_norm": 0.00785736832767725, "learning_rate": 0.0007794075883383586, "loss": 0.0509, "num_input_tokens_seen": 100685040, "step": 46605 }, { "epoch": 7.603588907014682, "grad_norm": 0.09408126026391983, "learning_rate": 0.0007793485566335958, "loss": 0.1419, "num_input_tokens_seen": 100695408, "step": 46610 }, { "epoch": 7.604404567699837, "grad_norm": 0.08680996298789978, "learning_rate": 0.0007792895192674802, "loss": 0.0433, "num_input_tokens_seen": 100705840, "step": 46615 }, { "epoch": 7.605220228384992, "grad_norm": 0.37143993377685547, "learning_rate": 0.0007792304762412084, "loss": 0.1415, "num_input_tokens_seen": 100716944, "step": 46620 }, { "epoch": 7.606035889070147, "grad_norm": 0.0823369175195694, "learning_rate": 0.0007791714275559765, "loss": 0.032, "num_input_tokens_seen": 100729072, "step": 46625 }, { "epoch": 7.6068515497553015, "grad_norm": 0.01576918736100197, "learning_rate": 0.0007791123732129815, "loss": 0.1349, "num_input_tokens_seen": 100739952, "step": 46630 }, { "epoch": 7.607667210440457, "grad_norm": 1.0202510356903076, "learning_rate": 0.0007790533132134201, "loss": 0.0627, "num_input_tokens_seen": 100751344, "step": 46635 }, { "epoch": 7.608482871125612, "grad_norm": 0.010519228875637054, "learning_rate": 0.0007789942475584894, "loss": 0.0128, "num_input_tokens_seen": 100761712, "step": 46640 }, { "epoch": 7.609298531810767, "grad_norm": 0.07782994210720062, "learning_rate": 0.0007789351762493865, "loss": 0.086, "num_input_tokens_seen": 100773040, "step": 46645 }, { "epoch": 7.610114192495922, "grad_norm": 0.09352076053619385, "learning_rate": 0.0007788760992873083, "loss": 0.0475, "num_input_tokens_seen": 100782448, "step": 46650 }, { "epoch": 7.6109298531810765, "grad_norm": 0.16804049909114838, "learning_rate": 0.000778817016673452, "loss": 0.0503, "num_input_tokens_seen": 100792720, "step": 46655 }, { "epoch": 7.611745513866231, "grad_norm": 0.05742299184203148, "learning_rate": 0.0007787579284090154, "loss": 0.1065, "num_input_tokens_seen": 100804400, "step": 46660 }, { "epoch": 7.612561174551386, "grad_norm": 0.002605182584375143, "learning_rate": 0.0007786988344951956, "loss": 0.0223, "num_input_tokens_seen": 100814704, "step": 46665 }, { "epoch": 7.613376835236542, "grad_norm": 0.2340633124113083, "learning_rate": 0.0007786397349331904, "loss": 0.0414, "num_input_tokens_seen": 100823632, "step": 46670 }, { "epoch": 7.614192495921697, "grad_norm": 0.011929473839700222, "learning_rate": 0.0007785806297241976, "loss": 0.0075, "num_input_tokens_seen": 100834544, "step": 46675 }, { "epoch": 7.6150081566068515, "grad_norm": 0.1309245228767395, "learning_rate": 0.0007785215188694148, "loss": 0.0354, "num_input_tokens_seen": 100846864, "step": 46680 }, { "epoch": 7.615823817292006, "grad_norm": 0.0732983872294426, "learning_rate": 0.0007784624023700402, "loss": 0.0269, "num_input_tokens_seen": 100858480, "step": 46685 }, { "epoch": 7.616639477977161, "grad_norm": 0.2625514566898346, "learning_rate": 0.0007784032802272716, "loss": 0.2307, "num_input_tokens_seen": 100869584, "step": 46690 }, { "epoch": 7.617455138662317, "grad_norm": 0.1511547863483429, "learning_rate": 0.0007783441524423074, "loss": 0.1718, "num_input_tokens_seen": 100879248, "step": 46695 }, { "epoch": 7.618270799347472, "grad_norm": 0.05024776607751846, "learning_rate": 0.0007782850190163459, "loss": 0.0707, "num_input_tokens_seen": 100890288, "step": 46700 }, { "epoch": 7.6190864600326265, "grad_norm": 0.2799839377403259, "learning_rate": 0.0007782258799505855, "loss": 0.0376, "num_input_tokens_seen": 100901616, "step": 46705 }, { "epoch": 7.619902120717781, "grad_norm": 0.08367808163166046, "learning_rate": 0.0007781667352462245, "loss": 0.2048, "num_input_tokens_seen": 100912496, "step": 46710 }, { "epoch": 7.620717781402936, "grad_norm": 0.07492048293352127, "learning_rate": 0.0007781075849044619, "loss": 0.0655, "num_input_tokens_seen": 100924656, "step": 46715 }, { "epoch": 7.621533442088092, "grad_norm": 0.36795780062675476, "learning_rate": 0.0007780484289264961, "loss": 0.0932, "num_input_tokens_seen": 100936176, "step": 46720 }, { "epoch": 7.622349102773247, "grad_norm": 0.14871415495872498, "learning_rate": 0.0007779892673135264, "loss": 0.1147, "num_input_tokens_seen": 100947312, "step": 46725 }, { "epoch": 7.623164763458401, "grad_norm": 0.03786802291870117, "learning_rate": 0.0007779301000667516, "loss": 0.0893, "num_input_tokens_seen": 100957232, "step": 46730 }, { "epoch": 7.623980424143556, "grad_norm": 0.20446109771728516, "learning_rate": 0.0007778709271873706, "loss": 0.1756, "num_input_tokens_seen": 100968592, "step": 46735 }, { "epoch": 7.624796084828711, "grad_norm": 0.030702682211995125, "learning_rate": 0.0007778117486765825, "loss": 0.1157, "num_input_tokens_seen": 100979024, "step": 46740 }, { "epoch": 7.625611745513866, "grad_norm": 0.02638734132051468, "learning_rate": 0.0007777525645355872, "loss": 0.0571, "num_input_tokens_seen": 100989712, "step": 46745 }, { "epoch": 7.626427406199021, "grad_norm": 0.004499876406043768, "learning_rate": 0.0007776933747655838, "loss": 0.1112, "num_input_tokens_seen": 101000784, "step": 46750 }, { "epoch": 7.627243066884176, "grad_norm": 0.056049101054668427, "learning_rate": 0.0007776341793677719, "loss": 0.0751, "num_input_tokens_seen": 101011600, "step": 46755 }, { "epoch": 7.628058727569331, "grad_norm": 0.14655882120132446, "learning_rate": 0.000777574978343351, "loss": 0.0671, "num_input_tokens_seen": 101022160, "step": 46760 }, { "epoch": 7.628874388254486, "grad_norm": 0.04394836723804474, "learning_rate": 0.000777515771693521, "loss": 0.0459, "num_input_tokens_seen": 101034192, "step": 46765 }, { "epoch": 7.629690048939641, "grad_norm": 0.01639639027416706, "learning_rate": 0.0007774565594194821, "loss": 0.0374, "num_input_tokens_seen": 101044592, "step": 46770 }, { "epoch": 7.630505709624796, "grad_norm": 0.14619885385036469, "learning_rate": 0.0007773973415224339, "loss": 0.0513, "num_input_tokens_seen": 101054640, "step": 46775 }, { "epoch": 7.631321370309951, "grad_norm": 0.017432374879717827, "learning_rate": 0.0007773381180035766, "loss": 0.0684, "num_input_tokens_seen": 101066160, "step": 46780 }, { "epoch": 7.632137030995106, "grad_norm": 0.15743707120418549, "learning_rate": 0.0007772788888641107, "loss": 0.1699, "num_input_tokens_seen": 101078000, "step": 46785 }, { "epoch": 7.632952691680261, "grad_norm": 0.1914764940738678, "learning_rate": 0.0007772196541052361, "loss": 0.1399, "num_input_tokens_seen": 101088272, "step": 46790 }, { "epoch": 7.633768352365416, "grad_norm": 0.05888279527425766, "learning_rate": 0.0007771604137281538, "loss": 0.08, "num_input_tokens_seen": 101098384, "step": 46795 }, { "epoch": 7.634584013050571, "grad_norm": 0.005790786352008581, "learning_rate": 0.0007771011677340639, "loss": 0.0977, "num_input_tokens_seen": 101110064, "step": 46800 }, { "epoch": 7.635399673735726, "grad_norm": 0.13105180859565735, "learning_rate": 0.0007770419161241675, "loss": 0.0936, "num_input_tokens_seen": 101121200, "step": 46805 }, { "epoch": 7.636215334420881, "grad_norm": 0.06090496852993965, "learning_rate": 0.0007769826588996651, "loss": 0.0136, "num_input_tokens_seen": 101132208, "step": 46810 }, { "epoch": 7.637030995106036, "grad_norm": 0.023098204284906387, "learning_rate": 0.0007769233960617576, "loss": 0.0248, "num_input_tokens_seen": 101141744, "step": 46815 }, { "epoch": 7.637846655791191, "grad_norm": 0.03087800368666649, "learning_rate": 0.0007768641276116465, "loss": 0.0233, "num_input_tokens_seen": 101151728, "step": 46820 }, { "epoch": 7.638662316476346, "grad_norm": 0.31353387236595154, "learning_rate": 0.0007768048535505324, "loss": 0.116, "num_input_tokens_seen": 101162160, "step": 46825 }, { "epoch": 7.6394779771615005, "grad_norm": 0.27113598585128784, "learning_rate": 0.0007767455738796169, "loss": 0.0587, "num_input_tokens_seen": 101172816, "step": 46830 }, { "epoch": 7.640293637846656, "grad_norm": 0.14102505147457123, "learning_rate": 0.0007766862886001011, "loss": 0.0897, "num_input_tokens_seen": 101183344, "step": 46835 }, { "epoch": 7.641109298531811, "grad_norm": 0.028963766992092133, "learning_rate": 0.0007766269977131868, "loss": 0.0281, "num_input_tokens_seen": 101193680, "step": 46840 }, { "epoch": 7.641924959216966, "grad_norm": 0.007863939739763737, "learning_rate": 0.0007765677012200753, "loss": 0.0303, "num_input_tokens_seen": 101205808, "step": 46845 }, { "epoch": 7.642740619902121, "grad_norm": 0.11236032843589783, "learning_rate": 0.0007765083991219688, "loss": 0.0624, "num_input_tokens_seen": 101216400, "step": 46850 }, { "epoch": 7.643556280587275, "grad_norm": 0.20675627887248993, "learning_rate": 0.0007764490914200686, "loss": 0.0503, "num_input_tokens_seen": 101226480, "step": 46855 }, { "epoch": 7.64437194127243, "grad_norm": 0.03227461874485016, "learning_rate": 0.0007763897781155769, "loss": 0.0144, "num_input_tokens_seen": 101235952, "step": 46860 }, { "epoch": 7.645187601957586, "grad_norm": 0.07756864279508591, "learning_rate": 0.0007763304592096956, "loss": 0.0173, "num_input_tokens_seen": 101247280, "step": 46865 }, { "epoch": 7.646003262642741, "grad_norm": 0.23163361847400665, "learning_rate": 0.0007762711347036273, "loss": 0.1045, "num_input_tokens_seen": 101258224, "step": 46870 }, { "epoch": 7.646818923327896, "grad_norm": 0.01285717636346817, "learning_rate": 0.0007762118045985738, "loss": 0.0289, "num_input_tokens_seen": 101268688, "step": 46875 }, { "epoch": 7.64763458401305, "grad_norm": 0.019179528579115868, "learning_rate": 0.0007761524688957377, "loss": 0.0728, "num_input_tokens_seen": 101279568, "step": 46880 }, { "epoch": 7.648450244698205, "grad_norm": 0.03536270931363106, "learning_rate": 0.0007760931275963215, "loss": 0.0187, "num_input_tokens_seen": 101291568, "step": 46885 }, { "epoch": 7.649265905383361, "grad_norm": 0.021652111783623695, "learning_rate": 0.0007760337807015276, "loss": 0.0341, "num_input_tokens_seen": 101302320, "step": 46890 }, { "epoch": 7.650081566068516, "grad_norm": 0.007089455612003803, "learning_rate": 0.0007759744282125593, "loss": 0.0512, "num_input_tokens_seen": 101313328, "step": 46895 }, { "epoch": 7.650897226753671, "grad_norm": 0.06724183261394501, "learning_rate": 0.000775915070130619, "loss": 0.1505, "num_input_tokens_seen": 101323888, "step": 46900 }, { "epoch": 7.651712887438825, "grad_norm": 0.02761230431497097, "learning_rate": 0.0007758557064569096, "loss": 0.0983, "num_input_tokens_seen": 101334416, "step": 46905 }, { "epoch": 7.65252854812398, "grad_norm": 0.049123216420412064, "learning_rate": 0.0007757963371926346, "loss": 0.0224, "num_input_tokens_seen": 101345552, "step": 46910 }, { "epoch": 7.653344208809135, "grad_norm": 0.03470964357256889, "learning_rate": 0.000775736962338997, "loss": 0.1807, "num_input_tokens_seen": 101357168, "step": 46915 }, { "epoch": 7.654159869494291, "grad_norm": 0.25555697083473206, "learning_rate": 0.0007756775818971998, "loss": 0.0535, "num_input_tokens_seen": 101368336, "step": 46920 }, { "epoch": 7.6549755301794455, "grad_norm": 0.2521311640739441, "learning_rate": 0.0007756181958684467, "loss": 0.0587, "num_input_tokens_seen": 101379088, "step": 46925 }, { "epoch": 7.6557911908646, "grad_norm": 0.03777456283569336, "learning_rate": 0.0007755588042539414, "loss": 0.0218, "num_input_tokens_seen": 101389360, "step": 46930 }, { "epoch": 7.656606851549755, "grad_norm": 0.25904545187950134, "learning_rate": 0.0007754994070548873, "loss": 0.0492, "num_input_tokens_seen": 101401264, "step": 46935 }, { "epoch": 7.65742251223491, "grad_norm": 0.008663099259138107, "learning_rate": 0.0007754400042724881, "loss": 0.0318, "num_input_tokens_seen": 101411248, "step": 46940 }, { "epoch": 7.658238172920065, "grad_norm": 0.15806740522384644, "learning_rate": 0.0007753805959079481, "loss": 0.0542, "num_input_tokens_seen": 101420752, "step": 46945 }, { "epoch": 7.6590538336052205, "grad_norm": 0.008633045479655266, "learning_rate": 0.0007753211819624706, "loss": 0.1152, "num_input_tokens_seen": 101432272, "step": 46950 }, { "epoch": 7.659869494290375, "grad_norm": 0.47052812576293945, "learning_rate": 0.0007752617624372602, "loss": 0.047, "num_input_tokens_seen": 101443824, "step": 46955 }, { "epoch": 7.66068515497553, "grad_norm": 0.22263245284557343, "learning_rate": 0.000775202337333521, "loss": 0.0303, "num_input_tokens_seen": 101454512, "step": 46960 }, { "epoch": 7.661500815660685, "grad_norm": 0.0009532614494673908, "learning_rate": 0.0007751429066524575, "loss": 0.1237, "num_input_tokens_seen": 101465968, "step": 46965 }, { "epoch": 7.66231647634584, "grad_norm": 0.0020556438248604536, "learning_rate": 0.0007750834703952738, "loss": 0.0419, "num_input_tokens_seen": 101476720, "step": 46970 }, { "epoch": 7.6631321370309955, "grad_norm": 0.0666525810956955, "learning_rate": 0.0007750240285631745, "loss": 0.0806, "num_input_tokens_seen": 101487600, "step": 46975 }, { "epoch": 7.66394779771615, "grad_norm": 0.09244846552610397, "learning_rate": 0.0007749645811573646, "loss": 0.065, "num_input_tokens_seen": 101498832, "step": 46980 }, { "epoch": 7.664763458401305, "grad_norm": 0.2173173427581787, "learning_rate": 0.0007749051281790484, "loss": 0.0477, "num_input_tokens_seen": 101510000, "step": 46985 }, { "epoch": 7.66557911908646, "grad_norm": 0.006644314154982567, "learning_rate": 0.0007748456696294312, "loss": 0.0871, "num_input_tokens_seen": 101520528, "step": 46990 }, { "epoch": 7.666394779771615, "grad_norm": 0.006989603862166405, "learning_rate": 0.0007747862055097179, "loss": 0.0196, "num_input_tokens_seen": 101531504, "step": 46995 }, { "epoch": 7.6672104404567705, "grad_norm": 0.36260828375816345, "learning_rate": 0.0007747267358211135, "loss": 0.1973, "num_input_tokens_seen": 101542640, "step": 47000 }, { "epoch": 7.668026101141925, "grad_norm": 0.11980457603931427, "learning_rate": 0.0007746672605648231, "loss": 0.0752, "num_input_tokens_seen": 101553040, "step": 47005 }, { "epoch": 7.66884176182708, "grad_norm": 0.2689124643802643, "learning_rate": 0.0007746077797420524, "loss": 0.1872, "num_input_tokens_seen": 101564272, "step": 47010 }, { "epoch": 7.669657422512235, "grad_norm": 0.04494722560048103, "learning_rate": 0.0007745482933540067, "loss": 0.0134, "num_input_tokens_seen": 101574896, "step": 47015 }, { "epoch": 7.67047308319739, "grad_norm": 0.005968465004116297, "learning_rate": 0.0007744888014018914, "loss": 0.0078, "num_input_tokens_seen": 101586032, "step": 47020 }, { "epoch": 7.671288743882545, "grad_norm": 0.004824151284992695, "learning_rate": 0.0007744293038869125, "loss": 0.018, "num_input_tokens_seen": 101596784, "step": 47025 }, { "epoch": 7.672104404567699, "grad_norm": 0.01978217624127865, "learning_rate": 0.0007743698008102755, "loss": 0.0438, "num_input_tokens_seen": 101607632, "step": 47030 }, { "epoch": 7.672920065252855, "grad_norm": 0.11269936710596085, "learning_rate": 0.0007743102921731864, "loss": 0.0749, "num_input_tokens_seen": 101618064, "step": 47035 }, { "epoch": 7.67373572593801, "grad_norm": 0.4015054702758789, "learning_rate": 0.0007742507779768513, "loss": 0.1683, "num_input_tokens_seen": 101629712, "step": 47040 }, { "epoch": 7.674551386623165, "grad_norm": 0.014630908146500587, "learning_rate": 0.0007741912582224764, "loss": 0.0424, "num_input_tokens_seen": 101639920, "step": 47045 }, { "epoch": 7.6753670473083195, "grad_norm": 0.004438962321728468, "learning_rate": 0.0007741317329112675, "loss": 0.0132, "num_input_tokens_seen": 101650448, "step": 47050 }, { "epoch": 7.676182707993474, "grad_norm": 0.06496120244264603, "learning_rate": 0.0007740722020444315, "loss": 0.0644, "num_input_tokens_seen": 101661136, "step": 47055 }, { "epoch": 7.67699836867863, "grad_norm": 0.25175753235816956, "learning_rate": 0.0007740126656231746, "loss": 0.1169, "num_input_tokens_seen": 101671600, "step": 47060 }, { "epoch": 7.677814029363785, "grad_norm": 0.23091796040534973, "learning_rate": 0.0007739531236487034, "loss": 0.1631, "num_input_tokens_seen": 101681200, "step": 47065 }, { "epoch": 7.67862969004894, "grad_norm": 0.012666295282542706, "learning_rate": 0.0007738935761222247, "loss": 0.114, "num_input_tokens_seen": 101691824, "step": 47070 }, { "epoch": 7.6794453507340945, "grad_norm": 0.08096782118082047, "learning_rate": 0.0007738340230449451, "loss": 0.1061, "num_input_tokens_seen": 101701552, "step": 47075 }, { "epoch": 7.680261011419249, "grad_norm": 0.21125862002372742, "learning_rate": 0.0007737744644180718, "loss": 0.0638, "num_input_tokens_seen": 101713136, "step": 47080 }, { "epoch": 7.681076672104405, "grad_norm": 0.11040032655000687, "learning_rate": 0.0007737149002428114, "loss": 0.0173, "num_input_tokens_seen": 101723792, "step": 47085 }, { "epoch": 7.68189233278956, "grad_norm": 0.0034124937374144793, "learning_rate": 0.0007736553305203715, "loss": 0.079, "num_input_tokens_seen": 101733136, "step": 47090 }, { "epoch": 7.682707993474715, "grad_norm": 0.01145437452942133, "learning_rate": 0.0007735957552519592, "loss": 0.081, "num_input_tokens_seen": 101744496, "step": 47095 }, { "epoch": 7.6835236541598695, "grad_norm": 0.02029622718691826, "learning_rate": 0.0007735361744387818, "loss": 0.0309, "num_input_tokens_seen": 101755088, "step": 47100 }, { "epoch": 7.684339314845024, "grad_norm": 0.029110131785273552, "learning_rate": 0.0007734765880820468, "loss": 0.0534, "num_input_tokens_seen": 101764144, "step": 47105 }, { "epoch": 7.685154975530179, "grad_norm": 0.0040335459634661674, "learning_rate": 0.0007734169961829618, "loss": 0.0099, "num_input_tokens_seen": 101774960, "step": 47110 }, { "epoch": 7.685970636215334, "grad_norm": 0.1551961600780487, "learning_rate": 0.0007733573987427346, "loss": 0.0268, "num_input_tokens_seen": 101785488, "step": 47115 }, { "epoch": 7.68678629690049, "grad_norm": 0.0011410359293222427, "learning_rate": 0.0007732977957625729, "loss": 0.0282, "num_input_tokens_seen": 101795792, "step": 47120 }, { "epoch": 7.6876019575856445, "grad_norm": 0.009819770231842995, "learning_rate": 0.0007732381872436846, "loss": 0.1608, "num_input_tokens_seen": 101805168, "step": 47125 }, { "epoch": 7.688417618270799, "grad_norm": 0.36888980865478516, "learning_rate": 0.0007731785731872778, "loss": 0.1409, "num_input_tokens_seen": 101817104, "step": 47130 }, { "epoch": 7.689233278955954, "grad_norm": 0.009072404354810715, "learning_rate": 0.0007731189535945609, "loss": 0.084, "num_input_tokens_seen": 101826768, "step": 47135 }, { "epoch": 7.690048939641109, "grad_norm": 0.14182324707508087, "learning_rate": 0.0007730593284667416, "loss": 0.1707, "num_input_tokens_seen": 101835696, "step": 47140 }, { "epoch": 7.690864600326265, "grad_norm": 0.2949478328227997, "learning_rate": 0.0007729996978050287, "loss": 0.0506, "num_input_tokens_seen": 101847280, "step": 47145 }, { "epoch": 7.691680261011419, "grad_norm": 0.010478787124156952, "learning_rate": 0.0007729400616106308, "loss": 0.0218, "num_input_tokens_seen": 101859056, "step": 47150 }, { "epoch": 7.692495921696574, "grad_norm": 0.1508476883172989, "learning_rate": 0.0007728804198847561, "loss": 0.2044, "num_input_tokens_seen": 101871184, "step": 47155 }, { "epoch": 7.693311582381729, "grad_norm": 0.12415754795074463, "learning_rate": 0.0007728207726286136, "loss": 0.0443, "num_input_tokens_seen": 101881616, "step": 47160 }, { "epoch": 7.694127243066884, "grad_norm": 0.01569686271250248, "learning_rate": 0.000772761119843412, "loss": 0.0265, "num_input_tokens_seen": 101892688, "step": 47165 }, { "epoch": 7.69494290375204, "grad_norm": 0.07221835851669312, "learning_rate": 0.0007727014615303602, "loss": 0.0413, "num_input_tokens_seen": 101903696, "step": 47170 }, { "epoch": 7.695758564437194, "grad_norm": 0.03987114503979683, "learning_rate": 0.0007726417976906674, "loss": 0.0323, "num_input_tokens_seen": 101913808, "step": 47175 }, { "epoch": 7.696574225122349, "grad_norm": 0.0109365563839674, "learning_rate": 0.0007725821283255427, "loss": 0.0375, "num_input_tokens_seen": 101924304, "step": 47180 }, { "epoch": 7.697389885807504, "grad_norm": 0.020825443789362907, "learning_rate": 0.0007725224534361955, "loss": 0.0574, "num_input_tokens_seen": 101934320, "step": 47185 }, { "epoch": 7.698205546492659, "grad_norm": 0.386552095413208, "learning_rate": 0.000772462773023835, "loss": 0.0995, "num_input_tokens_seen": 101944912, "step": 47190 }, { "epoch": 7.699021207177814, "grad_norm": 0.055130232125520706, "learning_rate": 0.0007724030870896707, "loss": 0.0217, "num_input_tokens_seen": 101954992, "step": 47195 }, { "epoch": 7.699836867862969, "grad_norm": 0.38049188256263733, "learning_rate": 0.0007723433956349123, "loss": 0.0683, "num_input_tokens_seen": 101965552, "step": 47200 }, { "epoch": 7.700652528548124, "grad_norm": 0.025845926254987717, "learning_rate": 0.0007722836986607696, "loss": 0.0644, "num_input_tokens_seen": 101976304, "step": 47205 }, { "epoch": 7.701468189233279, "grad_norm": 0.009440034627914429, "learning_rate": 0.000772223996168452, "loss": 0.0158, "num_input_tokens_seen": 101987568, "step": 47210 }, { "epoch": 7.702283849918434, "grad_norm": 0.2705138027667999, "learning_rate": 0.0007721642881591701, "loss": 0.0852, "num_input_tokens_seen": 101998992, "step": 47215 }, { "epoch": 7.703099510603589, "grad_norm": 0.01270539965480566, "learning_rate": 0.0007721045746341335, "loss": 0.0462, "num_input_tokens_seen": 102009744, "step": 47220 }, { "epoch": 7.7039151712887435, "grad_norm": 0.009581586346030235, "learning_rate": 0.0007720448555945527, "loss": 0.0652, "num_input_tokens_seen": 102020528, "step": 47225 }, { "epoch": 7.704730831973899, "grad_norm": 0.00414057495072484, "learning_rate": 0.0007719851310416376, "loss": 0.0147, "num_input_tokens_seen": 102031056, "step": 47230 }, { "epoch": 7.705546492659054, "grad_norm": 0.14105384051799774, "learning_rate": 0.0007719254009765988, "loss": 0.0172, "num_input_tokens_seen": 102042480, "step": 47235 }, { "epoch": 7.706362153344209, "grad_norm": 0.09803734719753265, "learning_rate": 0.0007718656654006469, "loss": 0.0487, "num_input_tokens_seen": 102053904, "step": 47240 }, { "epoch": 7.707177814029364, "grad_norm": 0.0025712084025144577, "learning_rate": 0.0007718059243149921, "loss": 0.0106, "num_input_tokens_seen": 102064464, "step": 47245 }, { "epoch": 7.7079934747145185, "grad_norm": 0.017207970842719078, "learning_rate": 0.0007717461777208458, "loss": 0.0131, "num_input_tokens_seen": 102074832, "step": 47250 }, { "epoch": 7.708809135399674, "grad_norm": 0.006477975752204657, "learning_rate": 0.0007716864256194182, "loss": 0.16, "num_input_tokens_seen": 102087088, "step": 47255 }, { "epoch": 7.709624796084829, "grad_norm": 0.0029095339123159647, "learning_rate": 0.0007716266680119207, "loss": 0.0223, "num_input_tokens_seen": 102098576, "step": 47260 }, { "epoch": 7.710440456769984, "grad_norm": 0.042995352298021317, "learning_rate": 0.0007715669048995641, "loss": 0.1378, "num_input_tokens_seen": 102109360, "step": 47265 }, { "epoch": 7.711256117455139, "grad_norm": 0.013274877332150936, "learning_rate": 0.0007715071362835597, "loss": 0.0187, "num_input_tokens_seen": 102119344, "step": 47270 }, { "epoch": 7.712071778140293, "grad_norm": 0.28431835770606995, "learning_rate": 0.0007714473621651188, "loss": 0.0345, "num_input_tokens_seen": 102130192, "step": 47275 }, { "epoch": 7.712887438825448, "grad_norm": 0.06582538783550262, "learning_rate": 0.0007713875825454526, "loss": 0.1525, "num_input_tokens_seen": 102140496, "step": 47280 }, { "epoch": 7.713703099510604, "grad_norm": 0.02666584588587284, "learning_rate": 0.0007713277974257729, "loss": 0.0184, "num_input_tokens_seen": 102150448, "step": 47285 }, { "epoch": 7.714518760195759, "grad_norm": 0.045763175934553146, "learning_rate": 0.0007712680068072911, "loss": 0.0853, "num_input_tokens_seen": 102160848, "step": 47290 }, { "epoch": 7.715334420880914, "grad_norm": 0.005454830825328827, "learning_rate": 0.000771208210691219, "loss": 0.0161, "num_input_tokens_seen": 102171152, "step": 47295 }, { "epoch": 7.716150081566068, "grad_norm": 0.0028063564095646143, "learning_rate": 0.0007711484090787686, "loss": 0.0423, "num_input_tokens_seen": 102181840, "step": 47300 }, { "epoch": 7.716965742251223, "grad_norm": 0.026000995188951492, "learning_rate": 0.0007710886019711516, "loss": 0.0331, "num_input_tokens_seen": 102193328, "step": 47305 }, { "epoch": 7.717781402936378, "grad_norm": 0.21542233228683472, "learning_rate": 0.0007710287893695803, "loss": 0.0964, "num_input_tokens_seen": 102203632, "step": 47310 }, { "epoch": 7.718597063621534, "grad_norm": 0.004267824813723564, "learning_rate": 0.0007709689712752666, "loss": 0.0594, "num_input_tokens_seen": 102214864, "step": 47315 }, { "epoch": 7.719412724306689, "grad_norm": 0.05040392652153969, "learning_rate": 0.000770909147689423, "loss": 0.049, "num_input_tokens_seen": 102225392, "step": 47320 }, { "epoch": 7.720228384991843, "grad_norm": 0.003110036253929138, "learning_rate": 0.000770849318613262, "loss": 0.0652, "num_input_tokens_seen": 102235408, "step": 47325 }, { "epoch": 7.721044045676998, "grad_norm": 0.007758749648928642, "learning_rate": 0.0007707894840479957, "loss": 0.0663, "num_input_tokens_seen": 102246672, "step": 47330 }, { "epoch": 7.721859706362153, "grad_norm": 0.12128084897994995, "learning_rate": 0.0007707296439948372, "loss": 0.0693, "num_input_tokens_seen": 102256752, "step": 47335 }, { "epoch": 7.722675367047309, "grad_norm": 0.08663403242826462, "learning_rate": 0.0007706697984549988, "loss": 0.0679, "num_input_tokens_seen": 102267280, "step": 47340 }, { "epoch": 7.7234910277324635, "grad_norm": 0.021859407424926758, "learning_rate": 0.0007706099474296938, "loss": 0.0234, "num_input_tokens_seen": 102278928, "step": 47345 }, { "epoch": 7.724306688417618, "grad_norm": 0.013470686972141266, "learning_rate": 0.0007705500909201349, "loss": 0.0075, "num_input_tokens_seen": 102290608, "step": 47350 }, { "epoch": 7.725122349102773, "grad_norm": 0.31886106729507446, "learning_rate": 0.0007704902289275351, "loss": 0.1433, "num_input_tokens_seen": 102301456, "step": 47355 }, { "epoch": 7.725938009787928, "grad_norm": 0.016273748129606247, "learning_rate": 0.0007704303614531076, "loss": 0.1529, "num_input_tokens_seen": 102311760, "step": 47360 }, { "epoch": 7.726753670473083, "grad_norm": 0.0390392541885376, "learning_rate": 0.0007703704884980659, "loss": 0.01, "num_input_tokens_seen": 102322928, "step": 47365 }, { "epoch": 7.7275693311582385, "grad_norm": 0.022559884935617447, "learning_rate": 0.0007703106100636233, "loss": 0.1171, "num_input_tokens_seen": 102333488, "step": 47370 }, { "epoch": 7.728384991843393, "grad_norm": 0.1979581117630005, "learning_rate": 0.0007702507261509932, "loss": 0.0402, "num_input_tokens_seen": 102344240, "step": 47375 }, { "epoch": 7.729200652528548, "grad_norm": 0.03317411243915558, "learning_rate": 0.000770190836761389, "loss": 0.0135, "num_input_tokens_seen": 102354544, "step": 47380 }, { "epoch": 7.730016313213703, "grad_norm": 0.13358907401561737, "learning_rate": 0.0007701309418960252, "loss": 0.0188, "num_input_tokens_seen": 102365456, "step": 47385 }, { "epoch": 7.730831973898858, "grad_norm": 0.010089668445289135, "learning_rate": 0.000770071041556115, "loss": 0.0294, "num_input_tokens_seen": 102376560, "step": 47390 }, { "epoch": 7.731647634584013, "grad_norm": 0.22938187420368195, "learning_rate": 0.0007700111357428724, "loss": 0.2544, "num_input_tokens_seen": 102387184, "step": 47395 }, { "epoch": 7.732463295269168, "grad_norm": 0.005095354747027159, "learning_rate": 0.0007699512244575118, "loss": 0.0726, "num_input_tokens_seen": 102398128, "step": 47400 }, { "epoch": 7.733278955954323, "grad_norm": 0.3448812961578369, "learning_rate": 0.0007698913077012471, "loss": 0.1301, "num_input_tokens_seen": 102409840, "step": 47405 }, { "epoch": 7.734094616639478, "grad_norm": 0.013782687485218048, "learning_rate": 0.0007698313854752925, "loss": 0.1804, "num_input_tokens_seen": 102418544, "step": 47410 }, { "epoch": 7.734910277324633, "grad_norm": 0.036323726177215576, "learning_rate": 0.0007697714577808627, "loss": 0.0443, "num_input_tokens_seen": 102429872, "step": 47415 }, { "epoch": 7.735725938009788, "grad_norm": 0.35927650332450867, "learning_rate": 0.0007697115246191723, "loss": 0.0777, "num_input_tokens_seen": 102441424, "step": 47420 }, { "epoch": 7.736541598694943, "grad_norm": 0.12467711418867111, "learning_rate": 0.0007696515859914355, "loss": 0.1231, "num_input_tokens_seen": 102451568, "step": 47425 }, { "epoch": 7.737357259380098, "grad_norm": 0.13213302195072174, "learning_rate": 0.0007695916418988672, "loss": 0.0862, "num_input_tokens_seen": 102463312, "step": 47430 }, { "epoch": 7.738172920065253, "grad_norm": 0.060174569487571716, "learning_rate": 0.0007695316923426823, "loss": 0.1374, "num_input_tokens_seen": 102473584, "step": 47435 }, { "epoch": 7.738988580750408, "grad_norm": 0.01953984424471855, "learning_rate": 0.0007694717373240957, "loss": 0.0782, "num_input_tokens_seen": 102484592, "step": 47440 }, { "epoch": 7.739804241435563, "grad_norm": 0.02944285422563553, "learning_rate": 0.0007694117768443225, "loss": 0.0167, "num_input_tokens_seen": 102494960, "step": 47445 }, { "epoch": 7.740619902120718, "grad_norm": 0.06595656275749207, "learning_rate": 0.0007693518109045779, "loss": 0.0576, "num_input_tokens_seen": 102505872, "step": 47450 }, { "epoch": 7.741435562805873, "grad_norm": 0.0057144612073898315, "learning_rate": 0.0007692918395060772, "loss": 0.0318, "num_input_tokens_seen": 102518320, "step": 47455 }, { "epoch": 7.742251223491028, "grad_norm": 0.22104227542877197, "learning_rate": 0.0007692318626500357, "loss": 0.0608, "num_input_tokens_seen": 102529488, "step": 47460 }, { "epoch": 7.743066884176183, "grad_norm": 0.09873582422733307, "learning_rate": 0.000769171880337669, "loss": 0.0279, "num_input_tokens_seen": 102539920, "step": 47465 }, { "epoch": 7.7438825448613375, "grad_norm": 0.011355056427419186, "learning_rate": 0.0007691118925701927, "loss": 0.1026, "num_input_tokens_seen": 102550832, "step": 47470 }, { "epoch": 7.744698205546492, "grad_norm": 0.0603439062833786, "learning_rate": 0.0007690518993488225, "loss": 0.1269, "num_input_tokens_seen": 102560784, "step": 47475 }, { "epoch": 7.745513866231647, "grad_norm": 0.004944812506437302, "learning_rate": 0.0007689919006747741, "loss": 0.0378, "num_input_tokens_seen": 102571920, "step": 47480 }, { "epoch": 7.746329526916803, "grad_norm": 0.04303191974759102, "learning_rate": 0.0007689318965492637, "loss": 0.0342, "num_input_tokens_seen": 102582288, "step": 47485 }, { "epoch": 7.747145187601958, "grad_norm": 0.1496291607618332, "learning_rate": 0.0007688718869735072, "loss": 0.2258, "num_input_tokens_seen": 102592400, "step": 47490 }, { "epoch": 7.7479608482871125, "grad_norm": 0.13335032761096954, "learning_rate": 0.0007688118719487209, "loss": 0.0704, "num_input_tokens_seen": 102604112, "step": 47495 }, { "epoch": 7.748776508972267, "grad_norm": 0.02721407637000084, "learning_rate": 0.000768751851476121, "loss": 0.0234, "num_input_tokens_seen": 102615760, "step": 47500 }, { "epoch": 7.749592169657422, "grad_norm": 0.009211353026330471, "learning_rate": 0.0007686918255569238, "loss": 0.0356, "num_input_tokens_seen": 102626640, "step": 47505 }, { "epoch": 7.750407830342578, "grad_norm": 0.00823135394603014, "learning_rate": 0.000768631794192346, "loss": 0.0338, "num_input_tokens_seen": 102635280, "step": 47510 }, { "epoch": 7.751223491027733, "grad_norm": 0.21994829177856445, "learning_rate": 0.0007685717573836041, "loss": 0.2531, "num_input_tokens_seen": 102644592, "step": 47515 }, { "epoch": 7.7520391517128875, "grad_norm": 0.1314292550086975, "learning_rate": 0.0007685117151319148, "loss": 0.0189, "num_input_tokens_seen": 102655792, "step": 47520 }, { "epoch": 7.752854812398042, "grad_norm": 0.07114052772521973, "learning_rate": 0.000768451667438495, "loss": 0.0133, "num_input_tokens_seen": 102664912, "step": 47525 }, { "epoch": 7.753670473083197, "grad_norm": 0.26361361145973206, "learning_rate": 0.0007683916143045615, "loss": 0.1825, "num_input_tokens_seen": 102675952, "step": 47530 }, { "epoch": 7.754486133768353, "grad_norm": 0.0912046879529953, "learning_rate": 0.0007683315557313315, "loss": 0.0991, "num_input_tokens_seen": 102686352, "step": 47535 }, { "epoch": 7.755301794453508, "grad_norm": 0.13648521900177002, "learning_rate": 0.0007682714917200222, "loss": 0.272, "num_input_tokens_seen": 102697136, "step": 47540 }, { "epoch": 7.7561174551386625, "grad_norm": 0.11068026721477509, "learning_rate": 0.0007682114222718507, "loss": 0.1335, "num_input_tokens_seen": 102708496, "step": 47545 }, { "epoch": 7.756933115823817, "grad_norm": 0.07234393805265427, "learning_rate": 0.0007681513473880345, "loss": 0.047, "num_input_tokens_seen": 102718736, "step": 47550 }, { "epoch": 7.757748776508972, "grad_norm": 0.16802458465099335, "learning_rate": 0.000768091267069791, "loss": 0.0532, "num_input_tokens_seen": 102729904, "step": 47555 }, { "epoch": 7.758564437194127, "grad_norm": 0.03538018837571144, "learning_rate": 0.000768031181318338, "loss": 0.0343, "num_input_tokens_seen": 102739664, "step": 47560 }, { "epoch": 7.759380097879282, "grad_norm": 0.08238279074430466, "learning_rate": 0.000767971090134893, "loss": 0.0939, "num_input_tokens_seen": 102750480, "step": 47565 }, { "epoch": 7.760195758564437, "grad_norm": 0.23739773035049438, "learning_rate": 0.0007679109935206741, "loss": 0.3616, "num_input_tokens_seen": 102762256, "step": 47570 }, { "epoch": 7.761011419249592, "grad_norm": 0.002562036272138357, "learning_rate": 0.0007678508914768989, "loss": 0.0531, "num_input_tokens_seen": 102771856, "step": 47575 }, { "epoch": 7.761827079934747, "grad_norm": 0.5261669158935547, "learning_rate": 0.0007677907840047855, "loss": 0.0518, "num_input_tokens_seen": 102781744, "step": 47580 }, { "epoch": 7.762642740619902, "grad_norm": 0.013511805795133114, "learning_rate": 0.0007677306711055523, "loss": 0.1552, "num_input_tokens_seen": 102792272, "step": 47585 }, { "epoch": 7.763458401305057, "grad_norm": 0.050765104591846466, "learning_rate": 0.0007676705527804173, "loss": 0.0508, "num_input_tokens_seen": 102802608, "step": 47590 }, { "epoch": 7.764274061990212, "grad_norm": 0.010733344592154026, "learning_rate": 0.000767610429030599, "loss": 0.0236, "num_input_tokens_seen": 102812784, "step": 47595 }, { "epoch": 7.765089722675367, "grad_norm": 0.16465511918067932, "learning_rate": 0.0007675502998573159, "loss": 0.0481, "num_input_tokens_seen": 102823792, "step": 47600 }, { "epoch": 7.765905383360522, "grad_norm": 0.005473458673804998, "learning_rate": 0.0007674901652617865, "loss": 0.0576, "num_input_tokens_seen": 102833712, "step": 47605 }, { "epoch": 7.766721044045677, "grad_norm": 0.2899492681026459, "learning_rate": 0.0007674300252452297, "loss": 0.0938, "num_input_tokens_seen": 102845232, "step": 47610 }, { "epoch": 7.767536704730832, "grad_norm": 0.012640978209674358, "learning_rate": 0.000767369879808864, "loss": 0.0254, "num_input_tokens_seen": 102856944, "step": 47615 }, { "epoch": 7.768352365415987, "grad_norm": 0.0032530981115996838, "learning_rate": 0.0007673097289539086, "loss": 0.0265, "num_input_tokens_seen": 102867856, "step": 47620 }, { "epoch": 7.769168026101142, "grad_norm": 0.050850965082645416, "learning_rate": 0.0007672495726815825, "loss": 0.0641, "num_input_tokens_seen": 102878064, "step": 47625 }, { "epoch": 7.769983686786297, "grad_norm": 0.004038092214614153, "learning_rate": 0.0007671894109931048, "loss": 0.0452, "num_input_tokens_seen": 102890320, "step": 47630 }, { "epoch": 7.770799347471452, "grad_norm": 0.007436644751578569, "learning_rate": 0.0007671292438896946, "loss": 0.0517, "num_input_tokens_seen": 102901296, "step": 47635 }, { "epoch": 7.771615008156607, "grad_norm": 0.12992477416992188, "learning_rate": 0.0007670690713725715, "loss": 0.0603, "num_input_tokens_seen": 102911856, "step": 47640 }, { "epoch": 7.7724306688417615, "grad_norm": 0.08481542021036148, "learning_rate": 0.0007670088934429548, "loss": 0.0352, "num_input_tokens_seen": 102921488, "step": 47645 }, { "epoch": 7.773246329526917, "grad_norm": 0.012516772374510765, "learning_rate": 0.0007669487101020642, "loss": 0.1126, "num_input_tokens_seen": 102931984, "step": 47650 }, { "epoch": 7.774061990212072, "grad_norm": 0.08736187219619751, "learning_rate": 0.0007668885213511193, "loss": 0.0573, "num_input_tokens_seen": 102943568, "step": 47655 }, { "epoch": 7.774877650897227, "grad_norm": 0.012583942152559757, "learning_rate": 0.0007668283271913399, "loss": 0.1129, "num_input_tokens_seen": 102954288, "step": 47660 }, { "epoch": 7.775693311582382, "grad_norm": 0.09388376772403717, "learning_rate": 0.000766768127623946, "loss": 0.0823, "num_input_tokens_seen": 102964304, "step": 47665 }, { "epoch": 7.7765089722675365, "grad_norm": 0.0643705278635025, "learning_rate": 0.0007667079226501576, "loss": 0.0343, "num_input_tokens_seen": 102975248, "step": 47670 }, { "epoch": 7.777324632952691, "grad_norm": 0.008023286238312721, "learning_rate": 0.0007666477122711948, "loss": 0.0354, "num_input_tokens_seen": 102986576, "step": 47675 }, { "epoch": 7.778140293637847, "grad_norm": 0.30081382393836975, "learning_rate": 0.000766587496488278, "loss": 0.1358, "num_input_tokens_seen": 102997584, "step": 47680 }, { "epoch": 7.778955954323002, "grad_norm": 0.03647547587752342, "learning_rate": 0.0007665272753026271, "loss": 0.0171, "num_input_tokens_seen": 103008368, "step": 47685 }, { "epoch": 7.779771615008157, "grad_norm": 0.004456724505871534, "learning_rate": 0.000766467048715463, "loss": 0.0149, "num_input_tokens_seen": 103019568, "step": 47690 }, { "epoch": 7.780587275693311, "grad_norm": 0.055073726922273636, "learning_rate": 0.000766406816728006, "loss": 0.0918, "num_input_tokens_seen": 103030256, "step": 47695 }, { "epoch": 7.781402936378466, "grad_norm": 0.3227660357952118, "learning_rate": 0.000766346579341477, "loss": 0.0326, "num_input_tokens_seen": 103041168, "step": 47700 }, { "epoch": 7.782218597063622, "grad_norm": 0.06537395715713501, "learning_rate": 0.0007662863365570967, "loss": 0.1407, "num_input_tokens_seen": 103052464, "step": 47705 }, { "epoch": 7.783034257748777, "grad_norm": 0.3882252871990204, "learning_rate": 0.000766226088376086, "loss": 0.0276, "num_input_tokens_seen": 103063376, "step": 47710 }, { "epoch": 7.783849918433932, "grad_norm": 0.04566549137234688, "learning_rate": 0.0007661658347996659, "loss": 0.0638, "num_input_tokens_seen": 103074448, "step": 47715 }, { "epoch": 7.784665579119086, "grad_norm": 0.039481550455093384, "learning_rate": 0.0007661055758290574, "loss": 0.204, "num_input_tokens_seen": 103084912, "step": 47720 }, { "epoch": 7.785481239804241, "grad_norm": 0.24234062433242798, "learning_rate": 0.0007660453114654819, "loss": 0.1593, "num_input_tokens_seen": 103095408, "step": 47725 }, { "epoch": 7.786296900489396, "grad_norm": 0.24106614291667938, "learning_rate": 0.0007659850417101606, "loss": 0.0674, "num_input_tokens_seen": 103105776, "step": 47730 }, { "epoch": 7.787112561174552, "grad_norm": 0.013635087758302689, "learning_rate": 0.0007659247665643151, "loss": 0.1237, "num_input_tokens_seen": 103117392, "step": 47735 }, { "epoch": 7.787928221859707, "grad_norm": 0.05942991003394127, "learning_rate": 0.0007658644860291668, "loss": 0.0434, "num_input_tokens_seen": 103127344, "step": 47740 }, { "epoch": 7.788743882544861, "grad_norm": 0.008099487982690334, "learning_rate": 0.0007658042001059373, "loss": 0.1783, "num_input_tokens_seen": 103136848, "step": 47745 }, { "epoch": 7.789559543230016, "grad_norm": 0.01683001220226288, "learning_rate": 0.0007657439087958486, "loss": 0.0663, "num_input_tokens_seen": 103148208, "step": 47750 }, { "epoch": 7.790375203915171, "grad_norm": 0.05494864284992218, "learning_rate": 0.0007656836121001225, "loss": 0.0163, "num_input_tokens_seen": 103158832, "step": 47755 }, { "epoch": 7.791190864600326, "grad_norm": 0.1892729550600052, "learning_rate": 0.0007656233100199809, "loss": 0.1048, "num_input_tokens_seen": 103168688, "step": 47760 }, { "epoch": 7.7920065252854815, "grad_norm": 0.05981948971748352, "learning_rate": 0.000765563002556646, "loss": 0.0575, "num_input_tokens_seen": 103178736, "step": 47765 }, { "epoch": 7.792822185970636, "grad_norm": 0.216399148106575, "learning_rate": 0.00076550268971134, "loss": 0.1477, "num_input_tokens_seen": 103188528, "step": 47770 }, { "epoch": 7.793637846655791, "grad_norm": 0.09204624593257904, "learning_rate": 0.0007654423714852852, "loss": 0.0542, "num_input_tokens_seen": 103198736, "step": 47775 }, { "epoch": 7.794453507340946, "grad_norm": 0.11351175606250763, "learning_rate": 0.0007653820478797038, "loss": 0.0609, "num_input_tokens_seen": 103209296, "step": 47780 }, { "epoch": 7.795269168026101, "grad_norm": 0.2176835536956787, "learning_rate": 0.0007653217188958188, "loss": 0.188, "num_input_tokens_seen": 103219632, "step": 47785 }, { "epoch": 7.7960848287112565, "grad_norm": 0.025288639590144157, "learning_rate": 0.0007652613845348524, "loss": 0.0448, "num_input_tokens_seen": 103230960, "step": 47790 }, { "epoch": 7.796900489396411, "grad_norm": 0.03485998511314392, "learning_rate": 0.0007652010447980276, "loss": 0.047, "num_input_tokens_seen": 103240912, "step": 47795 }, { "epoch": 7.797716150081566, "grad_norm": 0.004971094895154238, "learning_rate": 0.0007651406996865672, "loss": 0.0318, "num_input_tokens_seen": 103252752, "step": 47800 }, { "epoch": 7.798531810766721, "grad_norm": 0.2158200442790985, "learning_rate": 0.000765080349201694, "loss": 0.0414, "num_input_tokens_seen": 103262800, "step": 47805 }, { "epoch": 7.799347471451876, "grad_norm": 0.0057363430969417095, "learning_rate": 0.0007650199933446314, "loss": 0.1655, "num_input_tokens_seen": 103273680, "step": 47810 }, { "epoch": 7.800163132137031, "grad_norm": 0.13042238354682922, "learning_rate": 0.0007649596321166025, "loss": 0.0229, "num_input_tokens_seen": 103285200, "step": 47815 }, { "epoch": 7.800978792822186, "grad_norm": 0.010898235253989697, "learning_rate": 0.0007648992655188305, "loss": 0.0878, "num_input_tokens_seen": 103295568, "step": 47820 }, { "epoch": 7.801794453507341, "grad_norm": 0.08140433579683304, "learning_rate": 0.0007648388935525388, "loss": 0.0804, "num_input_tokens_seen": 103305648, "step": 47825 }, { "epoch": 7.802610114192496, "grad_norm": 0.036025699228048325, "learning_rate": 0.0007647785162189509, "loss": 0.0935, "num_input_tokens_seen": 103317264, "step": 47830 }, { "epoch": 7.803425774877651, "grad_norm": 0.026536036282777786, "learning_rate": 0.0007647181335192905, "loss": 0.0664, "num_input_tokens_seen": 103328720, "step": 47835 }, { "epoch": 7.804241435562806, "grad_norm": 0.1489490419626236, "learning_rate": 0.0007646577454547814, "loss": 0.038, "num_input_tokens_seen": 103340464, "step": 47840 }, { "epoch": 7.80505709624796, "grad_norm": 0.016447249799966812, "learning_rate": 0.0007645973520266472, "loss": 0.0407, "num_input_tokens_seen": 103350992, "step": 47845 }, { "epoch": 7.805872756933116, "grad_norm": 0.23225589096546173, "learning_rate": 0.000764536953236112, "loss": 0.1121, "num_input_tokens_seen": 103362736, "step": 47850 }, { "epoch": 7.806688417618271, "grad_norm": 0.024745440110564232, "learning_rate": 0.0007644765490844, "loss": 0.0941, "num_input_tokens_seen": 103373136, "step": 47855 }, { "epoch": 7.807504078303426, "grad_norm": 0.18811935186386108, "learning_rate": 0.0007644161395727352, "loss": 0.0494, "num_input_tokens_seen": 103383504, "step": 47860 }, { "epoch": 7.808319738988581, "grad_norm": 0.2695559561252594, "learning_rate": 0.0007643557247023418, "loss": 0.0746, "num_input_tokens_seen": 103393808, "step": 47865 }, { "epoch": 7.809135399673735, "grad_norm": 0.03419940173625946, "learning_rate": 0.0007642953044744443, "loss": 0.0669, "num_input_tokens_seen": 103405936, "step": 47870 }, { "epoch": 7.809951060358891, "grad_norm": 0.018129676580429077, "learning_rate": 0.0007642348788902672, "loss": 0.0814, "num_input_tokens_seen": 103416464, "step": 47875 }, { "epoch": 7.810766721044046, "grad_norm": 0.2084517925977707, "learning_rate": 0.000764174447951035, "loss": 0.0363, "num_input_tokens_seen": 103426768, "step": 47880 }, { "epoch": 7.811582381729201, "grad_norm": 0.08284687250852585, "learning_rate": 0.0007641140116579725, "loss": 0.0782, "num_input_tokens_seen": 103436656, "step": 47885 }, { "epoch": 7.8123980424143555, "grad_norm": 0.07356946915388107, "learning_rate": 0.0007640535700123047, "loss": 0.0255, "num_input_tokens_seen": 103447504, "step": 47890 }, { "epoch": 7.81321370309951, "grad_norm": 0.018549971282482147, "learning_rate": 0.000763993123015256, "loss": 0.0239, "num_input_tokens_seen": 103458800, "step": 47895 }, { "epoch": 7.814029363784666, "grad_norm": 0.001164857647381723, "learning_rate": 0.0007639326706680521, "loss": 0.0284, "num_input_tokens_seen": 103468944, "step": 47900 }, { "epoch": 7.814845024469821, "grad_norm": 0.0029101655818521976, "learning_rate": 0.0007638722129719175, "loss": 0.0222, "num_input_tokens_seen": 103478928, "step": 47905 }, { "epoch": 7.815660685154976, "grad_norm": 0.005133399274200201, "learning_rate": 0.0007638117499280778, "loss": 0.014, "num_input_tokens_seen": 103491024, "step": 47910 }, { "epoch": 7.8164763458401305, "grad_norm": 0.10789318382740021, "learning_rate": 0.0007637512815377585, "loss": 0.0456, "num_input_tokens_seen": 103503600, "step": 47915 }, { "epoch": 7.817292006525285, "grad_norm": 0.417122483253479, "learning_rate": 0.0007636908078021848, "loss": 0.0817, "num_input_tokens_seen": 103513136, "step": 47920 }, { "epoch": 7.81810766721044, "grad_norm": 0.1550966501235962, "learning_rate": 0.0007636303287225823, "loss": 0.1408, "num_input_tokens_seen": 103523696, "step": 47925 }, { "epoch": 7.818923327895595, "grad_norm": 0.24719803035259247, "learning_rate": 0.0007635698443001768, "loss": 0.1584, "num_input_tokens_seen": 103535088, "step": 47930 }, { "epoch": 7.819738988580751, "grad_norm": 0.1656491756439209, "learning_rate": 0.0007635093545361942, "loss": 0.0848, "num_input_tokens_seen": 103544976, "step": 47935 }, { "epoch": 7.8205546492659055, "grad_norm": 0.011646476574242115, "learning_rate": 0.00076344885943186, "loss": 0.158, "num_input_tokens_seen": 103555824, "step": 47940 }, { "epoch": 7.82137030995106, "grad_norm": 0.2617485821247101, "learning_rate": 0.0007633883589884007, "loss": 0.1808, "num_input_tokens_seen": 103566544, "step": 47945 }, { "epoch": 7.822185970636215, "grad_norm": 0.0567353293299675, "learning_rate": 0.000763327853207042, "loss": 0.0203, "num_input_tokens_seen": 103577680, "step": 47950 }, { "epoch": 7.82300163132137, "grad_norm": 0.07070982456207275, "learning_rate": 0.0007632673420890104, "loss": 0.0291, "num_input_tokens_seen": 103587280, "step": 47955 }, { "epoch": 7.823817292006526, "grad_norm": 0.08610428869724274, "learning_rate": 0.000763206825635532, "loss": 0.0578, "num_input_tokens_seen": 103598896, "step": 47960 }, { "epoch": 7.8246329526916805, "grad_norm": 0.28003424406051636, "learning_rate": 0.0007631463038478334, "loss": 0.1182, "num_input_tokens_seen": 103608976, "step": 47965 }, { "epoch": 7.825448613376835, "grad_norm": 0.015281864441931248, "learning_rate": 0.0007630857767271413, "loss": 0.0134, "num_input_tokens_seen": 103620080, "step": 47970 }, { "epoch": 7.82626427406199, "grad_norm": 0.08863025903701782, "learning_rate": 0.000763025244274682, "loss": 0.0249, "num_input_tokens_seen": 103631312, "step": 47975 }, { "epoch": 7.827079934747145, "grad_norm": 0.5021570324897766, "learning_rate": 0.0007629647064916825, "loss": 0.0948, "num_input_tokens_seen": 103641360, "step": 47980 }, { "epoch": 7.827895595432301, "grad_norm": 0.07482955604791641, "learning_rate": 0.0007629041633793696, "loss": 0.0867, "num_input_tokens_seen": 103652944, "step": 47985 }, { "epoch": 7.828711256117455, "grad_norm": 0.1137724220752716, "learning_rate": 0.0007628436149389703, "loss": 0.1346, "num_input_tokens_seen": 103662544, "step": 47990 }, { "epoch": 7.82952691680261, "grad_norm": 0.07171276956796646, "learning_rate": 0.000762783061171712, "loss": 0.0905, "num_input_tokens_seen": 103672496, "step": 47995 }, { "epoch": 7.830342577487765, "grad_norm": 0.01498460490256548, "learning_rate": 0.0007627225020788213, "loss": 0.0511, "num_input_tokens_seen": 103683312, "step": 48000 }, { "epoch": 7.83115823817292, "grad_norm": 0.08065731823444366, "learning_rate": 0.0007626619376615258, "loss": 0.0343, "num_input_tokens_seen": 103694320, "step": 48005 }, { "epoch": 7.831973898858075, "grad_norm": 0.22067171335220337, "learning_rate": 0.000762601367921053, "loss": 0.0691, "num_input_tokens_seen": 103705712, "step": 48010 }, { "epoch": 7.8327895595432295, "grad_norm": 0.2833503484725952, "learning_rate": 0.0007625407928586303, "loss": 0.0494, "num_input_tokens_seen": 103717424, "step": 48015 }, { "epoch": 7.833605220228385, "grad_norm": 0.053285736590623856, "learning_rate": 0.0007624802124754855, "loss": 0.0152, "num_input_tokens_seen": 103729008, "step": 48020 }, { "epoch": 7.83442088091354, "grad_norm": 0.3245934844017029, "learning_rate": 0.000762419626772846, "loss": 0.0347, "num_input_tokens_seen": 103740464, "step": 48025 }, { "epoch": 7.835236541598695, "grad_norm": 0.5312497019767761, "learning_rate": 0.0007623590357519401, "loss": 0.1356, "num_input_tokens_seen": 103751088, "step": 48030 }, { "epoch": 7.83605220228385, "grad_norm": 0.0032611230853945017, "learning_rate": 0.0007622984394139953, "loss": 0.1581, "num_input_tokens_seen": 103760432, "step": 48035 }, { "epoch": 7.8368678629690045, "grad_norm": 0.0028083904180675745, "learning_rate": 0.00076223783776024, "loss": 0.015, "num_input_tokens_seen": 103771120, "step": 48040 }, { "epoch": 7.83768352365416, "grad_norm": 0.09268619865179062, "learning_rate": 0.0007621772307919022, "loss": 0.0176, "num_input_tokens_seen": 103780144, "step": 48045 }, { "epoch": 7.838499184339315, "grad_norm": 0.012299345806241035, "learning_rate": 0.0007621166185102104, "loss": 0.0105, "num_input_tokens_seen": 103790928, "step": 48050 }, { "epoch": 7.83931484502447, "grad_norm": 0.32112395763397217, "learning_rate": 0.0007620560009163926, "loss": 0.1263, "num_input_tokens_seen": 103802064, "step": 48055 }, { "epoch": 7.840130505709625, "grad_norm": 0.01307889074087143, "learning_rate": 0.0007619953780116775, "loss": 0.1268, "num_input_tokens_seen": 103813872, "step": 48060 }, { "epoch": 7.8409461663947795, "grad_norm": 0.045404642820358276, "learning_rate": 0.0007619347497972937, "loss": 0.0942, "num_input_tokens_seen": 103825232, "step": 48065 }, { "epoch": 7.841761827079935, "grad_norm": 0.2753269672393799, "learning_rate": 0.00076187411627447, "loss": 0.1738, "num_input_tokens_seen": 103834992, "step": 48070 }, { "epoch": 7.84257748776509, "grad_norm": 0.008529874496161938, "learning_rate": 0.0007618134774444351, "loss": 0.0346, "num_input_tokens_seen": 103846384, "step": 48075 }, { "epoch": 7.843393148450245, "grad_norm": 0.22419241070747375, "learning_rate": 0.0007617528333084178, "loss": 0.0849, "num_input_tokens_seen": 103855888, "step": 48080 }, { "epoch": 7.8442088091354, "grad_norm": 0.15129628777503967, "learning_rate": 0.0007616921838676475, "loss": 0.102, "num_input_tokens_seen": 103867568, "step": 48085 }, { "epoch": 7.8450244698205545, "grad_norm": 0.23662333190441132, "learning_rate": 0.0007616315291233531, "loss": 0.0673, "num_input_tokens_seen": 103878960, "step": 48090 }, { "epoch": 7.845840130505709, "grad_norm": 0.0883672758936882, "learning_rate": 0.0007615708690767637, "loss": 0.0409, "num_input_tokens_seen": 103888432, "step": 48095 }, { "epoch": 7.846655791190865, "grad_norm": 0.057228852063417435, "learning_rate": 0.0007615102037291089, "loss": 0.1298, "num_input_tokens_seen": 103899632, "step": 48100 }, { "epoch": 7.84747145187602, "grad_norm": 0.013155256398022175, "learning_rate": 0.000761449533081618, "loss": 0.0247, "num_input_tokens_seen": 103910128, "step": 48105 }, { "epoch": 7.848287112561175, "grad_norm": 0.02147751860320568, "learning_rate": 0.0007613888571355208, "loss": 0.1243, "num_input_tokens_seen": 103922160, "step": 48110 }, { "epoch": 7.849102773246329, "grad_norm": 0.003036431735381484, "learning_rate": 0.0007613281758920467, "loss": 0.0094, "num_input_tokens_seen": 103933232, "step": 48115 }, { "epoch": 7.849918433931484, "grad_norm": 0.03485213965177536, "learning_rate": 0.0007612674893524256, "loss": 0.0377, "num_input_tokens_seen": 103946160, "step": 48120 }, { "epoch": 7.850734094616639, "grad_norm": 0.04104197025299072, "learning_rate": 0.0007612067975178874, "loss": 0.0486, "num_input_tokens_seen": 103958000, "step": 48125 }, { "epoch": 7.851549755301795, "grad_norm": 0.2663140892982483, "learning_rate": 0.0007611461003896621, "loss": 0.1522, "num_input_tokens_seen": 103968912, "step": 48130 }, { "epoch": 7.85236541598695, "grad_norm": 0.8700830340385437, "learning_rate": 0.0007610853979689797, "loss": 0.096, "num_input_tokens_seen": 103978960, "step": 48135 }, { "epoch": 7.853181076672104, "grad_norm": 0.28547972440719604, "learning_rate": 0.0007610246902570706, "loss": 0.1672, "num_input_tokens_seen": 103989008, "step": 48140 }, { "epoch": 7.853996737357259, "grad_norm": 0.20221035182476044, "learning_rate": 0.000760963977255165, "loss": 0.0699, "num_input_tokens_seen": 104001712, "step": 48145 }, { "epoch": 7.854812398042414, "grad_norm": 0.01691204123198986, "learning_rate": 0.0007609032589644934, "loss": 0.0162, "num_input_tokens_seen": 104013648, "step": 48150 }, { "epoch": 7.85562805872757, "grad_norm": 0.22434313595294952, "learning_rate": 0.0007608425353862863, "loss": 0.1619, "num_input_tokens_seen": 104024496, "step": 48155 }, { "epoch": 7.856443719412725, "grad_norm": 0.520187497138977, "learning_rate": 0.000760781806521774, "loss": 0.1211, "num_input_tokens_seen": 104034256, "step": 48160 }, { "epoch": 7.857259380097879, "grad_norm": 0.2626437842845917, "learning_rate": 0.0007607210723721879, "loss": 0.1844, "num_input_tokens_seen": 104045456, "step": 48165 }, { "epoch": 7.858075040783034, "grad_norm": 0.2683877646923065, "learning_rate": 0.0007606603329387585, "loss": 0.1541, "num_input_tokens_seen": 104056368, "step": 48170 }, { "epoch": 7.858890701468189, "grad_norm": 0.05445249378681183, "learning_rate": 0.0007605995882227166, "loss": 0.0701, "num_input_tokens_seen": 104067280, "step": 48175 }, { "epoch": 7.859706362153344, "grad_norm": 0.18587210774421692, "learning_rate": 0.0007605388382252936, "loss": 0.1222, "num_input_tokens_seen": 104079120, "step": 48180 }, { "epoch": 7.8605220228384995, "grad_norm": 0.1920372098684311, "learning_rate": 0.0007604780829477205, "loss": 0.1243, "num_input_tokens_seen": 104090288, "step": 48185 }, { "epoch": 7.861337683523654, "grad_norm": 0.007299771066755056, "learning_rate": 0.0007604173223912285, "loss": 0.0234, "num_input_tokens_seen": 104099952, "step": 48190 }, { "epoch": 7.862153344208809, "grad_norm": 0.0069532874040305614, "learning_rate": 0.0007603565565570493, "loss": 0.0189, "num_input_tokens_seen": 104111344, "step": 48195 }, { "epoch": 7.862969004893964, "grad_norm": 0.023784659802913666, "learning_rate": 0.0007602957854464141, "loss": 0.0314, "num_input_tokens_seen": 104123056, "step": 48200 }, { "epoch": 7.863784665579119, "grad_norm": 0.18003705143928528, "learning_rate": 0.0007602350090605546, "loss": 0.1692, "num_input_tokens_seen": 104132208, "step": 48205 }, { "epoch": 7.864600326264274, "grad_norm": 0.03283815085887909, "learning_rate": 0.0007601742274007023, "loss": 0.0305, "num_input_tokens_seen": 104143376, "step": 48210 }, { "epoch": 7.865415986949429, "grad_norm": 0.04933464154601097, "learning_rate": 0.0007601134404680894, "loss": 0.0597, "num_input_tokens_seen": 104153584, "step": 48215 }, { "epoch": 7.866231647634584, "grad_norm": 0.0056173368357121944, "learning_rate": 0.0007600526482639477, "loss": 0.0334, "num_input_tokens_seen": 104164880, "step": 48220 }, { "epoch": 7.867047308319739, "grad_norm": 0.5657948851585388, "learning_rate": 0.0007599918507895092, "loss": 0.0872, "num_input_tokens_seen": 104175920, "step": 48225 }, { "epoch": 7.867862969004894, "grad_norm": 0.06797628104686737, "learning_rate": 0.000759931048046006, "loss": 0.0218, "num_input_tokens_seen": 104186640, "step": 48230 }, { "epoch": 7.868678629690049, "grad_norm": 0.15060186386108398, "learning_rate": 0.0007598702400346703, "loss": 0.0289, "num_input_tokens_seen": 104197872, "step": 48235 }, { "epoch": 7.869494290375204, "grad_norm": 0.16141721606254578, "learning_rate": 0.0007598094267567345, "loss": 0.1611, "num_input_tokens_seen": 104208816, "step": 48240 }, { "epoch": 7.870309951060359, "grad_norm": 0.14451834559440613, "learning_rate": 0.0007597486082134311, "loss": 0.0256, "num_input_tokens_seen": 104219952, "step": 48245 }, { "epoch": 7.871125611745514, "grad_norm": 0.010201388970017433, "learning_rate": 0.0007596877844059926, "loss": 0.11, "num_input_tokens_seen": 104230640, "step": 48250 }, { "epoch": 7.871941272430669, "grad_norm": 0.22415506839752197, "learning_rate": 0.0007596269553356518, "loss": 0.1094, "num_input_tokens_seen": 104242000, "step": 48255 }, { "epoch": 7.872756933115824, "grad_norm": 0.02670017071068287, "learning_rate": 0.0007595661210036414, "loss": 0.0597, "num_input_tokens_seen": 104252144, "step": 48260 }, { "epoch": 7.873572593800979, "grad_norm": 0.20463339984416962, "learning_rate": 0.0007595052814111942, "loss": 0.031, "num_input_tokens_seen": 104263984, "step": 48265 }, { "epoch": 7.874388254486134, "grad_norm": 0.13383440673351288, "learning_rate": 0.0007594444365595435, "loss": 0.0718, "num_input_tokens_seen": 104274320, "step": 48270 }, { "epoch": 7.875203915171289, "grad_norm": 0.6233965754508972, "learning_rate": 0.0007593835864499219, "loss": 0.1259, "num_input_tokens_seen": 104285776, "step": 48275 }, { "epoch": 7.876019575856444, "grad_norm": 0.0012839973205700517, "learning_rate": 0.0007593227310835629, "loss": 0.0229, "num_input_tokens_seen": 104297104, "step": 48280 }, { "epoch": 7.876835236541599, "grad_norm": 0.5068719983100891, "learning_rate": 0.0007592618704616998, "loss": 0.1091, "num_input_tokens_seen": 104307600, "step": 48285 }, { "epoch": 7.877650897226753, "grad_norm": 0.1758025586605072, "learning_rate": 0.0007592010045855662, "loss": 0.14, "num_input_tokens_seen": 104319664, "step": 48290 }, { "epoch": 7.878466557911908, "grad_norm": 0.02141629531979561, "learning_rate": 0.0007591401334563952, "loss": 0.1144, "num_input_tokens_seen": 104331184, "step": 48295 }, { "epoch": 7.879282218597064, "grad_norm": 0.14108148217201233, "learning_rate": 0.0007590792570754207, "loss": 0.0349, "num_input_tokens_seen": 104342352, "step": 48300 }, { "epoch": 7.880097879282219, "grad_norm": 0.027795322239398956, "learning_rate": 0.0007590183754438764, "loss": 0.078, "num_input_tokens_seen": 104354704, "step": 48305 }, { "epoch": 7.8809135399673735, "grad_norm": 0.16794262826442719, "learning_rate": 0.0007589574885629961, "loss": 0.093, "num_input_tokens_seen": 104365872, "step": 48310 }, { "epoch": 7.881729200652528, "grad_norm": 0.30435100197792053, "learning_rate": 0.0007588965964340137, "loss": 0.0865, "num_input_tokens_seen": 104377008, "step": 48315 }, { "epoch": 7.882544861337683, "grad_norm": 0.007787167094647884, "learning_rate": 0.0007588356990581635, "loss": 0.0255, "num_input_tokens_seen": 104388016, "step": 48320 }, { "epoch": 7.883360522022839, "grad_norm": 0.19416892528533936, "learning_rate": 0.0007587747964366796, "loss": 0.0961, "num_input_tokens_seen": 104397968, "step": 48325 }, { "epoch": 7.884176182707994, "grad_norm": 0.012199878692626953, "learning_rate": 0.0007587138885707959, "loss": 0.0248, "num_input_tokens_seen": 104408944, "step": 48330 }, { "epoch": 7.8849918433931485, "grad_norm": 0.6253767013549805, "learning_rate": 0.000758652975461747, "loss": 0.168, "num_input_tokens_seen": 104419440, "step": 48335 }, { "epoch": 7.885807504078303, "grad_norm": 0.003849747823551297, "learning_rate": 0.0007585920571107677, "loss": 0.0135, "num_input_tokens_seen": 104428560, "step": 48340 }, { "epoch": 7.886623164763458, "grad_norm": 0.08811540901660919, "learning_rate": 0.0007585311335190923, "loss": 0.1504, "num_input_tokens_seen": 104438256, "step": 48345 }, { "epoch": 7.887438825448614, "grad_norm": 0.020213250070810318, "learning_rate": 0.0007584702046879554, "loss": 0.0259, "num_input_tokens_seen": 104448624, "step": 48350 }, { "epoch": 7.888254486133769, "grad_norm": 0.41347524523735046, "learning_rate": 0.0007584092706185919, "loss": 0.228, "num_input_tokens_seen": 104460528, "step": 48355 }, { "epoch": 7.8890701468189235, "grad_norm": 0.0025836778804659843, "learning_rate": 0.0007583483313122368, "loss": 0.0134, "num_input_tokens_seen": 104472016, "step": 48360 }, { "epoch": 7.889885807504078, "grad_norm": 0.012997115030884743, "learning_rate": 0.000758287386770125, "loss": 0.0448, "num_input_tokens_seen": 104482288, "step": 48365 }, { "epoch": 7.890701468189233, "grad_norm": 0.041280992329120636, "learning_rate": 0.0007582264369934915, "loss": 0.0502, "num_input_tokens_seen": 104492304, "step": 48370 }, { "epoch": 7.891517128874388, "grad_norm": 0.0031451070681214333, "learning_rate": 0.0007581654819835717, "loss": 0.1548, "num_input_tokens_seen": 104503472, "step": 48375 }, { "epoch": 7.892332789559543, "grad_norm": 0.017510058358311653, "learning_rate": 0.0007581045217416011, "loss": 0.1972, "num_input_tokens_seen": 104513872, "step": 48380 }, { "epoch": 7.8931484502446985, "grad_norm": 0.00507473386824131, "learning_rate": 0.0007580435562688148, "loss": 0.1909, "num_input_tokens_seen": 104524336, "step": 48385 }, { "epoch": 7.893964110929853, "grad_norm": 0.0073803444392979145, "learning_rate": 0.0007579825855664486, "loss": 0.1623, "num_input_tokens_seen": 104535600, "step": 48390 }, { "epoch": 7.894779771615008, "grad_norm": 0.06939529627561569, "learning_rate": 0.0007579216096357378, "loss": 0.0451, "num_input_tokens_seen": 104544688, "step": 48395 }, { "epoch": 7.895595432300163, "grad_norm": 0.2730625569820404, "learning_rate": 0.0007578606284779185, "loss": 0.2875, "num_input_tokens_seen": 104555952, "step": 48400 }, { "epoch": 7.896411092985318, "grad_norm": 0.054563023149967194, "learning_rate": 0.0007577996420942266, "loss": 0.0795, "num_input_tokens_seen": 104566928, "step": 48405 }, { "epoch": 7.897226753670473, "grad_norm": 0.2631271183490753, "learning_rate": 0.0007577386504858978, "loss": 0.116, "num_input_tokens_seen": 104578544, "step": 48410 }, { "epoch": 7.898042414355628, "grad_norm": 0.014852375723421574, "learning_rate": 0.0007576776536541682, "loss": 0.0832, "num_input_tokens_seen": 104588496, "step": 48415 }, { "epoch": 7.898858075040783, "grad_norm": 0.06987016648054123, "learning_rate": 0.0007576166516002741, "loss": 0.0754, "num_input_tokens_seen": 104599760, "step": 48420 }, { "epoch": 7.899673735725938, "grad_norm": 0.03241053223609924, "learning_rate": 0.0007575556443254518, "loss": 0.0856, "num_input_tokens_seen": 104610192, "step": 48425 }, { "epoch": 7.900489396411093, "grad_norm": 0.8125465512275696, "learning_rate": 0.0007574946318309376, "loss": 0.1169, "num_input_tokens_seen": 104621072, "step": 48430 }, { "epoch": 7.901305057096248, "grad_norm": 0.015340335667133331, "learning_rate": 0.000757433614117968, "loss": 0.0825, "num_input_tokens_seen": 104632080, "step": 48435 }, { "epoch": 7.902120717781403, "grad_norm": 0.04143735393881798, "learning_rate": 0.0007573725911877797, "loss": 0.1525, "num_input_tokens_seen": 104643792, "step": 48440 }, { "epoch": 7.902936378466558, "grad_norm": 0.01662031188607216, "learning_rate": 0.0007573115630416092, "loss": 0.0695, "num_input_tokens_seen": 104654960, "step": 48445 }, { "epoch": 7.903752039151713, "grad_norm": 0.23152291774749756, "learning_rate": 0.0007572505296806935, "loss": 0.0432, "num_input_tokens_seen": 104666320, "step": 48450 }, { "epoch": 7.904567699836868, "grad_norm": 0.20455844700336456, "learning_rate": 0.0007571894911062696, "loss": 0.0884, "num_input_tokens_seen": 104675600, "step": 48455 }, { "epoch": 7.9053833605220225, "grad_norm": 0.2332301288843155, "learning_rate": 0.0007571284473195743, "loss": 0.1064, "num_input_tokens_seen": 104686928, "step": 48460 }, { "epoch": 7.906199021207177, "grad_norm": 0.15824279189109802, "learning_rate": 0.0007570673983218448, "loss": 0.053, "num_input_tokens_seen": 104698512, "step": 48465 }, { "epoch": 7.907014681892333, "grad_norm": 0.031252142041921616, "learning_rate": 0.0007570063441143185, "loss": 0.0574, "num_input_tokens_seen": 104709552, "step": 48470 }, { "epoch": 7.907830342577488, "grad_norm": 0.01947942189872265, "learning_rate": 0.0007569452846982325, "loss": 0.0595, "num_input_tokens_seen": 104719184, "step": 48475 }, { "epoch": 7.908646003262643, "grad_norm": 0.296113520860672, "learning_rate": 0.0007568842200748243, "loss": 0.0816, "num_input_tokens_seen": 104730160, "step": 48480 }, { "epoch": 7.9094616639477975, "grad_norm": 0.636048436164856, "learning_rate": 0.0007568231502453317, "loss": 0.1006, "num_input_tokens_seen": 104740816, "step": 48485 }, { "epoch": 7.910277324632952, "grad_norm": 0.07422137260437012, "learning_rate": 0.000756762075210992, "loss": 0.144, "num_input_tokens_seen": 104750672, "step": 48490 }, { "epoch": 7.911092985318108, "grad_norm": 0.016525914892554283, "learning_rate": 0.0007567009949730431, "loss": 0.0165, "num_input_tokens_seen": 104761584, "step": 48495 }, { "epoch": 7.911908646003263, "grad_norm": 0.011553775519132614, "learning_rate": 0.000756639909532723, "loss": 0.0264, "num_input_tokens_seen": 104771376, "step": 48500 }, { "epoch": 7.912724306688418, "grad_norm": 0.047472815960645676, "learning_rate": 0.0007565788188912694, "loss": 0.0956, "num_input_tokens_seen": 104783440, "step": 48505 }, { "epoch": 7.9135399673735725, "grad_norm": 0.006028347183018923, "learning_rate": 0.0007565177230499206, "loss": 0.0177, "num_input_tokens_seen": 104794256, "step": 48510 }, { "epoch": 7.914355628058727, "grad_norm": 0.009613900445401669, "learning_rate": 0.0007564566220099147, "loss": 0.0571, "num_input_tokens_seen": 104804944, "step": 48515 }, { "epoch": 7.915171288743883, "grad_norm": 0.35099858045578003, "learning_rate": 0.00075639551577249, "loss": 0.0724, "num_input_tokens_seen": 104816656, "step": 48520 }, { "epoch": 7.915986949429038, "grad_norm": 0.02365029975771904, "learning_rate": 0.0007563344043388851, "loss": 0.0796, "num_input_tokens_seen": 104828016, "step": 48525 }, { "epoch": 7.916802610114193, "grad_norm": 0.013142816722393036, "learning_rate": 0.0007562732877103382, "loss": 0.1559, "num_input_tokens_seen": 104839696, "step": 48530 }, { "epoch": 7.917618270799347, "grad_norm": 0.00743667408823967, "learning_rate": 0.000756212165888088, "loss": 0.05, "num_input_tokens_seen": 104850576, "step": 48535 }, { "epoch": 7.918433931484502, "grad_norm": 0.012738938443362713, "learning_rate": 0.0007561510388733732, "loss": 0.1381, "num_input_tokens_seen": 104861008, "step": 48540 }, { "epoch": 7.919249592169657, "grad_norm": 0.027310442179441452, "learning_rate": 0.0007560899066674327, "loss": 0.1044, "num_input_tokens_seen": 104872144, "step": 48545 }, { "epoch": 7.920065252854813, "grad_norm": 0.2208627462387085, "learning_rate": 0.0007560287692715053, "loss": 0.1653, "num_input_tokens_seen": 104882608, "step": 48550 }, { "epoch": 7.920880913539968, "grad_norm": 0.11449895054101944, "learning_rate": 0.0007559676266868302, "loss": 0.066, "num_input_tokens_seen": 104892688, "step": 48555 }, { "epoch": 7.921696574225122, "grad_norm": 0.006951616611331701, "learning_rate": 0.0007559064789146464, "loss": 0.0307, "num_input_tokens_seen": 104902640, "step": 48560 }, { "epoch": 7.922512234910277, "grad_norm": 0.36576905846595764, "learning_rate": 0.000755845325956193, "loss": 0.2786, "num_input_tokens_seen": 104913168, "step": 48565 }, { "epoch": 7.923327895595432, "grad_norm": 0.19045992195606232, "learning_rate": 0.0007557841678127097, "loss": 0.2578, "num_input_tokens_seen": 104922896, "step": 48570 }, { "epoch": 7.924143556280587, "grad_norm": 0.009913114830851555, "learning_rate": 0.0007557230044854357, "loss": 0.0764, "num_input_tokens_seen": 104934672, "step": 48575 }, { "epoch": 7.924959216965743, "grad_norm": 0.17396552860736847, "learning_rate": 0.0007556618359756107, "loss": 0.0408, "num_input_tokens_seen": 104947120, "step": 48580 }, { "epoch": 7.925774877650897, "grad_norm": 0.07783018052577972, "learning_rate": 0.0007556006622844742, "loss": 0.102, "num_input_tokens_seen": 104957616, "step": 48585 }, { "epoch": 7.926590538336052, "grad_norm": 0.04610704258084297, "learning_rate": 0.000755539483413266, "loss": 0.0192, "num_input_tokens_seen": 104968112, "step": 48590 }, { "epoch": 7.927406199021207, "grad_norm": 0.18525907397270203, "learning_rate": 0.0007554782993632259, "loss": 0.0888, "num_input_tokens_seen": 104978672, "step": 48595 }, { "epoch": 7.928221859706362, "grad_norm": 0.21388539671897888, "learning_rate": 0.0007554171101355941, "loss": 0.1034, "num_input_tokens_seen": 104989360, "step": 48600 }, { "epoch": 7.9290375203915175, "grad_norm": 0.026249831542372704, "learning_rate": 0.0007553559157316105, "loss": 0.1253, "num_input_tokens_seen": 105000080, "step": 48605 }, { "epoch": 7.929853181076672, "grad_norm": 0.013661502860486507, "learning_rate": 0.0007552947161525153, "loss": 0.0453, "num_input_tokens_seen": 105010768, "step": 48610 }, { "epoch": 7.930668841761827, "grad_norm": 0.7273470163345337, "learning_rate": 0.0007552335113995489, "loss": 0.0613, "num_input_tokens_seen": 105022128, "step": 48615 }, { "epoch": 7.931484502446982, "grad_norm": 0.1308993399143219, "learning_rate": 0.0007551723014739515, "loss": 0.0295, "num_input_tokens_seen": 105033168, "step": 48620 }, { "epoch": 7.932300163132137, "grad_norm": 0.0197969488799572, "learning_rate": 0.0007551110863769638, "loss": 0.0514, "num_input_tokens_seen": 105043728, "step": 48625 }, { "epoch": 7.933115823817292, "grad_norm": 0.1168442964553833, "learning_rate": 0.0007550498661098263, "loss": 0.0743, "num_input_tokens_seen": 105053008, "step": 48630 }, { "epoch": 7.933931484502447, "grad_norm": 0.063286192715168, "learning_rate": 0.0007549886406737796, "loss": 0.0991, "num_input_tokens_seen": 105063312, "step": 48635 }, { "epoch": 7.934747145187602, "grad_norm": 0.01165692787617445, "learning_rate": 0.0007549274100700647, "loss": 0.2042, "num_input_tokens_seen": 105073712, "step": 48640 }, { "epoch": 7.935562805872757, "grad_norm": 0.2161632776260376, "learning_rate": 0.0007548661742999225, "loss": 0.1167, "num_input_tokens_seen": 105084560, "step": 48645 }, { "epoch": 7.936378466557912, "grad_norm": 0.03843540698289871, "learning_rate": 0.0007548049333645939, "loss": 0.0233, "num_input_tokens_seen": 105095984, "step": 48650 }, { "epoch": 7.937194127243067, "grad_norm": 0.010381447151303291, "learning_rate": 0.00075474368726532, "loss": 0.0206, "num_input_tokens_seen": 105107024, "step": 48655 }, { "epoch": 7.938009787928221, "grad_norm": 0.16924302279949188, "learning_rate": 0.0007546824360033421, "loss": 0.1278, "num_input_tokens_seen": 105118448, "step": 48660 }, { "epoch": 7.938825448613377, "grad_norm": 0.15478937327861786, "learning_rate": 0.0007546211795799016, "loss": 0.1476, "num_input_tokens_seen": 105128592, "step": 48665 }, { "epoch": 7.939641109298532, "grad_norm": 0.015761559829115868, "learning_rate": 0.0007545599179962399, "loss": 0.0412, "num_input_tokens_seen": 105139888, "step": 48670 }, { "epoch": 7.940456769983687, "grad_norm": 0.06137595698237419, "learning_rate": 0.0007544986512535985, "loss": 0.2636, "num_input_tokens_seen": 105150192, "step": 48675 }, { "epoch": 7.941272430668842, "grad_norm": 0.15942196547985077, "learning_rate": 0.0007544373793532191, "loss": 0.187, "num_input_tokens_seen": 105160912, "step": 48680 }, { "epoch": 7.942088091353996, "grad_norm": 0.036675989627838135, "learning_rate": 0.0007543761022963436, "loss": 0.0901, "num_input_tokens_seen": 105171792, "step": 48685 }, { "epoch": 7.942903752039152, "grad_norm": 0.020440716296434402, "learning_rate": 0.0007543148200842134, "loss": 0.0202, "num_input_tokens_seen": 105183408, "step": 48690 }, { "epoch": 7.943719412724307, "grad_norm": 0.09116707742214203, "learning_rate": 0.0007542535327180708, "loss": 0.0878, "num_input_tokens_seen": 105194736, "step": 48695 }, { "epoch": 7.944535073409462, "grad_norm": 0.0569356270134449, "learning_rate": 0.0007541922401991579, "loss": 0.1086, "num_input_tokens_seen": 105205008, "step": 48700 }, { "epoch": 7.945350734094617, "grad_norm": 0.008786008693277836, "learning_rate": 0.0007541309425287168, "loss": 0.0645, "num_input_tokens_seen": 105216592, "step": 48705 }, { "epoch": 7.946166394779771, "grad_norm": 0.1885114312171936, "learning_rate": 0.0007540696397079898, "loss": 0.1041, "num_input_tokens_seen": 105227504, "step": 48710 }, { "epoch": 7.946982055464927, "grad_norm": 0.00608594436198473, "learning_rate": 0.0007540083317382192, "loss": 0.0876, "num_input_tokens_seen": 105237904, "step": 48715 }, { "epoch": 7.947797716150082, "grad_norm": 0.08031502366065979, "learning_rate": 0.0007539470186206474, "loss": 0.0259, "num_input_tokens_seen": 105247504, "step": 48720 }, { "epoch": 7.948613376835237, "grad_norm": 0.026645051315426826, "learning_rate": 0.0007538857003565174, "loss": 0.0697, "num_input_tokens_seen": 105256400, "step": 48725 }, { "epoch": 7.9494290375203915, "grad_norm": 0.015550825744867325, "learning_rate": 0.0007538243769470714, "loss": 0.0345, "num_input_tokens_seen": 105267440, "step": 48730 }, { "epoch": 7.950244698205546, "grad_norm": 0.03166070580482483, "learning_rate": 0.0007537630483935524, "loss": 0.0417, "num_input_tokens_seen": 105278928, "step": 48735 }, { "epoch": 7.951060358890701, "grad_norm": 0.2161301076412201, "learning_rate": 0.0007537017146972033, "loss": 0.1683, "num_input_tokens_seen": 105290320, "step": 48740 }, { "epoch": 7.951876019575856, "grad_norm": 0.04890443757176399, "learning_rate": 0.0007536403758592672, "loss": 0.0214, "num_input_tokens_seen": 105302320, "step": 48745 }, { "epoch": 7.952691680261012, "grad_norm": 0.0032995252404361963, "learning_rate": 0.000753579031880987, "loss": 0.0257, "num_input_tokens_seen": 105312336, "step": 48750 }, { "epoch": 7.9535073409461665, "grad_norm": 0.04470792040228844, "learning_rate": 0.0007535176827636061, "loss": 0.0997, "num_input_tokens_seen": 105323216, "step": 48755 }, { "epoch": 7.954323001631321, "grad_norm": 0.03811972588300705, "learning_rate": 0.0007534563285083678, "loss": 0.0638, "num_input_tokens_seen": 105333680, "step": 48760 }, { "epoch": 7.955138662316476, "grad_norm": 0.004735193680971861, "learning_rate": 0.0007533949691165152, "loss": 0.1082, "num_input_tokens_seen": 105344624, "step": 48765 }, { "epoch": 7.955954323001631, "grad_norm": 0.02903125435113907, "learning_rate": 0.0007533336045892925, "loss": 0.1382, "num_input_tokens_seen": 105355568, "step": 48770 }, { "epoch": 7.956769983686787, "grad_norm": 0.06740237772464752, "learning_rate": 0.0007532722349279426, "loss": 0.1862, "num_input_tokens_seen": 105365328, "step": 48775 }, { "epoch": 7.9575856443719415, "grad_norm": 0.3388029932975769, "learning_rate": 0.0007532108601337097, "loss": 0.1106, "num_input_tokens_seen": 105376144, "step": 48780 }, { "epoch": 7.958401305057096, "grad_norm": 0.09516182541847229, "learning_rate": 0.0007531494802078376, "loss": 0.093, "num_input_tokens_seen": 105386928, "step": 48785 }, { "epoch": 7.959216965742251, "grad_norm": 0.023860549554228783, "learning_rate": 0.00075308809515157, "loss": 0.062, "num_input_tokens_seen": 105398352, "step": 48790 }, { "epoch": 7.960032626427406, "grad_norm": 0.016818996518850327, "learning_rate": 0.0007530267049661511, "loss": 0.0407, "num_input_tokens_seen": 105408528, "step": 48795 }, { "epoch": 7.960848287112562, "grad_norm": 0.01870405301451683, "learning_rate": 0.000752965309652825, "loss": 0.0232, "num_input_tokens_seen": 105418224, "step": 48800 }, { "epoch": 7.9616639477977165, "grad_norm": 0.02412063628435135, "learning_rate": 0.0007529039092128361, "loss": 0.04, "num_input_tokens_seen": 105429072, "step": 48805 }, { "epoch": 7.962479608482871, "grad_norm": 0.24175409972667694, "learning_rate": 0.0007528425036474287, "loss": 0.1169, "num_input_tokens_seen": 105440240, "step": 48810 }, { "epoch": 7.963295269168026, "grad_norm": 0.006044411100447178, "learning_rate": 0.000752781092957847, "loss": 0.0621, "num_input_tokens_seen": 105450352, "step": 48815 }, { "epoch": 7.964110929853181, "grad_norm": 0.010947907343506813, "learning_rate": 0.000752719677145336, "loss": 0.167, "num_input_tokens_seen": 105461072, "step": 48820 }, { "epoch": 7.964926590538336, "grad_norm": 0.04575859382748604, "learning_rate": 0.0007526582562111399, "loss": 0.0721, "num_input_tokens_seen": 105471440, "step": 48825 }, { "epoch": 7.9657422512234906, "grad_norm": 0.005048078019171953, "learning_rate": 0.0007525968301565038, "loss": 0.0306, "num_input_tokens_seen": 105480816, "step": 48830 }, { "epoch": 7.966557911908646, "grad_norm": 0.31248384714126587, "learning_rate": 0.0007525353989826726, "loss": 0.2006, "num_input_tokens_seen": 105490800, "step": 48835 }, { "epoch": 7.967373572593801, "grad_norm": 0.012551373802125454, "learning_rate": 0.000752473962690891, "loss": 0.0381, "num_input_tokens_seen": 105501104, "step": 48840 }, { "epoch": 7.968189233278956, "grad_norm": 0.11429378390312195, "learning_rate": 0.0007524125212824044, "loss": 0.0702, "num_input_tokens_seen": 105510128, "step": 48845 }, { "epoch": 7.969004893964111, "grad_norm": 0.17006027698516846, "learning_rate": 0.0007523510747584578, "loss": 0.1937, "num_input_tokens_seen": 105520976, "step": 48850 }, { "epoch": 7.9698205546492655, "grad_norm": 0.07592868059873581, "learning_rate": 0.0007522896231202967, "loss": 0.0343, "num_input_tokens_seen": 105532400, "step": 48855 }, { "epoch": 7.970636215334421, "grad_norm": 0.061747901141643524, "learning_rate": 0.0007522281663691661, "loss": 0.0891, "num_input_tokens_seen": 105543184, "step": 48860 }, { "epoch": 7.971451876019576, "grad_norm": 0.061240021139383316, "learning_rate": 0.0007521667045063119, "loss": 0.0517, "num_input_tokens_seen": 105553680, "step": 48865 }, { "epoch": 7.972267536704731, "grad_norm": 0.15075771510601044, "learning_rate": 0.0007521052375329793, "loss": 0.0457, "num_input_tokens_seen": 105563472, "step": 48870 }, { "epoch": 7.973083197389886, "grad_norm": 0.2989932596683502, "learning_rate": 0.0007520437654504144, "loss": 0.1824, "num_input_tokens_seen": 105574224, "step": 48875 }, { "epoch": 7.9738988580750405, "grad_norm": 0.05400446057319641, "learning_rate": 0.0007519822882598629, "loss": 0.0226, "num_input_tokens_seen": 105585712, "step": 48880 }, { "epoch": 7.974714518760196, "grad_norm": 0.08582861721515656, "learning_rate": 0.0007519208059625707, "loss": 0.1254, "num_input_tokens_seen": 105596304, "step": 48885 }, { "epoch": 7.975530179445351, "grad_norm": 0.1772875189781189, "learning_rate": 0.0007518593185597837, "loss": 0.1296, "num_input_tokens_seen": 105607600, "step": 48890 }, { "epoch": 7.976345840130506, "grad_norm": 0.039883363991975784, "learning_rate": 0.000751797826052748, "loss": 0.046, "num_input_tokens_seen": 105618192, "step": 48895 }, { "epoch": 7.977161500815661, "grad_norm": 0.03105618990957737, "learning_rate": 0.0007517363284427101, "loss": 0.0825, "num_input_tokens_seen": 105629392, "step": 48900 }, { "epoch": 7.9779771615008155, "grad_norm": 0.16668418049812317, "learning_rate": 0.0007516748257309162, "loss": 0.0844, "num_input_tokens_seen": 105639056, "step": 48905 }, { "epoch": 7.97879282218597, "grad_norm": 0.039331886917352676, "learning_rate": 0.0007516133179186125, "loss": 0.129, "num_input_tokens_seen": 105651024, "step": 48910 }, { "epoch": 7.979608482871125, "grad_norm": 0.34375035762786865, "learning_rate": 0.0007515518050070458, "loss": 0.1145, "num_input_tokens_seen": 105662128, "step": 48915 }, { "epoch": 7.980424143556281, "grad_norm": 0.014578046277165413, "learning_rate": 0.0007514902869974627, "loss": 0.0093, "num_input_tokens_seen": 105673552, "step": 48920 }, { "epoch": 7.981239804241436, "grad_norm": 0.005937238223850727, "learning_rate": 0.0007514287638911099, "loss": 0.0427, "num_input_tokens_seen": 105684464, "step": 48925 }, { "epoch": 7.9820554649265905, "grad_norm": 0.024509524926543236, "learning_rate": 0.0007513672356892342, "loss": 0.0338, "num_input_tokens_seen": 105696272, "step": 48930 }, { "epoch": 7.982871125611745, "grad_norm": 0.174819678068161, "learning_rate": 0.0007513057023930825, "loss": 0.1251, "num_input_tokens_seen": 105708208, "step": 48935 }, { "epoch": 7.9836867862969, "grad_norm": 0.18815754354000092, "learning_rate": 0.000751244164003902, "loss": 0.1564, "num_input_tokens_seen": 105718928, "step": 48940 }, { "epoch": 7.984502446982056, "grad_norm": 0.17951254546642303, "learning_rate": 0.00075118262052294, "loss": 0.1253, "num_input_tokens_seen": 105729296, "step": 48945 }, { "epoch": 7.985318107667211, "grad_norm": 0.012976273894309998, "learning_rate": 0.0007511210719514432, "loss": 0.0199, "num_input_tokens_seen": 105739664, "step": 48950 }, { "epoch": 7.986133768352365, "grad_norm": 0.009064053185284138, "learning_rate": 0.0007510595182906595, "loss": 0.025, "num_input_tokens_seen": 105749872, "step": 48955 }, { "epoch": 7.98694942903752, "grad_norm": 0.16349948942661285, "learning_rate": 0.0007509979595418362, "loss": 0.0693, "num_input_tokens_seen": 105761008, "step": 48960 }, { "epoch": 7.987765089722675, "grad_norm": 0.149709090590477, "learning_rate": 0.0007509363957062207, "loss": 0.076, "num_input_tokens_seen": 105771824, "step": 48965 }, { "epoch": 7.988580750407831, "grad_norm": 0.3649735152721405, "learning_rate": 0.0007508748267850609, "loss": 0.4246, "num_input_tokens_seen": 105783056, "step": 48970 }, { "epoch": 7.989396411092986, "grad_norm": 0.031553879380226135, "learning_rate": 0.0007508132527796043, "loss": 0.1724, "num_input_tokens_seen": 105794192, "step": 48975 }, { "epoch": 7.99021207177814, "grad_norm": 0.10740819573402405, "learning_rate": 0.0007507516736910992, "loss": 0.2098, "num_input_tokens_seen": 105804528, "step": 48980 }, { "epoch": 7.991027732463295, "grad_norm": 0.011156396940350533, "learning_rate": 0.0007506900895207932, "loss": 0.0543, "num_input_tokens_seen": 105815824, "step": 48985 }, { "epoch": 7.99184339314845, "grad_norm": 0.008487922139465809, "learning_rate": 0.0007506285002699346, "loss": 0.009, "num_input_tokens_seen": 105827440, "step": 48990 }, { "epoch": 7.992659053833605, "grad_norm": 0.12116503715515137, "learning_rate": 0.0007505669059397715, "loss": 0.0424, "num_input_tokens_seen": 105838288, "step": 48995 }, { "epoch": 7.993474714518761, "grad_norm": 0.015823280438780785, "learning_rate": 0.0007505053065315521, "loss": 0.1182, "num_input_tokens_seen": 105849584, "step": 49000 }, { "epoch": 7.994290375203915, "grad_norm": 0.0266315545886755, "learning_rate": 0.0007504437020465248, "loss": 0.0419, "num_input_tokens_seen": 105861232, "step": 49005 }, { "epoch": 7.99510603588907, "grad_norm": 0.005523244384676218, "learning_rate": 0.0007503820924859382, "loss": 0.0389, "num_input_tokens_seen": 105871600, "step": 49010 }, { "epoch": 7.995921696574225, "grad_norm": 0.43169787526130676, "learning_rate": 0.000750320477851041, "loss": 0.1195, "num_input_tokens_seen": 105882992, "step": 49015 }, { "epoch": 7.99673735725938, "grad_norm": 0.015888521447777748, "learning_rate": 0.0007502588581430817, "loss": 0.0675, "num_input_tokens_seen": 105893360, "step": 49020 }, { "epoch": 7.997553017944535, "grad_norm": 0.12559035420417786, "learning_rate": 0.0007501972333633091, "loss": 0.0527, "num_input_tokens_seen": 105903728, "step": 49025 }, { "epoch": 7.99836867862969, "grad_norm": 0.19304737448692322, "learning_rate": 0.0007501356035129723, "loss": 0.1411, "num_input_tokens_seen": 105915600, "step": 49030 }, { "epoch": 7.999184339314845, "grad_norm": 0.105661541223526, "learning_rate": 0.0007500739685933201, "loss": 0.0734, "num_input_tokens_seen": 105925808, "step": 49035 }, { "epoch": 8.0, "grad_norm": 0.05994661524891853, "learning_rate": 0.0007500123286056018, "loss": 0.0358, "num_input_tokens_seen": 105934480, "step": 49040 }, { "epoch": 8.0, "eval_loss": 0.12728162109851837, "eval_runtime": 103.6319, "eval_samples_per_second": 26.295, "eval_steps_per_second": 6.581, "num_input_tokens_seen": 105934480, "step": 49040 }, { "epoch": 8.000815660685156, "grad_norm": 0.014739147387444973, "learning_rate": 0.0007499506835510663, "loss": 0.032, "num_input_tokens_seen": 105946192, "step": 49045 }, { "epoch": 8.00163132137031, "grad_norm": 0.07827870547771454, "learning_rate": 0.0007498890334309633, "loss": 0.0517, "num_input_tokens_seen": 105956304, "step": 49050 }, { "epoch": 8.002446982055465, "grad_norm": 0.0049188388511538506, "learning_rate": 0.000749827378246542, "loss": 0.0262, "num_input_tokens_seen": 105968240, "step": 49055 }, { "epoch": 8.00326264274062, "grad_norm": 0.1784546822309494, "learning_rate": 0.0007497657179990518, "loss": 0.0955, "num_input_tokens_seen": 105980080, "step": 49060 }, { "epoch": 8.004078303425775, "grad_norm": 0.037165481597185135, "learning_rate": 0.0007497040526897426, "loss": 0.1378, "num_input_tokens_seen": 105990224, "step": 49065 }, { "epoch": 8.00489396411093, "grad_norm": 0.3894607424736023, "learning_rate": 0.0007496423823198639, "loss": 0.0841, "num_input_tokens_seen": 106000944, "step": 49070 }, { "epoch": 8.005709624796085, "grad_norm": 0.008682585321366787, "learning_rate": 0.0007495807068906657, "loss": 0.0254, "num_input_tokens_seen": 106012368, "step": 49075 }, { "epoch": 8.00652528548124, "grad_norm": 0.033180102705955505, "learning_rate": 0.0007495190264033978, "loss": 0.1062, "num_input_tokens_seen": 106023696, "step": 49080 }, { "epoch": 8.007340946166394, "grad_norm": 0.00399895990267396, "learning_rate": 0.0007494573408593103, "loss": 0.1288, "num_input_tokens_seen": 106033840, "step": 49085 }, { "epoch": 8.00815660685155, "grad_norm": 0.07352360337972641, "learning_rate": 0.0007493956502596533, "loss": 0.034, "num_input_tokens_seen": 106044144, "step": 49090 }, { "epoch": 8.008972267536704, "grad_norm": 0.01003354787826538, "learning_rate": 0.0007493339546056772, "loss": 0.0388, "num_input_tokens_seen": 106054800, "step": 49095 }, { "epoch": 8.00978792822186, "grad_norm": 0.03879820555448532, "learning_rate": 0.0007492722538986321, "loss": 0.0408, "num_input_tokens_seen": 106065392, "step": 49100 }, { "epoch": 8.010603588907015, "grad_norm": 0.053220439702272415, "learning_rate": 0.0007492105481397686, "loss": 0.0953, "num_input_tokens_seen": 106076528, "step": 49105 }, { "epoch": 8.01141924959217, "grad_norm": 0.059041813015937805, "learning_rate": 0.0007491488373303373, "loss": 0.0277, "num_input_tokens_seen": 106087664, "step": 49110 }, { "epoch": 8.012234910277325, "grad_norm": 0.011349274776875973, "learning_rate": 0.0007490871214715885, "loss": 0.0122, "num_input_tokens_seen": 106099184, "step": 49115 }, { "epoch": 8.013050570962479, "grad_norm": 0.057813651859760284, "learning_rate": 0.0007490254005647735, "loss": 0.0269, "num_input_tokens_seen": 106109776, "step": 49120 }, { "epoch": 8.013866231647635, "grad_norm": 0.017140861600637436, "learning_rate": 0.0007489636746111426, "loss": 0.0241, "num_input_tokens_seen": 106120464, "step": 49125 }, { "epoch": 8.01468189233279, "grad_norm": 0.015432114712893963, "learning_rate": 0.0007489019436119471, "loss": 0.0379, "num_input_tokens_seen": 106131568, "step": 49130 }, { "epoch": 8.015497553017944, "grad_norm": 0.0168614462018013, "learning_rate": 0.0007488402075684379, "loss": 0.017, "num_input_tokens_seen": 106142416, "step": 49135 }, { "epoch": 8.0163132137031, "grad_norm": 0.004921023268252611, "learning_rate": 0.0007487784664818662, "loss": 0.0197, "num_input_tokens_seen": 106152592, "step": 49140 }, { "epoch": 8.017128874388254, "grad_norm": 0.25885462760925293, "learning_rate": 0.0007487167203534834, "loss": 0.1524, "num_input_tokens_seen": 106162992, "step": 49145 }, { "epoch": 8.01794453507341, "grad_norm": 0.09149748086929321, "learning_rate": 0.0007486549691845405, "loss": 0.0246, "num_input_tokens_seen": 106174736, "step": 49150 }, { "epoch": 8.018760195758565, "grad_norm": 0.004045677836984396, "learning_rate": 0.0007485932129762895, "loss": 0.1091, "num_input_tokens_seen": 106185936, "step": 49155 }, { "epoch": 8.01957585644372, "grad_norm": 0.013637539930641651, "learning_rate": 0.0007485314517299815, "loss": 0.0353, "num_input_tokens_seen": 106196688, "step": 49160 }, { "epoch": 8.020391517128875, "grad_norm": 0.02192877233028412, "learning_rate": 0.0007484696854468684, "loss": 0.0796, "num_input_tokens_seen": 106206768, "step": 49165 }, { "epoch": 8.021207177814029, "grad_norm": 0.0217854306101799, "learning_rate": 0.0007484079141282018, "loss": 0.0482, "num_input_tokens_seen": 106216208, "step": 49170 }, { "epoch": 8.022022838499185, "grad_norm": 0.1591954380273819, "learning_rate": 0.0007483461377752339, "loss": 0.0885, "num_input_tokens_seen": 106226704, "step": 49175 }, { "epoch": 8.022838499184338, "grad_norm": 0.2401871681213379, "learning_rate": 0.0007482843563892164, "loss": 0.1582, "num_input_tokens_seen": 106238640, "step": 49180 }, { "epoch": 8.023654159869494, "grad_norm": 0.12886598706245422, "learning_rate": 0.0007482225699714014, "loss": 0.0273, "num_input_tokens_seen": 106249200, "step": 49185 }, { "epoch": 8.02446982055465, "grad_norm": 0.0026838481426239014, "learning_rate": 0.0007481607785230411, "loss": 0.0509, "num_input_tokens_seen": 106259920, "step": 49190 }, { "epoch": 8.025285481239804, "grad_norm": 0.07842859625816345, "learning_rate": 0.0007480989820453878, "loss": 0.1143, "num_input_tokens_seen": 106271088, "step": 49195 }, { "epoch": 8.02610114192496, "grad_norm": 0.1295957714319229, "learning_rate": 0.0007480371805396941, "loss": 0.1251, "num_input_tokens_seen": 106281488, "step": 49200 }, { "epoch": 8.026916802610113, "grad_norm": 0.005239915568381548, "learning_rate": 0.0007479753740072121, "loss": 0.0062, "num_input_tokens_seen": 106291920, "step": 49205 }, { "epoch": 8.02773246329527, "grad_norm": 0.02723226509988308, "learning_rate": 0.0007479135624491946, "loss": 0.0365, "num_input_tokens_seen": 106302896, "step": 49210 }, { "epoch": 8.028548123980425, "grad_norm": 0.19069083034992218, "learning_rate": 0.0007478517458668943, "loss": 0.0748, "num_input_tokens_seen": 106313968, "step": 49215 }, { "epoch": 8.029363784665579, "grad_norm": 0.07725605368614197, "learning_rate": 0.0007477899242615639, "loss": 0.0185, "num_input_tokens_seen": 106325328, "step": 49220 }, { "epoch": 8.030179445350734, "grad_norm": 0.34236836433410645, "learning_rate": 0.0007477280976344563, "loss": 0.0838, "num_input_tokens_seen": 106336720, "step": 49225 }, { "epoch": 8.030995106035888, "grad_norm": 0.01022479310631752, "learning_rate": 0.0007476662659868246, "loss": 0.1021, "num_input_tokens_seen": 106347888, "step": 49230 }, { "epoch": 8.031810766721044, "grad_norm": 0.0006199624622240663, "learning_rate": 0.0007476044293199218, "loss": 0.009, "num_input_tokens_seen": 106359440, "step": 49235 }, { "epoch": 8.0326264274062, "grad_norm": 0.025253375992178917, "learning_rate": 0.0007475425876350011, "loss": 0.0687, "num_input_tokens_seen": 106371152, "step": 49240 }, { "epoch": 8.033442088091354, "grad_norm": 0.00817878171801567, "learning_rate": 0.000747480740933316, "loss": 0.0207, "num_input_tokens_seen": 106382640, "step": 49245 }, { "epoch": 8.03425774877651, "grad_norm": 0.029934866353869438, "learning_rate": 0.0007474188892161196, "loss": 0.0692, "num_input_tokens_seen": 106393712, "step": 49250 }, { "epoch": 8.035073409461663, "grad_norm": 0.3806656301021576, "learning_rate": 0.0007473570324846656, "loss": 0.0402, "num_input_tokens_seen": 106403664, "step": 49255 }, { "epoch": 8.035889070146819, "grad_norm": 0.039969995617866516, "learning_rate": 0.0007472951707402074, "loss": 0.0082, "num_input_tokens_seen": 106415664, "step": 49260 }, { "epoch": 8.036704730831975, "grad_norm": 0.1323152780532837, "learning_rate": 0.0007472333039839989, "loss": 0.0612, "num_input_tokens_seen": 106426672, "step": 49265 }, { "epoch": 8.037520391517129, "grad_norm": 0.035253897309303284, "learning_rate": 0.000747171432217294, "loss": 0.0171, "num_input_tokens_seen": 106437296, "step": 49270 }, { "epoch": 8.038336052202284, "grad_norm": 0.08336462080478668, "learning_rate": 0.0007471095554413463, "loss": 0.0331, "num_input_tokens_seen": 106447024, "step": 49275 }, { "epoch": 8.039151712887438, "grad_norm": 0.03029344044625759, "learning_rate": 0.0007470476736574102, "loss": 0.0719, "num_input_tokens_seen": 106456976, "step": 49280 }, { "epoch": 8.039967373572594, "grad_norm": 0.3142164647579193, "learning_rate": 0.0007469857868667393, "loss": 0.2818, "num_input_tokens_seen": 106467888, "step": 49285 }, { "epoch": 8.040783034257748, "grad_norm": 0.02353101409971714, "learning_rate": 0.0007469238950705883, "loss": 0.0155, "num_input_tokens_seen": 106479024, "step": 49290 }, { "epoch": 8.041598694942904, "grad_norm": 0.003246279200538993, "learning_rate": 0.0007468619982702112, "loss": 0.005, "num_input_tokens_seen": 106488912, "step": 49295 }, { "epoch": 8.04241435562806, "grad_norm": 0.33486485481262207, "learning_rate": 0.0007468000964668625, "loss": 0.0731, "num_input_tokens_seen": 106500336, "step": 49300 }, { "epoch": 8.043230016313213, "grad_norm": 0.044109445065259933, "learning_rate": 0.0007467381896617968, "loss": 0.1144, "num_input_tokens_seen": 106511280, "step": 49305 }, { "epoch": 8.044045676998369, "grad_norm": 0.2685130536556244, "learning_rate": 0.0007466762778562687, "loss": 0.1703, "num_input_tokens_seen": 106521776, "step": 49310 }, { "epoch": 8.044861337683523, "grad_norm": 0.07512059807777405, "learning_rate": 0.000746614361051533, "loss": 0.1613, "num_input_tokens_seen": 106532976, "step": 49315 }, { "epoch": 8.045676998368679, "grad_norm": 0.1503468155860901, "learning_rate": 0.0007465524392488443, "loss": 0.1, "num_input_tokens_seen": 106543792, "step": 49320 }, { "epoch": 8.046492659053834, "grad_norm": 0.14477421343326569, "learning_rate": 0.0007464905124494578, "loss": 0.23, "num_input_tokens_seen": 106555632, "step": 49325 }, { "epoch": 8.047308319738988, "grad_norm": 0.20425103604793549, "learning_rate": 0.0007464285806546283, "loss": 0.07, "num_input_tokens_seen": 106566608, "step": 49330 }, { "epoch": 8.048123980424144, "grad_norm": 0.007286466658115387, "learning_rate": 0.0007463666438656109, "loss": 0.0461, "num_input_tokens_seen": 106576976, "step": 49335 }, { "epoch": 8.048939641109298, "grad_norm": 0.007894366048276424, "learning_rate": 0.000746304702083661, "loss": 0.0204, "num_input_tokens_seen": 106586896, "step": 49340 }, { "epoch": 8.049755301794454, "grad_norm": 0.044826071709394455, "learning_rate": 0.0007462427553100339, "loss": 0.0876, "num_input_tokens_seen": 106595760, "step": 49345 }, { "epoch": 8.05057096247961, "grad_norm": 0.09864915162324905, "learning_rate": 0.0007461808035459848, "loss": 0.0103, "num_input_tokens_seen": 106606480, "step": 49350 }, { "epoch": 8.051386623164763, "grad_norm": 0.08167116343975067, "learning_rate": 0.0007461188467927695, "loss": 0.023, "num_input_tokens_seen": 106616720, "step": 49355 }, { "epoch": 8.052202283849919, "grad_norm": 0.0640343576669693, "learning_rate": 0.0007460568850516436, "loss": 0.0279, "num_input_tokens_seen": 106626512, "step": 49360 }, { "epoch": 8.053017944535073, "grad_norm": 0.39939889311790466, "learning_rate": 0.0007459949183238627, "loss": 0.0461, "num_input_tokens_seen": 106637296, "step": 49365 }, { "epoch": 8.053833605220229, "grad_norm": 0.036775026470422745, "learning_rate": 0.0007459329466106829, "loss": 0.0943, "num_input_tokens_seen": 106647888, "step": 49370 }, { "epoch": 8.054649265905383, "grad_norm": 0.20606455206871033, "learning_rate": 0.0007458709699133597, "loss": 0.1257, "num_input_tokens_seen": 106659408, "step": 49375 }, { "epoch": 8.055464926590538, "grad_norm": 0.007144047878682613, "learning_rate": 0.0007458089882331495, "loss": 0.079, "num_input_tokens_seen": 106670576, "step": 49380 }, { "epoch": 8.056280587275694, "grad_norm": 0.4637611210346222, "learning_rate": 0.0007457470015713085, "loss": 0.0459, "num_input_tokens_seen": 106681040, "step": 49385 }, { "epoch": 8.057096247960848, "grad_norm": 0.00918448157608509, "learning_rate": 0.0007456850099290927, "loss": 0.0184, "num_input_tokens_seen": 106691376, "step": 49390 }, { "epoch": 8.057911908646004, "grad_norm": 0.09710443019866943, "learning_rate": 0.0007456230133077583, "loss": 0.012, "num_input_tokens_seen": 106703312, "step": 49395 }, { "epoch": 8.058727569331158, "grad_norm": 0.004849865101277828, "learning_rate": 0.0007455610117085618, "loss": 0.0331, "num_input_tokens_seen": 106713328, "step": 49400 }, { "epoch": 8.059543230016313, "grad_norm": 0.22771984338760376, "learning_rate": 0.0007454990051327602, "loss": 0.0268, "num_input_tokens_seen": 106723984, "step": 49405 }, { "epoch": 8.060358890701469, "grad_norm": 0.018330955877900124, "learning_rate": 0.0007454369935816098, "loss": 0.0197, "num_input_tokens_seen": 106735216, "step": 49410 }, { "epoch": 8.061174551386623, "grad_norm": 0.02104993537068367, "learning_rate": 0.0007453749770563673, "loss": 0.1355, "num_input_tokens_seen": 106746160, "step": 49415 }, { "epoch": 8.061990212071779, "grad_norm": 0.0019923443906009197, "learning_rate": 0.0007453129555582896, "loss": 0.0202, "num_input_tokens_seen": 106756912, "step": 49420 }, { "epoch": 8.062805872756933, "grad_norm": 0.019411850720643997, "learning_rate": 0.0007452509290886336, "loss": 0.0215, "num_input_tokens_seen": 106768560, "step": 49425 }, { "epoch": 8.063621533442088, "grad_norm": 0.09938501566648483, "learning_rate": 0.0007451888976486565, "loss": 0.0326, "num_input_tokens_seen": 106780656, "step": 49430 }, { "epoch": 8.064437194127244, "grad_norm": 0.30560827255249023, "learning_rate": 0.0007451268612396154, "loss": 0.0346, "num_input_tokens_seen": 106789968, "step": 49435 }, { "epoch": 8.065252854812398, "grad_norm": 0.002016686834394932, "learning_rate": 0.0007450648198627673, "loss": 0.0084, "num_input_tokens_seen": 106800144, "step": 49440 }, { "epoch": 8.066068515497554, "grad_norm": 0.04060761258006096, "learning_rate": 0.0007450027735193699, "loss": 0.0354, "num_input_tokens_seen": 106811248, "step": 49445 }, { "epoch": 8.066884176182707, "grad_norm": 0.004460466559976339, "learning_rate": 0.0007449407222106804, "loss": 0.0678, "num_input_tokens_seen": 106821136, "step": 49450 }, { "epoch": 8.067699836867863, "grad_norm": 0.006339784245938063, "learning_rate": 0.0007448786659379565, "loss": 0.1027, "num_input_tokens_seen": 106832368, "step": 49455 }, { "epoch": 8.068515497553017, "grad_norm": 0.012937436811625957, "learning_rate": 0.0007448166047024556, "loss": 0.1329, "num_input_tokens_seen": 106842096, "step": 49460 }, { "epoch": 8.069331158238173, "grad_norm": 0.0679636299610138, "learning_rate": 0.0007447545385054358, "loss": 0.0163, "num_input_tokens_seen": 106853200, "step": 49465 }, { "epoch": 8.070146818923329, "grad_norm": 0.0034860328305512667, "learning_rate": 0.0007446924673481548, "loss": 0.0071, "num_input_tokens_seen": 106865104, "step": 49470 }, { "epoch": 8.070962479608482, "grad_norm": 0.011176558211445808, "learning_rate": 0.0007446303912318705, "loss": 0.0941, "num_input_tokens_seen": 106876560, "step": 49475 }, { "epoch": 8.071778140293638, "grad_norm": 0.38480356335639954, "learning_rate": 0.000744568310157841, "loss": 0.0225, "num_input_tokens_seen": 106888176, "step": 49480 }, { "epoch": 8.072593800978792, "grad_norm": 0.00534833362326026, "learning_rate": 0.0007445062241273244, "loss": 0.1502, "num_input_tokens_seen": 106899696, "step": 49485 }, { "epoch": 8.073409461663948, "grad_norm": 0.05332694947719574, "learning_rate": 0.000744444133141579, "loss": 0.0184, "num_input_tokens_seen": 106910480, "step": 49490 }, { "epoch": 8.074225122349104, "grad_norm": 0.007500792853534222, "learning_rate": 0.0007443820372018631, "loss": 0.007, "num_input_tokens_seen": 106921072, "step": 49495 }, { "epoch": 8.075040783034257, "grad_norm": 0.03552490100264549, "learning_rate": 0.0007443199363094353, "loss": 0.0176, "num_input_tokens_seen": 106933392, "step": 49500 }, { "epoch": 8.075856443719413, "grad_norm": 0.3780343234539032, "learning_rate": 0.0007442578304655541, "loss": 0.0829, "num_input_tokens_seen": 106942928, "step": 49505 }, { "epoch": 8.076672104404567, "grad_norm": 0.041732337325811386, "learning_rate": 0.0007441957196714778, "loss": 0.0128, "num_input_tokens_seen": 106952912, "step": 49510 }, { "epoch": 8.077487765089723, "grad_norm": 0.0023167598992586136, "learning_rate": 0.0007441336039284656, "loss": 0.0128, "num_input_tokens_seen": 106962704, "step": 49515 }, { "epoch": 8.078303425774878, "grad_norm": 0.2755075693130493, "learning_rate": 0.0007440714832377764, "loss": 0.1491, "num_input_tokens_seen": 106973008, "step": 49520 }, { "epoch": 8.079119086460032, "grad_norm": 0.05139836296439171, "learning_rate": 0.0007440093576006688, "loss": 0.0638, "num_input_tokens_seen": 106983632, "step": 49525 }, { "epoch": 8.079934747145188, "grad_norm": 0.3967004120349884, "learning_rate": 0.000743947227018402, "loss": 0.2202, "num_input_tokens_seen": 106995152, "step": 49530 }, { "epoch": 8.080750407830342, "grad_norm": 0.1344999372959137, "learning_rate": 0.0007438850914922352, "loss": 0.0648, "num_input_tokens_seen": 107005936, "step": 49535 }, { "epoch": 8.081566068515498, "grad_norm": 0.05749267339706421, "learning_rate": 0.0007438229510234278, "loss": 0.015, "num_input_tokens_seen": 107014960, "step": 49540 }, { "epoch": 8.082381729200652, "grad_norm": 0.001101077999919653, "learning_rate": 0.0007437608056132388, "loss": 0.025, "num_input_tokens_seen": 107026448, "step": 49545 }, { "epoch": 8.083197389885807, "grad_norm": 0.003104017348960042, "learning_rate": 0.0007436986552629279, "loss": 0.0218, "num_input_tokens_seen": 107038768, "step": 49550 }, { "epoch": 8.084013050570963, "grad_norm": 0.012065582908689976, "learning_rate": 0.0007436364999737546, "loss": 0.0271, "num_input_tokens_seen": 107049936, "step": 49555 }, { "epoch": 8.084828711256117, "grad_norm": 0.1463005393743515, "learning_rate": 0.0007435743397469785, "loss": 0.0241, "num_input_tokens_seen": 107061808, "step": 49560 }, { "epoch": 8.085644371941273, "grad_norm": 0.024028457701206207, "learning_rate": 0.0007435121745838595, "loss": 0.0256, "num_input_tokens_seen": 107072464, "step": 49565 }, { "epoch": 8.086460032626427, "grad_norm": 0.24131129682064056, "learning_rate": 0.0007434500044856574, "loss": 0.1333, "num_input_tokens_seen": 107082704, "step": 49570 }, { "epoch": 8.087275693311582, "grad_norm": 0.2152007669210434, "learning_rate": 0.000743387829453632, "loss": 0.0369, "num_input_tokens_seen": 107093680, "step": 49575 }, { "epoch": 8.088091353996738, "grad_norm": 0.2218417227268219, "learning_rate": 0.0007433256494890435, "loss": 0.0972, "num_input_tokens_seen": 107103824, "step": 49580 }, { "epoch": 8.088907014681892, "grad_norm": 0.01789860427379608, "learning_rate": 0.000743263464593152, "loss": 0.062, "num_input_tokens_seen": 107114512, "step": 49585 }, { "epoch": 8.089722675367048, "grad_norm": 0.4298427104949951, "learning_rate": 0.0007432012747672179, "loss": 0.0929, "num_input_tokens_seen": 107125904, "step": 49590 }, { "epoch": 8.090538336052202, "grad_norm": 0.0065206796862185, "learning_rate": 0.0007431390800125013, "loss": 0.0142, "num_input_tokens_seen": 107137136, "step": 49595 }, { "epoch": 8.091353996737357, "grad_norm": 0.020476138219237328, "learning_rate": 0.0007430768803302629, "loss": 0.0653, "num_input_tokens_seen": 107148912, "step": 49600 }, { "epoch": 8.092169657422513, "grad_norm": 0.017311519011855125, "learning_rate": 0.0007430146757217631, "loss": 0.2111, "num_input_tokens_seen": 107159792, "step": 49605 }, { "epoch": 8.092985318107667, "grad_norm": 0.022033190354704857, "learning_rate": 0.0007429524661882626, "loss": 0.1223, "num_input_tokens_seen": 107170640, "step": 49610 }, { "epoch": 8.093800978792823, "grad_norm": 0.05615110695362091, "learning_rate": 0.0007428902517310222, "loss": 0.0604, "num_input_tokens_seen": 107180816, "step": 49615 }, { "epoch": 8.094616639477977, "grad_norm": 0.03126392140984535, "learning_rate": 0.0007428280323513028, "loss": 0.049, "num_input_tokens_seen": 107192016, "step": 49620 }, { "epoch": 8.095432300163132, "grad_norm": 0.622178316116333, "learning_rate": 0.0007427658080503652, "loss": 0.2196, "num_input_tokens_seen": 107202160, "step": 49625 }, { "epoch": 8.096247960848286, "grad_norm": 0.1008746325969696, "learning_rate": 0.0007427035788294704, "loss": 0.0663, "num_input_tokens_seen": 107213776, "step": 49630 }, { "epoch": 8.097063621533442, "grad_norm": 0.04118318483233452, "learning_rate": 0.0007426413446898799, "loss": 0.0315, "num_input_tokens_seen": 107224048, "step": 49635 }, { "epoch": 8.097879282218598, "grad_norm": 0.3178521990776062, "learning_rate": 0.0007425791056328546, "loss": 0.1177, "num_input_tokens_seen": 107234800, "step": 49640 }, { "epoch": 8.098694942903752, "grad_norm": 0.24075457453727722, "learning_rate": 0.0007425168616596561, "loss": 0.0396, "num_input_tokens_seen": 107245872, "step": 49645 }, { "epoch": 8.099510603588907, "grad_norm": 0.22038891911506653, "learning_rate": 0.0007424546127715456, "loss": 0.057, "num_input_tokens_seen": 107255760, "step": 49650 }, { "epoch": 8.100326264274061, "grad_norm": 0.13066186010837555, "learning_rate": 0.0007423923589697849, "loss": 0.0265, "num_input_tokens_seen": 107266992, "step": 49655 }, { "epoch": 8.101141924959217, "grad_norm": 0.009751847945153713, "learning_rate": 0.0007423301002556355, "loss": 0.0186, "num_input_tokens_seen": 107275536, "step": 49660 }, { "epoch": 8.101957585644373, "grad_norm": 0.11319594085216522, "learning_rate": 0.0007422678366303592, "loss": 0.0525, "num_input_tokens_seen": 107286960, "step": 49665 }, { "epoch": 8.102773246329527, "grad_norm": 0.051054831594228745, "learning_rate": 0.000742205568095218, "loss": 0.0205, "num_input_tokens_seen": 107297360, "step": 49670 }, { "epoch": 8.103588907014682, "grad_norm": 0.059771161526441574, "learning_rate": 0.0007421432946514736, "loss": 0.0394, "num_input_tokens_seen": 107308272, "step": 49675 }, { "epoch": 8.104404567699836, "grad_norm": 0.042254697531461716, "learning_rate": 0.0007420810163003881, "loss": 0.0656, "num_input_tokens_seen": 107318640, "step": 49680 }, { "epoch": 8.105220228384992, "grad_norm": 0.004530859179794788, "learning_rate": 0.0007420187330432238, "loss": 0.1321, "num_input_tokens_seen": 107328656, "step": 49685 }, { "epoch": 8.106035889070148, "grad_norm": 0.48277902603149414, "learning_rate": 0.0007419564448812428, "loss": 0.1329, "num_input_tokens_seen": 107338832, "step": 49690 }, { "epoch": 8.106851549755302, "grad_norm": 0.053494442254304886, "learning_rate": 0.0007418941518157075, "loss": 0.0152, "num_input_tokens_seen": 107349456, "step": 49695 }, { "epoch": 8.107667210440457, "grad_norm": 0.3123549520969391, "learning_rate": 0.0007418318538478803, "loss": 0.1035, "num_input_tokens_seen": 107361360, "step": 49700 }, { "epoch": 8.108482871125611, "grad_norm": 0.3388768136501312, "learning_rate": 0.0007417695509790239, "loss": 0.11, "num_input_tokens_seen": 107372496, "step": 49705 }, { "epoch": 8.109298531810767, "grad_norm": 0.07602295279502869, "learning_rate": 0.0007417072432104007, "loss": 0.1473, "num_input_tokens_seen": 107383376, "step": 49710 }, { "epoch": 8.11011419249592, "grad_norm": 0.08169770240783691, "learning_rate": 0.0007416449305432738, "loss": 0.0188, "num_input_tokens_seen": 107394448, "step": 49715 }, { "epoch": 8.110929853181077, "grad_norm": 0.02449674718081951, "learning_rate": 0.0007415826129789057, "loss": 0.0245, "num_input_tokens_seen": 107404944, "step": 49720 }, { "epoch": 8.111745513866232, "grad_norm": 0.16801486909389496, "learning_rate": 0.0007415202905185594, "loss": 0.1158, "num_input_tokens_seen": 107416080, "step": 49725 }, { "epoch": 8.112561174551386, "grad_norm": 0.03335743770003319, "learning_rate": 0.0007414579631634981, "loss": 0.0197, "num_input_tokens_seen": 107426224, "step": 49730 }, { "epoch": 8.113376835236542, "grad_norm": 0.053683776408433914, "learning_rate": 0.0007413956309149848, "loss": 0.1636, "num_input_tokens_seen": 107437328, "step": 49735 }, { "epoch": 8.114192495921696, "grad_norm": 0.15747734904289246, "learning_rate": 0.000741333293774283, "loss": 0.0375, "num_input_tokens_seen": 107446736, "step": 49740 }, { "epoch": 8.115008156606851, "grad_norm": 0.039649732410907745, "learning_rate": 0.0007412709517426556, "loss": 0.011, "num_input_tokens_seen": 107456784, "step": 49745 }, { "epoch": 8.115823817292007, "grad_norm": 0.015954799950122833, "learning_rate": 0.0007412086048213665, "loss": 0.0142, "num_input_tokens_seen": 107466640, "step": 49750 }, { "epoch": 8.116639477977161, "grad_norm": 0.005059359595179558, "learning_rate": 0.000741146253011679, "loss": 0.0644, "num_input_tokens_seen": 107477168, "step": 49755 }, { "epoch": 8.117455138662317, "grad_norm": 0.010598313063383102, "learning_rate": 0.0007410838963148568, "loss": 0.0103, "num_input_tokens_seen": 107487920, "step": 49760 }, { "epoch": 8.11827079934747, "grad_norm": 0.006887549534440041, "learning_rate": 0.0007410215347321634, "loss": 0.0252, "num_input_tokens_seen": 107499632, "step": 49765 }, { "epoch": 8.119086460032626, "grad_norm": 0.026491384953260422, "learning_rate": 0.000740959168264863, "loss": 0.0803, "num_input_tokens_seen": 107510192, "step": 49770 }, { "epoch": 8.119902120717782, "grad_norm": 0.02549799717962742, "learning_rate": 0.0007408967969142193, "loss": 0.0923, "num_input_tokens_seen": 107521040, "step": 49775 }, { "epoch": 8.120717781402936, "grad_norm": 0.6434139013290405, "learning_rate": 0.0007408344206814965, "loss": 0.0789, "num_input_tokens_seen": 107532816, "step": 49780 }, { "epoch": 8.121533442088092, "grad_norm": 0.4618445038795471, "learning_rate": 0.0007407720395679585, "loss": 0.1924, "num_input_tokens_seen": 107543984, "step": 49785 }, { "epoch": 8.122349102773246, "grad_norm": 0.024171195924282074, "learning_rate": 0.0007407096535748698, "loss": 0.0338, "num_input_tokens_seen": 107555824, "step": 49790 }, { "epoch": 8.123164763458401, "grad_norm": 0.052864234894514084, "learning_rate": 0.0007406472627034946, "loss": 0.1377, "num_input_tokens_seen": 107566800, "step": 49795 }, { "epoch": 8.123980424143557, "grad_norm": 0.0018474479438737035, "learning_rate": 0.0007405848669550973, "loss": 0.0285, "num_input_tokens_seen": 107577456, "step": 49800 }, { "epoch": 8.124796084828711, "grad_norm": 0.17964208126068115, "learning_rate": 0.0007405224663309425, "loss": 0.2171, "num_input_tokens_seen": 107589328, "step": 49805 }, { "epoch": 8.125611745513867, "grad_norm": 0.1299470216035843, "learning_rate": 0.0007404600608322948, "loss": 0.146, "num_input_tokens_seen": 107600784, "step": 49810 }, { "epoch": 8.12642740619902, "grad_norm": 0.005026193335652351, "learning_rate": 0.0007403976504604189, "loss": 0.0586, "num_input_tokens_seen": 107612592, "step": 49815 }, { "epoch": 8.127243066884176, "grad_norm": 0.06259380280971527, "learning_rate": 0.0007403352352165797, "loss": 0.0488, "num_input_tokens_seen": 107623184, "step": 49820 }, { "epoch": 8.12805872756933, "grad_norm": 0.016340158879756927, "learning_rate": 0.0007402728151020419, "loss": 0.0232, "num_input_tokens_seen": 107633456, "step": 49825 }, { "epoch": 8.128874388254486, "grad_norm": 0.19695636630058289, "learning_rate": 0.0007402103901180708, "loss": 0.0811, "num_input_tokens_seen": 107644688, "step": 49830 }, { "epoch": 8.129690048939642, "grad_norm": 0.01583622582256794, "learning_rate": 0.0007401479602659315, "loss": 0.1204, "num_input_tokens_seen": 107654960, "step": 49835 }, { "epoch": 8.130505709624796, "grad_norm": 0.13566166162490845, "learning_rate": 0.000740085525546889, "loss": 0.0135, "num_input_tokens_seen": 107665104, "step": 49840 }, { "epoch": 8.131321370309951, "grad_norm": 0.0025712582282721996, "learning_rate": 0.0007400230859622088, "loss": 0.024, "num_input_tokens_seen": 107675920, "step": 49845 }, { "epoch": 8.132137030995105, "grad_norm": 0.07446157187223434, "learning_rate": 0.0007399606415131563, "loss": 0.0806, "num_input_tokens_seen": 107686544, "step": 49850 }, { "epoch": 8.132952691680261, "grad_norm": 0.08578246086835861, "learning_rate": 0.0007398981922009971, "loss": 0.0433, "num_input_tokens_seen": 107698320, "step": 49855 }, { "epoch": 8.133768352365417, "grad_norm": 0.06800012290477753, "learning_rate": 0.0007398357380269966, "loss": 0.0156, "num_input_tokens_seen": 107709360, "step": 49860 }, { "epoch": 8.13458401305057, "grad_norm": 0.007951917126774788, "learning_rate": 0.0007397732789924205, "loss": 0.0075, "num_input_tokens_seen": 107720560, "step": 49865 }, { "epoch": 8.135399673735726, "grad_norm": 0.010952308773994446, "learning_rate": 0.0007397108150985349, "loss": 0.0187, "num_input_tokens_seen": 107730864, "step": 49870 }, { "epoch": 8.13621533442088, "grad_norm": 0.1421835869550705, "learning_rate": 0.0007396483463466055, "loss": 0.0472, "num_input_tokens_seen": 107740720, "step": 49875 }, { "epoch": 8.137030995106036, "grad_norm": 0.00989621039479971, "learning_rate": 0.0007395858727378982, "loss": 0.0458, "num_input_tokens_seen": 107750096, "step": 49880 }, { "epoch": 8.137846655791192, "grad_norm": 0.0596698634326458, "learning_rate": 0.0007395233942736794, "loss": 0.1653, "num_input_tokens_seen": 107761584, "step": 49885 }, { "epoch": 8.138662316476346, "grad_norm": 0.028497813269495964, "learning_rate": 0.0007394609109552152, "loss": 0.0863, "num_input_tokens_seen": 107772464, "step": 49890 }, { "epoch": 8.139477977161501, "grad_norm": 0.08234116435050964, "learning_rate": 0.0007393984227837718, "loss": 0.1797, "num_input_tokens_seen": 107783248, "step": 49895 }, { "epoch": 8.140293637846655, "grad_norm": 0.02983970381319523, "learning_rate": 0.0007393359297606155, "loss": 0.0247, "num_input_tokens_seen": 107793584, "step": 49900 }, { "epoch": 8.141109298531811, "grad_norm": 0.23107391595840454, "learning_rate": 0.0007392734318870133, "loss": 0.0643, "num_input_tokens_seen": 107803440, "step": 49905 }, { "epoch": 8.141924959216965, "grad_norm": 0.06202094629406929, "learning_rate": 0.0007392109291642311, "loss": 0.0638, "num_input_tokens_seen": 107813424, "step": 49910 }, { "epoch": 8.14274061990212, "grad_norm": 0.31579720973968506, "learning_rate": 0.0007391484215935363, "loss": 0.1542, "num_input_tokens_seen": 107823152, "step": 49915 }, { "epoch": 8.143556280587276, "grad_norm": 0.14060421288013458, "learning_rate": 0.000739085909176195, "loss": 0.1019, "num_input_tokens_seen": 107834736, "step": 49920 }, { "epoch": 8.14437194127243, "grad_norm": 0.2537612020969391, "learning_rate": 0.0007390233919134747, "loss": 0.0287, "num_input_tokens_seen": 107847120, "step": 49925 }, { "epoch": 8.145187601957586, "grad_norm": 0.1196332573890686, "learning_rate": 0.0007389608698066422, "loss": 0.0142, "num_input_tokens_seen": 107858448, "step": 49930 }, { "epoch": 8.14600326264274, "grad_norm": 0.0030300780199468136, "learning_rate": 0.0007388983428569643, "loss": 0.0214, "num_input_tokens_seen": 107870288, "step": 49935 }, { "epoch": 8.146818923327896, "grad_norm": 0.005464842543005943, "learning_rate": 0.0007388358110657085, "loss": 0.0359, "num_input_tokens_seen": 107881232, "step": 49940 }, { "epoch": 8.147634584013051, "grad_norm": 0.12564074993133545, "learning_rate": 0.000738773274434142, "loss": 0.0288, "num_input_tokens_seen": 107891024, "step": 49945 }, { "epoch": 8.148450244698205, "grad_norm": 0.021209627389907837, "learning_rate": 0.0007387107329635322, "loss": 0.0732, "num_input_tokens_seen": 107902224, "step": 49950 }, { "epoch": 8.149265905383361, "grad_norm": 0.0424620546400547, "learning_rate": 0.0007386481866551466, "loss": 0.0218, "num_input_tokens_seen": 107913776, "step": 49955 }, { "epoch": 8.150081566068515, "grad_norm": 0.13068750500679016, "learning_rate": 0.0007385856355102528, "loss": 0.1071, "num_input_tokens_seen": 107925488, "step": 49960 }, { "epoch": 8.15089722675367, "grad_norm": 0.04212689772248268, "learning_rate": 0.0007385230795301183, "loss": 0.1603, "num_input_tokens_seen": 107937040, "step": 49965 }, { "epoch": 8.151712887438826, "grad_norm": 0.41780608892440796, "learning_rate": 0.000738460518716011, "loss": 0.0536, "num_input_tokens_seen": 107947824, "step": 49970 }, { "epoch": 8.15252854812398, "grad_norm": 0.2264157235622406, "learning_rate": 0.0007383979530691989, "loss": 0.1038, "num_input_tokens_seen": 107957904, "step": 49975 }, { "epoch": 8.153344208809136, "grad_norm": 0.10788124054670334, "learning_rate": 0.0007383353825909498, "loss": 0.1653, "num_input_tokens_seen": 107968336, "step": 49980 }, { "epoch": 8.15415986949429, "grad_norm": 0.16582083702087402, "learning_rate": 0.0007382728072825318, "loss": 0.1608, "num_input_tokens_seen": 107978768, "step": 49985 }, { "epoch": 8.154975530179446, "grad_norm": 0.23172712326049805, "learning_rate": 0.0007382102271452132, "loss": 0.0412, "num_input_tokens_seen": 107989168, "step": 49990 }, { "epoch": 8.1557911908646, "grad_norm": 0.11851934343576431, "learning_rate": 0.0007381476421802621, "loss": 0.0618, "num_input_tokens_seen": 107999664, "step": 49995 }, { "epoch": 8.156606851549755, "grad_norm": 0.022425547242164612, "learning_rate": 0.0007380850523889469, "loss": 0.0268, "num_input_tokens_seen": 108008560, "step": 50000 }, { "epoch": 8.15742251223491, "grad_norm": 0.17704157531261444, "learning_rate": 0.0007380224577725361, "loss": 0.0822, "num_input_tokens_seen": 108019504, "step": 50005 }, { "epoch": 8.158238172920065, "grad_norm": 0.22146500647068024, "learning_rate": 0.0007379598583322982, "loss": 0.2091, "num_input_tokens_seen": 108030224, "step": 50010 }, { "epoch": 8.15905383360522, "grad_norm": 0.0075067877769470215, "learning_rate": 0.0007378972540695019, "loss": 0.0308, "num_input_tokens_seen": 108041936, "step": 50015 }, { "epoch": 8.159869494290374, "grad_norm": 0.007990765385329723, "learning_rate": 0.0007378346449854159, "loss": 0.0663, "num_input_tokens_seen": 108052688, "step": 50020 }, { "epoch": 8.16068515497553, "grad_norm": 0.003241181606426835, "learning_rate": 0.0007377720310813092, "loss": 0.1593, "num_input_tokens_seen": 108063440, "step": 50025 }, { "epoch": 8.161500815660686, "grad_norm": 0.17617450654506683, "learning_rate": 0.0007377094123584507, "loss": 0.0786, "num_input_tokens_seen": 108074448, "step": 50030 }, { "epoch": 8.16231647634584, "grad_norm": 0.04974460229277611, "learning_rate": 0.0007376467888181094, "loss": 0.0424, "num_input_tokens_seen": 108085840, "step": 50035 }, { "epoch": 8.163132137030995, "grad_norm": 0.028729038313031197, "learning_rate": 0.0007375841604615542, "loss": 0.0164, "num_input_tokens_seen": 108096816, "step": 50040 }, { "epoch": 8.16394779771615, "grad_norm": 0.22961454093456268, "learning_rate": 0.0007375215272900548, "loss": 0.1319, "num_input_tokens_seen": 108106448, "step": 50045 }, { "epoch": 8.164763458401305, "grad_norm": 0.010329188778996468, "learning_rate": 0.0007374588893048803, "loss": 0.0371, "num_input_tokens_seen": 108116816, "step": 50050 }, { "epoch": 8.16557911908646, "grad_norm": 0.010221997275948524, "learning_rate": 0.0007373962465073002, "loss": 0.0539, "num_input_tokens_seen": 108127440, "step": 50055 }, { "epoch": 8.166394779771615, "grad_norm": 0.23573219776153564, "learning_rate": 0.0007373335988985839, "loss": 0.0611, "num_input_tokens_seen": 108138128, "step": 50060 }, { "epoch": 8.16721044045677, "grad_norm": 0.007467462215572596, "learning_rate": 0.0007372709464800013, "loss": 0.0263, "num_input_tokens_seen": 108148912, "step": 50065 }, { "epoch": 8.168026101141924, "grad_norm": 0.23514242470264435, "learning_rate": 0.0007372082892528218, "loss": 0.0509, "num_input_tokens_seen": 108160240, "step": 50070 }, { "epoch": 8.16884176182708, "grad_norm": 0.24603112041950226, "learning_rate": 0.0007371456272183156, "loss": 0.1425, "num_input_tokens_seen": 108171280, "step": 50075 }, { "epoch": 8.169657422512234, "grad_norm": 0.013973649591207504, "learning_rate": 0.0007370829603777523, "loss": 0.0193, "num_input_tokens_seen": 108183088, "step": 50080 }, { "epoch": 8.17047308319739, "grad_norm": 0.05473875626921654, "learning_rate": 0.000737020288732402, "loss": 0.1722, "num_input_tokens_seen": 108194320, "step": 50085 }, { "epoch": 8.171288743882545, "grad_norm": 0.075847327709198, "learning_rate": 0.0007369576122835349, "loss": 0.0737, "num_input_tokens_seen": 108205296, "step": 50090 }, { "epoch": 8.1721044045677, "grad_norm": 0.5191269516944885, "learning_rate": 0.0007368949310324211, "loss": 0.2405, "num_input_tokens_seen": 108215696, "step": 50095 }, { "epoch": 8.172920065252855, "grad_norm": 0.15466056764125824, "learning_rate": 0.0007368322449803311, "loss": 0.1787, "num_input_tokens_seen": 108225456, "step": 50100 }, { "epoch": 8.173735725938009, "grad_norm": 0.26574286818504333, "learning_rate": 0.0007367695541285353, "loss": 0.0349, "num_input_tokens_seen": 108236560, "step": 50105 }, { "epoch": 8.174551386623165, "grad_norm": 0.04187563434243202, "learning_rate": 0.0007367068584783041, "loss": 0.0278, "num_input_tokens_seen": 108247568, "step": 50110 }, { "epoch": 8.17536704730832, "grad_norm": 0.1134595200419426, "learning_rate": 0.000736644158030908, "loss": 0.0225, "num_input_tokens_seen": 108258192, "step": 50115 }, { "epoch": 8.176182707993474, "grad_norm": 0.004802480805665255, "learning_rate": 0.0007365814527876179, "loss": 0.0665, "num_input_tokens_seen": 108268944, "step": 50120 }, { "epoch": 8.17699836867863, "grad_norm": 0.3093656599521637, "learning_rate": 0.0007365187427497045, "loss": 0.1294, "num_input_tokens_seen": 108279344, "step": 50125 }, { "epoch": 8.177814029363784, "grad_norm": 0.020289601758122444, "learning_rate": 0.0007364560279184387, "loss": 0.0414, "num_input_tokens_seen": 108290768, "step": 50130 }, { "epoch": 8.17862969004894, "grad_norm": 0.019983666017651558, "learning_rate": 0.0007363933082950917, "loss": 0.1073, "num_input_tokens_seen": 108302192, "step": 50135 }, { "epoch": 8.179445350734095, "grad_norm": 0.009791013784706593, "learning_rate": 0.0007363305838809344, "loss": 0.0139, "num_input_tokens_seen": 108312720, "step": 50140 }, { "epoch": 8.18026101141925, "grad_norm": 0.007855056785047054, "learning_rate": 0.0007362678546772379, "loss": 0.2655, "num_input_tokens_seen": 108322480, "step": 50145 }, { "epoch": 8.181076672104405, "grad_norm": 0.023270519450306892, "learning_rate": 0.0007362051206852736, "loss": 0.0336, "num_input_tokens_seen": 108332976, "step": 50150 }, { "epoch": 8.181892332789559, "grad_norm": 0.15803292393684387, "learning_rate": 0.0007361423819063128, "loss": 0.0329, "num_input_tokens_seen": 108344112, "step": 50155 }, { "epoch": 8.182707993474715, "grad_norm": 0.2568044364452362, "learning_rate": 0.0007360796383416273, "loss": 0.1826, "num_input_tokens_seen": 108354960, "step": 50160 }, { "epoch": 8.18352365415987, "grad_norm": 0.011066235601902008, "learning_rate": 0.0007360168899924883, "loss": 0.055, "num_input_tokens_seen": 108365008, "step": 50165 }, { "epoch": 8.184339314845024, "grad_norm": 0.08335398137569427, "learning_rate": 0.0007359541368601675, "loss": 0.1744, "num_input_tokens_seen": 108374544, "step": 50170 }, { "epoch": 8.18515497553018, "grad_norm": 0.23367607593536377, "learning_rate": 0.0007358913789459369, "loss": 0.161, "num_input_tokens_seen": 108384304, "step": 50175 }, { "epoch": 8.185970636215334, "grad_norm": 0.04029659181833267, "learning_rate": 0.0007358286162510683, "loss": 0.1058, "num_input_tokens_seen": 108394992, "step": 50180 }, { "epoch": 8.18678629690049, "grad_norm": 0.04990570619702339, "learning_rate": 0.0007357658487768337, "loss": 0.0677, "num_input_tokens_seen": 108407568, "step": 50185 }, { "epoch": 8.187601957585644, "grad_norm": 0.009469253942370415, "learning_rate": 0.0007357030765245049, "loss": 0.022, "num_input_tokens_seen": 108417904, "step": 50190 }, { "epoch": 8.1884176182708, "grad_norm": 0.026664957404136658, "learning_rate": 0.0007356402994953544, "loss": 0.0822, "num_input_tokens_seen": 108428400, "step": 50195 }, { "epoch": 8.189233278955955, "grad_norm": 0.015516448765993118, "learning_rate": 0.0007355775176906543, "loss": 0.0596, "num_input_tokens_seen": 108438864, "step": 50200 }, { "epoch": 8.190048939641109, "grad_norm": 0.014006822369992733, "learning_rate": 0.0007355147311116768, "loss": 0.1504, "num_input_tokens_seen": 108448720, "step": 50205 }, { "epoch": 8.190864600326265, "grad_norm": 0.1970442533493042, "learning_rate": 0.0007354519397596946, "loss": 0.049, "num_input_tokens_seen": 108459344, "step": 50210 }, { "epoch": 8.191680261011419, "grad_norm": 0.015166080556809902, "learning_rate": 0.0007353891436359801, "loss": 0.0223, "num_input_tokens_seen": 108470000, "step": 50215 }, { "epoch": 8.192495921696574, "grad_norm": 0.0828956738114357, "learning_rate": 0.000735326342741806, "loss": 0.0787, "num_input_tokens_seen": 108479856, "step": 50220 }, { "epoch": 8.19331158238173, "grad_norm": 0.1197994202375412, "learning_rate": 0.0007352635370784451, "loss": 0.0738, "num_input_tokens_seen": 108490576, "step": 50225 }, { "epoch": 8.194127243066884, "grad_norm": 0.047717440873384476, "learning_rate": 0.00073520072664717, "loss": 0.0749, "num_input_tokens_seen": 108502960, "step": 50230 }, { "epoch": 8.19494290375204, "grad_norm": 0.08582471311092377, "learning_rate": 0.000735137911449254, "loss": 0.0627, "num_input_tokens_seen": 108512944, "step": 50235 }, { "epoch": 8.195758564437194, "grad_norm": 0.1752943992614746, "learning_rate": 0.0007350750914859698, "loss": 0.0526, "num_input_tokens_seen": 108523664, "step": 50240 }, { "epoch": 8.19657422512235, "grad_norm": 0.008756861090660095, "learning_rate": 0.0007350122667585908, "loss": 0.0229, "num_input_tokens_seen": 108534704, "step": 50245 }, { "epoch": 8.197389885807505, "grad_norm": 0.01668260246515274, "learning_rate": 0.0007349494372683899, "loss": 0.0281, "num_input_tokens_seen": 108545936, "step": 50250 }, { "epoch": 8.198205546492659, "grad_norm": 0.7298435568809509, "learning_rate": 0.0007348866030166407, "loss": 0.0642, "num_input_tokens_seen": 108555536, "step": 50255 }, { "epoch": 8.199021207177815, "grad_norm": 0.015028917230665684, "learning_rate": 0.0007348237640046165, "loss": 0.0155, "num_input_tokens_seen": 108566896, "step": 50260 }, { "epoch": 8.199836867862969, "grad_norm": 0.013336896896362305, "learning_rate": 0.0007347609202335907, "loss": 0.0383, "num_input_tokens_seen": 108577520, "step": 50265 }, { "epoch": 8.200652528548124, "grad_norm": 0.0023462544195353985, "learning_rate": 0.0007346980717048373, "loss": 0.0093, "num_input_tokens_seen": 108589104, "step": 50270 }, { "epoch": 8.201468189233278, "grad_norm": 0.02456527017056942, "learning_rate": 0.0007346352184196296, "loss": 0.0188, "num_input_tokens_seen": 108599440, "step": 50275 }, { "epoch": 8.202283849918434, "grad_norm": 0.23470385372638702, "learning_rate": 0.0007345723603792415, "loss": 0.0888, "num_input_tokens_seen": 108610608, "step": 50280 }, { "epoch": 8.20309951060359, "grad_norm": 0.03273777663707733, "learning_rate": 0.000734509497584947, "loss": 0.0784, "num_input_tokens_seen": 108621680, "step": 50285 }, { "epoch": 8.203915171288743, "grad_norm": 0.20122350752353668, "learning_rate": 0.0007344466300380201, "loss": 0.0249, "num_input_tokens_seen": 108633712, "step": 50290 }, { "epoch": 8.2047308319739, "grad_norm": 0.03938678279519081, "learning_rate": 0.0007343837577397347, "loss": 0.0923, "num_input_tokens_seen": 108643888, "step": 50295 }, { "epoch": 8.205546492659053, "grad_norm": 0.023820001631975174, "learning_rate": 0.0007343208806913651, "loss": 0.0581, "num_input_tokens_seen": 108654160, "step": 50300 }, { "epoch": 8.206362153344209, "grad_norm": 0.0029546052683144808, "learning_rate": 0.0007342579988941858, "loss": 0.1322, "num_input_tokens_seen": 108665072, "step": 50305 }, { "epoch": 8.207177814029365, "grad_norm": 0.09167854487895966, "learning_rate": 0.0007341951123494708, "loss": 0.0173, "num_input_tokens_seen": 108676432, "step": 50310 }, { "epoch": 8.207993474714518, "grad_norm": 0.13153241574764252, "learning_rate": 0.0007341322210584947, "loss": 0.0306, "num_input_tokens_seen": 108687088, "step": 50315 }, { "epoch": 8.208809135399674, "grad_norm": 0.27671709656715393, "learning_rate": 0.0007340693250225322, "loss": 0.0658, "num_input_tokens_seen": 108698736, "step": 50320 }, { "epoch": 8.209624796084828, "grad_norm": 0.0030362617690116167, "learning_rate": 0.0007340064242428579, "loss": 0.0678, "num_input_tokens_seen": 108709616, "step": 50325 }, { "epoch": 8.210440456769984, "grad_norm": 0.02015618234872818, "learning_rate": 0.0007339435187207466, "loss": 0.006, "num_input_tokens_seen": 108720688, "step": 50330 }, { "epoch": 8.21125611745514, "grad_norm": 0.017543811351060867, "learning_rate": 0.0007338806084574731, "loss": 0.0171, "num_input_tokens_seen": 108730928, "step": 50335 }, { "epoch": 8.212071778140293, "grad_norm": 0.003998770844191313, "learning_rate": 0.0007338176934543124, "loss": 0.0067, "num_input_tokens_seen": 108742832, "step": 50340 }, { "epoch": 8.21288743882545, "grad_norm": 0.0968787744641304, "learning_rate": 0.0007337547737125394, "loss": 0.1262, "num_input_tokens_seen": 108753616, "step": 50345 }, { "epoch": 8.213703099510603, "grad_norm": 0.3040590286254883, "learning_rate": 0.0007336918492334294, "loss": 0.1554, "num_input_tokens_seen": 108764592, "step": 50350 }, { "epoch": 8.214518760195759, "grad_norm": 0.09109804034233093, "learning_rate": 0.0007336289200182576, "loss": 0.0133, "num_input_tokens_seen": 108776688, "step": 50355 }, { "epoch": 8.215334420880913, "grad_norm": 0.04389248788356781, "learning_rate": 0.0007335659860682994, "loss": 0.0288, "num_input_tokens_seen": 108787792, "step": 50360 }, { "epoch": 8.216150081566068, "grad_norm": 0.12057659029960632, "learning_rate": 0.0007335030473848302, "loss": 0.0637, "num_input_tokens_seen": 108798032, "step": 50365 }, { "epoch": 8.216965742251224, "grad_norm": 0.02155953273177147, "learning_rate": 0.0007334401039691255, "loss": 0.0127, "num_input_tokens_seen": 108808976, "step": 50370 }, { "epoch": 8.217781402936378, "grad_norm": 0.011279478669166565, "learning_rate": 0.000733377155822461, "loss": 0.0086, "num_input_tokens_seen": 108818928, "step": 50375 }, { "epoch": 8.218597063621534, "grad_norm": 0.042144011706113815, "learning_rate": 0.0007333142029461124, "loss": 0.2277, "num_input_tokens_seen": 108829744, "step": 50380 }, { "epoch": 8.219412724306688, "grad_norm": 0.3779882788658142, "learning_rate": 0.0007332512453413555, "loss": 0.0623, "num_input_tokens_seen": 108841008, "step": 50385 }, { "epoch": 8.220228384991843, "grad_norm": 0.029292693361639977, "learning_rate": 0.0007331882830094661, "loss": 0.1683, "num_input_tokens_seen": 108852208, "step": 50390 }, { "epoch": 8.221044045676999, "grad_norm": 0.058450907468795776, "learning_rate": 0.0007331253159517204, "loss": 0.0171, "num_input_tokens_seen": 108863440, "step": 50395 }, { "epoch": 8.221859706362153, "grad_norm": 0.016852879896759987, "learning_rate": 0.0007330623441693944, "loss": 0.0701, "num_input_tokens_seen": 108874256, "step": 50400 }, { "epoch": 8.222675367047309, "grad_norm": 0.030780978500843048, "learning_rate": 0.0007329993676637643, "loss": 0.0464, "num_input_tokens_seen": 108885808, "step": 50405 }, { "epoch": 8.223491027732463, "grad_norm": 0.1959686279296875, "learning_rate": 0.0007329363864361065, "loss": 0.1331, "num_input_tokens_seen": 108896880, "step": 50410 }, { "epoch": 8.224306688417618, "grad_norm": 0.3598119616508484, "learning_rate": 0.0007328734004876974, "loss": 0.0714, "num_input_tokens_seen": 108908336, "step": 50415 }, { "epoch": 8.225122349102774, "grad_norm": 0.004757583606988192, "learning_rate": 0.0007328104098198131, "loss": 0.0879, "num_input_tokens_seen": 108919312, "step": 50420 }, { "epoch": 8.225938009787928, "grad_norm": 0.40510547161102295, "learning_rate": 0.000732747414433731, "loss": 0.0601, "num_input_tokens_seen": 108930992, "step": 50425 }, { "epoch": 8.226753670473084, "grad_norm": 0.3972322344779968, "learning_rate": 0.000732684414330727, "loss": 0.062, "num_input_tokens_seen": 108942864, "step": 50430 }, { "epoch": 8.227569331158238, "grad_norm": 0.008180802688002586, "learning_rate": 0.0007326214095120781, "loss": 0.0334, "num_input_tokens_seen": 108952880, "step": 50435 }, { "epoch": 8.228384991843393, "grad_norm": 0.007211578544229269, "learning_rate": 0.0007325583999790613, "loss": 0.0264, "num_input_tokens_seen": 108964560, "step": 50440 }, { "epoch": 8.229200652528547, "grad_norm": 0.017740648239850998, "learning_rate": 0.0007324953857329535, "loss": 0.0261, "num_input_tokens_seen": 108976336, "step": 50445 }, { "epoch": 8.230016313213703, "grad_norm": 0.009635083377361298, "learning_rate": 0.0007324323667750319, "loss": 0.0208, "num_input_tokens_seen": 108987216, "step": 50450 }, { "epoch": 8.230831973898859, "grad_norm": 0.007957677356898785, "learning_rate": 0.0007323693431065734, "loss": 0.1397, "num_input_tokens_seen": 108997328, "step": 50455 }, { "epoch": 8.231647634584013, "grad_norm": 0.013457262888550758, "learning_rate": 0.0007323063147288553, "loss": 0.1046, "num_input_tokens_seen": 109007088, "step": 50460 }, { "epoch": 8.232463295269168, "grad_norm": 0.22362826764583588, "learning_rate": 0.0007322432816431551, "loss": 0.0372, "num_input_tokens_seen": 109017488, "step": 50465 }, { "epoch": 8.233278955954322, "grad_norm": 0.17243242263793945, "learning_rate": 0.0007321802438507502, "loss": 0.0236, "num_input_tokens_seen": 109028240, "step": 50470 }, { "epoch": 8.234094616639478, "grad_norm": 0.003037064801901579, "learning_rate": 0.0007321172013529182, "loss": 0.129, "num_input_tokens_seen": 109038224, "step": 50475 }, { "epoch": 8.234910277324634, "grad_norm": 0.007365551311522722, "learning_rate": 0.0007320541541509366, "loss": 0.0422, "num_input_tokens_seen": 109048592, "step": 50480 }, { "epoch": 8.235725938009788, "grad_norm": 0.020375186577439308, "learning_rate": 0.0007319911022460831, "loss": 0.0671, "num_input_tokens_seen": 109058512, "step": 50485 }, { "epoch": 8.236541598694943, "grad_norm": 0.22685709595680237, "learning_rate": 0.0007319280456396357, "loss": 0.0269, "num_input_tokens_seen": 109069392, "step": 50490 }, { "epoch": 8.237357259380097, "grad_norm": 0.27163517475128174, "learning_rate": 0.0007318649843328722, "loss": 0.0468, "num_input_tokens_seen": 109080176, "step": 50495 }, { "epoch": 8.238172920065253, "grad_norm": 0.008194385096430779, "learning_rate": 0.0007318019183270707, "loss": 0.0809, "num_input_tokens_seen": 109091216, "step": 50500 }, { "epoch": 8.238988580750409, "grad_norm": 0.03463249281048775, "learning_rate": 0.0007317388476235091, "loss": 0.0119, "num_input_tokens_seen": 109101872, "step": 50505 }, { "epoch": 8.239804241435563, "grad_norm": 0.02676951140165329, "learning_rate": 0.0007316757722234659, "loss": 0.0978, "num_input_tokens_seen": 109111600, "step": 50510 }, { "epoch": 8.240619902120718, "grad_norm": 0.012483866885304451, "learning_rate": 0.0007316126921282193, "loss": 0.0099, "num_input_tokens_seen": 109121872, "step": 50515 }, { "epoch": 8.241435562805872, "grad_norm": 0.2171943187713623, "learning_rate": 0.0007315496073390477, "loss": 0.029, "num_input_tokens_seen": 109133136, "step": 50520 }, { "epoch": 8.242251223491028, "grad_norm": 0.0367184579372406, "learning_rate": 0.0007314865178572295, "loss": 0.2034, "num_input_tokens_seen": 109143216, "step": 50525 }, { "epoch": 8.243066884176184, "grad_norm": 0.04937918111681938, "learning_rate": 0.0007314234236840434, "loss": 0.0461, "num_input_tokens_seen": 109153104, "step": 50530 }, { "epoch": 8.243882544861338, "grad_norm": 0.0748000219464302, "learning_rate": 0.000731360324820768, "loss": 0.035, "num_input_tokens_seen": 109163568, "step": 50535 }, { "epoch": 8.244698205546493, "grad_norm": 0.008928696624934673, "learning_rate": 0.000731297221268682, "loss": 0.0819, "num_input_tokens_seen": 109174096, "step": 50540 }, { "epoch": 8.245513866231647, "grad_norm": 0.003620315110310912, "learning_rate": 0.0007312341130290645, "loss": 0.1859, "num_input_tokens_seen": 109185328, "step": 50545 }, { "epoch": 8.246329526916803, "grad_norm": 0.03834008425474167, "learning_rate": 0.0007311710001031943, "loss": 0.0182, "num_input_tokens_seen": 109195568, "step": 50550 }, { "epoch": 8.247145187601957, "grad_norm": 0.2103102058172226, "learning_rate": 0.0007311078824923506, "loss": 0.0414, "num_input_tokens_seen": 109205712, "step": 50555 }, { "epoch": 8.247960848287113, "grad_norm": 0.0034484812058508396, "learning_rate": 0.0007310447601978125, "loss": 0.0053, "num_input_tokens_seen": 109217200, "step": 50560 }, { "epoch": 8.248776508972268, "grad_norm": 0.05513901263475418, "learning_rate": 0.0007309816332208592, "loss": 0.0062, "num_input_tokens_seen": 109227312, "step": 50565 }, { "epoch": 8.249592169657422, "grad_norm": 0.3575925827026367, "learning_rate": 0.00073091850156277, "loss": 0.0751, "num_input_tokens_seen": 109239664, "step": 50570 }, { "epoch": 8.250407830342578, "grad_norm": 0.02116283029317856, "learning_rate": 0.0007308553652248244, "loss": 0.1445, "num_input_tokens_seen": 109250320, "step": 50575 }, { "epoch": 8.251223491027732, "grad_norm": 0.006838952656835318, "learning_rate": 0.0007307922242083022, "loss": 0.0543, "num_input_tokens_seen": 109260048, "step": 50580 }, { "epoch": 8.252039151712887, "grad_norm": 0.04055549204349518, "learning_rate": 0.0007307290785144826, "loss": 0.0177, "num_input_tokens_seen": 109270896, "step": 50585 }, { "epoch": 8.252854812398043, "grad_norm": 0.17573504149913788, "learning_rate": 0.0007306659281446456, "loss": 0.0878, "num_input_tokens_seen": 109279440, "step": 50590 }, { "epoch": 8.253670473083197, "grad_norm": 0.03469372168183327, "learning_rate": 0.000730602773100071, "loss": 0.0988, "num_input_tokens_seen": 109291824, "step": 50595 }, { "epoch": 8.254486133768353, "grad_norm": 0.021459020674228668, "learning_rate": 0.0007305396133820385, "loss": 0.0152, "num_input_tokens_seen": 109302256, "step": 50600 }, { "epoch": 8.255301794453507, "grad_norm": 0.029705122113227844, "learning_rate": 0.0007304764489918284, "loss": 0.1134, "num_input_tokens_seen": 109312624, "step": 50605 }, { "epoch": 8.256117455138662, "grad_norm": 0.050352420657873154, "learning_rate": 0.0007304132799307206, "loss": 0.0955, "num_input_tokens_seen": 109324304, "step": 50610 }, { "epoch": 8.256933115823816, "grad_norm": 0.017651639878749847, "learning_rate": 0.0007303501061999956, "loss": 0.125, "num_input_tokens_seen": 109336208, "step": 50615 }, { "epoch": 8.257748776508972, "grad_norm": 0.008997799828648567, "learning_rate": 0.0007302869278009332, "loss": 0.0646, "num_input_tokens_seen": 109346672, "step": 50620 }, { "epoch": 8.258564437194128, "grad_norm": 0.25991567969322205, "learning_rate": 0.0007302237447348141, "loss": 0.1863, "num_input_tokens_seen": 109356656, "step": 50625 }, { "epoch": 8.259380097879282, "grad_norm": 0.008971529081463814, "learning_rate": 0.0007301605570029189, "loss": 0.1918, "num_input_tokens_seen": 109365168, "step": 50630 }, { "epoch": 8.260195758564437, "grad_norm": 0.014313746243715286, "learning_rate": 0.000730097364606528, "loss": 0.0394, "num_input_tokens_seen": 109375376, "step": 50635 }, { "epoch": 8.261011419249591, "grad_norm": 0.2412402629852295, "learning_rate": 0.000730034167546922, "loss": 0.1086, "num_input_tokens_seen": 109386576, "step": 50640 }, { "epoch": 8.261827079934747, "grad_norm": 0.008197726681828499, "learning_rate": 0.0007299709658253819, "loss": 0.0813, "num_input_tokens_seen": 109398352, "step": 50645 }, { "epoch": 8.262642740619903, "grad_norm": 0.20047369599342346, "learning_rate": 0.0007299077594431885, "loss": 0.0372, "num_input_tokens_seen": 109407056, "step": 50650 }, { "epoch": 8.263458401305057, "grad_norm": 0.2139945924282074, "learning_rate": 0.0007298445484016225, "loss": 0.0519, "num_input_tokens_seen": 109416912, "step": 50655 }, { "epoch": 8.264274061990212, "grad_norm": 0.05942140519618988, "learning_rate": 0.0007297813327019652, "loss": 0.0264, "num_input_tokens_seen": 109428144, "step": 50660 }, { "epoch": 8.265089722675366, "grad_norm": 0.2181319147348404, "learning_rate": 0.0007297181123454977, "loss": 0.0128, "num_input_tokens_seen": 109436976, "step": 50665 }, { "epoch": 8.265905383360522, "grad_norm": 0.10995151102542877, "learning_rate": 0.0007296548873335013, "loss": 0.1215, "num_input_tokens_seen": 109448336, "step": 50670 }, { "epoch": 8.266721044045678, "grad_norm": 0.3578730523586273, "learning_rate": 0.0007295916576672572, "loss": 0.0989, "num_input_tokens_seen": 109458992, "step": 50675 }, { "epoch": 8.267536704730832, "grad_norm": 0.22147579491138458, "learning_rate": 0.0007295284233480468, "loss": 0.1213, "num_input_tokens_seen": 109469392, "step": 50680 }, { "epoch": 8.268352365415987, "grad_norm": 0.028804771602153778, "learning_rate": 0.0007294651843771519, "loss": 0.113, "num_input_tokens_seen": 109480112, "step": 50685 }, { "epoch": 8.269168026101141, "grad_norm": 0.09923944622278214, "learning_rate": 0.0007294019407558538, "loss": 0.0278, "num_input_tokens_seen": 109491344, "step": 50690 }, { "epoch": 8.269983686786297, "grad_norm": 0.006381940096616745, "learning_rate": 0.0007293386924854346, "loss": 0.0117, "num_input_tokens_seen": 109502640, "step": 50695 }, { "epoch": 8.270799347471453, "grad_norm": 0.03557540103793144, "learning_rate": 0.0007292754395671757, "loss": 0.1572, "num_input_tokens_seen": 109512464, "step": 50700 }, { "epoch": 8.271615008156607, "grad_norm": 0.13613596558570862, "learning_rate": 0.0007292121820023592, "loss": 0.0724, "num_input_tokens_seen": 109524816, "step": 50705 }, { "epoch": 8.272430668841762, "grad_norm": 0.48386117815971375, "learning_rate": 0.000729148919792267, "loss": 0.0783, "num_input_tokens_seen": 109535568, "step": 50710 }, { "epoch": 8.273246329526916, "grad_norm": 0.005705375224351883, "learning_rate": 0.000729085652938181, "loss": 0.0368, "num_input_tokens_seen": 109546704, "step": 50715 }, { "epoch": 8.274061990212072, "grad_norm": 0.023817330598831177, "learning_rate": 0.0007290223814413841, "loss": 0.0169, "num_input_tokens_seen": 109557648, "step": 50720 }, { "epoch": 8.274877650897226, "grad_norm": 0.06411412358283997, "learning_rate": 0.0007289591053031578, "loss": 0.0384, "num_input_tokens_seen": 109569136, "step": 50725 }, { "epoch": 8.275693311582382, "grad_norm": 0.11758533865213394, "learning_rate": 0.000728895824524785, "loss": 0.0779, "num_input_tokens_seen": 109580144, "step": 50730 }, { "epoch": 8.276508972267537, "grad_norm": 0.16610552370548248, "learning_rate": 0.0007288325391075478, "loss": 0.0837, "num_input_tokens_seen": 109590928, "step": 50735 }, { "epoch": 8.277324632952691, "grad_norm": 0.021982286125421524, "learning_rate": 0.000728769249052729, "loss": 0.1219, "num_input_tokens_seen": 109601584, "step": 50740 }, { "epoch": 8.278140293637847, "grad_norm": 0.1610334813594818, "learning_rate": 0.000728705954361611, "loss": 0.0725, "num_input_tokens_seen": 109611728, "step": 50745 }, { "epoch": 8.278955954323001, "grad_norm": 0.003985857591032982, "learning_rate": 0.0007286426550354768, "loss": 0.1684, "num_input_tokens_seen": 109622896, "step": 50750 }, { "epoch": 8.279771615008157, "grad_norm": 0.0630965307354927, "learning_rate": 0.000728579351075609, "loss": 0.0341, "num_input_tokens_seen": 109633296, "step": 50755 }, { "epoch": 8.280587275693312, "grad_norm": 0.13424259424209595, "learning_rate": 0.0007285160424832909, "loss": 0.0536, "num_input_tokens_seen": 109644336, "step": 50760 }, { "epoch": 8.281402936378466, "grad_norm": 0.014872642233967781, "learning_rate": 0.0007284527292598051, "loss": 0.0983, "num_input_tokens_seen": 109653776, "step": 50765 }, { "epoch": 8.282218597063622, "grad_norm": 0.009358805604279041, "learning_rate": 0.0007283894114064351, "loss": 0.1608, "num_input_tokens_seen": 109665328, "step": 50770 }, { "epoch": 8.283034257748776, "grad_norm": 0.12680892646312714, "learning_rate": 0.0007283260889244639, "loss": 0.145, "num_input_tokens_seen": 109676176, "step": 50775 }, { "epoch": 8.283849918433932, "grad_norm": 0.13732105493545532, "learning_rate": 0.0007282627618151747, "loss": 0.0679, "num_input_tokens_seen": 109687120, "step": 50780 }, { "epoch": 8.284665579119087, "grad_norm": 0.18857133388519287, "learning_rate": 0.0007281994300798511, "loss": 0.1352, "num_input_tokens_seen": 109697552, "step": 50785 }, { "epoch": 8.285481239804241, "grad_norm": 0.16515469551086426, "learning_rate": 0.0007281360937197767, "loss": 0.0405, "num_input_tokens_seen": 109708368, "step": 50790 }, { "epoch": 8.286296900489397, "grad_norm": 0.008306358940899372, "learning_rate": 0.0007280727527362349, "loss": 0.0822, "num_input_tokens_seen": 109719536, "step": 50795 }, { "epoch": 8.28711256117455, "grad_norm": 0.01918146014213562, "learning_rate": 0.0007280094071305095, "loss": 0.0259, "num_input_tokens_seen": 109730032, "step": 50800 }, { "epoch": 8.287928221859707, "grad_norm": 0.09741424024105072, "learning_rate": 0.0007279460569038841, "loss": 0.0503, "num_input_tokens_seen": 109738992, "step": 50805 }, { "epoch": 8.28874388254486, "grad_norm": 0.016544492915272713, "learning_rate": 0.0007278827020576427, "loss": 0.0188, "num_input_tokens_seen": 109749776, "step": 50810 }, { "epoch": 8.289559543230016, "grad_norm": 0.014726830646395683, "learning_rate": 0.0007278193425930692, "loss": 0.0578, "num_input_tokens_seen": 109761264, "step": 50815 }, { "epoch": 8.290375203915172, "grad_norm": 0.0035396378953009844, "learning_rate": 0.0007277559785114478, "loss": 0.0045, "num_input_tokens_seen": 109772304, "step": 50820 }, { "epoch": 8.291190864600326, "grad_norm": 0.06589116901159286, "learning_rate": 0.0007276926098140626, "loss": 0.0646, "num_input_tokens_seen": 109783600, "step": 50825 }, { "epoch": 8.292006525285482, "grad_norm": 0.010147838853299618, "learning_rate": 0.0007276292365021979, "loss": 0.0145, "num_input_tokens_seen": 109794096, "step": 50830 }, { "epoch": 8.292822185970635, "grad_norm": 0.002056955127045512, "learning_rate": 0.0007275658585771378, "loss": 0.021, "num_input_tokens_seen": 109804400, "step": 50835 }, { "epoch": 8.293637846655791, "grad_norm": 0.005161386914551258, "learning_rate": 0.0007275024760401668, "loss": 0.013, "num_input_tokens_seen": 109814704, "step": 50840 }, { "epoch": 8.294453507340947, "grad_norm": 0.19297684729099274, "learning_rate": 0.0007274390888925697, "loss": 0.1108, "num_input_tokens_seen": 109825264, "step": 50845 }, { "epoch": 8.2952691680261, "grad_norm": 0.3012802004814148, "learning_rate": 0.0007273756971356308, "loss": 0.1523, "num_input_tokens_seen": 109837072, "step": 50850 }, { "epoch": 8.296084828711257, "grad_norm": 0.17966091632843018, "learning_rate": 0.000727312300770635, "loss": 0.04, "num_input_tokens_seen": 109847920, "step": 50855 }, { "epoch": 8.29690048939641, "grad_norm": 0.019909987226128578, "learning_rate": 0.0007272488997988671, "loss": 0.0852, "num_input_tokens_seen": 109858768, "step": 50860 }, { "epoch": 8.297716150081566, "grad_norm": 0.02569238841533661, "learning_rate": 0.000727185494221612, "loss": 0.1047, "num_input_tokens_seen": 109869072, "step": 50865 }, { "epoch": 8.298531810766722, "grad_norm": 0.002372046699747443, "learning_rate": 0.0007271220840401546, "loss": 0.0585, "num_input_tokens_seen": 109881232, "step": 50870 }, { "epoch": 8.299347471451876, "grad_norm": 0.005322215147316456, "learning_rate": 0.0007270586692557799, "loss": 0.0141, "num_input_tokens_seen": 109892368, "step": 50875 }, { "epoch": 8.300163132137031, "grad_norm": 0.007450290489941835, "learning_rate": 0.0007269952498697733, "loss": 0.0219, "num_input_tokens_seen": 109903888, "step": 50880 }, { "epoch": 8.300978792822185, "grad_norm": 0.09495791792869568, "learning_rate": 0.0007269318258834202, "loss": 0.0182, "num_input_tokens_seen": 109915120, "step": 50885 }, { "epoch": 8.301794453507341, "grad_norm": 0.008407175540924072, "learning_rate": 0.0007268683972980056, "loss": 0.0262, "num_input_tokens_seen": 109925840, "step": 50890 }, { "epoch": 8.302610114192497, "grad_norm": 0.024781066924333572, "learning_rate": 0.0007268049641148152, "loss": 0.1302, "num_input_tokens_seen": 109936304, "step": 50895 }, { "epoch": 8.30342577487765, "grad_norm": 0.01070436555892229, "learning_rate": 0.0007267415263351343, "loss": 0.0203, "num_input_tokens_seen": 109946448, "step": 50900 }, { "epoch": 8.304241435562806, "grad_norm": 0.19071489572525024, "learning_rate": 0.0007266780839602488, "loss": 0.1507, "num_input_tokens_seen": 109956432, "step": 50905 }, { "epoch": 8.30505709624796, "grad_norm": 0.3626735806465149, "learning_rate": 0.0007266146369914445, "loss": 0.1265, "num_input_tokens_seen": 109967248, "step": 50910 }, { "epoch": 8.305872756933116, "grad_norm": 0.38729625940322876, "learning_rate": 0.0007265511854300069, "loss": 0.0512, "num_input_tokens_seen": 109979792, "step": 50915 }, { "epoch": 8.30668841761827, "grad_norm": 0.3133726716041565, "learning_rate": 0.0007264877292772223, "loss": 0.1247, "num_input_tokens_seen": 109990480, "step": 50920 }, { "epoch": 8.307504078303426, "grad_norm": 0.24243846535682678, "learning_rate": 0.0007264242685343765, "loss": 0.1239, "num_input_tokens_seen": 109999760, "step": 50925 }, { "epoch": 8.308319738988581, "grad_norm": 0.19307786226272583, "learning_rate": 0.0007263608032027557, "loss": 0.0408, "num_input_tokens_seen": 110010192, "step": 50930 }, { "epoch": 8.309135399673735, "grad_norm": 0.017839960753917694, "learning_rate": 0.000726297333283646, "loss": 0.0239, "num_input_tokens_seen": 110022192, "step": 50935 }, { "epoch": 8.309951060358891, "grad_norm": 0.00453083124011755, "learning_rate": 0.0007262338587783338, "loss": 0.017, "num_input_tokens_seen": 110032944, "step": 50940 }, { "epoch": 8.310766721044045, "grad_norm": 0.1239013820886612, "learning_rate": 0.0007261703796881054, "loss": 0.0101, "num_input_tokens_seen": 110043408, "step": 50945 }, { "epoch": 8.3115823817292, "grad_norm": 0.18398962914943695, "learning_rate": 0.0007261068960142474, "loss": 0.035, "num_input_tokens_seen": 110054448, "step": 50950 }, { "epoch": 8.312398042414356, "grad_norm": 0.07020730525255203, "learning_rate": 0.0007260434077580463, "loss": 0.018, "num_input_tokens_seen": 110065072, "step": 50955 }, { "epoch": 8.31321370309951, "grad_norm": 0.02656623162329197, "learning_rate": 0.0007259799149207887, "loss": 0.0148, "num_input_tokens_seen": 110075664, "step": 50960 }, { "epoch": 8.314029363784666, "grad_norm": 0.0020199622958898544, "learning_rate": 0.0007259164175037616, "loss": 0.0139, "num_input_tokens_seen": 110086672, "step": 50965 }, { "epoch": 8.31484502446982, "grad_norm": 0.12150160223245621, "learning_rate": 0.0007258529155082516, "loss": 0.0214, "num_input_tokens_seen": 110097840, "step": 50970 }, { "epoch": 8.315660685154976, "grad_norm": 0.031684860587120056, "learning_rate": 0.0007257894089355458, "loss": 0.2559, "num_input_tokens_seen": 110109552, "step": 50975 }, { "epoch": 8.31647634584013, "grad_norm": 0.23399962484836578, "learning_rate": 0.0007257258977869313, "loss": 0.0723, "num_input_tokens_seen": 110121072, "step": 50980 }, { "epoch": 8.317292006525285, "grad_norm": 0.14578203856945038, "learning_rate": 0.000725662382063695, "loss": 0.0523, "num_input_tokens_seen": 110132432, "step": 50985 }, { "epoch": 8.318107667210441, "grad_norm": 0.10246943682432175, "learning_rate": 0.0007255988617671241, "loss": 0.0761, "num_input_tokens_seen": 110144464, "step": 50990 }, { "epoch": 8.318923327895595, "grad_norm": 0.0060472646728158, "learning_rate": 0.0007255353368985063, "loss": 0.0798, "num_input_tokens_seen": 110155280, "step": 50995 }, { "epoch": 8.31973898858075, "grad_norm": 0.22746527194976807, "learning_rate": 0.0007254718074591285, "loss": 0.027, "num_input_tokens_seen": 110165360, "step": 51000 }, { "epoch": 8.320554649265905, "grad_norm": 0.3178712725639343, "learning_rate": 0.0007254082734502788, "loss": 0.116, "num_input_tokens_seen": 110176336, "step": 51005 }, { "epoch": 8.32137030995106, "grad_norm": 0.3559790849685669, "learning_rate": 0.0007253447348732443, "loss": 0.0362, "num_input_tokens_seen": 110188624, "step": 51010 }, { "epoch": 8.322185970636216, "grad_norm": 0.22412791848182678, "learning_rate": 0.000725281191729313, "loss": 0.0574, "num_input_tokens_seen": 110198768, "step": 51015 }, { "epoch": 8.32300163132137, "grad_norm": 0.01915472373366356, "learning_rate": 0.0007252176440197726, "loss": 0.013, "num_input_tokens_seen": 110209456, "step": 51020 }, { "epoch": 8.323817292006526, "grad_norm": 0.018906638026237488, "learning_rate": 0.0007251540917459109, "loss": 0.0337, "num_input_tokens_seen": 110220656, "step": 51025 }, { "epoch": 8.32463295269168, "grad_norm": 0.09125878661870956, "learning_rate": 0.0007250905349090158, "loss": 0.0241, "num_input_tokens_seen": 110231504, "step": 51030 }, { "epoch": 8.325448613376835, "grad_norm": 0.28258514404296875, "learning_rate": 0.0007250269735103754, "loss": 0.0414, "num_input_tokens_seen": 110242704, "step": 51035 }, { "epoch": 8.326264274061991, "grad_norm": 0.026500288397073746, "learning_rate": 0.0007249634075512781, "loss": 0.0064, "num_input_tokens_seen": 110253104, "step": 51040 }, { "epoch": 8.327079934747145, "grad_norm": 0.002784762065857649, "learning_rate": 0.0007248998370330119, "loss": 0.1243, "num_input_tokens_seen": 110263408, "step": 51045 }, { "epoch": 8.3278955954323, "grad_norm": 0.02501026540994644, "learning_rate": 0.0007248362619568651, "loss": 0.0126, "num_input_tokens_seen": 110275536, "step": 51050 }, { "epoch": 8.328711256117455, "grad_norm": 0.005268088076263666, "learning_rate": 0.0007247726823241264, "loss": 0.1713, "num_input_tokens_seen": 110285616, "step": 51055 }, { "epoch": 8.32952691680261, "grad_norm": 0.004592955578118563, "learning_rate": 0.0007247090981360841, "loss": 0.0813, "num_input_tokens_seen": 110297680, "step": 51060 }, { "epoch": 8.330342577487766, "grad_norm": 0.2294696718454361, "learning_rate": 0.0007246455093940268, "loss": 0.1092, "num_input_tokens_seen": 110307120, "step": 51065 }, { "epoch": 8.33115823817292, "grad_norm": 0.01248850580304861, "learning_rate": 0.0007245819160992434, "loss": 0.0297, "num_input_tokens_seen": 110317424, "step": 51070 }, { "epoch": 8.331973898858076, "grad_norm": 0.15484943985939026, "learning_rate": 0.0007245183182530224, "loss": 0.083, "num_input_tokens_seen": 110328240, "step": 51075 }, { "epoch": 8.33278955954323, "grad_norm": 0.01719985157251358, "learning_rate": 0.0007244547158566531, "loss": 0.0061, "num_input_tokens_seen": 110338448, "step": 51080 }, { "epoch": 8.333605220228385, "grad_norm": 0.09124539792537689, "learning_rate": 0.0007243911089114239, "loss": 0.032, "num_input_tokens_seen": 110348496, "step": 51085 }, { "epoch": 8.33442088091354, "grad_norm": 0.05580779165029526, "learning_rate": 0.0007243274974186245, "loss": 0.0204, "num_input_tokens_seen": 110360400, "step": 51090 }, { "epoch": 8.335236541598695, "grad_norm": 0.09825216233730316, "learning_rate": 0.0007242638813795437, "loss": 0.0344, "num_input_tokens_seen": 110371664, "step": 51095 }, { "epoch": 8.33605220228385, "grad_norm": 0.009273175150156021, "learning_rate": 0.0007242002607954708, "loss": 0.0771, "num_input_tokens_seen": 110383024, "step": 51100 }, { "epoch": 8.336867862969005, "grad_norm": 0.11133985221385956, "learning_rate": 0.000724136635667695, "loss": 0.0664, "num_input_tokens_seen": 110393808, "step": 51105 }, { "epoch": 8.33768352365416, "grad_norm": 0.5067927241325378, "learning_rate": 0.0007240730059975063, "loss": 0.123, "num_input_tokens_seen": 110404912, "step": 51110 }, { "epoch": 8.338499184339314, "grad_norm": 0.008333737030625343, "learning_rate": 0.0007240093717861937, "loss": 0.0251, "num_input_tokens_seen": 110415920, "step": 51115 }, { "epoch": 8.33931484502447, "grad_norm": 0.007308666128665209, "learning_rate": 0.000723945733035047, "loss": 0.0115, "num_input_tokens_seen": 110426416, "step": 51120 }, { "epoch": 8.340130505709626, "grad_norm": 0.05521797761321068, "learning_rate": 0.0007238820897453559, "loss": 0.2082, "num_input_tokens_seen": 110438480, "step": 51125 }, { "epoch": 8.34094616639478, "grad_norm": 0.02290504239499569, "learning_rate": 0.0007238184419184104, "loss": 0.0267, "num_input_tokens_seen": 110449168, "step": 51130 }, { "epoch": 8.341761827079935, "grad_norm": 0.018945492804050446, "learning_rate": 0.0007237547895555001, "loss": 0.0522, "num_input_tokens_seen": 110459472, "step": 51135 }, { "epoch": 8.34257748776509, "grad_norm": 0.04555573686957359, "learning_rate": 0.0007236911326579152, "loss": 0.0231, "num_input_tokens_seen": 110469584, "step": 51140 }, { "epoch": 8.343393148450245, "grad_norm": 0.012672092765569687, "learning_rate": 0.0007236274712269457, "loss": 0.0262, "num_input_tokens_seen": 110479600, "step": 51145 }, { "epoch": 8.3442088091354, "grad_norm": 0.01710429973900318, "learning_rate": 0.0007235638052638819, "loss": 0.0671, "num_input_tokens_seen": 110491600, "step": 51150 }, { "epoch": 8.345024469820554, "grad_norm": 0.0054277884773910046, "learning_rate": 0.0007235001347700139, "loss": 0.0549, "num_input_tokens_seen": 110501872, "step": 51155 }, { "epoch": 8.34584013050571, "grad_norm": 0.018640436232089996, "learning_rate": 0.0007234364597466321, "loss": 0.0379, "num_input_tokens_seen": 110513328, "step": 51160 }, { "epoch": 8.346655791190864, "grad_norm": 0.05840952321887016, "learning_rate": 0.000723372780195027, "loss": 0.0388, "num_input_tokens_seen": 110524496, "step": 51165 }, { "epoch": 8.34747145187602, "grad_norm": 0.004298006650060415, "learning_rate": 0.0007233090961164892, "loss": 0.0765, "num_input_tokens_seen": 110535120, "step": 51170 }, { "epoch": 8.348287112561174, "grad_norm": 0.12360477447509766, "learning_rate": 0.000723245407512309, "loss": 0.0333, "num_input_tokens_seen": 110545712, "step": 51175 }, { "epoch": 8.34910277324633, "grad_norm": 0.19511815905570984, "learning_rate": 0.0007231817143837778, "loss": 0.051, "num_input_tokens_seen": 110555952, "step": 51180 }, { "epoch": 8.349918433931485, "grad_norm": 0.002610130002722144, "learning_rate": 0.0007231180167321858, "loss": 0.0388, "num_input_tokens_seen": 110566320, "step": 51185 }, { "epoch": 8.350734094616639, "grad_norm": 0.17444966733455658, "learning_rate": 0.0007230543145588242, "loss": 0.1123, "num_input_tokens_seen": 110577744, "step": 51190 }, { "epoch": 8.351549755301795, "grad_norm": 0.22752845287322998, "learning_rate": 0.000722990607864984, "loss": 0.1407, "num_input_tokens_seen": 110589392, "step": 51195 }, { "epoch": 8.352365415986949, "grad_norm": 0.19195815920829773, "learning_rate": 0.0007229268966519562, "loss": 0.0575, "num_input_tokens_seen": 110600880, "step": 51200 }, { "epoch": 8.353181076672104, "grad_norm": 0.08895209431648254, "learning_rate": 0.0007228631809210321, "loss": 0.0465, "num_input_tokens_seen": 110611120, "step": 51205 }, { "epoch": 8.35399673735726, "grad_norm": 0.048343852162361145, "learning_rate": 0.0007227994606735029, "loss": 0.091, "num_input_tokens_seen": 110621584, "step": 51210 }, { "epoch": 8.354812398042414, "grad_norm": 0.30052274465560913, "learning_rate": 0.0007227357359106598, "loss": 0.1321, "num_input_tokens_seen": 110632656, "step": 51215 }, { "epoch": 8.35562805872757, "grad_norm": 0.00870759878307581, "learning_rate": 0.0007226720066337946, "loss": 0.0447, "num_input_tokens_seen": 110644208, "step": 51220 }, { "epoch": 8.356443719412724, "grad_norm": 0.051032643765211105, "learning_rate": 0.0007226082728441989, "loss": 0.0856, "num_input_tokens_seen": 110654832, "step": 51225 }, { "epoch": 8.35725938009788, "grad_norm": 0.015562736429274082, "learning_rate": 0.0007225445345431638, "loss": 0.0081, "num_input_tokens_seen": 110665648, "step": 51230 }, { "epoch": 8.358075040783035, "grad_norm": 0.008317803032696247, "learning_rate": 0.0007224807917319817, "loss": 0.0205, "num_input_tokens_seen": 110675280, "step": 51235 }, { "epoch": 8.358890701468189, "grad_norm": 0.01899358443915844, "learning_rate": 0.000722417044411944, "loss": 0.0458, "num_input_tokens_seen": 110685296, "step": 51240 }, { "epoch": 8.359706362153345, "grad_norm": 0.009460066445171833, "learning_rate": 0.0007223532925843427, "loss": 0.0061, "num_input_tokens_seen": 110695408, "step": 51245 }, { "epoch": 8.360522022838499, "grad_norm": 0.02281215600669384, "learning_rate": 0.0007222895362504698, "loss": 0.0356, "num_input_tokens_seen": 110707312, "step": 51250 }, { "epoch": 8.361337683523654, "grad_norm": 0.1802525818347931, "learning_rate": 0.0007222257754116176, "loss": 0.1316, "num_input_tokens_seen": 110718288, "step": 51255 }, { "epoch": 8.362153344208808, "grad_norm": 0.011667085811495781, "learning_rate": 0.000722162010069078, "loss": 0.0183, "num_input_tokens_seen": 110728880, "step": 51260 }, { "epoch": 8.362969004893964, "grad_norm": 0.005664953961968422, "learning_rate": 0.0007220982402241436, "loss": 0.1559, "num_input_tokens_seen": 110741104, "step": 51265 }, { "epoch": 8.36378466557912, "grad_norm": 0.005667020566761494, "learning_rate": 0.0007220344658781065, "loss": 0.0779, "num_input_tokens_seen": 110751888, "step": 51270 }, { "epoch": 8.364600326264274, "grad_norm": 0.025366060435771942, "learning_rate": 0.0007219706870322594, "loss": 0.0326, "num_input_tokens_seen": 110762640, "step": 51275 }, { "epoch": 8.36541598694943, "grad_norm": 0.19890545308589935, "learning_rate": 0.0007219069036878945, "loss": 0.161, "num_input_tokens_seen": 110772912, "step": 51280 }, { "epoch": 8.366231647634583, "grad_norm": 0.07803814113140106, "learning_rate": 0.0007218431158463048, "loss": 0.0557, "num_input_tokens_seen": 110784592, "step": 51285 }, { "epoch": 8.367047308319739, "grad_norm": 0.19067999720573425, "learning_rate": 0.000721779323508783, "loss": 0.0339, "num_input_tokens_seen": 110795216, "step": 51290 }, { "epoch": 8.367862969004895, "grad_norm": 0.43747133016586304, "learning_rate": 0.0007217155266766217, "loss": 0.166, "num_input_tokens_seen": 110805968, "step": 51295 }, { "epoch": 8.368678629690049, "grad_norm": 0.020700732246041298, "learning_rate": 0.0007216517253511143, "loss": 0.0165, "num_input_tokens_seen": 110816112, "step": 51300 }, { "epoch": 8.369494290375204, "grad_norm": 0.2714422941207886, "learning_rate": 0.0007215879195335531, "loss": 0.0797, "num_input_tokens_seen": 110826384, "step": 51305 }, { "epoch": 8.370309951060358, "grad_norm": 0.02906269021332264, "learning_rate": 0.0007215241092252319, "loss": 0.0776, "num_input_tokens_seen": 110835728, "step": 51310 }, { "epoch": 8.371125611745514, "grad_norm": 0.006266695912927389, "learning_rate": 0.0007214602944274435, "loss": 0.0088, "num_input_tokens_seen": 110846768, "step": 51315 }, { "epoch": 8.37194127243067, "grad_norm": 0.019487502053380013, "learning_rate": 0.0007213964751414812, "loss": 0.0152, "num_input_tokens_seen": 110856720, "step": 51320 }, { "epoch": 8.372756933115824, "grad_norm": 0.005166689399629831, "learning_rate": 0.0007213326513686386, "loss": 0.0128, "num_input_tokens_seen": 110867536, "step": 51325 }, { "epoch": 8.37357259380098, "grad_norm": 0.0032772270496934652, "learning_rate": 0.0007212688231102091, "loss": 0.0934, "num_input_tokens_seen": 110878320, "step": 51330 }, { "epoch": 8.374388254486133, "grad_norm": 0.022973116487264633, "learning_rate": 0.000721204990367486, "loss": 0.1023, "num_input_tokens_seen": 110888816, "step": 51335 }, { "epoch": 8.375203915171289, "grad_norm": 0.2152455896139145, "learning_rate": 0.0007211411531417633, "loss": 0.223, "num_input_tokens_seen": 110899792, "step": 51340 }, { "epoch": 8.376019575856443, "grad_norm": 0.2188149094581604, "learning_rate": 0.0007210773114343345, "loss": 0.1133, "num_input_tokens_seen": 110909584, "step": 51345 }, { "epoch": 8.376835236541599, "grad_norm": 0.020212259143590927, "learning_rate": 0.0007210134652464935, "loss": 0.0428, "num_input_tokens_seen": 110920752, "step": 51350 }, { "epoch": 8.377650897226754, "grad_norm": 0.004299887455999851, "learning_rate": 0.0007209496145795343, "loss": 0.0371, "num_input_tokens_seen": 110931248, "step": 51355 }, { "epoch": 8.378466557911908, "grad_norm": 0.011292368173599243, "learning_rate": 0.000720885759434751, "loss": 0.0168, "num_input_tokens_seen": 110940432, "step": 51360 }, { "epoch": 8.379282218597064, "grad_norm": 0.017861951142549515, "learning_rate": 0.0007208218998134375, "loss": 0.0674, "num_input_tokens_seen": 110950832, "step": 51365 }, { "epoch": 8.380097879282218, "grad_norm": 0.02082081325352192, "learning_rate": 0.000720758035716888, "loss": 0.0143, "num_input_tokens_seen": 110960624, "step": 51370 }, { "epoch": 8.380913539967374, "grad_norm": 0.13963234424591064, "learning_rate": 0.0007206941671463969, "loss": 0.1249, "num_input_tokens_seen": 110969840, "step": 51375 }, { "epoch": 8.38172920065253, "grad_norm": 0.005206751171499491, "learning_rate": 0.0007206302941032586, "loss": 0.1266, "num_input_tokens_seen": 110981680, "step": 51380 }, { "epoch": 8.382544861337683, "grad_norm": 0.04112826660275459, "learning_rate": 0.0007205664165887673, "loss": 0.0323, "num_input_tokens_seen": 110992112, "step": 51385 }, { "epoch": 8.383360522022839, "grad_norm": 0.05072109028697014, "learning_rate": 0.000720502534604218, "loss": 0.0369, "num_input_tokens_seen": 111001616, "step": 51390 }, { "epoch": 8.384176182707993, "grad_norm": 0.026560001075267792, "learning_rate": 0.0007204386481509049, "loss": 0.0208, "num_input_tokens_seen": 111011568, "step": 51395 }, { "epoch": 8.384991843393149, "grad_norm": 0.32577085494995117, "learning_rate": 0.0007203747572301231, "loss": 0.1754, "num_input_tokens_seen": 111022128, "step": 51400 }, { "epoch": 8.385807504078304, "grad_norm": 0.09001737087965012, "learning_rate": 0.0007203108618431672, "loss": 0.0991, "num_input_tokens_seen": 111032496, "step": 51405 }, { "epoch": 8.386623164763458, "grad_norm": 0.04394825920462608, "learning_rate": 0.0007202469619913322, "loss": 0.0641, "num_input_tokens_seen": 111043216, "step": 51410 }, { "epoch": 8.387438825448614, "grad_norm": 0.16462813317775726, "learning_rate": 0.0007201830576759132, "loss": 0.0273, "num_input_tokens_seen": 111054032, "step": 51415 }, { "epoch": 8.388254486133768, "grad_norm": 0.34273892641067505, "learning_rate": 0.0007201191488982051, "loss": 0.1403, "num_input_tokens_seen": 111065072, "step": 51420 }, { "epoch": 8.389070146818923, "grad_norm": 0.018835965543985367, "learning_rate": 0.0007200552356595031, "loss": 0.1901, "num_input_tokens_seen": 111076208, "step": 51425 }, { "epoch": 8.38988580750408, "grad_norm": 0.004108819179236889, "learning_rate": 0.0007199913179611029, "loss": 0.0192, "num_input_tokens_seen": 111087728, "step": 51430 }, { "epoch": 8.390701468189233, "grad_norm": 0.03467337414622307, "learning_rate": 0.0007199273958042994, "loss": 0.1125, "num_input_tokens_seen": 111098928, "step": 51435 }, { "epoch": 8.391517128874389, "grad_norm": 0.04100308567285538, "learning_rate": 0.0007198634691903882, "loss": 0.1222, "num_input_tokens_seen": 111110320, "step": 51440 }, { "epoch": 8.392332789559543, "grad_norm": 0.27813297510147095, "learning_rate": 0.0007197995381206649, "loss": 0.1024, "num_input_tokens_seen": 111120112, "step": 51445 }, { "epoch": 8.393148450244698, "grad_norm": 0.2542005777359009, "learning_rate": 0.0007197356025964252, "loss": 0.1429, "num_input_tokens_seen": 111131568, "step": 51450 }, { "epoch": 8.393964110929852, "grad_norm": 0.016471102833747864, "learning_rate": 0.0007196716626189646, "loss": 0.0473, "num_input_tokens_seen": 111142992, "step": 51455 }, { "epoch": 8.394779771615008, "grad_norm": 0.06641436368227005, "learning_rate": 0.0007196077181895792, "loss": 0.1069, "num_input_tokens_seen": 111154512, "step": 51460 }, { "epoch": 8.395595432300164, "grad_norm": 0.1935798078775406, "learning_rate": 0.0007195437693095647, "loss": 0.1171, "num_input_tokens_seen": 111164304, "step": 51465 }, { "epoch": 8.396411092985318, "grad_norm": 0.019165636971592903, "learning_rate": 0.0007194798159802174, "loss": 0.0137, "num_input_tokens_seen": 111174832, "step": 51470 }, { "epoch": 8.397226753670473, "grad_norm": 0.2816579043865204, "learning_rate": 0.0007194158582028332, "loss": 0.2435, "num_input_tokens_seen": 111184496, "step": 51475 }, { "epoch": 8.398042414355627, "grad_norm": 0.010307567194104195, "learning_rate": 0.0007193518959787081, "loss": 0.1182, "num_input_tokens_seen": 111194672, "step": 51480 }, { "epoch": 8.398858075040783, "grad_norm": 0.43253281712532043, "learning_rate": 0.0007192879293091386, "loss": 0.0763, "num_input_tokens_seen": 111205328, "step": 51485 }, { "epoch": 8.399673735725939, "grad_norm": 0.026115600019693375, "learning_rate": 0.000719223958195421, "loss": 0.029, "num_input_tokens_seen": 111216944, "step": 51490 }, { "epoch": 8.400489396411093, "grad_norm": 0.004687016364187002, "learning_rate": 0.0007191599826388518, "loss": 0.1128, "num_input_tokens_seen": 111228400, "step": 51495 }, { "epoch": 8.401305057096248, "grad_norm": 0.003850112436339259, "learning_rate": 0.0007190960026407276, "loss": 0.0425, "num_input_tokens_seen": 111239216, "step": 51500 }, { "epoch": 8.402120717781402, "grad_norm": 0.02196848951280117, "learning_rate": 0.0007190320182023449, "loss": 0.0334, "num_input_tokens_seen": 111249904, "step": 51505 }, { "epoch": 8.402936378466558, "grad_norm": 0.10487034171819687, "learning_rate": 0.0007189680293250005, "loss": 0.0302, "num_input_tokens_seen": 111261808, "step": 51510 }, { "epoch": 8.403752039151712, "grad_norm": 0.03270275518298149, "learning_rate": 0.0007189040360099913, "loss": 0.0364, "num_input_tokens_seen": 111273136, "step": 51515 }, { "epoch": 8.404567699836868, "grad_norm": 0.04307975620031357, "learning_rate": 0.000718840038258614, "loss": 0.03, "num_input_tokens_seen": 111285488, "step": 51520 }, { "epoch": 8.405383360522023, "grad_norm": 0.05347808450460434, "learning_rate": 0.0007187760360721658, "loss": 0.0384, "num_input_tokens_seen": 111297296, "step": 51525 }, { "epoch": 8.406199021207177, "grad_norm": 0.046753790229558945, "learning_rate": 0.0007187120294519434, "loss": 0.0718, "num_input_tokens_seen": 111308784, "step": 51530 }, { "epoch": 8.407014681892333, "grad_norm": 0.016464874148368835, "learning_rate": 0.0007186480183992446, "loss": 0.0653, "num_input_tokens_seen": 111318928, "step": 51535 }, { "epoch": 8.407830342577487, "grad_norm": 0.022152036428451538, "learning_rate": 0.0007185840029153663, "loss": 0.1133, "num_input_tokens_seen": 111330320, "step": 51540 }, { "epoch": 8.408646003262643, "grad_norm": 0.06510874629020691, "learning_rate": 0.0007185199830016058, "loss": 0.0687, "num_input_tokens_seen": 111341712, "step": 51545 }, { "epoch": 8.409461663947798, "grad_norm": 0.39695748686790466, "learning_rate": 0.0007184559586592606, "loss": 0.078, "num_input_tokens_seen": 111352688, "step": 51550 }, { "epoch": 8.410277324632952, "grad_norm": 0.06608985364437103, "learning_rate": 0.0007183919298896283, "loss": 0.0637, "num_input_tokens_seen": 111364144, "step": 51555 }, { "epoch": 8.411092985318108, "grad_norm": 0.23177236318588257, "learning_rate": 0.0007183278966940065, "loss": 0.0279, "num_input_tokens_seen": 111374512, "step": 51560 }, { "epoch": 8.411908646003262, "grad_norm": 0.014381797052919865, "learning_rate": 0.000718263859073693, "loss": 0.0146, "num_input_tokens_seen": 111384592, "step": 51565 }, { "epoch": 8.412724306688418, "grad_norm": 0.3006967306137085, "learning_rate": 0.0007181998170299854, "loss": 0.1615, "num_input_tokens_seen": 111396112, "step": 51570 }, { "epoch": 8.413539967373573, "grad_norm": 0.029067158699035645, "learning_rate": 0.0007181357705641818, "loss": 0.0813, "num_input_tokens_seen": 111407920, "step": 51575 }, { "epoch": 8.414355628058727, "grad_norm": 0.4092482924461365, "learning_rate": 0.0007180717196775799, "loss": 0.157, "num_input_tokens_seen": 111418320, "step": 51580 }, { "epoch": 8.415171288743883, "grad_norm": 0.02459162473678589, "learning_rate": 0.0007180076643714781, "loss": 0.1154, "num_input_tokens_seen": 111429200, "step": 51585 }, { "epoch": 8.415986949429037, "grad_norm": 0.03159189224243164, "learning_rate": 0.0007179436046471743, "loss": 0.0434, "num_input_tokens_seen": 111440336, "step": 51590 }, { "epoch": 8.416802610114193, "grad_norm": 0.2111511081457138, "learning_rate": 0.0007178795405059671, "loss": 0.0707, "num_input_tokens_seen": 111451920, "step": 51595 }, { "epoch": 8.417618270799348, "grad_norm": 0.2789364457130432, "learning_rate": 0.0007178154719491545, "loss": 0.0808, "num_input_tokens_seen": 111463344, "step": 51600 }, { "epoch": 8.418433931484502, "grad_norm": 0.06086054444313049, "learning_rate": 0.0007177513989780349, "loss": 0.1239, "num_input_tokens_seen": 111474256, "step": 51605 }, { "epoch": 8.419249592169658, "grad_norm": 0.019520027562975883, "learning_rate": 0.0007176873215939072, "loss": 0.0895, "num_input_tokens_seen": 111486544, "step": 51610 }, { "epoch": 8.420065252854812, "grad_norm": 0.14331720769405365, "learning_rate": 0.0007176232397980696, "loss": 0.1271, "num_input_tokens_seen": 111496752, "step": 51615 }, { "epoch": 8.420880913539968, "grad_norm": 0.0736565813422203, "learning_rate": 0.000717559153591821, "loss": 0.0223, "num_input_tokens_seen": 111507696, "step": 51620 }, { "epoch": 8.421696574225122, "grad_norm": 0.028831366449594498, "learning_rate": 0.0007174950629764602, "loss": 0.0343, "num_input_tokens_seen": 111517040, "step": 51625 }, { "epoch": 8.422512234910277, "grad_norm": 0.020894574001431465, "learning_rate": 0.0007174309679532859, "loss": 0.0226, "num_input_tokens_seen": 111527824, "step": 51630 }, { "epoch": 8.423327895595433, "grad_norm": 0.3167615830898285, "learning_rate": 0.0007173668685235973, "loss": 0.0735, "num_input_tokens_seen": 111538576, "step": 51635 }, { "epoch": 8.424143556280587, "grad_norm": 0.23885099589824677, "learning_rate": 0.0007173027646886934, "loss": 0.0408, "num_input_tokens_seen": 111548976, "step": 51640 }, { "epoch": 8.424959216965743, "grad_norm": 0.008945178240537643, "learning_rate": 0.0007172386564498733, "loss": 0.1267, "num_input_tokens_seen": 111560560, "step": 51645 }, { "epoch": 8.425774877650896, "grad_norm": 0.03208020329475403, "learning_rate": 0.0007171745438084362, "loss": 0.1069, "num_input_tokens_seen": 111571600, "step": 51650 }, { "epoch": 8.426590538336052, "grad_norm": 0.008227204903960228, "learning_rate": 0.0007171104267656814, "loss": 0.0583, "num_input_tokens_seen": 111583920, "step": 51655 }, { "epoch": 8.427406199021208, "grad_norm": 0.08085020631551743, "learning_rate": 0.0007170463053229085, "loss": 0.0314, "num_input_tokens_seen": 111593264, "step": 51660 }, { "epoch": 8.428221859706362, "grad_norm": 0.11244131624698639, "learning_rate": 0.0007169821794814168, "loss": 0.056, "num_input_tokens_seen": 111602416, "step": 51665 }, { "epoch": 8.429037520391518, "grad_norm": 0.02487485483288765, "learning_rate": 0.000716918049242506, "loss": 0.0144, "num_input_tokens_seen": 111614448, "step": 51670 }, { "epoch": 8.429853181076671, "grad_norm": 0.01839791052043438, "learning_rate": 0.0007168539146074757, "loss": 0.015, "num_input_tokens_seen": 111625360, "step": 51675 }, { "epoch": 8.430668841761827, "grad_norm": 0.20842097699642181, "learning_rate": 0.0007167897755776258, "loss": 0.0672, "num_input_tokens_seen": 111636720, "step": 51680 }, { "epoch": 8.431484502446983, "grad_norm": 0.07622958719730377, "learning_rate": 0.0007167256321542561, "loss": 0.1013, "num_input_tokens_seen": 111647312, "step": 51685 }, { "epoch": 8.432300163132137, "grad_norm": 0.07612695544958115, "learning_rate": 0.0007166614843386666, "loss": 0.0439, "num_input_tokens_seen": 111657968, "step": 51690 }, { "epoch": 8.433115823817293, "grad_norm": 0.20850707590579987, "learning_rate": 0.0007165973321321571, "loss": 0.0798, "num_input_tokens_seen": 111669040, "step": 51695 }, { "epoch": 8.433931484502446, "grad_norm": 0.13440640270709991, "learning_rate": 0.0007165331755360281, "loss": 0.0137, "num_input_tokens_seen": 111680560, "step": 51700 }, { "epoch": 8.434747145187602, "grad_norm": 0.004296013154089451, "learning_rate": 0.0007164690145515793, "loss": 0.0736, "num_input_tokens_seen": 111691408, "step": 51705 }, { "epoch": 8.435562805872756, "grad_norm": 0.026044311001896858, "learning_rate": 0.0007164048491801116, "loss": 0.0153, "num_input_tokens_seen": 111701776, "step": 51710 }, { "epoch": 8.436378466557912, "grad_norm": 0.218144953250885, "learning_rate": 0.0007163406794229249, "loss": 0.1054, "num_input_tokens_seen": 111712880, "step": 51715 }, { "epoch": 8.437194127243067, "grad_norm": 0.06459010392427444, "learning_rate": 0.0007162765052813199, "loss": 0.0305, "num_input_tokens_seen": 111725520, "step": 51720 }, { "epoch": 8.438009787928221, "grad_norm": 0.009473503567278385, "learning_rate": 0.0007162123267565972, "loss": 0.0265, "num_input_tokens_seen": 111736240, "step": 51725 }, { "epoch": 8.438825448613377, "grad_norm": 0.1754245012998581, "learning_rate": 0.0007161481438500574, "loss": 0.0571, "num_input_tokens_seen": 111747632, "step": 51730 }, { "epoch": 8.439641109298531, "grad_norm": 0.007052075117826462, "learning_rate": 0.0007160839565630014, "loss": 0.0112, "num_input_tokens_seen": 111758224, "step": 51735 }, { "epoch": 8.440456769983687, "grad_norm": 0.06561672687530518, "learning_rate": 0.0007160197648967298, "loss": 0.0301, "num_input_tokens_seen": 111770096, "step": 51740 }, { "epoch": 8.441272430668842, "grad_norm": 0.4638075530529022, "learning_rate": 0.0007159555688525434, "loss": 0.0888, "num_input_tokens_seen": 111780560, "step": 51745 }, { "epoch": 8.442088091353996, "grad_norm": 0.25027647614479065, "learning_rate": 0.0007158913684317437, "loss": 0.118, "num_input_tokens_seen": 111791408, "step": 51750 }, { "epoch": 8.442903752039152, "grad_norm": 0.010236898437142372, "learning_rate": 0.0007158271636356315, "loss": 0.0693, "num_input_tokens_seen": 111802096, "step": 51755 }, { "epoch": 8.443719412724306, "grad_norm": 0.021024169400334358, "learning_rate": 0.000715762954465508, "loss": 0.089, "num_input_tokens_seen": 111813136, "step": 51760 }, { "epoch": 8.444535073409462, "grad_norm": 0.0077271731570363045, "learning_rate": 0.0007156987409226745, "loss": 0.0293, "num_input_tokens_seen": 111824624, "step": 51765 }, { "epoch": 8.445350734094617, "grad_norm": 0.010105198249220848, "learning_rate": 0.0007156345230084325, "loss": 0.0382, "num_input_tokens_seen": 111834736, "step": 51770 }, { "epoch": 8.446166394779771, "grad_norm": 0.012942255474627018, "learning_rate": 0.0007155703007240832, "loss": 0.0033, "num_input_tokens_seen": 111846192, "step": 51775 }, { "epoch": 8.446982055464927, "grad_norm": 0.0030060415156185627, "learning_rate": 0.0007155060740709284, "loss": 0.0844, "num_input_tokens_seen": 111858096, "step": 51780 }, { "epoch": 8.447797716150081, "grad_norm": 0.04821230471134186, "learning_rate": 0.0007154418430502696, "loss": 0.0281, "num_input_tokens_seen": 111868720, "step": 51785 }, { "epoch": 8.448613376835237, "grad_norm": 0.005300053860992193, "learning_rate": 0.0007153776076634084, "loss": 0.0975, "num_input_tokens_seen": 111879888, "step": 51790 }, { "epoch": 8.449429037520392, "grad_norm": 0.05414601042866707, "learning_rate": 0.0007153133679116469, "loss": 0.0067, "num_input_tokens_seen": 111890672, "step": 51795 }, { "epoch": 8.450244698205546, "grad_norm": 0.09117277711629868, "learning_rate": 0.0007152491237962867, "loss": 0.018, "num_input_tokens_seen": 111901936, "step": 51800 }, { "epoch": 8.451060358890702, "grad_norm": 0.23480220139026642, "learning_rate": 0.0007151848753186301, "loss": 0.0746, "num_input_tokens_seen": 111912656, "step": 51805 }, { "epoch": 8.451876019575856, "grad_norm": 0.2549675405025482, "learning_rate": 0.000715120622479979, "loss": 0.0947, "num_input_tokens_seen": 111922448, "step": 51810 }, { "epoch": 8.452691680261012, "grad_norm": 0.004847115837037563, "learning_rate": 0.0007150563652816355, "loss": 0.1074, "num_input_tokens_seen": 111933200, "step": 51815 }, { "epoch": 8.453507340946166, "grad_norm": 0.2996913194656372, "learning_rate": 0.0007149921037249021, "loss": 0.2818, "num_input_tokens_seen": 111944048, "step": 51820 }, { "epoch": 8.454323001631321, "grad_norm": 0.08265216648578644, "learning_rate": 0.0007149278378110808, "loss": 0.0484, "num_input_tokens_seen": 111955184, "step": 51825 }, { "epoch": 8.455138662316477, "grad_norm": 0.033596016466617584, "learning_rate": 0.0007148635675414743, "loss": 0.0412, "num_input_tokens_seen": 111967632, "step": 51830 }, { "epoch": 8.455954323001631, "grad_norm": 0.014473401010036469, "learning_rate": 0.000714799292917385, "loss": 0.0857, "num_input_tokens_seen": 111978160, "step": 51835 }, { "epoch": 8.456769983686787, "grad_norm": 0.10292612016201019, "learning_rate": 0.0007147350139401156, "loss": 0.024, "num_input_tokens_seen": 111988688, "step": 51840 }, { "epoch": 8.45758564437194, "grad_norm": 0.011349059641361237, "learning_rate": 0.0007146707306109687, "loss": 0.0143, "num_input_tokens_seen": 111999024, "step": 51845 }, { "epoch": 8.458401305057096, "grad_norm": 0.07083631306886673, "learning_rate": 0.000714606442931247, "loss": 0.039, "num_input_tokens_seen": 112010544, "step": 51850 }, { "epoch": 8.459216965742252, "grad_norm": 0.0234207920730114, "learning_rate": 0.0007145421509022536, "loss": 0.0425, "num_input_tokens_seen": 112021456, "step": 51855 }, { "epoch": 8.460032626427406, "grad_norm": 0.10967404395341873, "learning_rate": 0.0007144778545252914, "loss": 0.1617, "num_input_tokens_seen": 112032528, "step": 51860 }, { "epoch": 8.460848287112562, "grad_norm": 0.04319612681865692, "learning_rate": 0.0007144135538016633, "loss": 0.0911, "num_input_tokens_seen": 112043376, "step": 51865 }, { "epoch": 8.461663947797716, "grad_norm": 0.02937161736190319, "learning_rate": 0.0007143492487326726, "loss": 0.0352, "num_input_tokens_seen": 112054928, "step": 51870 }, { "epoch": 8.462479608482871, "grad_norm": 0.04233347997069359, "learning_rate": 0.0007142849393196223, "loss": 0.1164, "num_input_tokens_seen": 112066352, "step": 51875 }, { "epoch": 8.463295269168025, "grad_norm": 0.003394125262275338, "learning_rate": 0.000714220625563816, "loss": 0.0085, "num_input_tokens_seen": 112077584, "step": 51880 }, { "epoch": 8.464110929853181, "grad_norm": 0.20848369598388672, "learning_rate": 0.0007141563074665571, "loss": 0.0893, "num_input_tokens_seen": 112088560, "step": 51885 }, { "epoch": 8.464926590538337, "grad_norm": 0.2927113175392151, "learning_rate": 0.0007140919850291488, "loss": 0.0801, "num_input_tokens_seen": 112099696, "step": 51890 }, { "epoch": 8.46574225122349, "grad_norm": 0.18456770479679108, "learning_rate": 0.0007140276582528947, "loss": 0.0724, "num_input_tokens_seen": 112111728, "step": 51895 }, { "epoch": 8.466557911908646, "grad_norm": 0.01184168178588152, "learning_rate": 0.0007139633271390988, "loss": 0.0117, "num_input_tokens_seen": 112121904, "step": 51900 }, { "epoch": 8.4673735725938, "grad_norm": 0.01245115976780653, "learning_rate": 0.0007138989916890644, "loss": 0.0217, "num_input_tokens_seen": 112133168, "step": 51905 }, { "epoch": 8.468189233278956, "grad_norm": 0.003608755301684141, "learning_rate": 0.0007138346519040959, "loss": 0.0067, "num_input_tokens_seen": 112144176, "step": 51910 }, { "epoch": 8.469004893964112, "grad_norm": 0.017230842262506485, "learning_rate": 0.0007137703077854967, "loss": 0.0537, "num_input_tokens_seen": 112155664, "step": 51915 }, { "epoch": 8.469820554649266, "grad_norm": 0.00256637716665864, "learning_rate": 0.0007137059593345711, "loss": 0.0366, "num_input_tokens_seen": 112167184, "step": 51920 }, { "epoch": 8.470636215334421, "grad_norm": 0.00391194224357605, "learning_rate": 0.0007136416065526231, "loss": 0.0715, "num_input_tokens_seen": 112177936, "step": 51925 }, { "epoch": 8.471451876019575, "grad_norm": 0.07863806933164597, "learning_rate": 0.0007135772494409569, "loss": 0.1055, "num_input_tokens_seen": 112189520, "step": 51930 }, { "epoch": 8.47226753670473, "grad_norm": 0.004654384218156338, "learning_rate": 0.0007135128880008768, "loss": 0.0256, "num_input_tokens_seen": 112199984, "step": 51935 }, { "epoch": 8.473083197389887, "grad_norm": 0.0199937354773283, "learning_rate": 0.0007134485222336873, "loss": 0.0288, "num_input_tokens_seen": 112210704, "step": 51940 }, { "epoch": 8.47389885807504, "grad_norm": 0.302824467420578, "learning_rate": 0.0007133841521406925, "loss": 0.0393, "num_input_tokens_seen": 112222224, "step": 51945 }, { "epoch": 8.474714518760196, "grad_norm": 0.017630685120821, "learning_rate": 0.0007133197777231973, "loss": 0.014, "num_input_tokens_seen": 112233456, "step": 51950 }, { "epoch": 8.47553017944535, "grad_norm": 0.11950056254863739, "learning_rate": 0.0007132553989825061, "loss": 0.0268, "num_input_tokens_seen": 112244720, "step": 51955 }, { "epoch": 8.476345840130506, "grad_norm": 0.0019255392253398895, "learning_rate": 0.0007131910159199238, "loss": 0.0532, "num_input_tokens_seen": 112253680, "step": 51960 }, { "epoch": 8.477161500815662, "grad_norm": 0.002990216948091984, "learning_rate": 0.000713126628536755, "loss": 0.0182, "num_input_tokens_seen": 112264144, "step": 51965 }, { "epoch": 8.477977161500815, "grad_norm": 0.009614250622689724, "learning_rate": 0.0007130622368343048, "loss": 0.0283, "num_input_tokens_seen": 112275088, "step": 51970 }, { "epoch": 8.478792822185971, "grad_norm": 0.2912578284740448, "learning_rate": 0.000712997840813878, "loss": 0.1088, "num_input_tokens_seen": 112284944, "step": 51975 }, { "epoch": 8.479608482871125, "grad_norm": 0.021116318181157112, "learning_rate": 0.0007129334404767797, "loss": 0.0256, "num_input_tokens_seen": 112296336, "step": 51980 }, { "epoch": 8.48042414355628, "grad_norm": 0.30992650985717773, "learning_rate": 0.0007128690358243153, "loss": 0.127, "num_input_tokens_seen": 112307248, "step": 51985 }, { "epoch": 8.481239804241435, "grad_norm": 0.2787032425403595, "learning_rate": 0.0007128046268577898, "loss": 0.0337, "num_input_tokens_seen": 112318672, "step": 51990 }, { "epoch": 8.48205546492659, "grad_norm": 0.016766250133514404, "learning_rate": 0.0007127402135785086, "loss": 0.022, "num_input_tokens_seen": 112329648, "step": 51995 }, { "epoch": 8.482871125611746, "grad_norm": 0.006805556360632181, "learning_rate": 0.000712675795987777, "loss": 0.1846, "num_input_tokens_seen": 112340816, "step": 52000 }, { "epoch": 8.4836867862969, "grad_norm": 0.3869755268096924, "learning_rate": 0.0007126113740869006, "loss": 0.0957, "num_input_tokens_seen": 112351344, "step": 52005 }, { "epoch": 8.484502446982056, "grad_norm": 0.026312250643968582, "learning_rate": 0.000712546947877185, "loss": 0.0812, "num_input_tokens_seen": 112361456, "step": 52010 }, { "epoch": 8.48531810766721, "grad_norm": 0.01061819028109312, "learning_rate": 0.0007124825173599359, "loss": 0.0437, "num_input_tokens_seen": 112374352, "step": 52015 }, { "epoch": 8.486133768352365, "grad_norm": 0.004045186098664999, "learning_rate": 0.000712418082536459, "loss": 0.0841, "num_input_tokens_seen": 112385520, "step": 52020 }, { "epoch": 8.486949429037521, "grad_norm": 0.004907793365418911, "learning_rate": 0.0007123536434080602, "loss": 0.156, "num_input_tokens_seen": 112395312, "step": 52025 }, { "epoch": 8.487765089722675, "grad_norm": 0.04804065078496933, "learning_rate": 0.0007122891999760454, "loss": 0.0264, "num_input_tokens_seen": 112406192, "step": 52030 }, { "epoch": 8.48858075040783, "grad_norm": 0.15240783989429474, "learning_rate": 0.0007122247522417206, "loss": 0.0554, "num_input_tokens_seen": 112417840, "step": 52035 }, { "epoch": 8.489396411092985, "grad_norm": 0.14023399353027344, "learning_rate": 0.0007121603002063921, "loss": 0.0316, "num_input_tokens_seen": 112427760, "step": 52040 }, { "epoch": 8.49021207177814, "grad_norm": 0.3805373013019562, "learning_rate": 0.000712095843871366, "loss": 0.119, "num_input_tokens_seen": 112438768, "step": 52045 }, { "epoch": 8.491027732463296, "grad_norm": 0.065114825963974, "learning_rate": 0.0007120313832379483, "loss": 0.1752, "num_input_tokens_seen": 112449840, "step": 52050 }, { "epoch": 8.49184339314845, "grad_norm": 0.42016083002090454, "learning_rate": 0.000711966918307446, "loss": 0.1429, "num_input_tokens_seen": 112460272, "step": 52055 }, { "epoch": 8.492659053833606, "grad_norm": 0.023709211498498917, "learning_rate": 0.000711902449081165, "loss": 0.0529, "num_input_tokens_seen": 112471056, "step": 52060 }, { "epoch": 8.49347471451876, "grad_norm": 0.0041375719010829926, "learning_rate": 0.000711837975560412, "loss": 0.0569, "num_input_tokens_seen": 112481584, "step": 52065 }, { "epoch": 8.494290375203915, "grad_norm": 0.18444637954235077, "learning_rate": 0.0007117734977464937, "loss": 0.0652, "num_input_tokens_seen": 112493040, "step": 52070 }, { "epoch": 8.49510603588907, "grad_norm": 0.20939184725284576, "learning_rate": 0.0007117090156407168, "loss": 0.0256, "num_input_tokens_seen": 112502960, "step": 52075 }, { "epoch": 8.495921696574225, "grad_norm": 0.0033792341127991676, "learning_rate": 0.0007116445292443883, "loss": 0.1428, "num_input_tokens_seen": 112513968, "step": 52080 }, { "epoch": 8.49673735725938, "grad_norm": 0.010220236144959927, "learning_rate": 0.0007115800385588148, "loss": 0.1098, "num_input_tokens_seen": 112525040, "step": 52085 }, { "epoch": 8.497553017944535, "grad_norm": 0.2910071015357971, "learning_rate": 0.0007115155435853034, "loss": 0.1471, "num_input_tokens_seen": 112536944, "step": 52090 }, { "epoch": 8.49836867862969, "grad_norm": 0.009880652651190758, "learning_rate": 0.0007114510443251613, "loss": 0.0204, "num_input_tokens_seen": 112547888, "step": 52095 }, { "epoch": 8.499184339314844, "grad_norm": 0.039387013763189316, "learning_rate": 0.0007113865407796955, "loss": 0.2248, "num_input_tokens_seen": 112557264, "step": 52100 }, { "epoch": 8.5, "grad_norm": 0.007032784633338451, "learning_rate": 0.0007113220329502131, "loss": 0.0199, "num_input_tokens_seen": 112568496, "step": 52105 }, { "epoch": 8.500815660685156, "grad_norm": 0.0679621696472168, "learning_rate": 0.0007112575208380219, "loss": 0.0882, "num_input_tokens_seen": 112578832, "step": 52110 }, { "epoch": 8.50163132137031, "grad_norm": 0.01735229603946209, "learning_rate": 0.0007111930044444288, "loss": 0.0058, "num_input_tokens_seen": 112590224, "step": 52115 }, { "epoch": 8.502446982055465, "grad_norm": 0.014939922839403152, "learning_rate": 0.0007111284837707416, "loss": 0.0157, "num_input_tokens_seen": 112601808, "step": 52120 }, { "epoch": 8.50326264274062, "grad_norm": 0.012377532199025154, "learning_rate": 0.0007110639588182679, "loss": 0.0581, "num_input_tokens_seen": 112611856, "step": 52125 }, { "epoch": 8.504078303425775, "grad_norm": 0.7738260626792908, "learning_rate": 0.0007109994295883154, "loss": 0.135, "num_input_tokens_seen": 112621648, "step": 52130 }, { "epoch": 8.50489396411093, "grad_norm": 0.114794060587883, "learning_rate": 0.0007109348960821916, "loss": 0.0378, "num_input_tokens_seen": 112631472, "step": 52135 }, { "epoch": 8.505709624796085, "grad_norm": 0.12063523381948471, "learning_rate": 0.0007108703583012047, "loss": 0.0188, "num_input_tokens_seen": 112641104, "step": 52140 }, { "epoch": 8.50652528548124, "grad_norm": 0.035221684724092484, "learning_rate": 0.0007108058162466624, "loss": 0.0134, "num_input_tokens_seen": 112651984, "step": 52145 }, { "epoch": 8.507340946166394, "grad_norm": 0.3090044856071472, "learning_rate": 0.0007107412699198729, "loss": 0.1596, "num_input_tokens_seen": 112662288, "step": 52150 }, { "epoch": 8.50815660685155, "grad_norm": 0.21309930086135864, "learning_rate": 0.0007106767193221442, "loss": 0.0886, "num_input_tokens_seen": 112674160, "step": 52155 }, { "epoch": 8.508972267536706, "grad_norm": 0.028279351070523262, "learning_rate": 0.0007106121644547844, "loss": 0.0308, "num_input_tokens_seen": 112685200, "step": 52160 }, { "epoch": 8.50978792822186, "grad_norm": 0.962195634841919, "learning_rate": 0.000710547605319102, "loss": 0.0735, "num_input_tokens_seen": 112695600, "step": 52165 }, { "epoch": 8.510603588907015, "grad_norm": 0.06181253492832184, "learning_rate": 0.0007104830419164052, "loss": 0.1228, "num_input_tokens_seen": 112708304, "step": 52170 }, { "epoch": 8.51141924959217, "grad_norm": 0.13785938918590546, "learning_rate": 0.0007104184742480025, "loss": 0.0694, "num_input_tokens_seen": 112718672, "step": 52175 }, { "epoch": 8.512234910277325, "grad_norm": 0.001226426218636334, "learning_rate": 0.0007103539023152025, "loss": 0.0092, "num_input_tokens_seen": 112728304, "step": 52180 }, { "epoch": 8.513050570962479, "grad_norm": 0.007094322703778744, "learning_rate": 0.0007102893261193141, "loss": 0.0052, "num_input_tokens_seen": 112738288, "step": 52185 }, { "epoch": 8.513866231647635, "grad_norm": 0.08791056275367737, "learning_rate": 0.0007102247456616456, "loss": 0.0206, "num_input_tokens_seen": 112749008, "step": 52190 }, { "epoch": 8.51468189233279, "grad_norm": 0.1945263296365738, "learning_rate": 0.0007101601609435057, "loss": 0.0508, "num_input_tokens_seen": 112759472, "step": 52195 }, { "epoch": 8.515497553017944, "grad_norm": 0.17970463633537292, "learning_rate": 0.0007100955719662038, "loss": 0.0529, "num_input_tokens_seen": 112770928, "step": 52200 }, { "epoch": 8.5163132137031, "grad_norm": 0.13630010187625885, "learning_rate": 0.0007100309787310485, "loss": 0.0463, "num_input_tokens_seen": 112780784, "step": 52205 }, { "epoch": 8.517128874388254, "grad_norm": 0.008806428872048855, "learning_rate": 0.0007099663812393489, "loss": 0.0579, "num_input_tokens_seen": 112792528, "step": 52210 }, { "epoch": 8.51794453507341, "grad_norm": 0.5296904444694519, "learning_rate": 0.0007099017794924144, "loss": 0.0636, "num_input_tokens_seen": 112804496, "step": 52215 }, { "epoch": 8.518760195758565, "grad_norm": 0.014076504856348038, "learning_rate": 0.000709837173491554, "loss": 0.023, "num_input_tokens_seen": 112815472, "step": 52220 }, { "epoch": 8.51957585644372, "grad_norm": 0.010113160125911236, "learning_rate": 0.0007097725632380771, "loss": 0.0869, "num_input_tokens_seen": 112826288, "step": 52225 }, { "epoch": 8.520391517128875, "grad_norm": 0.05956968665122986, "learning_rate": 0.0007097079487332931, "loss": 0.0974, "num_input_tokens_seen": 112837072, "step": 52230 }, { "epoch": 8.521207177814029, "grad_norm": 0.013660747557878494, "learning_rate": 0.0007096433299785113, "loss": 0.023, "num_input_tokens_seen": 112846736, "step": 52235 }, { "epoch": 8.522022838499185, "grad_norm": 0.03401073068380356, "learning_rate": 0.0007095787069750416, "loss": 0.0341, "num_input_tokens_seen": 112858864, "step": 52240 }, { "epoch": 8.522838499184338, "grad_norm": 0.00827324390411377, "learning_rate": 0.0007095140797241936, "loss": 0.041, "num_input_tokens_seen": 112870032, "step": 52245 }, { "epoch": 8.523654159869494, "grad_norm": 0.005122459959238768, "learning_rate": 0.0007094494482272768, "loss": 0.0207, "num_input_tokens_seen": 112880016, "step": 52250 }, { "epoch": 8.52446982055465, "grad_norm": 0.22027291357517242, "learning_rate": 0.0007093848124856014, "loss": 0.0762, "num_input_tokens_seen": 112890576, "step": 52255 }, { "epoch": 8.525285481239804, "grad_norm": 0.08939344435930252, "learning_rate": 0.000709320172500477, "loss": 0.0264, "num_input_tokens_seen": 112899920, "step": 52260 }, { "epoch": 8.52610114192496, "grad_norm": 0.13552896678447723, "learning_rate": 0.0007092555282732139, "loss": 0.0398, "num_input_tokens_seen": 112911696, "step": 52265 }, { "epoch": 8.526916802610113, "grad_norm": 0.034195221960544586, "learning_rate": 0.000709190879805122, "loss": 0.0092, "num_input_tokens_seen": 112922608, "step": 52270 }, { "epoch": 8.52773246329527, "grad_norm": 0.003045213408768177, "learning_rate": 0.0007091262270975116, "loss": 0.0573, "num_input_tokens_seen": 112931952, "step": 52275 }, { "epoch": 8.528548123980425, "grad_norm": 0.009350604377686977, "learning_rate": 0.0007090615701516929, "loss": 0.0563, "num_input_tokens_seen": 112942608, "step": 52280 }, { "epoch": 8.529363784665579, "grad_norm": 0.00833084899932146, "learning_rate": 0.0007089969089689761, "loss": 0.0798, "num_input_tokens_seen": 112953616, "step": 52285 }, { "epoch": 8.530179445350734, "grad_norm": 0.04479145631194115, "learning_rate": 0.0007089322435506719, "loss": 0.0367, "num_input_tokens_seen": 112964624, "step": 52290 }, { "epoch": 8.530995106035888, "grad_norm": 0.34924301505088806, "learning_rate": 0.0007088675738980909, "loss": 0.142, "num_input_tokens_seen": 112973936, "step": 52295 }, { "epoch": 8.531810766721044, "grad_norm": 0.0031820894218981266, "learning_rate": 0.0007088029000125435, "loss": 0.0221, "num_input_tokens_seen": 112983408, "step": 52300 }, { "epoch": 8.5326264274062, "grad_norm": 0.011126089841127396, "learning_rate": 0.0007087382218953403, "loss": 0.069, "num_input_tokens_seen": 112994384, "step": 52305 }, { "epoch": 8.533442088091354, "grad_norm": 0.7672825455665588, "learning_rate": 0.0007086735395477923, "loss": 0.1052, "num_input_tokens_seen": 113003632, "step": 52310 }, { "epoch": 8.53425774877651, "grad_norm": 0.019653482362627983, "learning_rate": 0.0007086088529712103, "loss": 0.0994, "num_input_tokens_seen": 113013584, "step": 52315 }, { "epoch": 8.535073409461663, "grad_norm": 0.068137988448143, "learning_rate": 0.0007085441621669053, "loss": 0.0696, "num_input_tokens_seen": 113024432, "step": 52320 }, { "epoch": 8.535889070146819, "grad_norm": 0.4487306475639343, "learning_rate": 0.0007084794671361883, "loss": 0.0535, "num_input_tokens_seen": 113034640, "step": 52325 }, { "epoch": 8.536704730831975, "grad_norm": 0.006371349096298218, "learning_rate": 0.0007084147678803703, "loss": 0.0067, "num_input_tokens_seen": 113044816, "step": 52330 }, { "epoch": 8.537520391517129, "grad_norm": 0.2185864895582199, "learning_rate": 0.0007083500644007628, "loss": 0.1425, "num_input_tokens_seen": 113055152, "step": 52335 }, { "epoch": 8.538336052202284, "grad_norm": 0.06938138604164124, "learning_rate": 0.0007082853566986769, "loss": 0.0359, "num_input_tokens_seen": 113066608, "step": 52340 }, { "epoch": 8.539151712887438, "grad_norm": 0.3545314371585846, "learning_rate": 0.0007082206447754239, "loss": 0.1569, "num_input_tokens_seen": 113076944, "step": 52345 }, { "epoch": 8.539967373572594, "grad_norm": 0.033213697373867035, "learning_rate": 0.0007081559286323155, "loss": 0.03, "num_input_tokens_seen": 113088240, "step": 52350 }, { "epoch": 8.540783034257748, "grad_norm": 0.0024236650206148624, "learning_rate": 0.0007080912082706631, "loss": 0.0878, "num_input_tokens_seen": 113100688, "step": 52355 }, { "epoch": 8.541598694942904, "grad_norm": 0.01018419861793518, "learning_rate": 0.0007080264836917783, "loss": 0.0711, "num_input_tokens_seen": 113110352, "step": 52360 }, { "epoch": 8.54241435562806, "grad_norm": 0.002987699583172798, "learning_rate": 0.000707961754896973, "loss": 0.2497, "num_input_tokens_seen": 113120048, "step": 52365 }, { "epoch": 8.543230016313213, "grad_norm": 0.011956961825489998, "learning_rate": 0.0007078970218875589, "loss": 0.0799, "num_input_tokens_seen": 113130960, "step": 52370 }, { "epoch": 8.544045676998369, "grad_norm": 0.06541749089956284, "learning_rate": 0.0007078322846648479, "loss": 0.0704, "num_input_tokens_seen": 113142192, "step": 52375 }, { "epoch": 8.544861337683523, "grad_norm": 0.27652621269226074, "learning_rate": 0.0007077675432301521, "loss": 0.1562, "num_input_tokens_seen": 113154256, "step": 52380 }, { "epoch": 8.545676998368679, "grad_norm": 0.27194124460220337, "learning_rate": 0.0007077027975847833, "loss": 0.2258, "num_input_tokens_seen": 113166640, "step": 52385 }, { "epoch": 8.546492659053834, "grad_norm": 0.18163374066352844, "learning_rate": 0.0007076380477300539, "loss": 0.0444, "num_input_tokens_seen": 113177744, "step": 52390 }, { "epoch": 8.547308319738988, "grad_norm": 0.26939302682876587, "learning_rate": 0.0007075732936672761, "loss": 0.0754, "num_input_tokens_seen": 113188688, "step": 52395 }, { "epoch": 8.548123980424144, "grad_norm": 0.007374065462499857, "learning_rate": 0.0007075085353977622, "loss": 0.0201, "num_input_tokens_seen": 113198704, "step": 52400 }, { "epoch": 8.548939641109298, "grad_norm": 0.19838181138038635, "learning_rate": 0.0007074437729228245, "loss": 0.0926, "num_input_tokens_seen": 113209296, "step": 52405 }, { "epoch": 8.549755301794454, "grad_norm": 0.021237578243017197, "learning_rate": 0.0007073790062437755, "loss": 0.0677, "num_input_tokens_seen": 113218896, "step": 52410 }, { "epoch": 8.550570962479608, "grad_norm": 0.0337057039141655, "learning_rate": 0.000707314235361928, "loss": 0.058, "num_input_tokens_seen": 113229520, "step": 52415 }, { "epoch": 8.551386623164763, "grad_norm": 0.07322728633880615, "learning_rate": 0.0007072494602785945, "loss": 0.0298, "num_input_tokens_seen": 113240048, "step": 52420 }, { "epoch": 8.552202283849919, "grad_norm": 0.42627814412117004, "learning_rate": 0.0007071846809950878, "loss": 0.144, "num_input_tokens_seen": 113251984, "step": 52425 }, { "epoch": 8.553017944535073, "grad_norm": 0.02343291975557804, "learning_rate": 0.0007071198975127206, "loss": 0.0746, "num_input_tokens_seen": 113262512, "step": 52430 }, { "epoch": 8.553833605220229, "grad_norm": 0.008681093342602253, "learning_rate": 0.000707055109832806, "loss": 0.0294, "num_input_tokens_seen": 113274768, "step": 52435 }, { "epoch": 8.554649265905383, "grad_norm": 0.03344331681728363, "learning_rate": 0.0007069903179566569, "loss": 0.0243, "num_input_tokens_seen": 113286512, "step": 52440 }, { "epoch": 8.555464926590538, "grad_norm": 0.015263247303664684, "learning_rate": 0.0007069255218855865, "loss": 0.0903, "num_input_tokens_seen": 113296944, "step": 52445 }, { "epoch": 8.556280587275694, "grad_norm": 0.008519859984517097, "learning_rate": 0.0007068607216209078, "loss": 0.0887, "num_input_tokens_seen": 113307760, "step": 52450 }, { "epoch": 8.557096247960848, "grad_norm": 0.1599939614534378, "learning_rate": 0.0007067959171639342, "loss": 0.1706, "num_input_tokens_seen": 113317776, "step": 52455 }, { "epoch": 8.557911908646004, "grad_norm": 0.027809320017695427, "learning_rate": 0.000706731108515979, "loss": 0.0842, "num_input_tokens_seen": 113328912, "step": 52460 }, { "epoch": 8.558727569331158, "grad_norm": 0.10448624938726425, "learning_rate": 0.0007066662956783556, "loss": 0.035, "num_input_tokens_seen": 113339056, "step": 52465 }, { "epoch": 8.559543230016313, "grad_norm": 0.010518464259803295, "learning_rate": 0.0007066014786523776, "loss": 0.0219, "num_input_tokens_seen": 113349488, "step": 52470 }, { "epoch": 8.560358890701469, "grad_norm": 0.23149004578590393, "learning_rate": 0.0007065366574393585, "loss": 0.0528, "num_input_tokens_seen": 113361616, "step": 52475 }, { "epoch": 8.561174551386623, "grad_norm": 0.009577560238540173, "learning_rate": 0.000706471832040612, "loss": 0.0427, "num_input_tokens_seen": 113371728, "step": 52480 }, { "epoch": 8.561990212071779, "grad_norm": 0.03113245777785778, "learning_rate": 0.000706407002457452, "loss": 0.0484, "num_input_tokens_seen": 113383280, "step": 52485 }, { "epoch": 8.562805872756933, "grad_norm": 0.18912045657634735, "learning_rate": 0.0007063421686911921, "loss": 0.1145, "num_input_tokens_seen": 113394160, "step": 52490 }, { "epoch": 8.563621533442088, "grad_norm": 0.2634306848049164, "learning_rate": 0.0007062773307431465, "loss": 0.1875, "num_input_tokens_seen": 113405552, "step": 52495 }, { "epoch": 8.564437194127244, "grad_norm": 0.33455103635787964, "learning_rate": 0.000706212488614629, "loss": 0.1255, "num_input_tokens_seen": 113415920, "step": 52500 }, { "epoch": 8.565252854812398, "grad_norm": 0.00712059810757637, "learning_rate": 0.0007061476423069539, "loss": 0.0049, "num_input_tokens_seen": 113427088, "step": 52505 }, { "epoch": 8.566068515497554, "grad_norm": 0.1418961137533188, "learning_rate": 0.0007060827918214353, "loss": 0.0772, "num_input_tokens_seen": 113438032, "step": 52510 }, { "epoch": 8.566884176182707, "grad_norm": 0.0014664334012195468, "learning_rate": 0.0007060179371593876, "loss": 0.0993, "num_input_tokens_seen": 113449424, "step": 52515 }, { "epoch": 8.567699836867863, "grad_norm": 0.25699958205223083, "learning_rate": 0.0007059530783221249, "loss": 0.0971, "num_input_tokens_seen": 113461232, "step": 52520 }, { "epoch": 8.568515497553017, "grad_norm": 0.006246160715818405, "learning_rate": 0.0007058882153109618, "loss": 0.0439, "num_input_tokens_seen": 113472432, "step": 52525 }, { "epoch": 8.569331158238173, "grad_norm": 0.021962016820907593, "learning_rate": 0.000705823348127213, "loss": 0.0765, "num_input_tokens_seen": 113482640, "step": 52530 }, { "epoch": 8.570146818923329, "grad_norm": 0.004425059538334608, "learning_rate": 0.0007057584767721927, "loss": 0.0733, "num_input_tokens_seen": 113492464, "step": 52535 }, { "epoch": 8.570962479608482, "grad_norm": 0.003864873433485627, "learning_rate": 0.000705693601247216, "loss": 0.1789, "num_input_tokens_seen": 113503152, "step": 52540 }, { "epoch": 8.571778140293638, "grad_norm": 0.006510365754365921, "learning_rate": 0.0007056287215535976, "loss": 0.0384, "num_input_tokens_seen": 113514416, "step": 52545 }, { "epoch": 8.572593800978792, "grad_norm": 0.189361110329628, "learning_rate": 0.0007055638376926522, "loss": 0.0428, "num_input_tokens_seen": 113525808, "step": 52550 }, { "epoch": 8.573409461663948, "grad_norm": 0.03475072607398033, "learning_rate": 0.0007054989496656949, "loss": 0.0801, "num_input_tokens_seen": 113536880, "step": 52555 }, { "epoch": 8.574225122349104, "grad_norm": 0.0047148847952485085, "learning_rate": 0.0007054340574740405, "loss": 0.021, "num_input_tokens_seen": 113549200, "step": 52560 }, { "epoch": 8.575040783034257, "grad_norm": 0.2840512990951538, "learning_rate": 0.0007053691611190045, "loss": 0.1158, "num_input_tokens_seen": 113559824, "step": 52565 }, { "epoch": 8.575856443719413, "grad_norm": 0.14973792433738708, "learning_rate": 0.0007053042606019017, "loss": 0.1616, "num_input_tokens_seen": 113571056, "step": 52570 }, { "epoch": 8.576672104404567, "grad_norm": 0.002990563167259097, "learning_rate": 0.0007052393559240479, "loss": 0.0572, "num_input_tokens_seen": 113581840, "step": 52575 }, { "epoch": 8.577487765089723, "grad_norm": 0.004023312591016293, "learning_rate": 0.0007051744470867581, "loss": 0.1229, "num_input_tokens_seen": 113593392, "step": 52580 }, { "epoch": 8.578303425774878, "grad_norm": 0.23519225418567657, "learning_rate": 0.0007051095340913478, "loss": 0.2013, "num_input_tokens_seen": 113604624, "step": 52585 }, { "epoch": 8.579119086460032, "grad_norm": 0.17504338920116425, "learning_rate": 0.0007050446169391326, "loss": 0.1504, "num_input_tokens_seen": 113615920, "step": 52590 }, { "epoch": 8.579934747145188, "grad_norm": 0.035556066781282425, "learning_rate": 0.0007049796956314281, "loss": 0.046, "num_input_tokens_seen": 113625840, "step": 52595 }, { "epoch": 8.580750407830342, "grad_norm": 0.014154000207781792, "learning_rate": 0.00070491477016955, "loss": 0.0249, "num_input_tokens_seen": 113636784, "step": 52600 }, { "epoch": 8.581566068515498, "grad_norm": 0.03693210706114769, "learning_rate": 0.0007048498405548142, "loss": 0.0116, "num_input_tokens_seen": 113647920, "step": 52605 }, { "epoch": 8.582381729200652, "grad_norm": 0.03940049931406975, "learning_rate": 0.0007047849067885366, "loss": 0.0161, "num_input_tokens_seen": 113658288, "step": 52610 }, { "epoch": 8.583197389885807, "grad_norm": 0.012887493707239628, "learning_rate": 0.000704719968872033, "loss": 0.0988, "num_input_tokens_seen": 113668368, "step": 52615 }, { "epoch": 8.584013050570963, "grad_norm": 0.05204826593399048, "learning_rate": 0.0007046550268066194, "loss": 0.0693, "num_input_tokens_seen": 113678288, "step": 52620 }, { "epoch": 8.584828711256117, "grad_norm": 0.22619600594043732, "learning_rate": 0.0007045900805936122, "loss": 0.1762, "num_input_tokens_seen": 113689392, "step": 52625 }, { "epoch": 8.585644371941273, "grad_norm": 0.160128653049469, "learning_rate": 0.0007045251302343276, "loss": 0.1142, "num_input_tokens_seen": 113700016, "step": 52630 }, { "epoch": 8.586460032626427, "grad_norm": 0.01484632957726717, "learning_rate": 0.0007044601757300815, "loss": 0.0264, "num_input_tokens_seen": 113712048, "step": 52635 }, { "epoch": 8.587275693311582, "grad_norm": 0.0061071184463799, "learning_rate": 0.0007043952170821907, "loss": 0.0273, "num_input_tokens_seen": 113722512, "step": 52640 }, { "epoch": 8.588091353996738, "grad_norm": 0.023335954174399376, "learning_rate": 0.0007043302542919715, "loss": 0.0497, "num_input_tokens_seen": 113732784, "step": 52645 }, { "epoch": 8.588907014681892, "grad_norm": 0.21892686188220978, "learning_rate": 0.0007042652873607405, "loss": 0.1033, "num_input_tokens_seen": 113742960, "step": 52650 }, { "epoch": 8.589722675367048, "grad_norm": 0.008176966570317745, "learning_rate": 0.0007042003162898143, "loss": 0.0097, "num_input_tokens_seen": 113753712, "step": 52655 }, { "epoch": 8.590538336052202, "grad_norm": 0.028112633153796196, "learning_rate": 0.0007041353410805097, "loss": 0.0703, "num_input_tokens_seen": 113765136, "step": 52660 }, { "epoch": 8.591353996737357, "grad_norm": 0.013970048166811466, "learning_rate": 0.0007040703617341434, "loss": 0.0227, "num_input_tokens_seen": 113775056, "step": 52665 }, { "epoch": 8.592169657422513, "grad_norm": 0.0035523215774446726, "learning_rate": 0.0007040053782520324, "loss": 0.0439, "num_input_tokens_seen": 113786512, "step": 52670 }, { "epoch": 8.592985318107667, "grad_norm": 0.24162547290325165, "learning_rate": 0.0007039403906354936, "loss": 0.1064, "num_input_tokens_seen": 113796880, "step": 52675 }, { "epoch": 8.593800978792823, "grad_norm": 0.09258195012807846, "learning_rate": 0.0007038753988858439, "loss": 0.0934, "num_input_tokens_seen": 113809360, "step": 52680 }, { "epoch": 8.594616639477977, "grad_norm": 0.1602240353822708, "learning_rate": 0.0007038104030044008, "loss": 0.0791, "num_input_tokens_seen": 113820592, "step": 52685 }, { "epoch": 8.595432300163132, "grad_norm": 0.0038400774355977774, "learning_rate": 0.0007037454029924814, "loss": 0.0588, "num_input_tokens_seen": 113832176, "step": 52690 }, { "epoch": 8.596247960848288, "grad_norm": 0.02949357032775879, "learning_rate": 0.0007036803988514028, "loss": 0.046, "num_input_tokens_seen": 113844496, "step": 52695 }, { "epoch": 8.597063621533442, "grad_norm": 0.41612306237220764, "learning_rate": 0.0007036153905824825, "loss": 0.0184, "num_input_tokens_seen": 113854544, "step": 52700 }, { "epoch": 8.597879282218598, "grad_norm": 0.14093731343746185, "learning_rate": 0.0007035503781870379, "loss": 0.0312, "num_input_tokens_seen": 113865008, "step": 52705 }, { "epoch": 8.598694942903752, "grad_norm": 0.27731600403785706, "learning_rate": 0.0007034853616663868, "loss": 0.1039, "num_input_tokens_seen": 113875952, "step": 52710 }, { "epoch": 8.599510603588907, "grad_norm": 0.014228510670363903, "learning_rate": 0.0007034203410218467, "loss": 0.1459, "num_input_tokens_seen": 113885552, "step": 52715 }, { "epoch": 8.600326264274061, "grad_norm": 0.07596822828054428, "learning_rate": 0.0007033553162547355, "loss": 0.0467, "num_input_tokens_seen": 113897104, "step": 52720 }, { "epoch": 8.601141924959217, "grad_norm": 0.048214759677648544, "learning_rate": 0.0007032902873663707, "loss": 0.0941, "num_input_tokens_seen": 113908944, "step": 52725 }, { "epoch": 8.601957585644373, "grad_norm": 0.14651963114738464, "learning_rate": 0.0007032252543580702, "loss": 0.0291, "num_input_tokens_seen": 113920624, "step": 52730 }, { "epoch": 8.602773246329527, "grad_norm": 0.012341699562966824, "learning_rate": 0.0007031602172311523, "loss": 0.0289, "num_input_tokens_seen": 113932432, "step": 52735 }, { "epoch": 8.603588907014682, "grad_norm": 0.054182104766368866, "learning_rate": 0.0007030951759869347, "loss": 0.1926, "num_input_tokens_seen": 113943632, "step": 52740 }, { "epoch": 8.604404567699836, "grad_norm": 0.08766784518957138, "learning_rate": 0.0007030301306267358, "loss": 0.1292, "num_input_tokens_seen": 113954928, "step": 52745 }, { "epoch": 8.605220228384992, "grad_norm": 0.018275413662195206, "learning_rate": 0.0007029650811518737, "loss": 0.1261, "num_input_tokens_seen": 113965296, "step": 52750 }, { "epoch": 8.606035889070148, "grad_norm": 0.03040480799973011, "learning_rate": 0.0007029000275636669, "loss": 0.0281, "num_input_tokens_seen": 113976176, "step": 52755 }, { "epoch": 8.606851549755302, "grad_norm": 0.11693539470434189, "learning_rate": 0.0007028349698634335, "loss": 0.0874, "num_input_tokens_seen": 113986288, "step": 52760 }, { "epoch": 8.607667210440457, "grad_norm": 0.19277828931808472, "learning_rate": 0.0007027699080524923, "loss": 0.129, "num_input_tokens_seen": 113997360, "step": 52765 }, { "epoch": 8.608482871125611, "grad_norm": 0.049259208142757416, "learning_rate": 0.0007027048421321616, "loss": 0.0237, "num_input_tokens_seen": 114009360, "step": 52770 }, { "epoch": 8.609298531810767, "grad_norm": 0.16560588777065277, "learning_rate": 0.0007026397721037601, "loss": 0.0378, "num_input_tokens_seen": 114021104, "step": 52775 }, { "epoch": 8.61011419249592, "grad_norm": 0.29161280393600464, "learning_rate": 0.0007025746979686065, "loss": 0.1414, "num_input_tokens_seen": 114032176, "step": 52780 }, { "epoch": 8.610929853181077, "grad_norm": 0.006784006953239441, "learning_rate": 0.0007025096197280196, "loss": 0.0363, "num_input_tokens_seen": 114042352, "step": 52785 }, { "epoch": 8.611745513866232, "grad_norm": 0.3216829299926758, "learning_rate": 0.0007024445373833185, "loss": 0.1175, "num_input_tokens_seen": 114052112, "step": 52790 }, { "epoch": 8.612561174551386, "grad_norm": 0.03260602802038193, "learning_rate": 0.000702379450935822, "loss": 0.0421, "num_input_tokens_seen": 114062928, "step": 52795 }, { "epoch": 8.613376835236542, "grad_norm": 0.1370018720626831, "learning_rate": 0.0007023143603868492, "loss": 0.0691, "num_input_tokens_seen": 114073392, "step": 52800 }, { "epoch": 8.614192495921696, "grad_norm": 0.17849183082580566, "learning_rate": 0.0007022492657377192, "loss": 0.0693, "num_input_tokens_seen": 114084464, "step": 52805 }, { "epoch": 8.615008156606851, "grad_norm": 0.019710781052708626, "learning_rate": 0.0007021841669897511, "loss": 0.0684, "num_input_tokens_seen": 114096240, "step": 52810 }, { "epoch": 8.615823817292007, "grad_norm": 0.03188520297408104, "learning_rate": 0.0007021190641442645, "loss": 0.0797, "num_input_tokens_seen": 114105168, "step": 52815 }, { "epoch": 8.616639477977161, "grad_norm": 0.0067170062102377415, "learning_rate": 0.0007020539572025788, "loss": 0.0086, "num_input_tokens_seen": 114115024, "step": 52820 }, { "epoch": 8.617455138662317, "grad_norm": 0.006065691821277142, "learning_rate": 0.0007019888461660132, "loss": 0.0646, "num_input_tokens_seen": 114126416, "step": 52825 }, { "epoch": 8.61827079934747, "grad_norm": 0.256100058555603, "learning_rate": 0.0007019237310358874, "loss": 0.1274, "num_input_tokens_seen": 114137008, "step": 52830 }, { "epoch": 8.619086460032626, "grad_norm": 0.1438518464565277, "learning_rate": 0.000701858611813521, "loss": 0.155, "num_input_tokens_seen": 114148144, "step": 52835 }, { "epoch": 8.619902120717782, "grad_norm": 0.025290878489613533, "learning_rate": 0.0007017934885002339, "loss": 0.1007, "num_input_tokens_seen": 114157872, "step": 52840 }, { "epoch": 8.620717781402936, "grad_norm": 0.013580790720880032, "learning_rate": 0.0007017283610973456, "loss": 0.0635, "num_input_tokens_seen": 114169168, "step": 52845 }, { "epoch": 8.621533442088092, "grad_norm": 0.08735395967960358, "learning_rate": 0.0007016632296061762, "loss": 0.0327, "num_input_tokens_seen": 114179984, "step": 52850 }, { "epoch": 8.622349102773246, "grad_norm": 0.13977889716625214, "learning_rate": 0.0007015980940280458, "loss": 0.163, "num_input_tokens_seen": 114191120, "step": 52855 }, { "epoch": 8.623164763458401, "grad_norm": 0.06077880784869194, "learning_rate": 0.0007015329543642741, "loss": 0.0578, "num_input_tokens_seen": 114201424, "step": 52860 }, { "epoch": 8.623980424143557, "grad_norm": 0.005568662192672491, "learning_rate": 0.0007014678106161814, "loss": 0.0258, "num_input_tokens_seen": 114212976, "step": 52865 }, { "epoch": 8.624796084828711, "grad_norm": 0.013049778528511524, "learning_rate": 0.000701402662785088, "loss": 0.0454, "num_input_tokens_seen": 114224432, "step": 52870 }, { "epoch": 8.625611745513867, "grad_norm": 0.01467926986515522, "learning_rate": 0.0007013375108723141, "loss": 0.0322, "num_input_tokens_seen": 114234256, "step": 52875 }, { "epoch": 8.62642740619902, "grad_norm": 0.23037031292915344, "learning_rate": 0.0007012723548791802, "loss": 0.039, "num_input_tokens_seen": 114244912, "step": 52880 }, { "epoch": 8.627243066884176, "grad_norm": 0.03196464106440544, "learning_rate": 0.0007012071948070065, "loss": 0.0694, "num_input_tokens_seen": 114257008, "step": 52885 }, { "epoch": 8.62805872756933, "grad_norm": 0.2203933745622635, "learning_rate": 0.0007011420306571139, "loss": 0.1499, "num_input_tokens_seen": 114266800, "step": 52890 }, { "epoch": 8.628874388254486, "grad_norm": 0.35956448316574097, "learning_rate": 0.0007010768624308228, "loss": 0.0696, "num_input_tokens_seen": 114278320, "step": 52895 }, { "epoch": 8.629690048939642, "grad_norm": 0.027677489444613457, "learning_rate": 0.0007010116901294541, "loss": 0.0302, "num_input_tokens_seen": 114288624, "step": 52900 }, { "epoch": 8.630505709624796, "grad_norm": 0.09585878998041153, "learning_rate": 0.0007009465137543285, "loss": 0.0271, "num_input_tokens_seen": 114299184, "step": 52905 }, { "epoch": 8.631321370309951, "grad_norm": 0.012258858419954777, "learning_rate": 0.0007008813333067668, "loss": 0.0776, "num_input_tokens_seen": 114309744, "step": 52910 }, { "epoch": 8.632137030995105, "grad_norm": 0.030114926397800446, "learning_rate": 0.00070081614878809, "loss": 0.1126, "num_input_tokens_seen": 114320496, "step": 52915 }, { "epoch": 8.632952691680261, "grad_norm": 0.06548038870096207, "learning_rate": 0.0007007509601996193, "loss": 0.0716, "num_input_tokens_seen": 114331728, "step": 52920 }, { "epoch": 8.633768352365417, "grad_norm": 0.06908722221851349, "learning_rate": 0.0007006857675426757, "loss": 0.0594, "num_input_tokens_seen": 114341776, "step": 52925 }, { "epoch": 8.63458401305057, "grad_norm": 0.22732791304588318, "learning_rate": 0.0007006205708185804, "loss": 0.1991, "num_input_tokens_seen": 114353264, "step": 52930 }, { "epoch": 8.635399673735726, "grad_norm": 0.007982817478477955, "learning_rate": 0.0007005553700286549, "loss": 0.2081, "num_input_tokens_seen": 114364784, "step": 52935 }, { "epoch": 8.63621533442088, "grad_norm": 0.011525883339345455, "learning_rate": 0.0007004901651742201, "loss": 0.0113, "num_input_tokens_seen": 114375920, "step": 52940 }, { "epoch": 8.637030995106036, "grad_norm": 0.11902187764644623, "learning_rate": 0.000700424956256598, "loss": 0.0312, "num_input_tokens_seen": 114386448, "step": 52945 }, { "epoch": 8.63784665579119, "grad_norm": 0.057894494384527206, "learning_rate": 0.0007003597432771098, "loss": 0.0793, "num_input_tokens_seen": 114397296, "step": 52950 }, { "epoch": 8.638662316476346, "grad_norm": 0.029431601986289024, "learning_rate": 0.0007002945262370773, "loss": 0.1027, "num_input_tokens_seen": 114408336, "step": 52955 }, { "epoch": 8.639477977161501, "grad_norm": 0.02109280601143837, "learning_rate": 0.0007002293051378221, "loss": 0.0341, "num_input_tokens_seen": 114419024, "step": 52960 }, { "epoch": 8.640293637846655, "grad_norm": 0.24475204944610596, "learning_rate": 0.0007001640799806662, "loss": 0.1841, "num_input_tokens_seen": 114430704, "step": 52965 }, { "epoch": 8.641109298531811, "grad_norm": 0.12317442893981934, "learning_rate": 0.000700098850766931, "loss": 0.026, "num_input_tokens_seen": 114441424, "step": 52970 }, { "epoch": 8.641924959216965, "grad_norm": 0.08731578290462494, "learning_rate": 0.0007000336174979389, "loss": 0.2159, "num_input_tokens_seen": 114452720, "step": 52975 }, { "epoch": 8.64274061990212, "grad_norm": 0.1497471034526825, "learning_rate": 0.0006999683801750116, "loss": 0.034, "num_input_tokens_seen": 114463920, "step": 52980 }, { "epoch": 8.643556280587276, "grad_norm": 0.06633875519037247, "learning_rate": 0.0006999031387994717, "loss": 0.106, "num_input_tokens_seen": 114474768, "step": 52985 }, { "epoch": 8.64437194127243, "grad_norm": 0.1102503314614296, "learning_rate": 0.0006998378933726408, "loss": 0.0459, "num_input_tokens_seen": 114483984, "step": 52990 }, { "epoch": 8.645187601957586, "grad_norm": 0.056301407516002655, "learning_rate": 0.0006997726438958417, "loss": 0.1107, "num_input_tokens_seen": 114494032, "step": 52995 }, { "epoch": 8.64600326264274, "grad_norm": 0.2921803593635559, "learning_rate": 0.0006997073903703964, "loss": 0.0404, "num_input_tokens_seen": 114504720, "step": 53000 }, { "epoch": 8.646818923327896, "grad_norm": 0.05801844969391823, "learning_rate": 0.0006996421327976276, "loss": 0.1848, "num_input_tokens_seen": 114515536, "step": 53005 }, { "epoch": 8.647634584013051, "grad_norm": 0.05805162340402603, "learning_rate": 0.0006995768711788577, "loss": 0.0231, "num_input_tokens_seen": 114527312, "step": 53010 }, { "epoch": 8.648450244698205, "grad_norm": 0.38928350806236267, "learning_rate": 0.0006995116055154093, "loss": 0.0959, "num_input_tokens_seen": 114539728, "step": 53015 }, { "epoch": 8.649265905383361, "grad_norm": 0.018125947564840317, "learning_rate": 0.000699446335808605, "loss": 0.0172, "num_input_tokens_seen": 114551184, "step": 53020 }, { "epoch": 8.650081566068515, "grad_norm": 0.02418561838567257, "learning_rate": 0.0006993810620597677, "loss": 0.0251, "num_input_tokens_seen": 114561936, "step": 53025 }, { "epoch": 8.65089722675367, "grad_norm": 0.007253970485180616, "learning_rate": 0.0006993157842702203, "loss": 0.2023, "num_input_tokens_seen": 114571728, "step": 53030 }, { "epoch": 8.651712887438826, "grad_norm": 0.2686326801776886, "learning_rate": 0.0006992505024412858, "loss": 0.0473, "num_input_tokens_seen": 114581264, "step": 53035 }, { "epoch": 8.65252854812398, "grad_norm": 0.5314196944236755, "learning_rate": 0.000699185216574287, "loss": 0.1753, "num_input_tokens_seen": 114591664, "step": 53040 }, { "epoch": 8.653344208809136, "grad_norm": 0.06350405514240265, "learning_rate": 0.0006991199266705472, "loss": 0.0147, "num_input_tokens_seen": 114602864, "step": 53045 }, { "epoch": 8.65415986949429, "grad_norm": 0.014382350258529186, "learning_rate": 0.0006990546327313894, "loss": 0.0233, "num_input_tokens_seen": 114614000, "step": 53050 }, { "epoch": 8.654975530179446, "grad_norm": 0.011114361695945263, "learning_rate": 0.0006989893347581368, "loss": 0.0323, "num_input_tokens_seen": 114625360, "step": 53055 }, { "epoch": 8.655791190864601, "grad_norm": 0.01948225125670433, "learning_rate": 0.000698924032752113, "loss": 0.1014, "num_input_tokens_seen": 114635536, "step": 53060 }, { "epoch": 8.656606851549755, "grad_norm": 0.006442820653319359, "learning_rate": 0.0006988587267146414, "loss": 0.0258, "num_input_tokens_seen": 114647088, "step": 53065 }, { "epoch": 8.65742251223491, "grad_norm": 0.5618607401847839, "learning_rate": 0.0006987934166470454, "loss": 0.1312, "num_input_tokens_seen": 114658864, "step": 53070 }, { "epoch": 8.658238172920065, "grad_norm": 0.05817626416683197, "learning_rate": 0.0006987281025506487, "loss": 0.0412, "num_input_tokens_seen": 114670576, "step": 53075 }, { "epoch": 8.65905383360522, "grad_norm": 0.13771042227745056, "learning_rate": 0.0006986627844267748, "loss": 0.0924, "num_input_tokens_seen": 114682416, "step": 53080 }, { "epoch": 8.659869494290374, "grad_norm": 0.0055312663316726685, "learning_rate": 0.0006985974622767475, "loss": 0.0119, "num_input_tokens_seen": 114692496, "step": 53085 }, { "epoch": 8.66068515497553, "grad_norm": 0.04500410705804825, "learning_rate": 0.0006985321361018908, "loss": 0.1105, "num_input_tokens_seen": 114701872, "step": 53090 }, { "epoch": 8.661500815660686, "grad_norm": 0.0751497745513916, "learning_rate": 0.0006984668059035284, "loss": 0.0143, "num_input_tokens_seen": 114713392, "step": 53095 }, { "epoch": 8.66231647634584, "grad_norm": 0.012522445991635323, "learning_rate": 0.0006984014716829845, "loss": 0.0253, "num_input_tokens_seen": 114723056, "step": 53100 }, { "epoch": 8.663132137030995, "grad_norm": 0.07139261066913605, "learning_rate": 0.0006983361334415831, "loss": 0.03, "num_input_tokens_seen": 114735056, "step": 53105 }, { "epoch": 8.66394779771615, "grad_norm": 0.08267118781805038, "learning_rate": 0.0006982707911806483, "loss": 0.1202, "num_input_tokens_seen": 114746352, "step": 53110 }, { "epoch": 8.664763458401305, "grad_norm": 0.007913434877991676, "learning_rate": 0.0006982054449015044, "loss": 0.0371, "num_input_tokens_seen": 114757840, "step": 53115 }, { "epoch": 8.66557911908646, "grad_norm": 0.01678098551928997, "learning_rate": 0.0006981400946054758, "loss": 0.016, "num_input_tokens_seen": 114768848, "step": 53120 }, { "epoch": 8.666394779771615, "grad_norm": 0.2976462244987488, "learning_rate": 0.0006980747402938868, "loss": 0.0631, "num_input_tokens_seen": 114779952, "step": 53125 }, { "epoch": 8.66721044045677, "grad_norm": 0.09854038804769516, "learning_rate": 0.0006980093819680616, "loss": 0.0107, "num_input_tokens_seen": 114788880, "step": 53130 }, { "epoch": 8.668026101141924, "grad_norm": 0.006026748567819595, "learning_rate": 0.0006979440196293254, "loss": 0.0977, "num_input_tokens_seen": 114800528, "step": 53135 }, { "epoch": 8.66884176182708, "grad_norm": 0.017739087343215942, "learning_rate": 0.0006978786532790025, "loss": 0.04, "num_input_tokens_seen": 114811088, "step": 53140 }, { "epoch": 8.669657422512234, "grad_norm": 0.031023986637592316, "learning_rate": 0.0006978132829184176, "loss": 0.1588, "num_input_tokens_seen": 114822256, "step": 53145 }, { "epoch": 8.67047308319739, "grad_norm": 0.002542394446209073, "learning_rate": 0.0006977479085488956, "loss": 0.1743, "num_input_tokens_seen": 114834352, "step": 53150 }, { "epoch": 8.671288743882545, "grad_norm": 0.13473205268383026, "learning_rate": 0.0006976825301717615, "loss": 0.0847, "num_input_tokens_seen": 114844272, "step": 53155 }, { "epoch": 8.6721044045677, "grad_norm": 0.09745965898036957, "learning_rate": 0.0006976171477883399, "loss": 0.0513, "num_input_tokens_seen": 114855216, "step": 53160 }, { "epoch": 8.672920065252855, "grad_norm": 0.11454571783542633, "learning_rate": 0.0006975517613999562, "loss": 0.0267, "num_input_tokens_seen": 114866992, "step": 53165 }, { "epoch": 8.673735725938009, "grad_norm": 0.011554248631000519, "learning_rate": 0.0006974863710079355, "loss": 0.1094, "num_input_tokens_seen": 114878032, "step": 53170 }, { "epoch": 8.674551386623165, "grad_norm": 0.04244585707783699, "learning_rate": 0.0006974209766136031, "loss": 0.0644, "num_input_tokens_seen": 114889104, "step": 53175 }, { "epoch": 8.67536704730832, "grad_norm": 0.033876482397317886, "learning_rate": 0.0006973555782182839, "loss": 0.0396, "num_input_tokens_seen": 114899792, "step": 53180 }, { "epoch": 8.676182707993474, "grad_norm": 0.01323134358972311, "learning_rate": 0.0006972901758233037, "loss": 0.0818, "num_input_tokens_seen": 114911184, "step": 53185 }, { "epoch": 8.67699836867863, "grad_norm": 0.004896457307040691, "learning_rate": 0.0006972247694299877, "loss": 0.0284, "num_input_tokens_seen": 114921936, "step": 53190 }, { "epoch": 8.677814029363784, "grad_norm": 0.09365899115800858, "learning_rate": 0.0006971593590396616, "loss": 0.0165, "num_input_tokens_seen": 114932752, "step": 53195 }, { "epoch": 8.67862969004894, "grad_norm": 0.315121591091156, "learning_rate": 0.000697093944653651, "loss": 0.0879, "num_input_tokens_seen": 114943632, "step": 53200 }, { "epoch": 8.679445350734095, "grad_norm": 0.008210898377001286, "learning_rate": 0.0006970285262732815, "loss": 0.1004, "num_input_tokens_seen": 114955088, "step": 53205 }, { "epoch": 8.68026101141925, "grad_norm": 0.07787376642227173, "learning_rate": 0.000696963103899879, "loss": 0.0299, "num_input_tokens_seen": 114966416, "step": 53210 }, { "epoch": 8.681076672104405, "grad_norm": 0.04467257484793663, "learning_rate": 0.0006968976775347694, "loss": 0.1604, "num_input_tokens_seen": 114978032, "step": 53215 }, { "epoch": 8.681892332789559, "grad_norm": 0.07246481627225876, "learning_rate": 0.0006968322471792785, "loss": 0.0268, "num_input_tokens_seen": 114990416, "step": 53220 }, { "epoch": 8.682707993474715, "grad_norm": 0.04033525288105011, "learning_rate": 0.0006967668128347324, "loss": 0.0187, "num_input_tokens_seen": 115000336, "step": 53225 }, { "epoch": 8.68352365415987, "grad_norm": 0.0070259906351566315, "learning_rate": 0.0006967013745024573, "loss": 0.0676, "num_input_tokens_seen": 115010192, "step": 53230 }, { "epoch": 8.684339314845024, "grad_norm": 0.019295837730169296, "learning_rate": 0.0006966359321837792, "loss": 0.1062, "num_input_tokens_seen": 115021072, "step": 53235 }, { "epoch": 8.68515497553018, "grad_norm": 0.24232225120067596, "learning_rate": 0.0006965704858800246, "loss": 0.0964, "num_input_tokens_seen": 115031760, "step": 53240 }, { "epoch": 8.685970636215334, "grad_norm": 0.015037334524095058, "learning_rate": 0.0006965050355925197, "loss": 0.1433, "num_input_tokens_seen": 115042480, "step": 53245 }, { "epoch": 8.68678629690049, "grad_norm": 0.02515346184372902, "learning_rate": 0.000696439581322591, "loss": 0.0383, "num_input_tokens_seen": 115054384, "step": 53250 }, { "epoch": 8.687601957585644, "grad_norm": 0.1145104393362999, "learning_rate": 0.000696374123071565, "loss": 0.0218, "num_input_tokens_seen": 115064048, "step": 53255 }, { "epoch": 8.6884176182708, "grad_norm": 0.03403741493821144, "learning_rate": 0.0006963086608407683, "loss": 0.0287, "num_input_tokens_seen": 115075088, "step": 53260 }, { "epoch": 8.689233278955955, "grad_norm": 0.01130970474332571, "learning_rate": 0.0006962431946315274, "loss": 0.079, "num_input_tokens_seen": 115085008, "step": 53265 }, { "epoch": 8.690048939641109, "grad_norm": 0.09057468175888062, "learning_rate": 0.0006961777244451694, "loss": 0.1347, "num_input_tokens_seen": 115096688, "step": 53270 }, { "epoch": 8.690864600326265, "grad_norm": 0.05597497150301933, "learning_rate": 0.0006961122502830208, "loss": 0.0818, "num_input_tokens_seen": 115108080, "step": 53275 }, { "epoch": 8.691680261011419, "grad_norm": 0.24031208455562592, "learning_rate": 0.0006960467721464086, "loss": 0.0754, "num_input_tokens_seen": 115118864, "step": 53280 }, { "epoch": 8.692495921696574, "grad_norm": 0.22548261284828186, "learning_rate": 0.00069598129003666, "loss": 0.052, "num_input_tokens_seen": 115128944, "step": 53285 }, { "epoch": 8.69331158238173, "grad_norm": 0.1766250878572464, "learning_rate": 0.0006959158039551019, "loss": 0.0616, "num_input_tokens_seen": 115139600, "step": 53290 }, { "epoch": 8.694127243066884, "grad_norm": 0.007115835323929787, "learning_rate": 0.0006958503139030616, "loss": 0.0235, "num_input_tokens_seen": 115149552, "step": 53295 }, { "epoch": 8.69494290375204, "grad_norm": 0.05052676051855087, "learning_rate": 0.0006957848198818661, "loss": 0.0436, "num_input_tokens_seen": 115160624, "step": 53300 }, { "epoch": 8.695758564437194, "grad_norm": 0.0866311639547348, "learning_rate": 0.0006957193218928429, "loss": 0.1183, "num_input_tokens_seen": 115172304, "step": 53305 }, { "epoch": 8.69657422512235, "grad_norm": 0.2571766972541809, "learning_rate": 0.0006956538199373194, "loss": 0.0922, "num_input_tokens_seen": 115183824, "step": 53310 }, { "epoch": 8.697389885807503, "grad_norm": 0.02091350592672825, "learning_rate": 0.000695588314016623, "loss": 0.0278, "num_input_tokens_seen": 115194832, "step": 53315 }, { "epoch": 8.698205546492659, "grad_norm": 0.06575371325016022, "learning_rate": 0.0006955228041320811, "loss": 0.0135, "num_input_tokens_seen": 115204528, "step": 53320 }, { "epoch": 8.699021207177815, "grad_norm": 0.03311387449502945, "learning_rate": 0.0006954572902850218, "loss": 0.0177, "num_input_tokens_seen": 115213840, "step": 53325 }, { "epoch": 8.699836867862969, "grad_norm": 0.01601443998515606, "learning_rate": 0.0006953917724767724, "loss": 0.0281, "num_input_tokens_seen": 115223600, "step": 53330 }, { "epoch": 8.700652528548124, "grad_norm": 0.15040957927703857, "learning_rate": 0.0006953262507086611, "loss": 0.0961, "num_input_tokens_seen": 115233168, "step": 53335 }, { "epoch": 8.701468189233278, "grad_norm": 0.03878360241651535, "learning_rate": 0.0006952607249820153, "loss": 0.0101, "num_input_tokens_seen": 115244592, "step": 53340 }, { "epoch": 8.702283849918434, "grad_norm": 0.2561344504356384, "learning_rate": 0.0006951951952981631, "loss": 0.2158, "num_input_tokens_seen": 115254192, "step": 53345 }, { "epoch": 8.70309951060359, "grad_norm": 0.05164014548063278, "learning_rate": 0.0006951296616584329, "loss": 0.0431, "num_input_tokens_seen": 115266384, "step": 53350 }, { "epoch": 8.703915171288743, "grad_norm": 0.021563317626714706, "learning_rate": 0.0006950641240641524, "loss": 0.0155, "num_input_tokens_seen": 115277776, "step": 53355 }, { "epoch": 8.7047308319739, "grad_norm": 0.8428294658660889, "learning_rate": 0.0006949985825166501, "loss": 0.0516, "num_input_tokens_seen": 115289168, "step": 53360 }, { "epoch": 8.705546492659053, "grad_norm": 0.010905325412750244, "learning_rate": 0.0006949330370172541, "loss": 0.1883, "num_input_tokens_seen": 115299216, "step": 53365 }, { "epoch": 8.706362153344209, "grad_norm": 0.00984366238117218, "learning_rate": 0.0006948674875672927, "loss": 0.0573, "num_input_tokens_seen": 115309968, "step": 53370 }, { "epoch": 8.707177814029365, "grad_norm": 0.01519181951880455, "learning_rate": 0.0006948019341680945, "loss": 0.0492, "num_input_tokens_seen": 115321264, "step": 53375 }, { "epoch": 8.707993474714518, "grad_norm": 0.015593461692333221, "learning_rate": 0.0006947363768209882, "loss": 0.1021, "num_input_tokens_seen": 115331472, "step": 53380 }, { "epoch": 8.708809135399674, "grad_norm": 0.15758588910102844, "learning_rate": 0.000694670815527302, "loss": 0.0611, "num_input_tokens_seen": 115342480, "step": 53385 }, { "epoch": 8.709624796084828, "grad_norm": 0.006150087807327509, "learning_rate": 0.0006946052502883648, "loss": 0.0425, "num_input_tokens_seen": 115351184, "step": 53390 }, { "epoch": 8.710440456769984, "grad_norm": 0.12416129559278488, "learning_rate": 0.0006945396811055053, "loss": 0.0198, "num_input_tokens_seen": 115363152, "step": 53395 }, { "epoch": 8.71125611745514, "grad_norm": 0.05409393459558487, "learning_rate": 0.0006944741079800525, "loss": 0.0436, "num_input_tokens_seen": 115373648, "step": 53400 }, { "epoch": 8.712071778140293, "grad_norm": 0.39778071641921997, "learning_rate": 0.000694408530913335, "loss": 0.1524, "num_input_tokens_seen": 115384560, "step": 53405 }, { "epoch": 8.71288743882545, "grad_norm": 0.0031305132433772087, "learning_rate": 0.0006943429499066821, "loss": 0.0084, "num_input_tokens_seen": 115396272, "step": 53410 }, { "epoch": 8.713703099510603, "grad_norm": 0.008841861970722675, "learning_rate": 0.0006942773649614228, "loss": 0.0066, "num_input_tokens_seen": 115407600, "step": 53415 }, { "epoch": 8.714518760195759, "grad_norm": 0.337829053401947, "learning_rate": 0.0006942117760788862, "loss": 0.1059, "num_input_tokens_seen": 115418288, "step": 53420 }, { "epoch": 8.715334420880914, "grad_norm": 0.22501158714294434, "learning_rate": 0.0006941461832604017, "loss": 0.1335, "num_input_tokens_seen": 115428592, "step": 53425 }, { "epoch": 8.716150081566068, "grad_norm": 0.2836399972438812, "learning_rate": 0.0006940805865072984, "loss": 0.3035, "num_input_tokens_seen": 115439824, "step": 53430 }, { "epoch": 8.716965742251224, "grad_norm": 0.30257099866867065, "learning_rate": 0.0006940149858209058, "loss": 0.1856, "num_input_tokens_seen": 115450960, "step": 53435 }, { "epoch": 8.717781402936378, "grad_norm": 0.18512238562107086, "learning_rate": 0.0006939493812025534, "loss": 0.0894, "num_input_tokens_seen": 115461584, "step": 53440 }, { "epoch": 8.718597063621534, "grad_norm": 0.05251099169254303, "learning_rate": 0.0006938837726535707, "loss": 0.0768, "num_input_tokens_seen": 115470992, "step": 53445 }, { "epoch": 8.719412724306688, "grad_norm": 0.28675875067710876, "learning_rate": 0.0006938181601752873, "loss": 0.0591, "num_input_tokens_seen": 115482736, "step": 53450 }, { "epoch": 8.720228384991843, "grad_norm": 0.03428531438112259, "learning_rate": 0.0006937525437690332, "loss": 0.0197, "num_input_tokens_seen": 115493680, "step": 53455 }, { "epoch": 8.721044045676999, "grad_norm": 0.007960710674524307, "learning_rate": 0.0006936869234361379, "loss": 0.064, "num_input_tokens_seen": 115503568, "step": 53460 }, { "epoch": 8.721859706362153, "grad_norm": 0.025951100513339043, "learning_rate": 0.0006936212991779314, "loss": 0.0374, "num_input_tokens_seen": 115514512, "step": 53465 }, { "epoch": 8.722675367047309, "grad_norm": 0.31575798988342285, "learning_rate": 0.0006935556709957437, "loss": 0.0587, "num_input_tokens_seen": 115525840, "step": 53470 }, { "epoch": 8.723491027732463, "grad_norm": 0.0034322626888751984, "learning_rate": 0.0006934900388909048, "loss": 0.0936, "num_input_tokens_seen": 115535856, "step": 53475 }, { "epoch": 8.724306688417618, "grad_norm": 0.011164214462041855, "learning_rate": 0.0006934244028647447, "loss": 0.0251, "num_input_tokens_seen": 115547376, "step": 53480 }, { "epoch": 8.725122349102774, "grad_norm": 0.27567970752716064, "learning_rate": 0.0006933587629185938, "loss": 0.1887, "num_input_tokens_seen": 115557200, "step": 53485 }, { "epoch": 8.725938009787928, "grad_norm": 0.23501868546009064, "learning_rate": 0.0006932931190537822, "loss": 0.1783, "num_input_tokens_seen": 115568048, "step": 53490 }, { "epoch": 8.726753670473084, "grad_norm": 0.017488988116383553, "learning_rate": 0.0006932274712716405, "loss": 0.0237, "num_input_tokens_seen": 115578736, "step": 53495 }, { "epoch": 8.727569331158238, "grad_norm": 0.025671212002635002, "learning_rate": 0.0006931618195734988, "loss": 0.0542, "num_input_tokens_seen": 115588656, "step": 53500 }, { "epoch": 8.728384991843393, "grad_norm": 0.30542680621147156, "learning_rate": 0.0006930961639606878, "loss": 0.0637, "num_input_tokens_seen": 115599280, "step": 53505 }, { "epoch": 8.729200652528547, "grad_norm": 0.04574590176343918, "learning_rate": 0.0006930305044345381, "loss": 0.0612, "num_input_tokens_seen": 115610544, "step": 53510 }, { "epoch": 8.730016313213703, "grad_norm": 0.016665812581777573, "learning_rate": 0.0006929648409963802, "loss": 0.0437, "num_input_tokens_seen": 115621104, "step": 53515 }, { "epoch": 8.730831973898859, "grad_norm": 0.06146889925003052, "learning_rate": 0.0006928991736475452, "loss": 0.0719, "num_input_tokens_seen": 115633104, "step": 53520 }, { "epoch": 8.731647634584013, "grad_norm": 0.6150043606758118, "learning_rate": 0.0006928335023893637, "loss": 0.0762, "num_input_tokens_seen": 115644048, "step": 53525 }, { "epoch": 8.732463295269168, "grad_norm": 0.028153996914625168, "learning_rate": 0.0006927678272231667, "loss": 0.0251, "num_input_tokens_seen": 115656304, "step": 53530 }, { "epoch": 8.733278955954322, "grad_norm": 0.07091391086578369, "learning_rate": 0.0006927021481502851, "loss": 0.0403, "num_input_tokens_seen": 115667120, "step": 53535 }, { "epoch": 8.734094616639478, "grad_norm": 0.049456529319286346, "learning_rate": 0.0006926364651720499, "loss": 0.0772, "num_input_tokens_seen": 115676880, "step": 53540 }, { "epoch": 8.734910277324634, "grad_norm": 0.32859930396080017, "learning_rate": 0.0006925707782897925, "loss": 0.2128, "num_input_tokens_seen": 115687344, "step": 53545 }, { "epoch": 8.735725938009788, "grad_norm": 0.02055169828236103, "learning_rate": 0.000692505087504844, "loss": 0.0476, "num_input_tokens_seen": 115697936, "step": 53550 }, { "epoch": 8.736541598694943, "grad_norm": 0.016683807596564293, "learning_rate": 0.0006924393928185354, "loss": 0.0219, "num_input_tokens_seen": 115707056, "step": 53555 }, { "epoch": 8.737357259380097, "grad_norm": 0.07922355085611343, "learning_rate": 0.0006923736942321987, "loss": 0.0324, "num_input_tokens_seen": 115718384, "step": 53560 }, { "epoch": 8.738172920065253, "grad_norm": 0.04225243628025055, "learning_rate": 0.0006923079917471648, "loss": 0.0664, "num_input_tokens_seen": 115729168, "step": 53565 }, { "epoch": 8.738988580750409, "grad_norm": 0.047049473971128464, "learning_rate": 0.0006922422853647656, "loss": 0.1753, "num_input_tokens_seen": 115739344, "step": 53570 }, { "epoch": 8.739804241435563, "grad_norm": 0.061708804219961166, "learning_rate": 0.0006921765750863327, "loss": 0.0559, "num_input_tokens_seen": 115749360, "step": 53575 }, { "epoch": 8.740619902120718, "grad_norm": 0.3257828652858734, "learning_rate": 0.0006921108609131976, "loss": 0.0648, "num_input_tokens_seen": 115759664, "step": 53580 }, { "epoch": 8.741435562805872, "grad_norm": 0.10550856590270996, "learning_rate": 0.0006920451428466923, "loss": 0.0411, "num_input_tokens_seen": 115770192, "step": 53585 }, { "epoch": 8.742251223491028, "grad_norm": 0.037641484290361404, "learning_rate": 0.0006919794208881486, "loss": 0.0312, "num_input_tokens_seen": 115782000, "step": 53590 }, { "epoch": 8.743066884176184, "grad_norm": 0.07037726789712906, "learning_rate": 0.0006919136950388982, "loss": 0.033, "num_input_tokens_seen": 115792560, "step": 53595 }, { "epoch": 8.743882544861338, "grad_norm": 0.09038639813661575, "learning_rate": 0.0006918479653002734, "loss": 0.0259, "num_input_tokens_seen": 115803248, "step": 53600 }, { "epoch": 8.744698205546493, "grad_norm": 0.26753684878349304, "learning_rate": 0.0006917822316736062, "loss": 0.2313, "num_input_tokens_seen": 115813776, "step": 53605 }, { "epoch": 8.745513866231647, "grad_norm": 0.004256491083651781, "learning_rate": 0.0006917164941602289, "loss": 0.215, "num_input_tokens_seen": 115824144, "step": 53610 }, { "epoch": 8.746329526916803, "grad_norm": 0.49793606996536255, "learning_rate": 0.0006916507527614735, "loss": 0.1397, "num_input_tokens_seen": 115834896, "step": 53615 }, { "epoch": 8.747145187601957, "grad_norm": 0.03849693387746811, "learning_rate": 0.0006915850074786725, "loss": 0.0755, "num_input_tokens_seen": 115844976, "step": 53620 }, { "epoch": 8.747960848287113, "grad_norm": 0.1075640395283699, "learning_rate": 0.0006915192583131582, "loss": 0.0565, "num_input_tokens_seen": 115856752, "step": 53625 }, { "epoch": 8.748776508972268, "grad_norm": 0.014227380976080894, "learning_rate": 0.0006914535052662633, "loss": 0.032, "num_input_tokens_seen": 115866064, "step": 53630 }, { "epoch": 8.749592169657422, "grad_norm": 0.07204482704401016, "learning_rate": 0.0006913877483393202, "loss": 0.1358, "num_input_tokens_seen": 115875856, "step": 53635 }, { "epoch": 8.750407830342578, "grad_norm": 0.03224179521203041, "learning_rate": 0.0006913219875336616, "loss": 0.1238, "num_input_tokens_seen": 115886608, "step": 53640 }, { "epoch": 8.751223491027732, "grad_norm": 0.02999437227845192, "learning_rate": 0.0006912562228506201, "loss": 0.0118, "num_input_tokens_seen": 115898672, "step": 53645 }, { "epoch": 8.752039151712887, "grad_norm": 0.012919296510517597, "learning_rate": 0.0006911904542915288, "loss": 0.1489, "num_input_tokens_seen": 115910448, "step": 53650 }, { "epoch": 8.752854812398043, "grad_norm": 0.3587695360183716, "learning_rate": 0.0006911246818577201, "loss": 0.0957, "num_input_tokens_seen": 115920944, "step": 53655 }, { "epoch": 8.753670473083197, "grad_norm": 0.17896397411823273, "learning_rate": 0.0006910589055505275, "loss": 0.0861, "num_input_tokens_seen": 115930480, "step": 53660 }, { "epoch": 8.754486133768353, "grad_norm": 0.09359142929315567, "learning_rate": 0.0006909931253712838, "loss": 0.0512, "num_input_tokens_seen": 115940144, "step": 53665 }, { "epoch": 8.755301794453507, "grad_norm": 0.3921225965023041, "learning_rate": 0.0006909273413213222, "loss": 0.1149, "num_input_tokens_seen": 115951216, "step": 53670 }, { "epoch": 8.756117455138662, "grad_norm": 0.1849663108587265, "learning_rate": 0.0006908615534019757, "loss": 0.0598, "num_input_tokens_seen": 115961680, "step": 53675 }, { "epoch": 8.756933115823816, "grad_norm": 0.07306811958551407, "learning_rate": 0.0006907957616145777, "loss": 0.0202, "num_input_tokens_seen": 115972720, "step": 53680 }, { "epoch": 8.757748776508972, "grad_norm": 0.13053929805755615, "learning_rate": 0.0006907299659604613, "loss": 0.0811, "num_input_tokens_seen": 115983632, "step": 53685 }, { "epoch": 8.758564437194128, "grad_norm": 0.265766441822052, "learning_rate": 0.0006906641664409605, "loss": 0.1375, "num_input_tokens_seen": 115993552, "step": 53690 }, { "epoch": 8.759380097879282, "grad_norm": 0.018430359661579132, "learning_rate": 0.0006905983630574084, "loss": 0.0442, "num_input_tokens_seen": 116004752, "step": 53695 }, { "epoch": 8.760195758564437, "grad_norm": 0.03611930087208748, "learning_rate": 0.0006905325558111389, "loss": 0.0762, "num_input_tokens_seen": 116015056, "step": 53700 }, { "epoch": 8.761011419249591, "grad_norm": 0.023959677666425705, "learning_rate": 0.0006904667447034851, "loss": 0.0373, "num_input_tokens_seen": 116026512, "step": 53705 }, { "epoch": 8.761827079934747, "grad_norm": 0.005680852569639683, "learning_rate": 0.0006904009297357814, "loss": 0.0173, "num_input_tokens_seen": 116036144, "step": 53710 }, { "epoch": 8.762642740619903, "grad_norm": 0.03747767210006714, "learning_rate": 0.000690335110909361, "loss": 0.0393, "num_input_tokens_seen": 116047056, "step": 53715 }, { "epoch": 8.763458401305057, "grad_norm": 0.01141283754259348, "learning_rate": 0.0006902692882255583, "loss": 0.013, "num_input_tokens_seen": 116057360, "step": 53720 }, { "epoch": 8.764274061990212, "grad_norm": 0.039775773882865906, "learning_rate": 0.0006902034616857073, "loss": 0.0694, "num_input_tokens_seen": 116066992, "step": 53725 }, { "epoch": 8.765089722675366, "grad_norm": 0.12590359151363373, "learning_rate": 0.0006901376312911416, "loss": 0.0809, "num_input_tokens_seen": 116079344, "step": 53730 }, { "epoch": 8.765905383360522, "grad_norm": 0.011169610545039177, "learning_rate": 0.0006900717970431956, "loss": 0.0129, "num_input_tokens_seen": 116089648, "step": 53735 }, { "epoch": 8.766721044045678, "grad_norm": 0.31761443614959717, "learning_rate": 0.0006900059589432036, "loss": 0.1355, "num_input_tokens_seen": 116100656, "step": 53740 }, { "epoch": 8.767536704730832, "grad_norm": 0.004789343569427729, "learning_rate": 0.0006899401169924997, "loss": 0.0708, "num_input_tokens_seen": 116111184, "step": 53745 }, { "epoch": 8.768352365415987, "grad_norm": 0.18085156381130219, "learning_rate": 0.0006898742711924185, "loss": 0.0906, "num_input_tokens_seen": 116121648, "step": 53750 }, { "epoch": 8.769168026101141, "grad_norm": 0.19203798472881317, "learning_rate": 0.0006898084215442942, "loss": 0.3073, "num_input_tokens_seen": 116132144, "step": 53755 }, { "epoch": 8.769983686786297, "grad_norm": 0.005764484871178865, "learning_rate": 0.0006897425680494616, "loss": 0.0732, "num_input_tokens_seen": 116144176, "step": 53760 }, { "epoch": 8.770799347471453, "grad_norm": 0.005528958048671484, "learning_rate": 0.000689676710709255, "loss": 0.125, "num_input_tokens_seen": 116154704, "step": 53765 }, { "epoch": 8.771615008156607, "grad_norm": 0.018419597297906876, "learning_rate": 0.0006896108495250092, "loss": 0.0443, "num_input_tokens_seen": 116165904, "step": 53770 }, { "epoch": 8.772430668841762, "grad_norm": 0.18632349371910095, "learning_rate": 0.0006895449844980592, "loss": 0.0617, "num_input_tokens_seen": 116177040, "step": 53775 }, { "epoch": 8.773246329526916, "grad_norm": 0.012830687686800957, "learning_rate": 0.0006894791156297394, "loss": 0.072, "num_input_tokens_seen": 116187248, "step": 53780 }, { "epoch": 8.774061990212072, "grad_norm": 0.002509468700736761, "learning_rate": 0.0006894132429213851, "loss": 0.0209, "num_input_tokens_seen": 116198640, "step": 53785 }, { "epoch": 8.774877650897226, "grad_norm": 0.037770144641399384, "learning_rate": 0.0006893473663743311, "loss": 0.0382, "num_input_tokens_seen": 116208944, "step": 53790 }, { "epoch": 8.775693311582382, "grad_norm": 0.1476047933101654, "learning_rate": 0.0006892814859899126, "loss": 0.0277, "num_input_tokens_seen": 116220784, "step": 53795 }, { "epoch": 8.776508972267537, "grad_norm": 0.004433739464730024, "learning_rate": 0.0006892156017694646, "loss": 0.0761, "num_input_tokens_seen": 116231600, "step": 53800 }, { "epoch": 8.777324632952691, "grad_norm": 0.027068404480814934, "learning_rate": 0.0006891497137143224, "loss": 0.1049, "num_input_tokens_seen": 116243280, "step": 53805 }, { "epoch": 8.778140293637847, "grad_norm": 0.008436436764895916, "learning_rate": 0.0006890838218258213, "loss": 0.0582, "num_input_tokens_seen": 116253584, "step": 53810 }, { "epoch": 8.778955954323001, "grad_norm": 0.017870329320430756, "learning_rate": 0.0006890179261052967, "loss": 0.0358, "num_input_tokens_seen": 116263824, "step": 53815 }, { "epoch": 8.779771615008157, "grad_norm": 0.023240847513079643, "learning_rate": 0.000688952026554084, "loss": 0.0669, "num_input_tokens_seen": 116274224, "step": 53820 }, { "epoch": 8.780587275693312, "grad_norm": 0.018025638535618782, "learning_rate": 0.0006888861231735186, "loss": 0.0153, "num_input_tokens_seen": 116284944, "step": 53825 }, { "epoch": 8.781402936378466, "grad_norm": 0.017300186678767204, "learning_rate": 0.0006888202159649366, "loss": 0.0943, "num_input_tokens_seen": 116295696, "step": 53830 }, { "epoch": 8.782218597063622, "grad_norm": 0.09028012305498123, "learning_rate": 0.0006887543049296733, "loss": 0.0588, "num_input_tokens_seen": 116306512, "step": 53835 }, { "epoch": 8.783034257748776, "grad_norm": 0.06694075465202332, "learning_rate": 0.0006886883900690645, "loss": 0.1342, "num_input_tokens_seen": 116316944, "step": 53840 }, { "epoch": 8.783849918433932, "grad_norm": 0.02075386978685856, "learning_rate": 0.0006886224713844461, "loss": 0.0448, "num_input_tokens_seen": 116327664, "step": 53845 }, { "epoch": 8.784665579119086, "grad_norm": 0.2526707947254181, "learning_rate": 0.0006885565488771541, "loss": 0.0893, "num_input_tokens_seen": 116339184, "step": 53850 }, { "epoch": 8.785481239804241, "grad_norm": 0.01189572736620903, "learning_rate": 0.0006884906225485245, "loss": 0.0183, "num_input_tokens_seen": 116349104, "step": 53855 }, { "epoch": 8.786296900489397, "grad_norm": 0.13445112109184265, "learning_rate": 0.0006884246923998932, "loss": 0.1051, "num_input_tokens_seen": 116359856, "step": 53860 }, { "epoch": 8.78711256117455, "grad_norm": 0.21758858859539032, "learning_rate": 0.0006883587584325965, "loss": 0.0645, "num_input_tokens_seen": 116370672, "step": 53865 }, { "epoch": 8.787928221859707, "grad_norm": 0.007030788343399763, "learning_rate": 0.0006882928206479707, "loss": 0.042, "num_input_tokens_seen": 116381648, "step": 53870 }, { "epoch": 8.78874388254486, "grad_norm": 0.1571875512599945, "learning_rate": 0.0006882268790473517, "loss": 0.1503, "num_input_tokens_seen": 116392912, "step": 53875 }, { "epoch": 8.789559543230016, "grad_norm": 0.05002790316939354, "learning_rate": 0.0006881609336320764, "loss": 0.1246, "num_input_tokens_seen": 116402448, "step": 53880 }, { "epoch": 8.790375203915172, "grad_norm": 0.25340625643730164, "learning_rate": 0.0006880949844034811, "loss": 0.0462, "num_input_tokens_seen": 116412496, "step": 53885 }, { "epoch": 8.791190864600326, "grad_norm": 0.12071508914232254, "learning_rate": 0.0006880290313629026, "loss": 0.0331, "num_input_tokens_seen": 116422736, "step": 53890 }, { "epoch": 8.792006525285482, "grad_norm": 0.3033401370048523, "learning_rate": 0.0006879630745116769, "loss": 0.0823, "num_input_tokens_seen": 116432464, "step": 53895 }, { "epoch": 8.792822185970635, "grad_norm": 0.2533930838108063, "learning_rate": 0.0006878971138511412, "loss": 0.0436, "num_input_tokens_seen": 116442224, "step": 53900 }, { "epoch": 8.793637846655791, "grad_norm": 0.11267364770174026, "learning_rate": 0.000687831149382632, "loss": 0.1089, "num_input_tokens_seen": 116454352, "step": 53905 }, { "epoch": 8.794453507340947, "grad_norm": 0.021168632432818413, "learning_rate": 0.0006877651811074863, "loss": 0.064, "num_input_tokens_seen": 116465264, "step": 53910 }, { "epoch": 8.7952691680261, "grad_norm": 0.05451773852109909, "learning_rate": 0.0006876992090270411, "loss": 0.0697, "num_input_tokens_seen": 116475760, "step": 53915 }, { "epoch": 8.796084828711257, "grad_norm": 0.060790155082941055, "learning_rate": 0.0006876332331426332, "loss": 0.1321, "num_input_tokens_seen": 116485744, "step": 53920 }, { "epoch": 8.79690048939641, "grad_norm": 0.06983616203069687, "learning_rate": 0.0006875672534556, "loss": 0.0615, "num_input_tokens_seen": 116497488, "step": 53925 }, { "epoch": 8.797716150081566, "grad_norm": 0.20969927310943604, "learning_rate": 0.0006875012699672783, "loss": 0.1099, "num_input_tokens_seen": 116507728, "step": 53930 }, { "epoch": 8.798531810766722, "grad_norm": 0.0036327510606497526, "learning_rate": 0.0006874352826790055, "loss": 0.1238, "num_input_tokens_seen": 116517552, "step": 53935 }, { "epoch": 8.799347471451876, "grad_norm": 0.050834622234106064, "learning_rate": 0.000687369291592119, "loss": 0.0128, "num_input_tokens_seen": 116528144, "step": 53940 }, { "epoch": 8.800163132137031, "grad_norm": 0.22240811586380005, "learning_rate": 0.0006873032967079561, "loss": 0.1287, "num_input_tokens_seen": 116537968, "step": 53945 }, { "epoch": 8.800978792822185, "grad_norm": 0.10322391241788864, "learning_rate": 0.0006872372980278543, "loss": 0.0811, "num_input_tokens_seen": 116548208, "step": 53950 }, { "epoch": 8.801794453507341, "grad_norm": 0.027097368612885475, "learning_rate": 0.0006871712955531511, "loss": 0.0971, "num_input_tokens_seen": 116558480, "step": 53955 }, { "epoch": 8.802610114192497, "grad_norm": 0.23474834859371185, "learning_rate": 0.0006871052892851842, "loss": 0.2162, "num_input_tokens_seen": 116569776, "step": 53960 }, { "epoch": 8.80342577487765, "grad_norm": 0.02830558642745018, "learning_rate": 0.0006870392792252911, "loss": 0.0259, "num_input_tokens_seen": 116580144, "step": 53965 }, { "epoch": 8.804241435562806, "grad_norm": 0.010453589260578156, "learning_rate": 0.0006869732653748096, "loss": 0.0772, "num_input_tokens_seen": 116590064, "step": 53970 }, { "epoch": 8.80505709624796, "grad_norm": 0.17776353657245636, "learning_rate": 0.000686907247735078, "loss": 0.0678, "num_input_tokens_seen": 116600080, "step": 53975 }, { "epoch": 8.805872756933116, "grad_norm": 0.006603384390473366, "learning_rate": 0.0006868412263074337, "loss": 0.0302, "num_input_tokens_seen": 116610160, "step": 53980 }, { "epoch": 8.80668841761827, "grad_norm": 0.020314160734415054, "learning_rate": 0.0006867752010932151, "loss": 0.0417, "num_input_tokens_seen": 116620688, "step": 53985 }, { "epoch": 8.807504078303426, "grad_norm": 0.13206049799919128, "learning_rate": 0.00068670917209376, "loss": 0.1469, "num_input_tokens_seen": 116630064, "step": 53990 }, { "epoch": 8.808319738988581, "grad_norm": 0.08112114667892456, "learning_rate": 0.0006866431393104067, "loss": 0.0818, "num_input_tokens_seen": 116641520, "step": 53995 }, { "epoch": 8.809135399673735, "grad_norm": 0.3721643388271332, "learning_rate": 0.0006865771027444933, "loss": 0.2034, "num_input_tokens_seen": 116653456, "step": 54000 }, { "epoch": 8.809951060358891, "grad_norm": 0.18222694098949432, "learning_rate": 0.0006865110623973585, "loss": 0.1046, "num_input_tokens_seen": 116663408, "step": 54005 }, { "epoch": 8.810766721044045, "grad_norm": 0.5798073410987854, "learning_rate": 0.0006864450182703403, "loss": 0.0734, "num_input_tokens_seen": 116674256, "step": 54010 }, { "epoch": 8.8115823817292, "grad_norm": 0.06881486624479294, "learning_rate": 0.0006863789703647771, "loss": 0.0776, "num_input_tokens_seen": 116685552, "step": 54015 }, { "epoch": 8.812398042414356, "grad_norm": 0.022122984752058983, "learning_rate": 0.0006863129186820079, "loss": 0.1005, "num_input_tokens_seen": 116695824, "step": 54020 }, { "epoch": 8.81321370309951, "grad_norm": 0.03289042413234711, "learning_rate": 0.0006862468632233709, "loss": 0.0524, "num_input_tokens_seen": 116706128, "step": 54025 }, { "epoch": 8.814029363784666, "grad_norm": 0.07637394964694977, "learning_rate": 0.000686180803990205, "loss": 0.1307, "num_input_tokens_seen": 116715728, "step": 54030 }, { "epoch": 8.81484502446982, "grad_norm": 0.016515476629137993, "learning_rate": 0.0006861147409838489, "loss": 0.0353, "num_input_tokens_seen": 116726992, "step": 54035 }, { "epoch": 8.815660685154976, "grad_norm": 0.3054102063179016, "learning_rate": 0.0006860486742056415, "loss": 0.1445, "num_input_tokens_seen": 116737968, "step": 54040 }, { "epoch": 8.81647634584013, "grad_norm": 0.007263507228344679, "learning_rate": 0.0006859826036569216, "loss": 0.0922, "num_input_tokens_seen": 116748240, "step": 54045 }, { "epoch": 8.817292006525285, "grad_norm": 0.1208798810839653, "learning_rate": 0.0006859165293390284, "loss": 0.0555, "num_input_tokens_seen": 116759504, "step": 54050 }, { "epoch": 8.818107667210441, "grad_norm": 0.22497281432151794, "learning_rate": 0.0006858504512533008, "loss": 0.0817, "num_input_tokens_seen": 116770768, "step": 54055 }, { "epoch": 8.818923327895595, "grad_norm": 0.1389174610376358, "learning_rate": 0.000685784369401078, "loss": 0.0579, "num_input_tokens_seen": 116782352, "step": 54060 }, { "epoch": 8.81973898858075, "grad_norm": 0.3582153916358948, "learning_rate": 0.0006857182837836994, "loss": 0.1186, "num_input_tokens_seen": 116793712, "step": 54065 }, { "epoch": 8.820554649265905, "grad_norm": 0.004126532934606075, "learning_rate": 0.0006856521944025041, "loss": 0.0268, "num_input_tokens_seen": 116804816, "step": 54070 }, { "epoch": 8.82137030995106, "grad_norm": 0.0762988030910492, "learning_rate": 0.0006855861012588316, "loss": 0.0666, "num_input_tokens_seen": 116815824, "step": 54075 }, { "epoch": 8.822185970636216, "grad_norm": 0.12311746925115585, "learning_rate": 0.0006855200043540213, "loss": 0.0263, "num_input_tokens_seen": 116826960, "step": 54080 }, { "epoch": 8.82300163132137, "grad_norm": 0.004039400722831488, "learning_rate": 0.0006854539036894128, "loss": 0.0109, "num_input_tokens_seen": 116837680, "step": 54085 }, { "epoch": 8.823817292006526, "grad_norm": 0.060211095958948135, "learning_rate": 0.0006853877992663456, "loss": 0.1247, "num_input_tokens_seen": 116847120, "step": 54090 }, { "epoch": 8.82463295269168, "grad_norm": 0.01676938310265541, "learning_rate": 0.0006853216910861595, "loss": 0.0374, "num_input_tokens_seen": 116857456, "step": 54095 }, { "epoch": 8.825448613376835, "grad_norm": 0.04738698527216911, "learning_rate": 0.0006852555791501942, "loss": 0.0222, "num_input_tokens_seen": 116869264, "step": 54100 }, { "epoch": 8.826264274061991, "grad_norm": 0.26809951663017273, "learning_rate": 0.0006851894634597898, "loss": 0.1946, "num_input_tokens_seen": 116879504, "step": 54105 }, { "epoch": 8.827079934747145, "grad_norm": 0.06636186689138412, "learning_rate": 0.0006851233440162858, "loss": 0.1837, "num_input_tokens_seen": 116889808, "step": 54110 }, { "epoch": 8.8278955954323, "grad_norm": 0.048026785254478455, "learning_rate": 0.0006850572208210223, "loss": 0.0446, "num_input_tokens_seen": 116900816, "step": 54115 }, { "epoch": 8.828711256117455, "grad_norm": 0.010122005827724934, "learning_rate": 0.0006849910938753396, "loss": 0.2698, "num_input_tokens_seen": 116912208, "step": 54120 }, { "epoch": 8.82952691680261, "grad_norm": 0.31122568249702454, "learning_rate": 0.0006849249631805777, "loss": 0.0497, "num_input_tokens_seen": 116922864, "step": 54125 }, { "epoch": 8.830342577487766, "grad_norm": 0.12743504345417023, "learning_rate": 0.0006848588287380769, "loss": 0.0294, "num_input_tokens_seen": 116933552, "step": 54130 }, { "epoch": 8.83115823817292, "grad_norm": 0.01123537216335535, "learning_rate": 0.0006847926905491771, "loss": 0.1134, "num_input_tokens_seen": 116944208, "step": 54135 }, { "epoch": 8.831973898858076, "grad_norm": 0.01716950722038746, "learning_rate": 0.0006847265486152192, "loss": 0.0422, "num_input_tokens_seen": 116955248, "step": 54140 }, { "epoch": 8.83278955954323, "grad_norm": 0.07278816401958466, "learning_rate": 0.0006846604029375435, "loss": 0.0258, "num_input_tokens_seen": 116966768, "step": 54145 }, { "epoch": 8.833605220228385, "grad_norm": 0.005184966139495373, "learning_rate": 0.0006845942535174905, "loss": 0.0346, "num_input_tokens_seen": 116977392, "step": 54150 }, { "epoch": 8.83442088091354, "grad_norm": 0.30484655499458313, "learning_rate": 0.0006845281003564007, "loss": 0.1343, "num_input_tokens_seen": 116988528, "step": 54155 }, { "epoch": 8.835236541598695, "grad_norm": 0.14823204278945923, "learning_rate": 0.0006844619434556149, "loss": 0.0152, "num_input_tokens_seen": 116999280, "step": 54160 }, { "epoch": 8.83605220228385, "grad_norm": 0.024366330355405807, "learning_rate": 0.0006843957828164737, "loss": 0.0304, "num_input_tokens_seen": 117008368, "step": 54165 }, { "epoch": 8.836867862969005, "grad_norm": 0.161958709359169, "learning_rate": 0.0006843296184403182, "loss": 0.0811, "num_input_tokens_seen": 117019472, "step": 54170 }, { "epoch": 8.83768352365416, "grad_norm": 0.031379345804452896, "learning_rate": 0.0006842634503284891, "loss": 0.0155, "num_input_tokens_seen": 117029136, "step": 54175 }, { "epoch": 8.838499184339314, "grad_norm": 0.030218927189707756, "learning_rate": 0.0006841972784823274, "loss": 0.0115, "num_input_tokens_seen": 117039888, "step": 54180 }, { "epoch": 8.83931484502447, "grad_norm": 0.0450090654194355, "learning_rate": 0.0006841311029031742, "loss": 0.2128, "num_input_tokens_seen": 117050224, "step": 54185 }, { "epoch": 8.840130505709626, "grad_norm": 0.20938782393932343, "learning_rate": 0.0006840649235923706, "loss": 0.0908, "num_input_tokens_seen": 117061840, "step": 54190 }, { "epoch": 8.84094616639478, "grad_norm": 0.0133676053956151, "learning_rate": 0.0006839987405512577, "loss": 0.153, "num_input_tokens_seen": 117073040, "step": 54195 }, { "epoch": 8.841761827079935, "grad_norm": 0.22873623669147491, "learning_rate": 0.000683932553781177, "loss": 0.157, "num_input_tokens_seen": 117084240, "step": 54200 }, { "epoch": 8.84257748776509, "grad_norm": 0.02595687285065651, "learning_rate": 0.0006838663632834697, "loss": 0.0781, "num_input_tokens_seen": 117095568, "step": 54205 }, { "epoch": 8.843393148450245, "grad_norm": 0.20884649455547333, "learning_rate": 0.0006838001690594775, "loss": 0.1167, "num_input_tokens_seen": 117106288, "step": 54210 }, { "epoch": 8.844208809135399, "grad_norm": 0.03644806891679764, "learning_rate": 0.0006837339711105414, "loss": 0.0528, "num_input_tokens_seen": 117116912, "step": 54215 }, { "epoch": 8.845024469820554, "grad_norm": 0.17615261673927307, "learning_rate": 0.0006836677694380035, "loss": 0.2627, "num_input_tokens_seen": 117128048, "step": 54220 }, { "epoch": 8.84584013050571, "grad_norm": 0.27721107006073, "learning_rate": 0.0006836015640432054, "loss": 0.0625, "num_input_tokens_seen": 117139376, "step": 54225 }, { "epoch": 8.846655791190864, "grad_norm": 0.003872593864798546, "learning_rate": 0.0006835353549274885, "loss": 0.0266, "num_input_tokens_seen": 117151472, "step": 54230 }, { "epoch": 8.84747145187602, "grad_norm": 0.02096593752503395, "learning_rate": 0.0006834691420921948, "loss": 0.0404, "num_input_tokens_seen": 117160848, "step": 54235 }, { "epoch": 8.848287112561174, "grad_norm": 0.0623176284134388, "learning_rate": 0.0006834029255386663, "loss": 0.0434, "num_input_tokens_seen": 117171664, "step": 54240 }, { "epoch": 8.84910277324633, "grad_norm": 0.01989927887916565, "learning_rate": 0.0006833367052682446, "loss": 0.0354, "num_input_tokens_seen": 117181488, "step": 54245 }, { "epoch": 8.849918433931485, "grad_norm": 0.23223379254341125, "learning_rate": 0.0006832704812822722, "loss": 0.1932, "num_input_tokens_seen": 117191984, "step": 54250 }, { "epoch": 8.850734094616639, "grad_norm": 0.11960668861865997, "learning_rate": 0.0006832042535820911, "loss": 0.046, "num_input_tokens_seen": 117201776, "step": 54255 }, { "epoch": 8.851549755301795, "grad_norm": 0.1948905736207962, "learning_rate": 0.0006831380221690431, "loss": 0.093, "num_input_tokens_seen": 117212944, "step": 54260 }, { "epoch": 8.852365415986949, "grad_norm": 0.16176442801952362, "learning_rate": 0.0006830717870444709, "loss": 0.0523, "num_input_tokens_seen": 117223312, "step": 54265 }, { "epoch": 8.853181076672104, "grad_norm": 0.09057550132274628, "learning_rate": 0.0006830055482097168, "loss": 0.0323, "num_input_tokens_seen": 117233744, "step": 54270 }, { "epoch": 8.85399673735726, "grad_norm": 0.15768930315971375, "learning_rate": 0.000682939305666123, "loss": 0.2155, "num_input_tokens_seen": 117245328, "step": 54275 }, { "epoch": 8.854812398042414, "grad_norm": 0.30745288729667664, "learning_rate": 0.000682873059415032, "loss": 0.1165, "num_input_tokens_seen": 117256176, "step": 54280 }, { "epoch": 8.85562805872757, "grad_norm": 0.04249805584549904, "learning_rate": 0.0006828068094577864, "loss": 0.0557, "num_input_tokens_seen": 117267664, "step": 54285 }, { "epoch": 8.856443719412724, "grad_norm": 0.30030643939971924, "learning_rate": 0.0006827405557957291, "loss": 0.0919, "num_input_tokens_seen": 117277744, "step": 54290 }, { "epoch": 8.85725938009788, "grad_norm": 0.11320184171199799, "learning_rate": 0.0006826742984302026, "loss": 0.1424, "num_input_tokens_seen": 117289168, "step": 54295 }, { "epoch": 8.858075040783035, "grad_norm": 0.07032129168510437, "learning_rate": 0.0006826080373625496, "loss": 0.0498, "num_input_tokens_seen": 117298896, "step": 54300 }, { "epoch": 8.858890701468189, "grad_norm": 0.35693272948265076, "learning_rate": 0.0006825417725941132, "loss": 0.1395, "num_input_tokens_seen": 117310000, "step": 54305 }, { "epoch": 8.859706362153345, "grad_norm": 0.2755097448825836, "learning_rate": 0.0006824755041262361, "loss": 0.0562, "num_input_tokens_seen": 117319088, "step": 54310 }, { "epoch": 8.860522022838499, "grad_norm": 0.024103153496980667, "learning_rate": 0.0006824092319602614, "loss": 0.1487, "num_input_tokens_seen": 117329552, "step": 54315 }, { "epoch": 8.861337683523654, "grad_norm": 0.04700141400098801, "learning_rate": 0.0006823429560975323, "loss": 0.066, "num_input_tokens_seen": 117340336, "step": 54320 }, { "epoch": 8.86215334420881, "grad_norm": 0.06457889080047607, "learning_rate": 0.0006822766765393919, "loss": 0.0271, "num_input_tokens_seen": 117352272, "step": 54325 }, { "epoch": 8.862969004893964, "grad_norm": 0.10260617733001709, "learning_rate": 0.0006822103932871832, "loss": 0.0536, "num_input_tokens_seen": 117362192, "step": 54330 }, { "epoch": 8.86378466557912, "grad_norm": 0.05182819440960884, "learning_rate": 0.00068214410634225, "loss": 0.037, "num_input_tokens_seen": 117373328, "step": 54335 }, { "epoch": 8.864600326264274, "grad_norm": 0.4678332209587097, "learning_rate": 0.0006820778157059353, "loss": 0.2525, "num_input_tokens_seen": 117383824, "step": 54340 }, { "epoch": 8.86541598694943, "grad_norm": 0.006274137180298567, "learning_rate": 0.0006820115213795827, "loss": 0.0557, "num_input_tokens_seen": 117395856, "step": 54345 }, { "epoch": 8.866231647634583, "grad_norm": 0.025420423597097397, "learning_rate": 0.0006819452233645357, "loss": 0.0221, "num_input_tokens_seen": 117406768, "step": 54350 }, { "epoch": 8.867047308319739, "grad_norm": 0.025930307805538177, "learning_rate": 0.0006818789216621379, "loss": 0.0554, "num_input_tokens_seen": 117418224, "step": 54355 }, { "epoch": 8.867862969004895, "grad_norm": 0.17869673669338226, "learning_rate": 0.0006818126162737332, "loss": 0.0779, "num_input_tokens_seen": 117429776, "step": 54360 }, { "epoch": 8.868678629690049, "grad_norm": 0.003503937041386962, "learning_rate": 0.000681746307200665, "loss": 0.0338, "num_input_tokens_seen": 117441296, "step": 54365 }, { "epoch": 8.869494290375204, "grad_norm": 0.18694375455379486, "learning_rate": 0.0006816799944442774, "loss": 0.1056, "num_input_tokens_seen": 117452880, "step": 54370 }, { "epoch": 8.870309951060358, "grad_norm": 0.08532639592885971, "learning_rate": 0.0006816136780059142, "loss": 0.145, "num_input_tokens_seen": 117464272, "step": 54375 }, { "epoch": 8.871125611745514, "grad_norm": 0.09267734736204147, "learning_rate": 0.0006815473578869194, "loss": 0.0598, "num_input_tokens_seen": 117476272, "step": 54380 }, { "epoch": 8.87194127243067, "grad_norm": 0.03818744048476219, "learning_rate": 0.0006814810340886372, "loss": 0.0146, "num_input_tokens_seen": 117487504, "step": 54385 }, { "epoch": 8.872756933115824, "grad_norm": 0.01406745333224535, "learning_rate": 0.0006814147066124116, "loss": 0.0289, "num_input_tokens_seen": 117497808, "step": 54390 }, { "epoch": 8.87357259380098, "grad_norm": 0.03349088132381439, "learning_rate": 0.0006813483754595867, "loss": 0.0173, "num_input_tokens_seen": 117510480, "step": 54395 }, { "epoch": 8.874388254486133, "grad_norm": 0.17432676255702972, "learning_rate": 0.000681282040631507, "loss": 0.0528, "num_input_tokens_seen": 117521872, "step": 54400 }, { "epoch": 8.875203915171289, "grad_norm": 0.07740618288516998, "learning_rate": 0.0006812157021295167, "loss": 0.2157, "num_input_tokens_seen": 117531760, "step": 54405 }, { "epoch": 8.876019575856443, "grad_norm": 0.20691031217575073, "learning_rate": 0.0006811493599549603, "loss": 0.0289, "num_input_tokens_seen": 117541872, "step": 54410 }, { "epoch": 8.876835236541599, "grad_norm": 0.12131853401660919, "learning_rate": 0.0006810830141091825, "loss": 0.1375, "num_input_tokens_seen": 117552336, "step": 54415 }, { "epoch": 8.877650897226754, "grad_norm": 0.36007246375083923, "learning_rate": 0.0006810166645935276, "loss": 0.1905, "num_input_tokens_seen": 117562704, "step": 54420 }, { "epoch": 8.878466557911908, "grad_norm": 0.028180794790387154, "learning_rate": 0.0006809503114093403, "loss": 0.0518, "num_input_tokens_seen": 117574032, "step": 54425 }, { "epoch": 8.879282218597064, "grad_norm": 0.009340302087366581, "learning_rate": 0.0006808839545579655, "loss": 0.0174, "num_input_tokens_seen": 117583984, "step": 54430 }, { "epoch": 8.880097879282218, "grad_norm": 0.04891321435570717, "learning_rate": 0.0006808175940407477, "loss": 0.0297, "num_input_tokens_seen": 117593200, "step": 54435 }, { "epoch": 8.880913539967374, "grad_norm": 0.3209296762943268, "learning_rate": 0.0006807512298590321, "loss": 0.0407, "num_input_tokens_seen": 117604688, "step": 54440 }, { "epoch": 8.88172920065253, "grad_norm": 0.3639463186264038, "learning_rate": 0.0006806848620141636, "loss": 0.0542, "num_input_tokens_seen": 117614960, "step": 54445 }, { "epoch": 8.882544861337683, "grad_norm": 0.02138776332139969, "learning_rate": 0.0006806184905074871, "loss": 0.0273, "num_input_tokens_seen": 117625616, "step": 54450 }, { "epoch": 8.883360522022839, "grad_norm": 0.9454212188720703, "learning_rate": 0.0006805521153403476, "loss": 0.1509, "num_input_tokens_seen": 117635216, "step": 54455 }, { "epoch": 8.884176182707993, "grad_norm": 0.02996288612484932, "learning_rate": 0.0006804857365140906, "loss": 0.0542, "num_input_tokens_seen": 117645008, "step": 54460 }, { "epoch": 8.884991843393149, "grad_norm": 0.016237854957580566, "learning_rate": 0.0006804193540300612, "loss": 0.1137, "num_input_tokens_seen": 117654992, "step": 54465 }, { "epoch": 8.885807504078304, "grad_norm": 0.13553477823734283, "learning_rate": 0.0006803529678896047, "loss": 0.0965, "num_input_tokens_seen": 117665520, "step": 54470 }, { "epoch": 8.886623164763458, "grad_norm": 0.21454428136348724, "learning_rate": 0.0006802865780940663, "loss": 0.1178, "num_input_tokens_seen": 117676240, "step": 54475 }, { "epoch": 8.887438825448614, "grad_norm": 0.005688629578799009, "learning_rate": 0.000680220184644792, "loss": 0.0257, "num_input_tokens_seen": 117686288, "step": 54480 }, { "epoch": 8.888254486133768, "grad_norm": 0.004805149510502815, "learning_rate": 0.0006801537875431269, "loss": 0.0261, "num_input_tokens_seen": 117696528, "step": 54485 }, { "epoch": 8.889070146818923, "grad_norm": 0.003945660311728716, "learning_rate": 0.0006800873867904167, "loss": 0.0208, "num_input_tokens_seen": 117707856, "step": 54490 }, { "epoch": 8.88988580750408, "grad_norm": 0.011920423246920109, "learning_rate": 0.0006800209823880072, "loss": 0.0189, "num_input_tokens_seen": 117719024, "step": 54495 }, { "epoch": 8.890701468189233, "grad_norm": 0.0690336748957634, "learning_rate": 0.0006799545743372442, "loss": 0.1799, "num_input_tokens_seen": 117728528, "step": 54500 }, { "epoch": 8.891517128874389, "grad_norm": 0.021867262199521065, "learning_rate": 0.0006798881626394734, "loss": 0.0189, "num_input_tokens_seen": 117738704, "step": 54505 }, { "epoch": 8.892332789559543, "grad_norm": 0.005222649779170752, "learning_rate": 0.0006798217472960407, "loss": 0.0125, "num_input_tokens_seen": 117750448, "step": 54510 }, { "epoch": 8.893148450244698, "grad_norm": 0.26809030771255493, "learning_rate": 0.0006797553283082922, "loss": 0.0942, "num_input_tokens_seen": 117760848, "step": 54515 }, { "epoch": 8.893964110929852, "grad_norm": 0.004433480557054281, "learning_rate": 0.000679688905677574, "loss": 0.0352, "num_input_tokens_seen": 117770960, "step": 54520 }, { "epoch": 8.894779771615008, "grad_norm": 0.006452389992773533, "learning_rate": 0.0006796224794052322, "loss": 0.0854, "num_input_tokens_seen": 117781744, "step": 54525 }, { "epoch": 8.895595432300164, "grad_norm": 0.13232830166816711, "learning_rate": 0.0006795560494926129, "loss": 0.1114, "num_input_tokens_seen": 117792368, "step": 54530 }, { "epoch": 8.896411092985318, "grad_norm": 0.0037049497477710247, "learning_rate": 0.0006794896159410625, "loss": 0.0158, "num_input_tokens_seen": 117802800, "step": 54535 }, { "epoch": 8.897226753670473, "grad_norm": 0.005620979238301516, "learning_rate": 0.0006794231787519274, "loss": 0.124, "num_input_tokens_seen": 117812624, "step": 54540 }, { "epoch": 8.898042414355627, "grad_norm": 0.004999668337404728, "learning_rate": 0.000679356737926554, "loss": 0.0204, "num_input_tokens_seen": 117823984, "step": 54545 }, { "epoch": 8.898858075040783, "grad_norm": 0.044155821204185486, "learning_rate": 0.0006792902934662885, "loss": 0.048, "num_input_tokens_seen": 117834448, "step": 54550 }, { "epoch": 8.899673735725939, "grad_norm": 0.031155651435256004, "learning_rate": 0.000679223845372478, "loss": 0.0429, "num_input_tokens_seen": 117845104, "step": 54555 }, { "epoch": 8.900489396411093, "grad_norm": 0.1095210388302803, "learning_rate": 0.0006791573936464689, "loss": 0.3007, "num_input_tokens_seen": 117854992, "step": 54560 }, { "epoch": 8.901305057096248, "grad_norm": 0.1805056929588318, "learning_rate": 0.0006790909382896079, "loss": 0.0367, "num_input_tokens_seen": 117864592, "step": 54565 }, { "epoch": 8.902120717781402, "grad_norm": 0.012045920826494694, "learning_rate": 0.0006790244793032418, "loss": 0.0524, "num_input_tokens_seen": 117875472, "step": 54570 }, { "epoch": 8.902936378466558, "grad_norm": 0.3896695375442505, "learning_rate": 0.0006789580166887176, "loss": 0.1069, "num_input_tokens_seen": 117886416, "step": 54575 }, { "epoch": 8.903752039151712, "grad_norm": 0.13602808117866516, "learning_rate": 0.0006788915504473822, "loss": 0.0464, "num_input_tokens_seen": 117897872, "step": 54580 }, { "epoch": 8.904567699836868, "grad_norm": 0.13074855506420135, "learning_rate": 0.0006788250805805824, "loss": 0.0688, "num_input_tokens_seen": 117908112, "step": 54585 }, { "epoch": 8.905383360522023, "grad_norm": 0.02051844261586666, "learning_rate": 0.0006787586070896657, "loss": 0.0161, "num_input_tokens_seen": 117919248, "step": 54590 }, { "epoch": 8.906199021207177, "grad_norm": 0.020327605307102203, "learning_rate": 0.0006786921299759789, "loss": 0.0123, "num_input_tokens_seen": 117928624, "step": 54595 }, { "epoch": 8.907014681892333, "grad_norm": 0.06539393216371536, "learning_rate": 0.0006786256492408694, "loss": 0.0319, "num_input_tokens_seen": 117940624, "step": 54600 }, { "epoch": 8.907830342577487, "grad_norm": 0.0312392208725214, "learning_rate": 0.0006785591648856846, "loss": 0.0969, "num_input_tokens_seen": 117952304, "step": 54605 }, { "epoch": 8.908646003262643, "grad_norm": 0.010261873714625835, "learning_rate": 0.0006784926769117717, "loss": 0.0455, "num_input_tokens_seen": 117964048, "step": 54610 }, { "epoch": 8.909461663947798, "grad_norm": 0.04237981513142586, "learning_rate": 0.0006784261853204783, "loss": 0.0133, "num_input_tokens_seen": 117975472, "step": 54615 }, { "epoch": 8.910277324632952, "grad_norm": 0.43529877066612244, "learning_rate": 0.0006783596901131521, "loss": 0.2285, "num_input_tokens_seen": 117986192, "step": 54620 }, { "epoch": 8.911092985318108, "grad_norm": 0.8742546439170837, "learning_rate": 0.0006782931912911402, "loss": 0.1212, "num_input_tokens_seen": 117997200, "step": 54625 }, { "epoch": 8.911908646003262, "grad_norm": 0.07040494680404663, "learning_rate": 0.0006782266888557909, "loss": 0.0251, "num_input_tokens_seen": 118007952, "step": 54630 }, { "epoch": 8.912724306688418, "grad_norm": 0.02308201789855957, "learning_rate": 0.0006781601828084513, "loss": 0.0147, "num_input_tokens_seen": 118019824, "step": 54635 }, { "epoch": 8.913539967373573, "grad_norm": 0.05983942374587059, "learning_rate": 0.0006780936731504699, "loss": 0.1136, "num_input_tokens_seen": 118030160, "step": 54640 }, { "epoch": 8.914355628058727, "grad_norm": 0.02567743882536888, "learning_rate": 0.0006780271598831942, "loss": 0.0497, "num_input_tokens_seen": 118040784, "step": 54645 }, { "epoch": 8.915171288743883, "grad_norm": 0.09421033412218094, "learning_rate": 0.0006779606430079723, "loss": 0.0594, "num_input_tokens_seen": 118050928, "step": 54650 }, { "epoch": 8.915986949429037, "grad_norm": 0.1930844634771347, "learning_rate": 0.0006778941225261522, "loss": 0.0538, "num_input_tokens_seen": 118062096, "step": 54655 }, { "epoch": 8.916802610114193, "grad_norm": 0.006250257138162851, "learning_rate": 0.0006778275984390819, "loss": 0.0855, "num_input_tokens_seen": 118073968, "step": 54660 }, { "epoch": 8.917618270799348, "grad_norm": 0.032026421278715134, "learning_rate": 0.0006777610707481099, "loss": 0.0834, "num_input_tokens_seen": 118084912, "step": 54665 }, { "epoch": 8.918433931484502, "grad_norm": 0.29081985354423523, "learning_rate": 0.0006776945394545841, "loss": 0.1938, "num_input_tokens_seen": 118095056, "step": 54670 }, { "epoch": 8.919249592169658, "grad_norm": 0.2211325615644455, "learning_rate": 0.0006776280045598533, "loss": 0.0852, "num_input_tokens_seen": 118106352, "step": 54675 }, { "epoch": 8.920065252854812, "grad_norm": 0.21044909954071045, "learning_rate": 0.0006775614660652655, "loss": 0.0505, "num_input_tokens_seen": 118118576, "step": 54680 }, { "epoch": 8.920880913539968, "grad_norm": 0.26746198534965515, "learning_rate": 0.0006774949239721692, "loss": 0.1183, "num_input_tokens_seen": 118130320, "step": 54685 }, { "epoch": 8.921696574225122, "grad_norm": 0.005159418564289808, "learning_rate": 0.0006774283782819133, "loss": 0.0218, "num_input_tokens_seen": 118141232, "step": 54690 }, { "epoch": 8.922512234910277, "grad_norm": 0.00886352825909853, "learning_rate": 0.0006773618289958462, "loss": 0.0372, "num_input_tokens_seen": 118152304, "step": 54695 }, { "epoch": 8.923327895595433, "grad_norm": 0.04698998108506203, "learning_rate": 0.0006772952761153167, "loss": 0.0688, "num_input_tokens_seen": 118162960, "step": 54700 }, { "epoch": 8.924143556280587, "grad_norm": 0.026449838653206825, "learning_rate": 0.0006772287196416733, "loss": 0.1556, "num_input_tokens_seen": 118173840, "step": 54705 }, { "epoch": 8.924959216965743, "grad_norm": 0.09392181783914566, "learning_rate": 0.0006771621595762652, "loss": 0.1458, "num_input_tokens_seen": 118184272, "step": 54710 }, { "epoch": 8.925774877650896, "grad_norm": 0.011575781740248203, "learning_rate": 0.0006770955959204412, "loss": 0.1433, "num_input_tokens_seen": 118195408, "step": 54715 }, { "epoch": 8.926590538336052, "grad_norm": 0.0434119813144207, "learning_rate": 0.0006770290286755503, "loss": 0.0752, "num_input_tokens_seen": 118204784, "step": 54720 }, { "epoch": 8.927406199021208, "grad_norm": 0.1033087745308876, "learning_rate": 0.0006769624578429414, "loss": 0.0674, "num_input_tokens_seen": 118214448, "step": 54725 }, { "epoch": 8.928221859706362, "grad_norm": 0.01915070414543152, "learning_rate": 0.0006768958834239639, "loss": 0.0969, "num_input_tokens_seen": 118223952, "step": 54730 }, { "epoch": 8.929037520391518, "grad_norm": 0.05163106694817543, "learning_rate": 0.0006768293054199669, "loss": 0.0264, "num_input_tokens_seen": 118235856, "step": 54735 }, { "epoch": 8.929853181076671, "grad_norm": 0.0274738110601902, "learning_rate": 0.0006767627238322998, "loss": 0.0579, "num_input_tokens_seen": 118247536, "step": 54740 }, { "epoch": 8.930668841761827, "grad_norm": 0.03914555907249451, "learning_rate": 0.0006766961386623118, "loss": 0.0684, "num_input_tokens_seen": 118258512, "step": 54745 }, { "epoch": 8.931484502446983, "grad_norm": 0.009585415944457054, "learning_rate": 0.0006766295499113524, "loss": 0.0216, "num_input_tokens_seen": 118268144, "step": 54750 }, { "epoch": 8.932300163132137, "grad_norm": 0.05026097968220711, "learning_rate": 0.000676562957580771, "loss": 0.0148, "num_input_tokens_seen": 118279216, "step": 54755 }, { "epoch": 8.933115823817293, "grad_norm": 0.08081215620040894, "learning_rate": 0.0006764963616719174, "loss": 0.0796, "num_input_tokens_seen": 118291312, "step": 54760 }, { "epoch": 8.933931484502446, "grad_norm": 0.08607519418001175, "learning_rate": 0.000676429762186141, "loss": 0.016, "num_input_tokens_seen": 118302832, "step": 54765 }, { "epoch": 8.934747145187602, "grad_norm": 0.5207202434539795, "learning_rate": 0.0006763631591247917, "loss": 0.1118, "num_input_tokens_seen": 118314224, "step": 54770 }, { "epoch": 8.935562805872756, "grad_norm": 0.007003180216997862, "learning_rate": 0.0006762965524892194, "loss": 0.0346, "num_input_tokens_seen": 118325552, "step": 54775 }, { "epoch": 8.936378466557912, "grad_norm": 0.030011018738150597, "learning_rate": 0.0006762299422807737, "loss": 0.0151, "num_input_tokens_seen": 118335376, "step": 54780 }, { "epoch": 8.937194127243067, "grad_norm": 0.006405920721590519, "learning_rate": 0.0006761633285008046, "loss": 0.0123, "num_input_tokens_seen": 118346640, "step": 54785 }, { "epoch": 8.938009787928221, "grad_norm": 0.49705770611763, "learning_rate": 0.0006760967111506623, "loss": 0.0813, "num_input_tokens_seen": 118356464, "step": 54790 }, { "epoch": 8.938825448613377, "grad_norm": 0.3860141932964325, "learning_rate": 0.0006760300902316967, "loss": 0.0731, "num_input_tokens_seen": 118365744, "step": 54795 }, { "epoch": 8.939641109298531, "grad_norm": 0.16810378432273865, "learning_rate": 0.000675963465745258, "loss": 0.0829, "num_input_tokens_seen": 118376240, "step": 54800 }, { "epoch": 8.940456769983687, "grad_norm": 0.009920637123286724, "learning_rate": 0.0006758968376926965, "loss": 0.0898, "num_input_tokens_seen": 118387440, "step": 54805 }, { "epoch": 8.941272430668842, "grad_norm": 0.024205774068832397, "learning_rate": 0.0006758302060753624, "loss": 0.0147, "num_input_tokens_seen": 118398576, "step": 54810 }, { "epoch": 8.942088091353996, "grad_norm": 0.006671852432191372, "learning_rate": 0.000675763570894606, "loss": 0.0175, "num_input_tokens_seen": 118410384, "step": 54815 }, { "epoch": 8.942903752039152, "grad_norm": 0.015929104760289192, "learning_rate": 0.0006756969321517781, "loss": 0.0418, "num_input_tokens_seen": 118420720, "step": 54820 }, { "epoch": 8.943719412724306, "grad_norm": 0.023308448493480682, "learning_rate": 0.0006756302898482288, "loss": 0.014, "num_input_tokens_seen": 118432688, "step": 54825 }, { "epoch": 8.944535073409462, "grad_norm": 0.016953030601143837, "learning_rate": 0.0006755636439853089, "loss": 0.0235, "num_input_tokens_seen": 118444144, "step": 54830 }, { "epoch": 8.945350734094617, "grad_norm": 0.1932719647884369, "learning_rate": 0.0006754969945643689, "loss": 0.1507, "num_input_tokens_seen": 118455184, "step": 54835 }, { "epoch": 8.946166394779771, "grad_norm": 0.028529340401291847, "learning_rate": 0.0006754303415867599, "loss": 0.0774, "num_input_tokens_seen": 118465936, "step": 54840 }, { "epoch": 8.946982055464927, "grad_norm": 0.041322022676467896, "learning_rate": 0.0006753636850538325, "loss": 0.0844, "num_input_tokens_seen": 118477392, "step": 54845 }, { "epoch": 8.947797716150081, "grad_norm": 0.04866543412208557, "learning_rate": 0.0006752970249669374, "loss": 0.0183, "num_input_tokens_seen": 118488816, "step": 54850 }, { "epoch": 8.948613376835237, "grad_norm": 0.028561508283019066, "learning_rate": 0.0006752303613274257, "loss": 0.2118, "num_input_tokens_seen": 118500848, "step": 54855 }, { "epoch": 8.949429037520392, "grad_norm": 0.5350882411003113, "learning_rate": 0.0006751636941366486, "loss": 0.0707, "num_input_tokens_seen": 118511376, "step": 54860 }, { "epoch": 8.950244698205546, "grad_norm": 0.08346492052078247, "learning_rate": 0.000675097023395957, "loss": 0.0234, "num_input_tokens_seen": 118521776, "step": 54865 }, { "epoch": 8.951060358890702, "grad_norm": 0.10249694436788559, "learning_rate": 0.0006750303491067021, "loss": 0.0456, "num_input_tokens_seen": 118532400, "step": 54870 }, { "epoch": 8.951876019575856, "grad_norm": 0.07672520726919174, "learning_rate": 0.0006749636712702349, "loss": 0.1437, "num_input_tokens_seen": 118541424, "step": 54875 }, { "epoch": 8.952691680261012, "grad_norm": 0.3835560381412506, "learning_rate": 0.0006748969898879071, "loss": 0.0844, "num_input_tokens_seen": 118551856, "step": 54880 }, { "epoch": 8.953507340946166, "grad_norm": 0.003387624863535166, "learning_rate": 0.00067483030496107, "loss": 0.0761, "num_input_tokens_seen": 118562192, "step": 54885 }, { "epoch": 8.954323001631321, "grad_norm": 0.007898389361798763, "learning_rate": 0.000674763616491075, "loss": 0.1076, "num_input_tokens_seen": 118572624, "step": 54890 }, { "epoch": 8.955138662316477, "grad_norm": 0.019596073776483536, "learning_rate": 0.0006746969244792734, "loss": 0.0177, "num_input_tokens_seen": 118584048, "step": 54895 }, { "epoch": 8.955954323001631, "grad_norm": 0.005080987699329853, "learning_rate": 0.0006746302289270172, "loss": 0.0103, "num_input_tokens_seen": 118595696, "step": 54900 }, { "epoch": 8.956769983686787, "grad_norm": 0.012511802837252617, "learning_rate": 0.0006745635298356579, "loss": 0.079, "num_input_tokens_seen": 118605392, "step": 54905 }, { "epoch": 8.95758564437194, "grad_norm": 0.11990555375814438, "learning_rate": 0.0006744968272065469, "loss": 0.0612, "num_input_tokens_seen": 118615024, "step": 54910 }, { "epoch": 8.958401305057096, "grad_norm": 0.019289560616016388, "learning_rate": 0.0006744301210410366, "loss": 0.0455, "num_input_tokens_seen": 118625968, "step": 54915 }, { "epoch": 8.959216965742252, "grad_norm": 0.0528937429189682, "learning_rate": 0.0006743634113404786, "loss": 0.0236, "num_input_tokens_seen": 118635824, "step": 54920 }, { "epoch": 8.960032626427406, "grad_norm": 0.04663698002696037, "learning_rate": 0.0006742966981062249, "loss": 0.0265, "num_input_tokens_seen": 118644624, "step": 54925 }, { "epoch": 8.960848287112562, "grad_norm": 0.0013387626968324184, "learning_rate": 0.0006742299813396274, "loss": 0.0941, "num_input_tokens_seen": 118653168, "step": 54930 }, { "epoch": 8.961663947797716, "grad_norm": 0.02348313294351101, "learning_rate": 0.0006741632610420384, "loss": 0.0951, "num_input_tokens_seen": 118663856, "step": 54935 }, { "epoch": 8.962479608482871, "grad_norm": 0.19825030863285065, "learning_rate": 0.0006740965372148098, "loss": 0.0429, "num_input_tokens_seen": 118674480, "step": 54940 }, { "epoch": 8.963295269168025, "grad_norm": 0.36576709151268005, "learning_rate": 0.0006740298098592941, "loss": 0.0643, "num_input_tokens_seen": 118684848, "step": 54945 }, { "epoch": 8.964110929853181, "grad_norm": 0.25725460052490234, "learning_rate": 0.0006739630789768436, "loss": 0.081, "num_input_tokens_seen": 118696400, "step": 54950 }, { "epoch": 8.964926590538337, "grad_norm": 0.06459327787160873, "learning_rate": 0.0006738963445688107, "loss": 0.0887, "num_input_tokens_seen": 118707120, "step": 54955 }, { "epoch": 8.96574225122349, "grad_norm": 0.043718330562114716, "learning_rate": 0.0006738296066365476, "loss": 0.0155, "num_input_tokens_seen": 118717200, "step": 54960 }, { "epoch": 8.966557911908646, "grad_norm": 0.09502162039279938, "learning_rate": 0.000673762865181407, "loss": 0.2014, "num_input_tokens_seen": 118728336, "step": 54965 }, { "epoch": 8.9673735725938, "grad_norm": 0.094657763838768, "learning_rate": 0.0006736961202047417, "loss": 0.0306, "num_input_tokens_seen": 118738928, "step": 54970 }, { "epoch": 8.968189233278956, "grad_norm": 0.017213786020874977, "learning_rate": 0.0006736293717079041, "loss": 0.0587, "num_input_tokens_seen": 118750096, "step": 54975 }, { "epoch": 8.969004893964112, "grad_norm": 0.019299479201436043, "learning_rate": 0.0006735626196922469, "loss": 0.0334, "num_input_tokens_seen": 118760176, "step": 54980 }, { "epoch": 8.969820554649266, "grad_norm": 0.21472111344337463, "learning_rate": 0.0006734958641591231, "loss": 0.1958, "num_input_tokens_seen": 118770928, "step": 54985 }, { "epoch": 8.970636215334421, "grad_norm": 0.022455843165516853, "learning_rate": 0.0006734291051098856, "loss": 0.0815, "num_input_tokens_seen": 118782576, "step": 54990 }, { "epoch": 8.971451876019575, "grad_norm": 0.003710412187501788, "learning_rate": 0.0006733623425458871, "loss": 0.0148, "num_input_tokens_seen": 118794128, "step": 54995 }, { "epoch": 8.97226753670473, "grad_norm": 0.005927415564656258, "learning_rate": 0.000673295576468481, "loss": 0.0632, "num_input_tokens_seen": 118804048, "step": 55000 }, { "epoch": 8.973083197389887, "grad_norm": 0.19900697469711304, "learning_rate": 0.00067322880687902, "loss": 0.0425, "num_input_tokens_seen": 118814800, "step": 55005 }, { "epoch": 8.97389885807504, "grad_norm": 0.2374364286661148, "learning_rate": 0.0006731620337788576, "loss": 0.0805, "num_input_tokens_seen": 118825616, "step": 55010 }, { "epoch": 8.974714518760196, "grad_norm": 0.05746473744511604, "learning_rate": 0.0006730952571693469, "loss": 0.0391, "num_input_tokens_seen": 118836208, "step": 55015 }, { "epoch": 8.97553017944535, "grad_norm": 0.020573321729898453, "learning_rate": 0.0006730284770518412, "loss": 0.0822, "num_input_tokens_seen": 118847344, "step": 55020 }, { "epoch": 8.976345840130506, "grad_norm": 0.009821916930377483, "learning_rate": 0.0006729616934276939, "loss": 0.0074, "num_input_tokens_seen": 118857840, "step": 55025 }, { "epoch": 8.977161500815662, "grad_norm": 0.005163253750652075, "learning_rate": 0.0006728949062982585, "loss": 0.1417, "num_input_tokens_seen": 118867984, "step": 55030 }, { "epoch": 8.977977161500815, "grad_norm": 0.0437258705496788, "learning_rate": 0.0006728281156648885, "loss": 0.0142, "num_input_tokens_seen": 118879696, "step": 55035 }, { "epoch": 8.978792822185971, "grad_norm": 0.02063642628490925, "learning_rate": 0.0006727613215289374, "loss": 0.0074, "num_input_tokens_seen": 118891088, "step": 55040 }, { "epoch": 8.979608482871125, "grad_norm": 0.00389106129296124, "learning_rate": 0.0006726945238917589, "loss": 0.0202, "num_input_tokens_seen": 118902672, "step": 55045 }, { "epoch": 8.98042414355628, "grad_norm": 0.3619442880153656, "learning_rate": 0.000672627722754707, "loss": 0.2283, "num_input_tokens_seen": 118913200, "step": 55050 }, { "epoch": 8.981239804241435, "grad_norm": 0.014173166826367378, "learning_rate": 0.0006725609181191352, "loss": 0.0756, "num_input_tokens_seen": 118924816, "step": 55055 }, { "epoch": 8.98205546492659, "grad_norm": 0.017785949632525444, "learning_rate": 0.0006724941099863975, "loss": 0.0982, "num_input_tokens_seen": 118934896, "step": 55060 }, { "epoch": 8.982871125611746, "grad_norm": 0.01947389915585518, "learning_rate": 0.0006724272983578478, "loss": 0.0191, "num_input_tokens_seen": 118943728, "step": 55065 }, { "epoch": 8.9836867862969, "grad_norm": 0.08733158558607101, "learning_rate": 0.0006723604832348403, "loss": 0.1048, "num_input_tokens_seen": 118955024, "step": 55070 }, { "epoch": 8.984502446982056, "grad_norm": 0.038850247859954834, "learning_rate": 0.0006722936646187288, "loss": 0.0178, "num_input_tokens_seen": 118966320, "step": 55075 }, { "epoch": 8.98531810766721, "grad_norm": 0.018704602494835854, "learning_rate": 0.0006722268425108675, "loss": 0.1408, "num_input_tokens_seen": 118976656, "step": 55080 }, { "epoch": 8.986133768352365, "grad_norm": 0.15029045939445496, "learning_rate": 0.000672160016912611, "loss": 0.0266, "num_input_tokens_seen": 118987472, "step": 55085 }, { "epoch": 8.986949429037521, "grad_norm": 0.4871756434440613, "learning_rate": 0.0006720931878253133, "loss": 0.0172, "num_input_tokens_seen": 118997904, "step": 55090 }, { "epoch": 8.987765089722675, "grad_norm": 0.00840507447719574, "learning_rate": 0.0006720263552503288, "loss": 0.0407, "num_input_tokens_seen": 119008080, "step": 55095 }, { "epoch": 8.98858075040783, "grad_norm": 0.2138519436120987, "learning_rate": 0.000671959519189012, "loss": 0.0609, "num_input_tokens_seen": 119018064, "step": 55100 }, { "epoch": 8.989396411092985, "grad_norm": 0.04366368055343628, "learning_rate": 0.0006718926796427174, "loss": 0.1852, "num_input_tokens_seen": 119027472, "step": 55105 }, { "epoch": 8.99021207177814, "grad_norm": 0.26440298557281494, "learning_rate": 0.0006718258366127995, "loss": 0.1308, "num_input_tokens_seen": 119038160, "step": 55110 }, { "epoch": 8.991027732463294, "grad_norm": 0.026875967159867287, "learning_rate": 0.0006717589901006131, "loss": 0.0477, "num_input_tokens_seen": 119048464, "step": 55115 }, { "epoch": 8.99184339314845, "grad_norm": 0.2834641933441162, "learning_rate": 0.0006716921401075129, "loss": 0.2825, "num_input_tokens_seen": 119059056, "step": 55120 }, { "epoch": 8.992659053833606, "grad_norm": 0.2926873564720154, "learning_rate": 0.0006716252866348537, "loss": 0.0781, "num_input_tokens_seen": 119069648, "step": 55125 }, { "epoch": 8.99347471451876, "grad_norm": 0.2594529986381531, "learning_rate": 0.0006715584296839903, "loss": 0.1189, "num_input_tokens_seen": 119079920, "step": 55130 }, { "epoch": 8.994290375203915, "grad_norm": 0.018395302817225456, "learning_rate": 0.0006714915692562777, "loss": 0.0279, "num_input_tokens_seen": 119090896, "step": 55135 }, { "epoch": 8.99510603588907, "grad_norm": 0.043946582823991776, "learning_rate": 0.0006714247053530709, "loss": 0.0472, "num_input_tokens_seen": 119101360, "step": 55140 }, { "epoch": 8.995921696574225, "grad_norm": 0.04027822986245155, "learning_rate": 0.0006713578379757251, "loss": 0.068, "num_input_tokens_seen": 119112336, "step": 55145 }, { "epoch": 8.99673735725938, "grad_norm": 0.14224760234355927, "learning_rate": 0.0006712909671255952, "loss": 0.0758, "num_input_tokens_seen": 119121712, "step": 55150 }, { "epoch": 8.997553017944535, "grad_norm": 0.085087850689888, "learning_rate": 0.0006712240928040363, "loss": 0.0845, "num_input_tokens_seen": 119133424, "step": 55155 }, { "epoch": 8.99836867862969, "grad_norm": 0.0085781030356884, "learning_rate": 0.0006711572150124043, "loss": 0.0217, "num_input_tokens_seen": 119145360, "step": 55160 }, { "epoch": 8.999184339314844, "grad_norm": 0.0073289647698402405, "learning_rate": 0.0006710903337520539, "loss": 0.0478, "num_input_tokens_seen": 119155568, "step": 55165 }, { "epoch": 9.0, "grad_norm": 0.013840760104358196, "learning_rate": 0.0006710234490243412, "loss": 0.2284, "num_input_tokens_seen": 119164864, "step": 55170 }, { "epoch": 9.0, "eval_loss": 0.1312951296567917, "eval_runtime": 103.641, "eval_samples_per_second": 26.293, "eval_steps_per_second": 6.58, "num_input_tokens_seen": 119164864, "step": 55170 }, { "epoch": 9.000815660685156, "grad_norm": 0.012224181555211544, "learning_rate": 0.0006709565608306212, "loss": 0.0546, "num_input_tokens_seen": 119176256, "step": 55175 }, { "epoch": 9.00163132137031, "grad_norm": 0.18823717534542084, "learning_rate": 0.0006708896691722495, "loss": 0.0402, "num_input_tokens_seen": 119187200, "step": 55180 }, { "epoch": 9.002446982055465, "grad_norm": 0.006239529699087143, "learning_rate": 0.0006708227740505822, "loss": 0.0164, "num_input_tokens_seen": 119199072, "step": 55185 }, { "epoch": 9.00326264274062, "grad_norm": 0.0993809700012207, "learning_rate": 0.0006707558754669744, "loss": 0.0127, "num_input_tokens_seen": 119210016, "step": 55190 }, { "epoch": 9.004078303425775, "grad_norm": 0.16244710981845856, "learning_rate": 0.0006706889734227823, "loss": 0.0424, "num_input_tokens_seen": 119221376, "step": 55195 }, { "epoch": 9.00489396411093, "grad_norm": 0.14778240025043488, "learning_rate": 0.0006706220679193614, "loss": 0.0585, "num_input_tokens_seen": 119232672, "step": 55200 }, { "epoch": 9.005709624796085, "grad_norm": 0.04779522493481636, "learning_rate": 0.000670555158958068, "loss": 0.2511, "num_input_tokens_seen": 119243072, "step": 55205 }, { "epoch": 9.00652528548124, "grad_norm": 0.05503462255001068, "learning_rate": 0.0006704882465402579, "loss": 0.0213, "num_input_tokens_seen": 119254016, "step": 55210 }, { "epoch": 9.007340946166394, "grad_norm": 0.02401323802769184, "learning_rate": 0.0006704213306672873, "loss": 0.0482, "num_input_tokens_seen": 119264448, "step": 55215 }, { "epoch": 9.00815660685155, "grad_norm": 0.11283843219280243, "learning_rate": 0.0006703544113405122, "loss": 0.0795, "num_input_tokens_seen": 119275424, "step": 55220 }, { "epoch": 9.008972267536704, "grad_norm": 0.09734462946653366, "learning_rate": 0.0006702874885612887, "loss": 0.071, "num_input_tokens_seen": 119287392, "step": 55225 }, { "epoch": 9.00978792822186, "grad_norm": 0.24100083112716675, "learning_rate": 0.0006702205623309734, "loss": 0.102, "num_input_tokens_seen": 119297472, "step": 55230 }, { "epoch": 9.010603588907015, "grad_norm": 0.011355135589838028, "learning_rate": 0.0006701536326509224, "loss": 0.0167, "num_input_tokens_seen": 119308416, "step": 55235 }, { "epoch": 9.01141924959217, "grad_norm": 0.004279524087905884, "learning_rate": 0.0006700866995224921, "loss": 0.1309, "num_input_tokens_seen": 119318592, "step": 55240 }, { "epoch": 9.012234910277325, "grad_norm": 0.1352081000804901, "learning_rate": 0.0006700197629470393, "loss": 0.0251, "num_input_tokens_seen": 119329696, "step": 55245 }, { "epoch": 9.013050570962479, "grad_norm": 0.013182039372622967, "learning_rate": 0.00066995282292592, "loss": 0.0778, "num_input_tokens_seen": 119339936, "step": 55250 }, { "epoch": 9.013866231647635, "grad_norm": 0.019128017127513885, "learning_rate": 0.0006698858794604914, "loss": 0.0305, "num_input_tokens_seen": 119350752, "step": 55255 }, { "epoch": 9.01468189233279, "grad_norm": 0.10801997780799866, "learning_rate": 0.0006698189325521097, "loss": 0.1493, "num_input_tokens_seen": 119362048, "step": 55260 }, { "epoch": 9.015497553017944, "grad_norm": 0.020492171868681908, "learning_rate": 0.000669751982202132, "loss": 0.016, "num_input_tokens_seen": 119371488, "step": 55265 }, { "epoch": 9.0163132137031, "grad_norm": 0.00517929857596755, "learning_rate": 0.0006696850284119151, "loss": 0.0194, "num_input_tokens_seen": 119382336, "step": 55270 }, { "epoch": 9.017128874388254, "grad_norm": 0.10492771118879318, "learning_rate": 0.0006696180711828159, "loss": 0.1248, "num_input_tokens_seen": 119392384, "step": 55275 }, { "epoch": 9.01794453507341, "grad_norm": 0.01910402998328209, "learning_rate": 0.0006695511105161913, "loss": 0.0182, "num_input_tokens_seen": 119404640, "step": 55280 }, { "epoch": 9.018760195758565, "grad_norm": 0.01113252155482769, "learning_rate": 0.0006694841464133981, "loss": 0.0633, "num_input_tokens_seen": 119414816, "step": 55285 }, { "epoch": 9.01957585644372, "grad_norm": 0.0077075595036149025, "learning_rate": 0.0006694171788757939, "loss": 0.0078, "num_input_tokens_seen": 119425824, "step": 55290 }, { "epoch": 9.020391517128875, "grad_norm": 0.29589682817459106, "learning_rate": 0.0006693502079047356, "loss": 0.0388, "num_input_tokens_seen": 119437344, "step": 55295 }, { "epoch": 9.021207177814029, "grad_norm": 0.08724862337112427, "learning_rate": 0.0006692832335015806, "loss": 0.2134, "num_input_tokens_seen": 119448896, "step": 55300 }, { "epoch": 9.022022838499185, "grad_norm": 0.045984670519828796, "learning_rate": 0.000669216255667686, "loss": 0.0944, "num_input_tokens_seen": 119459904, "step": 55305 }, { "epoch": 9.022838499184338, "grad_norm": 0.0032309559173882008, "learning_rate": 0.0006691492744044093, "loss": 0.0247, "num_input_tokens_seen": 119470624, "step": 55310 }, { "epoch": 9.023654159869494, "grad_norm": 0.6814609169960022, "learning_rate": 0.000669082289713108, "loss": 0.0539, "num_input_tokens_seen": 119481408, "step": 55315 }, { "epoch": 9.02446982055465, "grad_norm": 0.009352785535156727, "learning_rate": 0.0006690153015951397, "loss": 0.03, "num_input_tokens_seen": 119493376, "step": 55320 }, { "epoch": 9.025285481239804, "grad_norm": 0.06866900622844696, "learning_rate": 0.0006689483100518617, "loss": 0.0134, "num_input_tokens_seen": 119505056, "step": 55325 }, { "epoch": 9.02610114192496, "grad_norm": 0.06737705320119858, "learning_rate": 0.000668881315084632, "loss": 0.0599, "num_input_tokens_seen": 119516096, "step": 55330 }, { "epoch": 9.026916802610113, "grad_norm": 0.14733898639678955, "learning_rate": 0.0006688143166948082, "loss": 0.0697, "num_input_tokens_seen": 119526944, "step": 55335 }, { "epoch": 9.02773246329527, "grad_norm": 0.031115690246224403, "learning_rate": 0.0006687473148837482, "loss": 0.0326, "num_input_tokens_seen": 119537280, "step": 55340 }, { "epoch": 9.028548123980425, "grad_norm": 0.0266889575868845, "learning_rate": 0.0006686803096528096, "loss": 0.0681, "num_input_tokens_seen": 119547744, "step": 55345 }, { "epoch": 9.029363784665579, "grad_norm": 0.0063073355704545975, "learning_rate": 0.0006686133010033507, "loss": 0.1827, "num_input_tokens_seen": 119558784, "step": 55350 }, { "epoch": 9.030179445350734, "grad_norm": 0.15066297352313995, "learning_rate": 0.0006685462889367293, "loss": 0.0953, "num_input_tokens_seen": 119569056, "step": 55355 }, { "epoch": 9.030995106035888, "grad_norm": 0.6305835843086243, "learning_rate": 0.0006684792734543036, "loss": 0.0557, "num_input_tokens_seen": 119579808, "step": 55360 }, { "epoch": 9.031810766721044, "grad_norm": 0.05144287645816803, "learning_rate": 0.0006684122545574315, "loss": 0.0321, "num_input_tokens_seen": 119590816, "step": 55365 }, { "epoch": 9.0326264274062, "grad_norm": 0.07655679434537888, "learning_rate": 0.0006683452322474715, "loss": 0.1697, "num_input_tokens_seen": 119600640, "step": 55370 }, { "epoch": 9.033442088091354, "grad_norm": 0.08831831812858582, "learning_rate": 0.0006682782065257818, "loss": 0.007, "num_input_tokens_seen": 119611328, "step": 55375 }, { "epoch": 9.03425774877651, "grad_norm": 0.04714163392782211, "learning_rate": 0.000668211177393721, "loss": 0.0365, "num_input_tokens_seen": 119622368, "step": 55380 }, { "epoch": 9.035073409461663, "grad_norm": 0.17070084810256958, "learning_rate": 0.0006681441448526471, "loss": 0.0405, "num_input_tokens_seen": 119632960, "step": 55385 }, { "epoch": 9.035889070146819, "grad_norm": 0.1857834756374359, "learning_rate": 0.0006680771089039188, "loss": 0.0273, "num_input_tokens_seen": 119644096, "step": 55390 }, { "epoch": 9.036704730831975, "grad_norm": 0.14468571543693542, "learning_rate": 0.0006680100695488946, "loss": 0.088, "num_input_tokens_seen": 119655456, "step": 55395 }, { "epoch": 9.037520391517129, "grad_norm": 0.0060724965296685696, "learning_rate": 0.0006679430267889332, "loss": 0.0111, "num_input_tokens_seen": 119666240, "step": 55400 }, { "epoch": 9.038336052202284, "grad_norm": 0.08735064417123795, "learning_rate": 0.0006678759806253933, "loss": 0.1389, "num_input_tokens_seen": 119678272, "step": 55405 }, { "epoch": 9.039151712887438, "grad_norm": 0.08246026933193207, "learning_rate": 0.0006678089310596339, "loss": 0.0397, "num_input_tokens_seen": 119688224, "step": 55410 }, { "epoch": 9.039967373572594, "grad_norm": 0.011172840371727943, "learning_rate": 0.0006677418780930136, "loss": 0.0094, "num_input_tokens_seen": 119698752, "step": 55415 }, { "epoch": 9.040783034257748, "grad_norm": 0.007427356671541929, "learning_rate": 0.0006676748217268912, "loss": 0.0619, "num_input_tokens_seen": 119709600, "step": 55420 }, { "epoch": 9.041598694942904, "grad_norm": 0.22098654508590698, "learning_rate": 0.0006676077619626259, "loss": 0.0482, "num_input_tokens_seen": 119719552, "step": 55425 }, { "epoch": 9.04241435562806, "grad_norm": 0.717595636844635, "learning_rate": 0.0006675406988015766, "loss": 0.0553, "num_input_tokens_seen": 119730112, "step": 55430 }, { "epoch": 9.043230016313213, "grad_norm": 0.06307411193847656, "learning_rate": 0.0006674736322451027, "loss": 0.0242, "num_input_tokens_seen": 119741120, "step": 55435 }, { "epoch": 9.044045676998369, "grad_norm": 0.021658165380358696, "learning_rate": 0.000667406562294563, "loss": 0.0132, "num_input_tokens_seen": 119752480, "step": 55440 }, { "epoch": 9.044861337683523, "grad_norm": 0.03899122402071953, "learning_rate": 0.0006673394889513169, "loss": 0.0207, "num_input_tokens_seen": 119763552, "step": 55445 }, { "epoch": 9.045676998368679, "grad_norm": 0.1413194090127945, "learning_rate": 0.000667272412216724, "loss": 0.0306, "num_input_tokens_seen": 119774944, "step": 55450 }, { "epoch": 9.046492659053834, "grad_norm": 0.017150040715932846, "learning_rate": 0.0006672053320921433, "loss": 0.1126, "num_input_tokens_seen": 119786112, "step": 55455 }, { "epoch": 9.047308319738988, "grad_norm": 0.016458654776215553, "learning_rate": 0.0006671382485789344, "loss": 0.0098, "num_input_tokens_seen": 119797920, "step": 55460 }, { "epoch": 9.048123980424144, "grad_norm": 0.026537369936704636, "learning_rate": 0.0006670711616784571, "loss": 0.0144, "num_input_tokens_seen": 119808768, "step": 55465 }, { "epoch": 9.048939641109298, "grad_norm": 0.020560389384627342, "learning_rate": 0.0006670040713920704, "loss": 0.0329, "num_input_tokens_seen": 119818720, "step": 55470 }, { "epoch": 9.049755301794454, "grad_norm": 0.04738950356841087, "learning_rate": 0.0006669369777211344, "loss": 0.1724, "num_input_tokens_seen": 119830336, "step": 55475 }, { "epoch": 9.05057096247961, "grad_norm": 0.003236339660361409, "learning_rate": 0.000666869880667009, "loss": 0.0072, "num_input_tokens_seen": 119840416, "step": 55480 }, { "epoch": 9.051386623164763, "grad_norm": 0.005051568150520325, "learning_rate": 0.0006668027802310537, "loss": 0.0348, "num_input_tokens_seen": 119851104, "step": 55485 }, { "epoch": 9.052202283849919, "grad_norm": 0.0035680499859154224, "learning_rate": 0.0006667356764146284, "loss": 0.0067, "num_input_tokens_seen": 119862144, "step": 55490 }, { "epoch": 9.053017944535073, "grad_norm": 0.05445701628923416, "learning_rate": 0.0006666685692190931, "loss": 0.0102, "num_input_tokens_seen": 119873952, "step": 55495 }, { "epoch": 9.053833605220229, "grad_norm": 0.44816941022872925, "learning_rate": 0.0006666014586458079, "loss": 0.1066, "num_input_tokens_seen": 119884288, "step": 55500 }, { "epoch": 9.054649265905383, "grad_norm": 0.08904964476823807, "learning_rate": 0.0006665343446961327, "loss": 0.011, "num_input_tokens_seen": 119893696, "step": 55505 }, { "epoch": 9.055464926590538, "grad_norm": 0.0033945401664823294, "learning_rate": 0.0006664672273714278, "loss": 0.0059, "num_input_tokens_seen": 119905760, "step": 55510 }, { "epoch": 9.056280587275694, "grad_norm": 0.00631315354257822, "learning_rate": 0.0006664001066730532, "loss": 0.004, "num_input_tokens_seen": 119916992, "step": 55515 }, { "epoch": 9.057096247960848, "grad_norm": 0.01878887228667736, "learning_rate": 0.0006663329826023696, "loss": 0.0412, "num_input_tokens_seen": 119928192, "step": 55520 }, { "epoch": 9.057911908646004, "grad_norm": 0.010319680906832218, "learning_rate": 0.000666265855160737, "loss": 0.088, "num_input_tokens_seen": 119939360, "step": 55525 }, { "epoch": 9.058727569331158, "grad_norm": 0.12095552682876587, "learning_rate": 0.0006661987243495159, "loss": 0.0356, "num_input_tokens_seen": 119950432, "step": 55530 }, { "epoch": 9.059543230016313, "grad_norm": 0.029308486729860306, "learning_rate": 0.0006661315901700668, "loss": 0.0118, "num_input_tokens_seen": 119960768, "step": 55535 }, { "epoch": 9.060358890701469, "grad_norm": 0.03496725484728813, "learning_rate": 0.0006660644526237502, "loss": 0.0211, "num_input_tokens_seen": 119972032, "step": 55540 }, { "epoch": 9.061174551386623, "grad_norm": 0.020432081073522568, "learning_rate": 0.0006659973117119269, "loss": 0.0563, "num_input_tokens_seen": 119983072, "step": 55545 }, { "epoch": 9.061990212071779, "grad_norm": 0.005481477826833725, "learning_rate": 0.0006659301674359575, "loss": 0.0254, "num_input_tokens_seen": 119993472, "step": 55550 }, { "epoch": 9.062805872756933, "grad_norm": 0.04623018950223923, "learning_rate": 0.0006658630197972027, "loss": 0.0283, "num_input_tokens_seen": 120002656, "step": 55555 }, { "epoch": 9.063621533442088, "grad_norm": 0.06435194611549377, "learning_rate": 0.0006657958687970233, "loss": 0.1708, "num_input_tokens_seen": 120012224, "step": 55560 }, { "epoch": 9.064437194127244, "grad_norm": 0.021413441747426987, "learning_rate": 0.0006657287144367805, "loss": 0.0544, "num_input_tokens_seen": 120024128, "step": 55565 }, { "epoch": 9.065252854812398, "grad_norm": 0.020738402381539345, "learning_rate": 0.000665661556717835, "loss": 0.007, "num_input_tokens_seen": 120034112, "step": 55570 }, { "epoch": 9.066068515497554, "grad_norm": 0.0270835030823946, "learning_rate": 0.0006655943956415479, "loss": 0.011, "num_input_tokens_seen": 120045216, "step": 55575 }, { "epoch": 9.066884176182707, "grad_norm": 0.013338779099285603, "learning_rate": 0.0006655272312092802, "loss": 0.0455, "num_input_tokens_seen": 120056256, "step": 55580 }, { "epoch": 9.067699836867863, "grad_norm": 0.23012396693229675, "learning_rate": 0.0006654600634223933, "loss": 0.1355, "num_input_tokens_seen": 120067008, "step": 55585 }, { "epoch": 9.068515497553017, "grad_norm": 0.1102285236120224, "learning_rate": 0.0006653928922822482, "loss": 0.0208, "num_input_tokens_seen": 120078752, "step": 55590 }, { "epoch": 9.069331158238173, "grad_norm": 0.09096402674913406, "learning_rate": 0.0006653257177902063, "loss": 0.0282, "num_input_tokens_seen": 120090528, "step": 55595 }, { "epoch": 9.070146818923329, "grad_norm": 0.002108287997543812, "learning_rate": 0.0006652585399476292, "loss": 0.0119, "num_input_tokens_seen": 120101376, "step": 55600 }, { "epoch": 9.070962479608482, "grad_norm": 0.03092172183096409, "learning_rate": 0.000665191358755878, "loss": 0.0087, "num_input_tokens_seen": 120111648, "step": 55605 }, { "epoch": 9.071778140293638, "grad_norm": 0.29341599345207214, "learning_rate": 0.0006651241742163143, "loss": 0.05, "num_input_tokens_seen": 120121760, "step": 55610 }, { "epoch": 9.072593800978792, "grad_norm": 0.003775701392441988, "learning_rate": 0.0006650569863302999, "loss": 0.0041, "num_input_tokens_seen": 120133184, "step": 55615 }, { "epoch": 9.073409461663948, "grad_norm": 0.27860262989997864, "learning_rate": 0.0006649897950991962, "loss": 0.0834, "num_input_tokens_seen": 120143360, "step": 55620 }, { "epoch": 9.074225122349104, "grad_norm": 0.0017340558115392923, "learning_rate": 0.000664922600524365, "loss": 0.1483, "num_input_tokens_seen": 120153728, "step": 55625 }, { "epoch": 9.075040783034257, "grad_norm": 0.00711076008155942, "learning_rate": 0.000664855402607168, "loss": 0.0305, "num_input_tokens_seen": 120164160, "step": 55630 }, { "epoch": 9.075856443719413, "grad_norm": 0.009782101027667522, "learning_rate": 0.0006647882013489674, "loss": 0.0758, "num_input_tokens_seen": 120173472, "step": 55635 }, { "epoch": 9.076672104404567, "grad_norm": 0.0022454196587204933, "learning_rate": 0.0006647209967511245, "loss": 0.0883, "num_input_tokens_seen": 120184576, "step": 55640 }, { "epoch": 9.077487765089723, "grad_norm": 0.009879418648779392, "learning_rate": 0.0006646537888150019, "loss": 0.0323, "num_input_tokens_seen": 120196064, "step": 55645 }, { "epoch": 9.078303425774878, "grad_norm": 0.249136283993721, "learning_rate": 0.0006645865775419613, "loss": 0.1152, "num_input_tokens_seen": 120208096, "step": 55650 }, { "epoch": 9.079119086460032, "grad_norm": 0.225576713681221, "learning_rate": 0.0006645193629333649, "loss": 0.0238, "num_input_tokens_seen": 120218624, "step": 55655 }, { "epoch": 9.079934747145188, "grad_norm": 0.012649464420974255, "learning_rate": 0.0006644521449905749, "loss": 0.1596, "num_input_tokens_seen": 120230304, "step": 55660 }, { "epoch": 9.080750407830342, "grad_norm": 0.002151126740500331, "learning_rate": 0.0006643849237149536, "loss": 0.023, "num_input_tokens_seen": 120241248, "step": 55665 }, { "epoch": 9.081566068515498, "grad_norm": 0.004632554017007351, "learning_rate": 0.0006643176991078632, "loss": 0.0139, "num_input_tokens_seen": 120251584, "step": 55670 }, { "epoch": 9.082381729200652, "grad_norm": 0.016930658370256424, "learning_rate": 0.0006642504711706663, "loss": 0.0195, "num_input_tokens_seen": 120262336, "step": 55675 }, { "epoch": 9.083197389885807, "grad_norm": 0.010655293241143227, "learning_rate": 0.000664183239904725, "loss": 0.0365, "num_input_tokens_seen": 120274432, "step": 55680 }, { "epoch": 9.084013050570963, "grad_norm": 0.04127860441803932, "learning_rate": 0.0006641160053114021, "loss": 0.0365, "num_input_tokens_seen": 120283392, "step": 55685 }, { "epoch": 9.084828711256117, "grad_norm": 0.05015069618821144, "learning_rate": 0.0006640487673920605, "loss": 0.0164, "num_input_tokens_seen": 120293792, "step": 55690 }, { "epoch": 9.085644371941273, "grad_norm": 0.022326963022351265, "learning_rate": 0.0006639815261480622, "loss": 0.0747, "num_input_tokens_seen": 120305312, "step": 55695 }, { "epoch": 9.086460032626427, "grad_norm": 0.02016095258295536, "learning_rate": 0.0006639142815807704, "loss": 0.0068, "num_input_tokens_seen": 120315264, "step": 55700 }, { "epoch": 9.087275693311582, "grad_norm": 0.042222410440444946, "learning_rate": 0.0006638470336915477, "loss": 0.1165, "num_input_tokens_seen": 120326752, "step": 55705 }, { "epoch": 9.088091353996738, "grad_norm": 0.1003473624587059, "learning_rate": 0.0006637797824817569, "loss": 0.0373, "num_input_tokens_seen": 120337984, "step": 55710 }, { "epoch": 9.088907014681892, "grad_norm": 0.15232303738594055, "learning_rate": 0.000663712527952761, "loss": 0.0373, "num_input_tokens_seen": 120347872, "step": 55715 }, { "epoch": 9.089722675367048, "grad_norm": 0.004314785357564688, "learning_rate": 0.0006636452701059232, "loss": 0.0063, "num_input_tokens_seen": 120358656, "step": 55720 }, { "epoch": 9.090538336052202, "grad_norm": 0.20391501486301422, "learning_rate": 0.0006635780089426065, "loss": 0.0809, "num_input_tokens_seen": 120369696, "step": 55725 }, { "epoch": 9.091353996737357, "grad_norm": 0.019305676221847534, "learning_rate": 0.0006635107444641737, "loss": 0.0213, "num_input_tokens_seen": 120381440, "step": 55730 }, { "epoch": 9.092169657422513, "grad_norm": 0.24267306923866272, "learning_rate": 0.0006634434766719883, "loss": 0.0856, "num_input_tokens_seen": 120393344, "step": 55735 }, { "epoch": 9.092985318107667, "grad_norm": 0.05459204688668251, "learning_rate": 0.0006633762055674136, "loss": 0.181, "num_input_tokens_seen": 120403456, "step": 55740 }, { "epoch": 9.093800978792823, "grad_norm": 0.1435762494802475, "learning_rate": 0.0006633089311518128, "loss": 0.1155, "num_input_tokens_seen": 120414240, "step": 55745 }, { "epoch": 9.094616639477977, "grad_norm": 0.2441323846578598, "learning_rate": 0.0006632416534265493, "loss": 0.05, "num_input_tokens_seen": 120425696, "step": 55750 }, { "epoch": 9.095432300163132, "grad_norm": 0.0228540301322937, "learning_rate": 0.0006631743723929867, "loss": 0.0207, "num_input_tokens_seen": 120435424, "step": 55755 }, { "epoch": 9.096247960848286, "grad_norm": 0.020277827978134155, "learning_rate": 0.0006631070880524883, "loss": 0.0162, "num_input_tokens_seen": 120445184, "step": 55760 }, { "epoch": 9.097063621533442, "grad_norm": 0.19620034098625183, "learning_rate": 0.0006630398004064179, "loss": 0.0263, "num_input_tokens_seen": 120455648, "step": 55765 }, { "epoch": 9.097879282218598, "grad_norm": 0.013875322416424751, "learning_rate": 0.0006629725094561392, "loss": 0.04, "num_input_tokens_seen": 120467456, "step": 55770 }, { "epoch": 9.098694942903752, "grad_norm": 0.011110931634902954, "learning_rate": 0.0006629052152030158, "loss": 0.0151, "num_input_tokens_seen": 120477568, "step": 55775 }, { "epoch": 9.099510603588907, "grad_norm": 0.015466023236513138, "learning_rate": 0.0006628379176484115, "loss": 0.0942, "num_input_tokens_seen": 120487296, "step": 55780 }, { "epoch": 9.100326264274061, "grad_norm": 0.5825760364532471, "learning_rate": 0.0006627706167936903, "loss": 0.2598, "num_input_tokens_seen": 120498784, "step": 55785 }, { "epoch": 9.101141924959217, "grad_norm": 0.03466307371854782, "learning_rate": 0.0006627033126402159, "loss": 0.1472, "num_input_tokens_seen": 120509504, "step": 55790 }, { "epoch": 9.101957585644373, "grad_norm": 0.003099187510088086, "learning_rate": 0.0006626360051893526, "loss": 0.0501, "num_input_tokens_seen": 120520320, "step": 55795 }, { "epoch": 9.102773246329527, "grad_norm": 0.07016048580408096, "learning_rate": 0.0006625686944424642, "loss": 0.0455, "num_input_tokens_seen": 120530912, "step": 55800 }, { "epoch": 9.103588907014682, "grad_norm": 0.008897113613784313, "learning_rate": 0.0006625013804009152, "loss": 0.0188, "num_input_tokens_seen": 120541760, "step": 55805 }, { "epoch": 9.104404567699836, "grad_norm": 0.022372784093022346, "learning_rate": 0.0006624340630660695, "loss": 0.0859, "num_input_tokens_seen": 120552640, "step": 55810 }, { "epoch": 9.105220228384992, "grad_norm": 0.027728265151381493, "learning_rate": 0.0006623667424392914, "loss": 0.0288, "num_input_tokens_seen": 120564256, "step": 55815 }, { "epoch": 9.106035889070148, "grad_norm": 0.31177589297294617, "learning_rate": 0.0006622994185219453, "loss": 0.0578, "num_input_tokens_seen": 120574912, "step": 55820 }, { "epoch": 9.106851549755302, "grad_norm": 0.04539692774415016, "learning_rate": 0.0006622320913153957, "loss": 0.0162, "num_input_tokens_seen": 120585120, "step": 55825 }, { "epoch": 9.107667210440457, "grad_norm": 0.004602258093655109, "learning_rate": 0.0006621647608210068, "loss": 0.1413, "num_input_tokens_seen": 120595392, "step": 55830 }, { "epoch": 9.108482871125611, "grad_norm": 0.0793989822268486, "learning_rate": 0.0006620974270401434, "loss": 0.0847, "num_input_tokens_seen": 120607488, "step": 55835 }, { "epoch": 9.109298531810767, "grad_norm": 0.01831854321062565, "learning_rate": 0.00066203008997417, "loss": 0.0166, "num_input_tokens_seen": 120619136, "step": 55840 }, { "epoch": 9.11011419249592, "grad_norm": 0.2046804577112198, "learning_rate": 0.0006619627496244513, "loss": 0.1595, "num_input_tokens_seen": 120631808, "step": 55845 }, { "epoch": 9.110929853181077, "grad_norm": 0.6116275787353516, "learning_rate": 0.0006618954059923517, "loss": 0.0899, "num_input_tokens_seen": 120642752, "step": 55850 }, { "epoch": 9.111745513866232, "grad_norm": 0.0629836916923523, "learning_rate": 0.0006618280590792367, "loss": 0.0101, "num_input_tokens_seen": 120653312, "step": 55855 }, { "epoch": 9.112561174551386, "grad_norm": 0.05145289748907089, "learning_rate": 0.0006617607088864706, "loss": 0.0339, "num_input_tokens_seen": 120664384, "step": 55860 }, { "epoch": 9.113376835236542, "grad_norm": 0.009486453607678413, "learning_rate": 0.0006616933554154186, "loss": 0.0151, "num_input_tokens_seen": 120675520, "step": 55865 }, { "epoch": 9.114192495921696, "grad_norm": 0.07350229471921921, "learning_rate": 0.0006616259986674456, "loss": 0.0276, "num_input_tokens_seen": 120685856, "step": 55870 }, { "epoch": 9.115008156606851, "grad_norm": 0.11855614185333252, "learning_rate": 0.0006615586386439169, "loss": 0.1287, "num_input_tokens_seen": 120696864, "step": 55875 }, { "epoch": 9.115823817292007, "grad_norm": 0.46437859535217285, "learning_rate": 0.0006614912753461973, "loss": 0.1984, "num_input_tokens_seen": 120708352, "step": 55880 }, { "epoch": 9.116639477977161, "grad_norm": 0.007540303748100996, "learning_rate": 0.0006614239087756519, "loss": 0.0658, "num_input_tokens_seen": 120720128, "step": 55885 }, { "epoch": 9.117455138662317, "grad_norm": 0.017881179228425026, "learning_rate": 0.0006613565389336465, "loss": 0.0154, "num_input_tokens_seen": 120730048, "step": 55890 }, { "epoch": 9.11827079934747, "grad_norm": 0.29263004660606384, "learning_rate": 0.0006612891658215461, "loss": 0.0828, "num_input_tokens_seen": 120740576, "step": 55895 }, { "epoch": 9.119086460032626, "grad_norm": 0.01154404878616333, "learning_rate": 0.000661221789440716, "loss": 0.0091, "num_input_tokens_seen": 120752544, "step": 55900 }, { "epoch": 9.119902120717782, "grad_norm": 0.01328980177640915, "learning_rate": 0.0006611544097925219, "loss": 0.0233, "num_input_tokens_seen": 120763456, "step": 55905 }, { "epoch": 9.120717781402936, "grad_norm": 0.05561533570289612, "learning_rate": 0.0006610870268783292, "loss": 0.0545, "num_input_tokens_seen": 120774400, "step": 55910 }, { "epoch": 9.121533442088092, "grad_norm": 0.11454028636217117, "learning_rate": 0.0006610196406995038, "loss": 0.0478, "num_input_tokens_seen": 120785920, "step": 55915 }, { "epoch": 9.122349102773246, "grad_norm": 0.0025789556093513966, "learning_rate": 0.0006609522512574107, "loss": 0.0049, "num_input_tokens_seen": 120796960, "step": 55920 }, { "epoch": 9.123164763458401, "grad_norm": 0.048425693064928055, "learning_rate": 0.0006608848585534164, "loss": 0.0134, "num_input_tokens_seen": 120807840, "step": 55925 }, { "epoch": 9.123980424143557, "grad_norm": 0.031781259924173355, "learning_rate": 0.0006608174625888862, "loss": 0.0213, "num_input_tokens_seen": 120818944, "step": 55930 }, { "epoch": 9.124796084828711, "grad_norm": 0.004460108932107687, "learning_rate": 0.000660750063365186, "loss": 0.0087, "num_input_tokens_seen": 120830112, "step": 55935 }, { "epoch": 9.125611745513867, "grad_norm": 0.029495006427168846, "learning_rate": 0.000660682660883682, "loss": 0.0379, "num_input_tokens_seen": 120841248, "step": 55940 }, { "epoch": 9.12642740619902, "grad_norm": 0.010408415459096432, "learning_rate": 0.0006606152551457401, "loss": 0.0092, "num_input_tokens_seen": 120851840, "step": 55945 }, { "epoch": 9.127243066884176, "grad_norm": 0.004695413634181023, "learning_rate": 0.0006605478461527262, "loss": 0.0373, "num_input_tokens_seen": 120863520, "step": 55950 }, { "epoch": 9.12805872756933, "grad_norm": 0.03372404724359512, "learning_rate": 0.0006604804339060065, "loss": 0.0752, "num_input_tokens_seen": 120874176, "step": 55955 }, { "epoch": 9.128874388254486, "grad_norm": 0.12020575255155563, "learning_rate": 0.0006604130184069472, "loss": 0.0095, "num_input_tokens_seen": 120884736, "step": 55960 }, { "epoch": 9.129690048939642, "grad_norm": 0.033447980880737305, "learning_rate": 0.0006603455996569146, "loss": 0.0392, "num_input_tokens_seen": 120895296, "step": 55965 }, { "epoch": 9.130505709624796, "grad_norm": 0.015460162423551083, "learning_rate": 0.0006602781776572752, "loss": 0.0314, "num_input_tokens_seen": 120905472, "step": 55970 }, { "epoch": 9.131321370309951, "grad_norm": 0.023283349350094795, "learning_rate": 0.000660210752409395, "loss": 0.053, "num_input_tokens_seen": 120916576, "step": 55975 }, { "epoch": 9.132137030995105, "grad_norm": 0.007297954987734556, "learning_rate": 0.0006601433239146407, "loss": 0.0587, "num_input_tokens_seen": 120927232, "step": 55980 }, { "epoch": 9.132952691680261, "grad_norm": 0.004983537830412388, "learning_rate": 0.0006600758921743788, "loss": 0.0129, "num_input_tokens_seen": 120937280, "step": 55985 }, { "epoch": 9.133768352365417, "grad_norm": 0.6522960662841797, "learning_rate": 0.0006600084571899758, "loss": 0.0495, "num_input_tokens_seen": 120948992, "step": 55990 }, { "epoch": 9.13458401305057, "grad_norm": 0.0034484846983104944, "learning_rate": 0.0006599410189627985, "loss": 0.0183, "num_input_tokens_seen": 120958496, "step": 55995 }, { "epoch": 9.135399673735726, "grad_norm": 0.10356710851192474, "learning_rate": 0.0006598735774942135, "loss": 0.0603, "num_input_tokens_seen": 120971168, "step": 56000 }, { "epoch": 9.13621533442088, "grad_norm": 0.02262182906270027, "learning_rate": 0.0006598061327855876, "loss": 0.0101, "num_input_tokens_seen": 120982624, "step": 56005 }, { "epoch": 9.137030995106036, "grad_norm": 0.018539773300290108, "learning_rate": 0.0006597386848382878, "loss": 0.0677, "num_input_tokens_seen": 120994176, "step": 56010 }, { "epoch": 9.137846655791192, "grad_norm": 0.010305029340088367, "learning_rate": 0.000659671233653681, "loss": 0.0253, "num_input_tokens_seen": 121003968, "step": 56015 }, { "epoch": 9.138662316476346, "grad_norm": 0.013459831476211548, "learning_rate": 0.0006596037792331338, "loss": 0.0167, "num_input_tokens_seen": 121014112, "step": 56020 }, { "epoch": 9.139477977161501, "grad_norm": 0.002566079143434763, "learning_rate": 0.0006595363215780137, "loss": 0.0126, "num_input_tokens_seen": 121024800, "step": 56025 }, { "epoch": 9.140293637846655, "grad_norm": 0.0190932247787714, "learning_rate": 0.0006594688606896877, "loss": 0.0064, "num_input_tokens_seen": 121035552, "step": 56030 }, { "epoch": 9.141109298531811, "grad_norm": 0.008819978684186935, "learning_rate": 0.0006594013965695229, "loss": 0.1523, "num_input_tokens_seen": 121046208, "step": 56035 }, { "epoch": 9.141924959216965, "grad_norm": 0.25872063636779785, "learning_rate": 0.0006593339292188865, "loss": 0.0396, "num_input_tokens_seen": 121057312, "step": 56040 }, { "epoch": 9.14274061990212, "grad_norm": 0.05374641716480255, "learning_rate": 0.0006592664586391461, "loss": 0.031, "num_input_tokens_seen": 121068960, "step": 56045 }, { "epoch": 9.143556280587276, "grad_norm": 0.03318631649017334, "learning_rate": 0.0006591989848316687, "loss": 0.0055, "num_input_tokens_seen": 121079840, "step": 56050 }, { "epoch": 9.14437194127243, "grad_norm": 0.002823259448632598, "learning_rate": 0.0006591315077978221, "loss": 0.0721, "num_input_tokens_seen": 121089760, "step": 56055 }, { "epoch": 9.145187601957586, "grad_norm": 0.16577103734016418, "learning_rate": 0.0006590640275389734, "loss": 0.0111, "num_input_tokens_seen": 121100096, "step": 56060 }, { "epoch": 9.14600326264274, "grad_norm": 0.00289472215808928, "learning_rate": 0.0006589965440564905, "loss": 0.0649, "num_input_tokens_seen": 121110368, "step": 56065 }, { "epoch": 9.146818923327896, "grad_norm": 0.263703316450119, "learning_rate": 0.000658929057351741, "loss": 0.0508, "num_input_tokens_seen": 121119392, "step": 56070 }, { "epoch": 9.147634584013051, "grad_norm": 0.29043954610824585, "learning_rate": 0.0006588615674260925, "loss": 0.0644, "num_input_tokens_seen": 121129216, "step": 56075 }, { "epoch": 9.148450244698205, "grad_norm": 0.013290850445628166, "learning_rate": 0.0006587940742809127, "loss": 0.0177, "num_input_tokens_seen": 121139072, "step": 56080 }, { "epoch": 9.149265905383361, "grad_norm": 0.47222426533699036, "learning_rate": 0.0006587265779175696, "loss": 0.0709, "num_input_tokens_seen": 121149856, "step": 56085 }, { "epoch": 9.150081566068515, "grad_norm": 0.004278893116861582, "learning_rate": 0.0006586590783374311, "loss": 0.005, "num_input_tokens_seen": 121160608, "step": 56090 }, { "epoch": 9.15089722675367, "grad_norm": 0.3925560414791107, "learning_rate": 0.000658591575541865, "loss": 0.0331, "num_input_tokens_seen": 121170944, "step": 56095 }, { "epoch": 9.151712887438826, "grad_norm": 0.011311687529087067, "learning_rate": 0.0006585240695322395, "loss": 0.0404, "num_input_tokens_seen": 121181216, "step": 56100 }, { "epoch": 9.15252854812398, "grad_norm": 0.005556880030781031, "learning_rate": 0.0006584565603099227, "loss": 0.0364, "num_input_tokens_seen": 121192160, "step": 56105 }, { "epoch": 9.153344208809136, "grad_norm": 0.004971515852957964, "learning_rate": 0.0006583890478762824, "loss": 0.0628, "num_input_tokens_seen": 121203776, "step": 56110 }, { "epoch": 9.15415986949429, "grad_norm": 0.007574469782412052, "learning_rate": 0.0006583215322326874, "loss": 0.014, "num_input_tokens_seen": 121215040, "step": 56115 }, { "epoch": 9.154975530179446, "grad_norm": 0.3267376720905304, "learning_rate": 0.0006582540133805056, "loss": 0.027, "num_input_tokens_seen": 121226496, "step": 56120 }, { "epoch": 9.1557911908646, "grad_norm": 0.020117826759815216, "learning_rate": 0.0006581864913211055, "loss": 0.0179, "num_input_tokens_seen": 121237696, "step": 56125 }, { "epoch": 9.156606851549755, "grad_norm": 0.005197125021368265, "learning_rate": 0.0006581189660558554, "loss": 0.0285, "num_input_tokens_seen": 121248416, "step": 56130 }, { "epoch": 9.15742251223491, "grad_norm": 0.020636849105358124, "learning_rate": 0.000658051437586124, "loss": 0.028, "num_input_tokens_seen": 121259936, "step": 56135 }, { "epoch": 9.158238172920065, "grad_norm": 0.01939970813691616, "learning_rate": 0.0006579839059132796, "loss": 0.0126, "num_input_tokens_seen": 121271584, "step": 56140 }, { "epoch": 9.15905383360522, "grad_norm": 0.004743052180856466, "learning_rate": 0.000657916371038691, "loss": 0.0102, "num_input_tokens_seen": 121283072, "step": 56145 }, { "epoch": 9.159869494290374, "grad_norm": 1.1395015716552734, "learning_rate": 0.0006578488329637268, "loss": 0.1382, "num_input_tokens_seen": 121295264, "step": 56150 }, { "epoch": 9.16068515497553, "grad_norm": 0.0044103991240262985, "learning_rate": 0.0006577812916897558, "loss": 0.0073, "num_input_tokens_seen": 121306944, "step": 56155 }, { "epoch": 9.161500815660686, "grad_norm": 0.006278656888753176, "learning_rate": 0.0006577137472181466, "loss": 0.2067, "num_input_tokens_seen": 121316768, "step": 56160 }, { "epoch": 9.16231647634584, "grad_norm": 0.09036379307508469, "learning_rate": 0.0006576461995502682, "loss": 0.0281, "num_input_tokens_seen": 121327456, "step": 56165 }, { "epoch": 9.163132137030995, "grad_norm": 0.5136862397193909, "learning_rate": 0.0006575786486874897, "loss": 0.0629, "num_input_tokens_seen": 121339008, "step": 56170 }, { "epoch": 9.16394779771615, "grad_norm": 0.09326247125864029, "learning_rate": 0.0006575110946311801, "loss": 0.0446, "num_input_tokens_seen": 121350112, "step": 56175 }, { "epoch": 9.164763458401305, "grad_norm": 0.09577307105064392, "learning_rate": 0.0006574435373827083, "loss": 0.2164, "num_input_tokens_seen": 121361664, "step": 56180 }, { "epoch": 9.16557911908646, "grad_norm": 0.4209381639957428, "learning_rate": 0.0006573759769434433, "loss": 0.1213, "num_input_tokens_seen": 121372416, "step": 56185 }, { "epoch": 9.166394779771615, "grad_norm": 0.009589829482138157, "learning_rate": 0.0006573084133147547, "loss": 0.0675, "num_input_tokens_seen": 121381600, "step": 56190 }, { "epoch": 9.16721044045677, "grad_norm": 0.014700385741889477, "learning_rate": 0.0006572408464980115, "loss": 0.051, "num_input_tokens_seen": 121392000, "step": 56195 }, { "epoch": 9.168026101141924, "grad_norm": 0.1325574666261673, "learning_rate": 0.000657173276494583, "loss": 0.0061, "num_input_tokens_seen": 121402144, "step": 56200 }, { "epoch": 9.16884176182708, "grad_norm": 0.5163242816925049, "learning_rate": 0.0006571057033058386, "loss": 0.0892, "num_input_tokens_seen": 121413312, "step": 56205 }, { "epoch": 9.169657422512234, "grad_norm": 0.7403216361999512, "learning_rate": 0.000657038126933148, "loss": 0.0592, "num_input_tokens_seen": 121424384, "step": 56210 }, { "epoch": 9.17047308319739, "grad_norm": 0.003989236429333687, "learning_rate": 0.0006569705473778804, "loss": 0.094, "num_input_tokens_seen": 121434144, "step": 56215 }, { "epoch": 9.171288743882545, "grad_norm": 0.01190384291112423, "learning_rate": 0.0006569029646414055, "loss": 0.0207, "num_input_tokens_seen": 121445472, "step": 56220 }, { "epoch": 9.1721044045677, "grad_norm": 0.3108609914779663, "learning_rate": 0.0006568353787250931, "loss": 0.069, "num_input_tokens_seen": 121456704, "step": 56225 }, { "epoch": 9.172920065252855, "grad_norm": 0.002502848394215107, "learning_rate": 0.0006567677896303127, "loss": 0.0191, "num_input_tokens_seen": 121467264, "step": 56230 }, { "epoch": 9.173735725938009, "grad_norm": 0.024630989879369736, "learning_rate": 0.0006567001973584343, "loss": 0.025, "num_input_tokens_seen": 121477536, "step": 56235 }, { "epoch": 9.174551386623165, "grad_norm": 0.2676560580730438, "learning_rate": 0.0006566326019108275, "loss": 0.1971, "num_input_tokens_seen": 121489056, "step": 56240 }, { "epoch": 9.17536704730832, "grad_norm": 0.030201373621821404, "learning_rate": 0.0006565650032888624, "loss": 0.0254, "num_input_tokens_seen": 121500352, "step": 56245 }, { "epoch": 9.176182707993474, "grad_norm": 0.04962538927793503, "learning_rate": 0.0006564974014939088, "loss": 0.0213, "num_input_tokens_seen": 121511584, "step": 56250 }, { "epoch": 9.17699836867863, "grad_norm": 0.3099713623523712, "learning_rate": 0.0006564297965273369, "loss": 0.1156, "num_input_tokens_seen": 121521536, "step": 56255 }, { "epoch": 9.177814029363784, "grad_norm": 0.42166343331336975, "learning_rate": 0.0006563621883905167, "loss": 0.0634, "num_input_tokens_seen": 121531456, "step": 56260 }, { "epoch": 9.17862969004894, "grad_norm": 0.012773294001817703, "learning_rate": 0.0006562945770848183, "loss": 0.1584, "num_input_tokens_seen": 121542336, "step": 56265 }, { "epoch": 9.179445350734095, "grad_norm": 0.03006591834127903, "learning_rate": 0.0006562269626116122, "loss": 0.0446, "num_input_tokens_seen": 121553408, "step": 56270 }, { "epoch": 9.18026101141925, "grad_norm": 0.044128891080617905, "learning_rate": 0.0006561593449722683, "loss": 0.0175, "num_input_tokens_seen": 121563136, "step": 56275 }, { "epoch": 9.181076672104405, "grad_norm": 0.04296493902802467, "learning_rate": 0.0006560917241681573, "loss": 0.0417, "num_input_tokens_seen": 121573696, "step": 56280 }, { "epoch": 9.181892332789559, "grad_norm": 0.47510603070259094, "learning_rate": 0.0006560241002006495, "loss": 0.1153, "num_input_tokens_seen": 121584416, "step": 56285 }, { "epoch": 9.182707993474715, "grad_norm": 0.017805377021431923, "learning_rate": 0.0006559564730711153, "loss": 0.0205, "num_input_tokens_seen": 121595552, "step": 56290 }, { "epoch": 9.18352365415987, "grad_norm": 0.027597038075327873, "learning_rate": 0.0006558888427809255, "loss": 0.0935, "num_input_tokens_seen": 121606240, "step": 56295 }, { "epoch": 9.184339314845024, "grad_norm": 0.0038924135733395815, "learning_rate": 0.0006558212093314504, "loss": 0.2137, "num_input_tokens_seen": 121616352, "step": 56300 }, { "epoch": 9.18515497553018, "grad_norm": 0.15157592296600342, "learning_rate": 0.0006557535727240609, "loss": 0.0268, "num_input_tokens_seen": 121627776, "step": 56305 }, { "epoch": 9.185970636215334, "grad_norm": 0.018170544877648354, "learning_rate": 0.0006556859329601275, "loss": 0.0576, "num_input_tokens_seen": 121640672, "step": 56310 }, { "epoch": 9.18678629690049, "grad_norm": 0.005017032381147146, "learning_rate": 0.0006556182900410213, "loss": 0.0415, "num_input_tokens_seen": 121650976, "step": 56315 }, { "epoch": 9.187601957585644, "grad_norm": 0.004428547341376543, "learning_rate": 0.0006555506439681131, "loss": 0.0155, "num_input_tokens_seen": 121661856, "step": 56320 }, { "epoch": 9.1884176182708, "grad_norm": 0.004647783003747463, "learning_rate": 0.0006554829947427736, "loss": 0.0147, "num_input_tokens_seen": 121673024, "step": 56325 }, { "epoch": 9.189233278955955, "grad_norm": 0.0270043034106493, "learning_rate": 0.0006554153423663741, "loss": 0.0127, "num_input_tokens_seen": 121684000, "step": 56330 }, { "epoch": 9.190048939641109, "grad_norm": 0.16975529491901398, "learning_rate": 0.0006553476868402854, "loss": 0.096, "num_input_tokens_seen": 121693728, "step": 56335 }, { "epoch": 9.190864600326265, "grad_norm": 0.00935075618326664, "learning_rate": 0.0006552800281658789, "loss": 0.0386, "num_input_tokens_seen": 121703904, "step": 56340 }, { "epoch": 9.191680261011419, "grad_norm": 0.02370108850300312, "learning_rate": 0.0006552123663445255, "loss": 0.0064, "num_input_tokens_seen": 121714208, "step": 56345 }, { "epoch": 9.192495921696574, "grad_norm": 0.012804976664483547, "learning_rate": 0.0006551447013775967, "loss": 0.0598, "num_input_tokens_seen": 121723872, "step": 56350 }, { "epoch": 9.19331158238173, "grad_norm": 0.03224405273795128, "learning_rate": 0.0006550770332664637, "loss": 0.0208, "num_input_tokens_seen": 121735648, "step": 56355 }, { "epoch": 9.194127243066884, "grad_norm": 0.12478592246770859, "learning_rate": 0.0006550093620124979, "loss": 0.0258, "num_input_tokens_seen": 121747136, "step": 56360 }, { "epoch": 9.19494290375204, "grad_norm": 0.0019217646913602948, "learning_rate": 0.0006549416876170707, "loss": 0.0085, "num_input_tokens_seen": 121756544, "step": 56365 }, { "epoch": 9.195758564437194, "grad_norm": 0.0025901400949805975, "learning_rate": 0.0006548740100815537, "loss": 0.0084, "num_input_tokens_seen": 121767072, "step": 56370 }, { "epoch": 9.19657422512235, "grad_norm": 0.22183768451213837, "learning_rate": 0.0006548063294073183, "loss": 0.0344, "num_input_tokens_seen": 121777920, "step": 56375 }, { "epoch": 9.197389885807505, "grad_norm": 0.017956186085939407, "learning_rate": 0.0006547386455957364, "loss": 0.1983, "num_input_tokens_seen": 121788224, "step": 56380 }, { "epoch": 9.198205546492659, "grad_norm": 0.00347818317823112, "learning_rate": 0.0006546709586481794, "loss": 0.0875, "num_input_tokens_seen": 121799328, "step": 56385 }, { "epoch": 9.199021207177815, "grad_norm": 0.05604798346757889, "learning_rate": 0.0006546032685660193, "loss": 0.0579, "num_input_tokens_seen": 121810688, "step": 56390 }, { "epoch": 9.199836867862969, "grad_norm": 0.07729680836200714, "learning_rate": 0.000654535575350628, "loss": 0.161, "num_input_tokens_seen": 121821632, "step": 56395 }, { "epoch": 9.200652528548124, "grad_norm": 0.3953785002231598, "learning_rate": 0.0006544678790033769, "loss": 0.0892, "num_input_tokens_seen": 121832992, "step": 56400 }, { "epoch": 9.201468189233278, "grad_norm": 0.013939537107944489, "learning_rate": 0.0006544001795256385, "loss": 0.0812, "num_input_tokens_seen": 121842272, "step": 56405 }, { "epoch": 9.202283849918434, "grad_norm": 0.025160158053040504, "learning_rate": 0.0006543324769187844, "loss": 0.0137, "num_input_tokens_seen": 121852928, "step": 56410 }, { "epoch": 9.20309951060359, "grad_norm": 0.014282099902629852, "learning_rate": 0.0006542647711841869, "loss": 0.0093, "num_input_tokens_seen": 121864352, "step": 56415 }, { "epoch": 9.203915171288743, "grad_norm": 0.1687372922897339, "learning_rate": 0.0006541970623232183, "loss": 0.1539, "num_input_tokens_seen": 121875072, "step": 56420 }, { "epoch": 9.2047308319739, "grad_norm": 0.22237369418144226, "learning_rate": 0.0006541293503372506, "loss": 0.0284, "num_input_tokens_seen": 121886464, "step": 56425 }, { "epoch": 9.205546492659053, "grad_norm": 0.025881322100758553, "learning_rate": 0.0006540616352276558, "loss": 0.0186, "num_input_tokens_seen": 121897952, "step": 56430 }, { "epoch": 9.206362153344209, "grad_norm": 0.27931275963783264, "learning_rate": 0.0006539939169958067, "loss": 0.0438, "num_input_tokens_seen": 121907360, "step": 56435 }, { "epoch": 9.207177814029365, "grad_norm": 0.005891446024179459, "learning_rate": 0.0006539261956430755, "loss": 0.0202, "num_input_tokens_seen": 121918656, "step": 56440 }, { "epoch": 9.207993474714518, "grad_norm": 0.031058935448527336, "learning_rate": 0.0006538584711708348, "loss": 0.1126, "num_input_tokens_seen": 121929824, "step": 56445 }, { "epoch": 9.208809135399674, "grad_norm": 0.06801172345876694, "learning_rate": 0.0006537907435804569, "loss": 0.1866, "num_input_tokens_seen": 121939680, "step": 56450 }, { "epoch": 9.209624796084828, "grad_norm": 0.2498117983341217, "learning_rate": 0.0006537230128733144, "loss": 0.0488, "num_input_tokens_seen": 121950400, "step": 56455 }, { "epoch": 9.210440456769984, "grad_norm": 0.41223159432411194, "learning_rate": 0.0006536552790507802, "loss": 0.0561, "num_input_tokens_seen": 121960256, "step": 56460 }, { "epoch": 9.21125611745514, "grad_norm": 0.1966393142938614, "learning_rate": 0.0006535875421142267, "loss": 0.0122, "num_input_tokens_seen": 121970272, "step": 56465 }, { "epoch": 9.212071778140293, "grad_norm": 0.01200894545763731, "learning_rate": 0.0006535198020650269, "loss": 0.0291, "num_input_tokens_seen": 121979008, "step": 56470 }, { "epoch": 9.21288743882545, "grad_norm": 0.014986738562583923, "learning_rate": 0.0006534520589045537, "loss": 0.029, "num_input_tokens_seen": 121989504, "step": 56475 }, { "epoch": 9.213703099510603, "grad_norm": 0.014505613595247269, "learning_rate": 0.0006533843126341795, "loss": 0.0117, "num_input_tokens_seen": 122000320, "step": 56480 }, { "epoch": 9.214518760195759, "grad_norm": 0.03650680184364319, "learning_rate": 0.0006533165632552777, "loss": 0.0721, "num_input_tokens_seen": 122010592, "step": 56485 }, { "epoch": 9.215334420880913, "grad_norm": 0.022520599886775017, "learning_rate": 0.0006532488107692214, "loss": 0.01, "num_input_tokens_seen": 122020704, "step": 56490 }, { "epoch": 9.216150081566068, "grad_norm": 0.0102704968303442, "learning_rate": 0.0006531810551773836, "loss": 0.0339, "num_input_tokens_seen": 122031776, "step": 56495 }, { "epoch": 9.216965742251224, "grad_norm": 0.6189824938774109, "learning_rate": 0.0006531132964811374, "loss": 0.1069, "num_input_tokens_seen": 122042944, "step": 56500 }, { "epoch": 9.217781402936378, "grad_norm": 0.002442688215523958, "learning_rate": 0.0006530455346818559, "loss": 0.0429, "num_input_tokens_seen": 122053824, "step": 56505 }, { "epoch": 9.218597063621534, "grad_norm": 0.007630040869116783, "learning_rate": 0.0006529777697809125, "loss": 0.0152, "num_input_tokens_seen": 122064864, "step": 56510 }, { "epoch": 9.219412724306688, "grad_norm": 0.009317480958998203, "learning_rate": 0.0006529100017796805, "loss": 0.0344, "num_input_tokens_seen": 122075968, "step": 56515 }, { "epoch": 9.220228384991843, "grad_norm": 0.0036200080066919327, "learning_rate": 0.0006528422306795334, "loss": 0.0101, "num_input_tokens_seen": 122086912, "step": 56520 }, { "epoch": 9.221044045676999, "grad_norm": 0.1551111936569214, "learning_rate": 0.0006527744564818446, "loss": 0.0387, "num_input_tokens_seen": 122098368, "step": 56525 }, { "epoch": 9.221859706362153, "grad_norm": 0.02000151202082634, "learning_rate": 0.0006527066791879875, "loss": 0.159, "num_input_tokens_seen": 122108384, "step": 56530 }, { "epoch": 9.222675367047309, "grad_norm": 0.5325279235839844, "learning_rate": 0.000652638898799336, "loss": 0.0634, "num_input_tokens_seen": 122119808, "step": 56535 }, { "epoch": 9.223491027732463, "grad_norm": 0.03554704412817955, "learning_rate": 0.0006525711153172635, "loss": 0.0072, "num_input_tokens_seen": 122131424, "step": 56540 }, { "epoch": 9.224306688417618, "grad_norm": 0.043445706367492676, "learning_rate": 0.0006525033287431436, "loss": 0.2337, "num_input_tokens_seen": 122141536, "step": 56545 }, { "epoch": 9.225122349102774, "grad_norm": 0.20792369544506073, "learning_rate": 0.0006524355390783506, "loss": 0.0909, "num_input_tokens_seen": 122151840, "step": 56550 }, { "epoch": 9.225938009787928, "grad_norm": 0.0052780830301344395, "learning_rate": 0.0006523677463242579, "loss": 0.011, "num_input_tokens_seen": 122162624, "step": 56555 }, { "epoch": 9.226753670473084, "grad_norm": 0.15979614853858948, "learning_rate": 0.0006522999504822395, "loss": 0.0126, "num_input_tokens_seen": 122173664, "step": 56560 }, { "epoch": 9.227569331158238, "grad_norm": 0.20983552932739258, "learning_rate": 0.0006522321515536694, "loss": 0.2322, "num_input_tokens_seen": 122184480, "step": 56565 }, { "epoch": 9.228384991843393, "grad_norm": 0.007776240352541208, "learning_rate": 0.0006521643495399217, "loss": 0.0231, "num_input_tokens_seen": 122196032, "step": 56570 }, { "epoch": 9.229200652528547, "grad_norm": 0.30364108085632324, "learning_rate": 0.0006520965444423704, "loss": 0.2579, "num_input_tokens_seen": 122207392, "step": 56575 }, { "epoch": 9.230016313213703, "grad_norm": 1.1685848236083984, "learning_rate": 0.0006520287362623896, "loss": 0.083, "num_input_tokens_seen": 122218784, "step": 56580 }, { "epoch": 9.230831973898859, "grad_norm": 0.006376779638230801, "learning_rate": 0.0006519609250013538, "loss": 0.0232, "num_input_tokens_seen": 122230496, "step": 56585 }, { "epoch": 9.231647634584013, "grad_norm": 0.018893474712967873, "learning_rate": 0.000651893110660637, "loss": 0.0196, "num_input_tokens_seen": 122240512, "step": 56590 }, { "epoch": 9.232463295269168, "grad_norm": 0.0246660728007555, "learning_rate": 0.0006518252932416135, "loss": 0.0541, "num_input_tokens_seen": 122251200, "step": 56595 }, { "epoch": 9.233278955954322, "grad_norm": 0.018446508795022964, "learning_rate": 0.0006517574727456579, "loss": 0.0429, "num_input_tokens_seen": 122261184, "step": 56600 }, { "epoch": 9.234094616639478, "grad_norm": 0.017274249345064163, "learning_rate": 0.0006516896491741446, "loss": 0.0276, "num_input_tokens_seen": 122270048, "step": 56605 }, { "epoch": 9.234910277324634, "grad_norm": 0.03069995529949665, "learning_rate": 0.000651621822528448, "loss": 0.0279, "num_input_tokens_seen": 122280928, "step": 56610 }, { "epoch": 9.235725938009788, "grad_norm": 0.5565629005432129, "learning_rate": 0.000651553992809943, "loss": 0.0835, "num_input_tokens_seen": 122291488, "step": 56615 }, { "epoch": 9.236541598694943, "grad_norm": 0.03480347990989685, "learning_rate": 0.0006514861600200039, "loss": 0.0487, "num_input_tokens_seen": 122301408, "step": 56620 }, { "epoch": 9.237357259380097, "grad_norm": 0.013822119683027267, "learning_rate": 0.0006514183241600057, "loss": 0.1043, "num_input_tokens_seen": 122312416, "step": 56625 }, { "epoch": 9.238172920065253, "grad_norm": 0.42332392930984497, "learning_rate": 0.000651350485231323, "loss": 0.0659, "num_input_tokens_seen": 122324224, "step": 56630 }, { "epoch": 9.238988580750409, "grad_norm": 0.02906874567270279, "learning_rate": 0.0006512826432353308, "loss": 0.0768, "num_input_tokens_seen": 122335840, "step": 56635 }, { "epoch": 9.239804241435563, "grad_norm": 0.01386654656380415, "learning_rate": 0.000651214798173404, "loss": 0.0109, "num_input_tokens_seen": 122346208, "step": 56640 }, { "epoch": 9.240619902120718, "grad_norm": 0.0913848802447319, "learning_rate": 0.0006511469500469173, "loss": 0.0962, "num_input_tokens_seen": 122355808, "step": 56645 }, { "epoch": 9.241435562805872, "grad_norm": 0.4897230267524719, "learning_rate": 0.0006510790988572459, "loss": 0.0846, "num_input_tokens_seen": 122366656, "step": 56650 }, { "epoch": 9.242251223491028, "grad_norm": 0.006247374229133129, "learning_rate": 0.0006510112446057651, "loss": 0.038, "num_input_tokens_seen": 122375872, "step": 56655 }, { "epoch": 9.243066884176184, "grad_norm": 0.00599702401086688, "learning_rate": 0.0006509433872938497, "loss": 0.0065, "num_input_tokens_seen": 122387392, "step": 56660 }, { "epoch": 9.243882544861338, "grad_norm": 0.024100353941321373, "learning_rate": 0.0006508755269228752, "loss": 0.1065, "num_input_tokens_seen": 122396960, "step": 56665 }, { "epoch": 9.244698205546493, "grad_norm": 0.3259066343307495, "learning_rate": 0.0006508076634942167, "loss": 0.0854, "num_input_tokens_seen": 122408352, "step": 56670 }, { "epoch": 9.245513866231647, "grad_norm": 0.0508267804980278, "learning_rate": 0.0006507397970092496, "loss": 0.0238, "num_input_tokens_seen": 122420672, "step": 56675 }, { "epoch": 9.246329526916803, "grad_norm": 0.020270129665732384, "learning_rate": 0.0006506719274693492, "loss": 0.0313, "num_input_tokens_seen": 122431680, "step": 56680 }, { "epoch": 9.247145187601957, "grad_norm": 0.016229957342147827, "learning_rate": 0.0006506040548758911, "loss": 0.147, "num_input_tokens_seen": 122441984, "step": 56685 }, { "epoch": 9.247960848287113, "grad_norm": 0.004499287344515324, "learning_rate": 0.0006505361792302509, "loss": 0.0328, "num_input_tokens_seen": 122451616, "step": 56690 }, { "epoch": 9.248776508972268, "grad_norm": 0.006004804745316505, "learning_rate": 0.0006504683005338039, "loss": 0.0074, "num_input_tokens_seen": 122462848, "step": 56695 }, { "epoch": 9.249592169657422, "grad_norm": 0.12613023817539215, "learning_rate": 0.0006504004187879259, "loss": 0.0231, "num_input_tokens_seen": 122472960, "step": 56700 }, { "epoch": 9.250407830342578, "grad_norm": 0.00950097106397152, "learning_rate": 0.0006503325339939927, "loss": 0.0194, "num_input_tokens_seen": 122484064, "step": 56705 }, { "epoch": 9.251223491027732, "grad_norm": 0.33747807145118713, "learning_rate": 0.0006502646461533798, "loss": 0.1316, "num_input_tokens_seen": 122494816, "step": 56710 }, { "epoch": 9.252039151712887, "grad_norm": 0.07069870829582214, "learning_rate": 0.0006501967552674635, "loss": 0.0612, "num_input_tokens_seen": 122505024, "step": 56715 }, { "epoch": 9.252854812398043, "grad_norm": 0.22603590786457062, "learning_rate": 0.0006501288613376193, "loss": 0.1817, "num_input_tokens_seen": 122515680, "step": 56720 }, { "epoch": 9.253670473083197, "grad_norm": 0.21570582687854767, "learning_rate": 0.0006500609643652234, "loss": 0.2376, "num_input_tokens_seen": 122526368, "step": 56725 }, { "epoch": 9.254486133768353, "grad_norm": 0.09161341190338135, "learning_rate": 0.0006499930643516514, "loss": 0.2312, "num_input_tokens_seen": 122536448, "step": 56730 }, { "epoch": 9.255301794453507, "grad_norm": 0.017243504524230957, "learning_rate": 0.0006499251612982798, "loss": 0.0246, "num_input_tokens_seen": 122546464, "step": 56735 }, { "epoch": 9.256117455138662, "grad_norm": 0.22162210941314697, "learning_rate": 0.0006498572552064847, "loss": 0.0464, "num_input_tokens_seen": 122557152, "step": 56740 }, { "epoch": 9.256933115823816, "grad_norm": 0.008149663917720318, "learning_rate": 0.0006497893460776421, "loss": 0.0349, "num_input_tokens_seen": 122566848, "step": 56745 }, { "epoch": 9.257748776508972, "grad_norm": 0.07649563997983932, "learning_rate": 0.0006497214339131284, "loss": 0.0273, "num_input_tokens_seen": 122577568, "step": 56750 }, { "epoch": 9.258564437194128, "grad_norm": 0.004069112706929445, "learning_rate": 0.00064965351871432, "loss": 0.0477, "num_input_tokens_seen": 122587040, "step": 56755 }, { "epoch": 9.259380097879282, "grad_norm": 0.0017357214819639921, "learning_rate": 0.0006495856004825931, "loss": 0.0669, "num_input_tokens_seen": 122597856, "step": 56760 }, { "epoch": 9.260195758564437, "grad_norm": 0.032659392803907394, "learning_rate": 0.0006495176792193243, "loss": 0.0631, "num_input_tokens_seen": 122608032, "step": 56765 }, { "epoch": 9.261011419249591, "grad_norm": 0.01769702136516571, "learning_rate": 0.00064944975492589, "loss": 0.0282, "num_input_tokens_seen": 122620032, "step": 56770 }, { "epoch": 9.261827079934747, "grad_norm": 0.23506838083267212, "learning_rate": 0.0006493818276036669, "loss": 0.0572, "num_input_tokens_seen": 122629696, "step": 56775 }, { "epoch": 9.262642740619903, "grad_norm": 0.06442558020353317, "learning_rate": 0.0006493138972540316, "loss": 0.0164, "num_input_tokens_seen": 122639776, "step": 56780 }, { "epoch": 9.263458401305057, "grad_norm": 0.06317390501499176, "learning_rate": 0.0006492459638783606, "loss": 0.0266, "num_input_tokens_seen": 122650240, "step": 56785 }, { "epoch": 9.264274061990212, "grad_norm": 0.29172617197036743, "learning_rate": 0.0006491780274780308, "loss": 0.0292, "num_input_tokens_seen": 122661056, "step": 56790 }, { "epoch": 9.265089722675366, "grad_norm": 0.008959591388702393, "learning_rate": 0.0006491100880544191, "loss": 0.0448, "num_input_tokens_seen": 122671936, "step": 56795 }, { "epoch": 9.265905383360522, "grad_norm": 0.2516701817512512, "learning_rate": 0.0006490421456089023, "loss": 0.0195, "num_input_tokens_seen": 122683808, "step": 56800 }, { "epoch": 9.266721044045678, "grad_norm": 0.03728965297341347, "learning_rate": 0.0006489742001428573, "loss": 0.0137, "num_input_tokens_seen": 122695520, "step": 56805 }, { "epoch": 9.267536704730832, "grad_norm": 0.006289742887020111, "learning_rate": 0.0006489062516576613, "loss": 0.008, "num_input_tokens_seen": 122706624, "step": 56810 }, { "epoch": 9.268352365415987, "grad_norm": 0.03731833025813103, "learning_rate": 0.0006488383001546911, "loss": 0.0216, "num_input_tokens_seen": 122718016, "step": 56815 }, { "epoch": 9.269168026101141, "grad_norm": 0.004130576737225056, "learning_rate": 0.000648770345635324, "loss": 0.0111, "num_input_tokens_seen": 122727872, "step": 56820 }, { "epoch": 9.269983686786297, "grad_norm": 0.0015647370601072907, "learning_rate": 0.000648702388100937, "loss": 0.1372, "num_input_tokens_seen": 122737984, "step": 56825 }, { "epoch": 9.270799347471453, "grad_norm": 0.006820981856435537, "learning_rate": 0.0006486344275529076, "loss": 0.1507, "num_input_tokens_seen": 122749088, "step": 56830 }, { "epoch": 9.271615008156607, "grad_norm": 0.0026828646659851074, "learning_rate": 0.0006485664639926128, "loss": 0.0131, "num_input_tokens_seen": 122760128, "step": 56835 }, { "epoch": 9.272430668841762, "grad_norm": 0.2718150019645691, "learning_rate": 0.0006484984974214303, "loss": 0.0515, "num_input_tokens_seen": 122770592, "step": 56840 }, { "epoch": 9.273246329526916, "grad_norm": 0.030774256214499474, "learning_rate": 0.0006484305278407373, "loss": 0.0379, "num_input_tokens_seen": 122781536, "step": 56845 }, { "epoch": 9.274061990212072, "grad_norm": 0.13015435636043549, "learning_rate": 0.0006483625552519114, "loss": 0.045, "num_input_tokens_seen": 122791680, "step": 56850 }, { "epoch": 9.274877650897226, "grad_norm": 0.03489632159471512, "learning_rate": 0.00064829457965633, "loss": 0.0624, "num_input_tokens_seen": 122801728, "step": 56855 }, { "epoch": 9.275693311582382, "grad_norm": 0.5021911263465881, "learning_rate": 0.0006482266010553707, "loss": 0.0752, "num_input_tokens_seen": 122812896, "step": 56860 }, { "epoch": 9.276508972267537, "grad_norm": 0.012664350681006908, "learning_rate": 0.0006481586194504117, "loss": 0.0092, "num_input_tokens_seen": 122823488, "step": 56865 }, { "epoch": 9.277324632952691, "grad_norm": 0.07986239343881607, "learning_rate": 0.00064809063484283, "loss": 0.0187, "num_input_tokens_seen": 122834752, "step": 56870 }, { "epoch": 9.278140293637847, "grad_norm": 0.0034950373228639364, "learning_rate": 0.0006480226472340039, "loss": 0.1122, "num_input_tokens_seen": 122846080, "step": 56875 }, { "epoch": 9.278955954323001, "grad_norm": 0.044152747839689255, "learning_rate": 0.0006479546566253109, "loss": 0.0141, "num_input_tokens_seen": 122856896, "step": 56880 }, { "epoch": 9.279771615008157, "grad_norm": 0.10844366997480392, "learning_rate": 0.0006478866630181293, "loss": 0.0171, "num_input_tokens_seen": 122867456, "step": 56885 }, { "epoch": 9.280587275693312, "grad_norm": 0.03289483115077019, "learning_rate": 0.0006478186664138366, "loss": 0.0796, "num_input_tokens_seen": 122878944, "step": 56890 }, { "epoch": 9.281402936378466, "grad_norm": 0.020551707595586777, "learning_rate": 0.0006477506668138113, "loss": 0.0608, "num_input_tokens_seen": 122889408, "step": 56895 }, { "epoch": 9.282218597063622, "grad_norm": 0.022187283262610435, "learning_rate": 0.0006476826642194313, "loss": 0.0563, "num_input_tokens_seen": 122899584, "step": 56900 }, { "epoch": 9.283034257748776, "grad_norm": 0.006548890843987465, "learning_rate": 0.0006476146586320747, "loss": 0.0859, "num_input_tokens_seen": 122910976, "step": 56905 }, { "epoch": 9.283849918433932, "grad_norm": 0.17593686282634735, "learning_rate": 0.0006475466500531198, "loss": 0.0161, "num_input_tokens_seen": 122922336, "step": 56910 }, { "epoch": 9.284665579119087, "grad_norm": 0.007709556259214878, "learning_rate": 0.0006474786384839448, "loss": 0.0368, "num_input_tokens_seen": 122934208, "step": 56915 }, { "epoch": 9.285481239804241, "grad_norm": 0.03171587735414505, "learning_rate": 0.0006474106239259282, "loss": 0.054, "num_input_tokens_seen": 122945216, "step": 56920 }, { "epoch": 9.286296900489397, "grad_norm": 0.01960030384361744, "learning_rate": 0.0006473426063804483, "loss": 0.007, "num_input_tokens_seen": 122955904, "step": 56925 }, { "epoch": 9.28711256117455, "grad_norm": 0.0011323639191687107, "learning_rate": 0.0006472745858488835, "loss": 0.0624, "num_input_tokens_seen": 122967040, "step": 56930 }, { "epoch": 9.287928221859707, "grad_norm": 0.012678350321948528, "learning_rate": 0.0006472065623326123, "loss": 0.008, "num_input_tokens_seen": 122978080, "step": 56935 }, { "epoch": 9.28874388254486, "grad_norm": 0.0027061044238507748, "learning_rate": 0.0006471385358330135, "loss": 0.1473, "num_input_tokens_seen": 122988928, "step": 56940 }, { "epoch": 9.289559543230016, "grad_norm": 0.004366982262581587, "learning_rate": 0.0006470705063514656, "loss": 0.0111, "num_input_tokens_seen": 123000192, "step": 56945 }, { "epoch": 9.290375203915172, "grad_norm": 0.0069068376906216145, "learning_rate": 0.0006470024738893473, "loss": 0.0357, "num_input_tokens_seen": 123011264, "step": 56950 }, { "epoch": 9.291190864600326, "grad_norm": 0.03688400238752365, "learning_rate": 0.0006469344384480374, "loss": 0.0318, "num_input_tokens_seen": 123022336, "step": 56955 }, { "epoch": 9.292006525285482, "grad_norm": 0.035516850650310516, "learning_rate": 0.0006468664000289147, "loss": 0.0397, "num_input_tokens_seen": 123034304, "step": 56960 }, { "epoch": 9.292822185970635, "grad_norm": 0.01059263851493597, "learning_rate": 0.000646798358633358, "loss": 0.1205, "num_input_tokens_seen": 123044224, "step": 56965 }, { "epoch": 9.293637846655791, "grad_norm": 0.1201028898358345, "learning_rate": 0.0006467303142627465, "loss": 0.104, "num_input_tokens_seen": 123054624, "step": 56970 }, { "epoch": 9.294453507340947, "grad_norm": 0.3978259861469269, "learning_rate": 0.0006466622669184589, "loss": 0.0853, "num_input_tokens_seen": 123066368, "step": 56975 }, { "epoch": 9.2952691680261, "grad_norm": 0.007530895993113518, "learning_rate": 0.0006465942166018745, "loss": 0.1685, "num_input_tokens_seen": 123076224, "step": 56980 }, { "epoch": 9.296084828711257, "grad_norm": 0.0026840101927518845, "learning_rate": 0.0006465261633143722, "loss": 0.0423, "num_input_tokens_seen": 123088256, "step": 56985 }, { "epoch": 9.29690048939641, "grad_norm": 0.03933865576982498, "learning_rate": 0.0006464581070573315, "loss": 0.0176, "num_input_tokens_seen": 123098400, "step": 56990 }, { "epoch": 9.297716150081566, "grad_norm": 0.18984612822532654, "learning_rate": 0.0006463900478321314, "loss": 0.0416, "num_input_tokens_seen": 123108928, "step": 56995 }, { "epoch": 9.298531810766722, "grad_norm": 0.047432951629161835, "learning_rate": 0.0006463219856401513, "loss": 0.0538, "num_input_tokens_seen": 123120160, "step": 57000 }, { "epoch": 9.299347471451876, "grad_norm": 0.005672098137438297, "learning_rate": 0.0006462539204827705, "loss": 0.0088, "num_input_tokens_seen": 123129440, "step": 57005 }, { "epoch": 9.300163132137031, "grad_norm": 0.011451846919953823, "learning_rate": 0.0006461858523613684, "loss": 0.0286, "num_input_tokens_seen": 123139648, "step": 57010 }, { "epoch": 9.300978792822185, "grad_norm": 0.05032016709446907, "learning_rate": 0.0006461177812773246, "loss": 0.1426, "num_input_tokens_seen": 123150304, "step": 57015 }, { "epoch": 9.301794453507341, "grad_norm": 0.009700275957584381, "learning_rate": 0.0006460497072320186, "loss": 0.0465, "num_input_tokens_seen": 123160768, "step": 57020 }, { "epoch": 9.302610114192497, "grad_norm": 0.03388618305325508, "learning_rate": 0.00064598163022683, "loss": 0.0154, "num_input_tokens_seen": 123170336, "step": 57025 }, { "epoch": 9.30342577487765, "grad_norm": 0.0011625054758042097, "learning_rate": 0.0006459135502631386, "loss": 0.0869, "num_input_tokens_seen": 123181216, "step": 57030 }, { "epoch": 9.304241435562806, "grad_norm": 0.02638522908091545, "learning_rate": 0.0006458454673423238, "loss": 0.0566, "num_input_tokens_seen": 123192384, "step": 57035 }, { "epoch": 9.30505709624796, "grad_norm": 0.21478807926177979, "learning_rate": 0.0006457773814657657, "loss": 0.1093, "num_input_tokens_seen": 123203424, "step": 57040 }, { "epoch": 9.305872756933116, "grad_norm": 0.04131392389535904, "learning_rate": 0.000645709292634844, "loss": 0.0413, "num_input_tokens_seen": 123213152, "step": 57045 }, { "epoch": 9.30668841761827, "grad_norm": 0.04098490625619888, "learning_rate": 0.0006456412008509387, "loss": 0.0425, "num_input_tokens_seen": 123223616, "step": 57050 }, { "epoch": 9.307504078303426, "grad_norm": 0.24334684014320374, "learning_rate": 0.0006455731061154297, "loss": 0.0385, "num_input_tokens_seen": 123233632, "step": 57055 }, { "epoch": 9.308319738988581, "grad_norm": 0.03460830822587013, "learning_rate": 0.0006455050084296969, "loss": 0.0976, "num_input_tokens_seen": 123244608, "step": 57060 }, { "epoch": 9.309135399673735, "grad_norm": 0.06306884437799454, "learning_rate": 0.0006454369077951206, "loss": 0.0363, "num_input_tokens_seen": 123253952, "step": 57065 }, { "epoch": 9.309951060358891, "grad_norm": 0.0017151982756331563, "learning_rate": 0.0006453688042130808, "loss": 0.0099, "num_input_tokens_seen": 123263968, "step": 57070 }, { "epoch": 9.310766721044045, "grad_norm": 0.013023875653743744, "learning_rate": 0.0006453006976849578, "loss": 0.012, "num_input_tokens_seen": 123273312, "step": 57075 }, { "epoch": 9.3115823817292, "grad_norm": 0.5808905363082886, "learning_rate": 0.0006452325882121319, "loss": 0.1844, "num_input_tokens_seen": 123284864, "step": 57080 }, { "epoch": 9.312398042414356, "grad_norm": 0.020586438477039337, "learning_rate": 0.0006451644757959834, "loss": 0.0135, "num_input_tokens_seen": 123295840, "step": 57085 }, { "epoch": 9.31321370309951, "grad_norm": 0.007403132040053606, "learning_rate": 0.0006450963604378926, "loss": 0.0573, "num_input_tokens_seen": 123304992, "step": 57090 }, { "epoch": 9.314029363784666, "grad_norm": 0.05940420553088188, "learning_rate": 0.0006450282421392399, "loss": 0.0852, "num_input_tokens_seen": 123314720, "step": 57095 }, { "epoch": 9.31484502446982, "grad_norm": 0.15579479932785034, "learning_rate": 0.0006449601209014059, "loss": 0.0413, "num_input_tokens_seen": 123323168, "step": 57100 }, { "epoch": 9.315660685154976, "grad_norm": 0.29078125953674316, "learning_rate": 0.0006448919967257711, "loss": 0.0435, "num_input_tokens_seen": 123333952, "step": 57105 }, { "epoch": 9.31647634584013, "grad_norm": 0.008018561638891697, "learning_rate": 0.0006448238696137163, "loss": 0.0134, "num_input_tokens_seen": 123344928, "step": 57110 }, { "epoch": 9.317292006525285, "grad_norm": 0.0108210863545537, "learning_rate": 0.0006447557395666221, "loss": 0.0491, "num_input_tokens_seen": 123354720, "step": 57115 }, { "epoch": 9.318107667210441, "grad_norm": 0.21279096603393555, "learning_rate": 0.0006446876065858691, "loss": 0.1351, "num_input_tokens_seen": 123366272, "step": 57120 }, { "epoch": 9.318923327895595, "grad_norm": 0.2715965211391449, "learning_rate": 0.0006446194706728383, "loss": 0.0278, "num_input_tokens_seen": 123377472, "step": 57125 }, { "epoch": 9.31973898858075, "grad_norm": 0.0017662273021414876, "learning_rate": 0.0006445513318289104, "loss": 0.048, "num_input_tokens_seen": 123389280, "step": 57130 }, { "epoch": 9.320554649265905, "grad_norm": 0.01154270675033331, "learning_rate": 0.0006444831900554664, "loss": 0.0076, "num_input_tokens_seen": 123400352, "step": 57135 }, { "epoch": 9.32137030995106, "grad_norm": 0.004029486328363419, "learning_rate": 0.0006444150453538873, "loss": 0.036, "num_input_tokens_seen": 123411616, "step": 57140 }, { "epoch": 9.322185970636216, "grad_norm": 0.01075716968625784, "learning_rate": 0.000644346897725554, "loss": 0.0088, "num_input_tokens_seen": 123422304, "step": 57145 }, { "epoch": 9.32300163132137, "grad_norm": 0.003960131201893091, "learning_rate": 0.0006442787471718479, "loss": 0.0067, "num_input_tokens_seen": 123433024, "step": 57150 }, { "epoch": 9.323817292006526, "grad_norm": 0.005982367787510157, "learning_rate": 0.0006442105936941498, "loss": 0.0425, "num_input_tokens_seen": 123444416, "step": 57155 }, { "epoch": 9.32463295269168, "grad_norm": 0.038486577570438385, "learning_rate": 0.000644142437293841, "loss": 0.1482, "num_input_tokens_seen": 123454880, "step": 57160 }, { "epoch": 9.325448613376835, "grad_norm": 0.11881979554891586, "learning_rate": 0.000644074277972303, "loss": 0.0463, "num_input_tokens_seen": 123465472, "step": 57165 }, { "epoch": 9.326264274061991, "grad_norm": 0.010927199386060238, "learning_rate": 0.000644006115730917, "loss": 0.0554, "num_input_tokens_seen": 123477120, "step": 57170 }, { "epoch": 9.327079934747145, "grad_norm": 0.01006519515067339, "learning_rate": 0.000643937950571064, "loss": 0.0619, "num_input_tokens_seen": 123487296, "step": 57175 }, { "epoch": 9.3278955954323, "grad_norm": 0.008437362499535084, "learning_rate": 0.0006438697824941263, "loss": 0.0606, "num_input_tokens_seen": 123497440, "step": 57180 }, { "epoch": 9.328711256117455, "grad_norm": 0.011827422305941582, "learning_rate": 0.0006438016115014848, "loss": 0.009, "num_input_tokens_seen": 123508384, "step": 57185 }, { "epoch": 9.32952691680261, "grad_norm": 0.0015898183919489384, "learning_rate": 0.0006437334375945212, "loss": 0.0186, "num_input_tokens_seen": 123519296, "step": 57190 }, { "epoch": 9.330342577487766, "grad_norm": 0.019101936370134354, "learning_rate": 0.0006436652607746171, "loss": 0.0432, "num_input_tokens_seen": 123532256, "step": 57195 }, { "epoch": 9.33115823817292, "grad_norm": 0.0033209563698619604, "learning_rate": 0.0006435970810431544, "loss": 0.0032, "num_input_tokens_seen": 123542112, "step": 57200 }, { "epoch": 9.331973898858076, "grad_norm": 0.029729114845395088, "learning_rate": 0.0006435288984015146, "loss": 0.0626, "num_input_tokens_seen": 123552352, "step": 57205 }, { "epoch": 9.33278955954323, "grad_norm": 0.36488205194473267, "learning_rate": 0.0006434607128510796, "loss": 0.1586, "num_input_tokens_seen": 123562880, "step": 57210 }, { "epoch": 9.333605220228385, "grad_norm": 0.3097657561302185, "learning_rate": 0.0006433925243932312, "loss": 0.1643, "num_input_tokens_seen": 123573536, "step": 57215 }, { "epoch": 9.33442088091354, "grad_norm": 0.008041603490710258, "learning_rate": 0.0006433243330293514, "loss": 0.0461, "num_input_tokens_seen": 123584416, "step": 57220 }, { "epoch": 9.335236541598695, "grad_norm": 0.003848917316645384, "learning_rate": 0.0006432561387608222, "loss": 0.0302, "num_input_tokens_seen": 123596352, "step": 57225 }, { "epoch": 9.33605220228385, "grad_norm": 0.004652928560972214, "learning_rate": 0.0006431879415890256, "loss": 0.0794, "num_input_tokens_seen": 123607008, "step": 57230 }, { "epoch": 9.336867862969005, "grad_norm": 0.04283025488257408, "learning_rate": 0.0006431197415153437, "loss": 0.1123, "num_input_tokens_seen": 123617632, "step": 57235 }, { "epoch": 9.33768352365416, "grad_norm": 0.01733693666756153, "learning_rate": 0.0006430515385411588, "loss": 0.0486, "num_input_tokens_seen": 123628192, "step": 57240 }, { "epoch": 9.338499184339314, "grad_norm": 0.2398867905139923, "learning_rate": 0.0006429833326678529, "loss": 0.0316, "num_input_tokens_seen": 123638304, "step": 57245 }, { "epoch": 9.33931484502447, "grad_norm": 0.331961452960968, "learning_rate": 0.0006429151238968083, "loss": 0.0822, "num_input_tokens_seen": 123649792, "step": 57250 }, { "epoch": 9.340130505709626, "grad_norm": 0.1437341868877411, "learning_rate": 0.0006428469122294075, "loss": 0.0107, "num_input_tokens_seen": 123661024, "step": 57255 }, { "epoch": 9.34094616639478, "grad_norm": 0.09274806827306747, "learning_rate": 0.0006427786976670328, "loss": 0.0151, "num_input_tokens_seen": 123672672, "step": 57260 }, { "epoch": 9.341761827079935, "grad_norm": 0.0013064906233921647, "learning_rate": 0.0006427104802110667, "loss": 0.0185, "num_input_tokens_seen": 123683296, "step": 57265 }, { "epoch": 9.34257748776509, "grad_norm": 0.4128345549106598, "learning_rate": 0.0006426422598628916, "loss": 0.094, "num_input_tokens_seen": 123694848, "step": 57270 }, { "epoch": 9.343393148450245, "grad_norm": 0.6230624318122864, "learning_rate": 0.0006425740366238903, "loss": 0.1, "num_input_tokens_seen": 123705760, "step": 57275 }, { "epoch": 9.3442088091354, "grad_norm": 0.01017848402261734, "learning_rate": 0.0006425058104954451, "loss": 0.018, "num_input_tokens_seen": 123716000, "step": 57280 }, { "epoch": 9.345024469820554, "grad_norm": 0.18681271374225616, "learning_rate": 0.0006424375814789388, "loss": 0.1853, "num_input_tokens_seen": 123727616, "step": 57285 }, { "epoch": 9.34584013050571, "grad_norm": 0.419785737991333, "learning_rate": 0.0006423693495757545, "loss": 0.1258, "num_input_tokens_seen": 123738400, "step": 57290 }, { "epoch": 9.346655791190864, "grad_norm": 0.005531011614948511, "learning_rate": 0.0006423011147872745, "loss": 0.0515, "num_input_tokens_seen": 123747616, "step": 57295 }, { "epoch": 9.34747145187602, "grad_norm": 0.04268835484981537, "learning_rate": 0.000642232877114882, "loss": 0.0256, "num_input_tokens_seen": 123759424, "step": 57300 }, { "epoch": 9.348287112561174, "grad_norm": 0.012237378396093845, "learning_rate": 0.0006421646365599597, "loss": 0.0214, "num_input_tokens_seen": 123771072, "step": 57305 }, { "epoch": 9.34910277324633, "grad_norm": 0.05506362393498421, "learning_rate": 0.0006420963931238907, "loss": 0.0551, "num_input_tokens_seen": 123782816, "step": 57310 }, { "epoch": 9.349918433931485, "grad_norm": 0.0642736479640007, "learning_rate": 0.0006420281468080582, "loss": 0.0235, "num_input_tokens_seen": 123792416, "step": 57315 }, { "epoch": 9.350734094616639, "grad_norm": 0.2579999566078186, "learning_rate": 0.0006419598976138451, "loss": 0.0431, "num_input_tokens_seen": 123803680, "step": 57320 }, { "epoch": 9.351549755301795, "grad_norm": 0.009822947904467583, "learning_rate": 0.0006418916455426344, "loss": 0.0166, "num_input_tokens_seen": 123814912, "step": 57325 }, { "epoch": 9.352365415986949, "grad_norm": 0.0744229406118393, "learning_rate": 0.0006418233905958097, "loss": 0.0148, "num_input_tokens_seen": 123825856, "step": 57330 }, { "epoch": 9.353181076672104, "grad_norm": 0.012941932305693626, "learning_rate": 0.000641755132774754, "loss": 0.1114, "num_input_tokens_seen": 123836032, "step": 57335 }, { "epoch": 9.35399673735726, "grad_norm": 0.027866492047905922, "learning_rate": 0.0006416868720808507, "loss": 0.181, "num_input_tokens_seen": 123846080, "step": 57340 }, { "epoch": 9.354812398042414, "grad_norm": 0.007846455089747906, "learning_rate": 0.0006416186085154833, "loss": 0.1681, "num_input_tokens_seen": 123856928, "step": 57345 }, { "epoch": 9.35562805872757, "grad_norm": 0.029931560158729553, "learning_rate": 0.0006415503420800349, "loss": 0.1149, "num_input_tokens_seen": 123867072, "step": 57350 }, { "epoch": 9.356443719412724, "grad_norm": 0.036976058036088943, "learning_rate": 0.0006414820727758894, "loss": 0.1055, "num_input_tokens_seen": 123877280, "step": 57355 }, { "epoch": 9.35725938009788, "grad_norm": 0.08427825570106506, "learning_rate": 0.0006414138006044303, "loss": 0.0165, "num_input_tokens_seen": 123887040, "step": 57360 }, { "epoch": 9.358075040783035, "grad_norm": 0.009288780391216278, "learning_rate": 0.0006413455255670409, "loss": 0.0601, "num_input_tokens_seen": 123897632, "step": 57365 }, { "epoch": 9.358890701468189, "grad_norm": 0.007328356616199017, "learning_rate": 0.0006412772476651053, "loss": 0.0468, "num_input_tokens_seen": 123909024, "step": 57370 }, { "epoch": 9.359706362153345, "grad_norm": 0.012570452876389027, "learning_rate": 0.0006412089669000071, "loss": 0.0284, "num_input_tokens_seen": 123920864, "step": 57375 }, { "epoch": 9.360522022838499, "grad_norm": 0.01703028939664364, "learning_rate": 0.0006411406832731299, "loss": 0.0177, "num_input_tokens_seen": 123932224, "step": 57380 }, { "epoch": 9.361337683523654, "grad_norm": 0.01287777628749609, "learning_rate": 0.0006410723967858577, "loss": 0.0599, "num_input_tokens_seen": 123944064, "step": 57385 }, { "epoch": 9.362153344208808, "grad_norm": 0.10792047530412674, "learning_rate": 0.0006410041074395744, "loss": 0.0751, "num_input_tokens_seen": 123955168, "step": 57390 }, { "epoch": 9.362969004893964, "grad_norm": 0.2537194788455963, "learning_rate": 0.0006409358152356642, "loss": 0.1646, "num_input_tokens_seen": 123965920, "step": 57395 }, { "epoch": 9.36378466557912, "grad_norm": 0.04030577838420868, "learning_rate": 0.0006408675201755107, "loss": 0.0461, "num_input_tokens_seen": 123978048, "step": 57400 }, { "epoch": 9.364600326264274, "grad_norm": 0.7500656247138977, "learning_rate": 0.0006407992222604983, "loss": 0.0785, "num_input_tokens_seen": 123989216, "step": 57405 }, { "epoch": 9.36541598694943, "grad_norm": 0.0026482176035642624, "learning_rate": 0.000640730921492011, "loss": 0.0115, "num_input_tokens_seen": 124001408, "step": 57410 }, { "epoch": 9.366231647634583, "grad_norm": 0.09167847037315369, "learning_rate": 0.000640662617871433, "loss": 0.1328, "num_input_tokens_seen": 124011904, "step": 57415 }, { "epoch": 9.367047308319739, "grad_norm": 0.023537907749414444, "learning_rate": 0.0006405943114001486, "loss": 0.0702, "num_input_tokens_seen": 124023040, "step": 57420 }, { "epoch": 9.367862969004895, "grad_norm": 0.722507655620575, "learning_rate": 0.0006405260020795421, "loss": 0.0655, "num_input_tokens_seen": 124033728, "step": 57425 }, { "epoch": 9.368678629690049, "grad_norm": 0.07008686661720276, "learning_rate": 0.0006404576899109981, "loss": 0.0191, "num_input_tokens_seen": 124043840, "step": 57430 }, { "epoch": 9.369494290375204, "grad_norm": 0.392330527305603, "learning_rate": 0.0006403893748959007, "loss": 0.0943, "num_input_tokens_seen": 124055040, "step": 57435 }, { "epoch": 9.370309951060358, "grad_norm": 0.008758448995649815, "learning_rate": 0.0006403210570356346, "loss": 0.1311, "num_input_tokens_seen": 124065056, "step": 57440 }, { "epoch": 9.371125611745514, "grad_norm": 0.0045740483328700066, "learning_rate": 0.0006402527363315843, "loss": 0.0355, "num_input_tokens_seen": 124076224, "step": 57445 }, { "epoch": 9.37194127243067, "grad_norm": 0.002997696865350008, "learning_rate": 0.0006401844127851342, "loss": 0.0072, "num_input_tokens_seen": 124087424, "step": 57450 }, { "epoch": 9.372756933115824, "grad_norm": 0.001006297068670392, "learning_rate": 0.0006401160863976691, "loss": 0.0045, "num_input_tokens_seen": 124098144, "step": 57455 }, { "epoch": 9.37357259380098, "grad_norm": 0.30021172761917114, "learning_rate": 0.000640047757170574, "loss": 0.0892, "num_input_tokens_seen": 124109184, "step": 57460 }, { "epoch": 9.374388254486133, "grad_norm": 0.21571239829063416, "learning_rate": 0.0006399794251052333, "loss": 0.0363, "num_input_tokens_seen": 124120032, "step": 57465 }, { "epoch": 9.375203915171289, "grad_norm": 0.21111957728862762, "learning_rate": 0.000639911090203032, "loss": 0.028, "num_input_tokens_seen": 124130752, "step": 57470 }, { "epoch": 9.376019575856443, "grad_norm": 0.4067305028438568, "learning_rate": 0.000639842752465355, "loss": 0.0688, "num_input_tokens_seen": 124140896, "step": 57475 }, { "epoch": 9.376835236541599, "grad_norm": 0.1772114485502243, "learning_rate": 0.0006397744118935871, "loss": 0.0654, "num_input_tokens_seen": 124152000, "step": 57480 }, { "epoch": 9.377650897226754, "grad_norm": 0.057778965681791306, "learning_rate": 0.0006397060684891136, "loss": 0.0127, "num_input_tokens_seen": 124163296, "step": 57485 }, { "epoch": 9.378466557911908, "grad_norm": 0.009378801099956036, "learning_rate": 0.0006396377222533192, "loss": 0.0094, "num_input_tokens_seen": 124173824, "step": 57490 }, { "epoch": 9.379282218597064, "grad_norm": 0.004530887119472027, "learning_rate": 0.0006395693731875892, "loss": 0.0342, "num_input_tokens_seen": 124184384, "step": 57495 }, { "epoch": 9.380097879282218, "grad_norm": 0.0177314355969429, "learning_rate": 0.000639501021293309, "loss": 0.0721, "num_input_tokens_seen": 124196352, "step": 57500 }, { "epoch": 9.380913539967374, "grad_norm": 0.10000865161418915, "learning_rate": 0.0006394326665718635, "loss": 0.0947, "num_input_tokens_seen": 124206720, "step": 57505 }, { "epoch": 9.38172920065253, "grad_norm": 0.026358895003795624, "learning_rate": 0.0006393643090246381, "loss": 0.1997, "num_input_tokens_seen": 124217856, "step": 57510 }, { "epoch": 9.382544861337683, "grad_norm": 0.01959538832306862, "learning_rate": 0.0006392959486530183, "loss": 0.0319, "num_input_tokens_seen": 124227616, "step": 57515 }, { "epoch": 9.383360522022839, "grad_norm": 0.1922665238380432, "learning_rate": 0.0006392275854583894, "loss": 0.0153, "num_input_tokens_seen": 124236800, "step": 57520 }, { "epoch": 9.384176182707993, "grad_norm": 0.0030915914103388786, "learning_rate": 0.0006391592194421367, "loss": 0.0529, "num_input_tokens_seen": 124247456, "step": 57525 }, { "epoch": 9.384991843393149, "grad_norm": 0.02654971182346344, "learning_rate": 0.0006390908506056461, "loss": 0.0673, "num_input_tokens_seen": 124258752, "step": 57530 }, { "epoch": 9.385807504078304, "grad_norm": 0.008271587081253529, "learning_rate": 0.0006390224789503028, "loss": 0.0834, "num_input_tokens_seen": 124269856, "step": 57535 }, { "epoch": 9.386623164763458, "grad_norm": 0.31094446778297424, "learning_rate": 0.0006389541044774927, "loss": 0.0556, "num_input_tokens_seen": 124280640, "step": 57540 }, { "epoch": 9.387438825448614, "grad_norm": 0.20158900320529938, "learning_rate": 0.0006388857271886013, "loss": 0.0321, "num_input_tokens_seen": 124290976, "step": 57545 }, { "epoch": 9.388254486133768, "grad_norm": 0.26112285256385803, "learning_rate": 0.0006388173470850144, "loss": 0.0209, "num_input_tokens_seen": 124302752, "step": 57550 }, { "epoch": 9.389070146818923, "grad_norm": 0.003522562561556697, "learning_rate": 0.0006387489641681181, "loss": 0.0037, "num_input_tokens_seen": 124314208, "step": 57555 }, { "epoch": 9.38988580750408, "grad_norm": 0.08223313838243484, "learning_rate": 0.0006386805784392978, "loss": 0.1029, "num_input_tokens_seen": 124324992, "step": 57560 }, { "epoch": 9.390701468189233, "grad_norm": 0.09716961532831192, "learning_rate": 0.0006386121898999397, "loss": 0.0255, "num_input_tokens_seen": 124335264, "step": 57565 }, { "epoch": 9.391517128874389, "grad_norm": 0.1217547208070755, "learning_rate": 0.0006385437985514297, "loss": 0.1149, "num_input_tokens_seen": 124345952, "step": 57570 }, { "epoch": 9.392332789559543, "grad_norm": 0.005859457887709141, "learning_rate": 0.000638475404395154, "loss": 0.0781, "num_input_tokens_seen": 124357152, "step": 57575 }, { "epoch": 9.393148450244698, "grad_norm": 0.02815798856317997, "learning_rate": 0.0006384070074324984, "loss": 0.0335, "num_input_tokens_seen": 124368096, "step": 57580 }, { "epoch": 9.393964110929852, "grad_norm": 0.3322744369506836, "learning_rate": 0.0006383386076648494, "loss": 0.1213, "num_input_tokens_seen": 124378272, "step": 57585 }, { "epoch": 9.394779771615008, "grad_norm": 0.09620869159698486, "learning_rate": 0.0006382702050935929, "loss": 0.0363, "num_input_tokens_seen": 124389856, "step": 57590 }, { "epoch": 9.395595432300164, "grad_norm": 0.011701155453920364, "learning_rate": 0.0006382017997201152, "loss": 0.0438, "num_input_tokens_seen": 124402240, "step": 57595 }, { "epoch": 9.396411092985318, "grad_norm": 0.007712031714618206, "learning_rate": 0.000638133391545803, "loss": 0.0098, "num_input_tokens_seen": 124413344, "step": 57600 }, { "epoch": 9.397226753670473, "grad_norm": 0.1123395785689354, "learning_rate": 0.000638064980572042, "loss": 0.0149, "num_input_tokens_seen": 124425600, "step": 57605 }, { "epoch": 9.398042414355627, "grad_norm": 0.017627792432904243, "learning_rate": 0.0006379965668002192, "loss": 0.1074, "num_input_tokens_seen": 124435200, "step": 57610 }, { "epoch": 9.398858075040783, "grad_norm": 0.0041339038871228695, "learning_rate": 0.0006379281502317209, "loss": 0.0121, "num_input_tokens_seen": 124445888, "step": 57615 }, { "epoch": 9.399673735725939, "grad_norm": 0.07806552201509476, "learning_rate": 0.0006378597308679338, "loss": 0.0267, "num_input_tokens_seen": 124457696, "step": 57620 }, { "epoch": 9.400489396411093, "grad_norm": 0.016718747094273567, "learning_rate": 0.0006377913087102443, "loss": 0.0852, "num_input_tokens_seen": 124469312, "step": 57625 }, { "epoch": 9.401305057096248, "grad_norm": 0.016923511400818825, "learning_rate": 0.0006377228837600391, "loss": 0.1711, "num_input_tokens_seen": 124479584, "step": 57630 }, { "epoch": 9.402120717781402, "grad_norm": 0.002410069340839982, "learning_rate": 0.0006376544560187049, "loss": 0.0281, "num_input_tokens_seen": 124490432, "step": 57635 }, { "epoch": 9.402936378466558, "grad_norm": 0.00935316551476717, "learning_rate": 0.0006375860254876286, "loss": 0.0536, "num_input_tokens_seen": 124501312, "step": 57640 }, { "epoch": 9.403752039151712, "grad_norm": 0.05050384998321533, "learning_rate": 0.0006375175921681968, "loss": 0.0437, "num_input_tokens_seen": 124512256, "step": 57645 }, { "epoch": 9.404567699836868, "grad_norm": 0.002771280240267515, "learning_rate": 0.0006374491560617967, "loss": 0.2557, "num_input_tokens_seen": 124523328, "step": 57650 }, { "epoch": 9.405383360522023, "grad_norm": 0.01832154579460621, "learning_rate": 0.0006373807171698151, "loss": 0.1944, "num_input_tokens_seen": 124532832, "step": 57655 }, { "epoch": 9.406199021207177, "grad_norm": 0.22622382640838623, "learning_rate": 0.0006373122754936389, "loss": 0.1608, "num_input_tokens_seen": 124544288, "step": 57660 }, { "epoch": 9.407014681892333, "grad_norm": 0.1542602777481079, "learning_rate": 0.0006372438310346553, "loss": 0.0357, "num_input_tokens_seen": 124553952, "step": 57665 }, { "epoch": 9.407830342577487, "grad_norm": 0.1983586847782135, "learning_rate": 0.0006371753837942513, "loss": 0.0626, "num_input_tokens_seen": 124565344, "step": 57670 }, { "epoch": 9.408646003262643, "grad_norm": 0.370592325925827, "learning_rate": 0.0006371069337738142, "loss": 0.0444, "num_input_tokens_seen": 124576032, "step": 57675 }, { "epoch": 9.409461663947798, "grad_norm": 0.004081313032656908, "learning_rate": 0.000637038480974731, "loss": 0.0331, "num_input_tokens_seen": 124586976, "step": 57680 }, { "epoch": 9.410277324632952, "grad_norm": 0.019369378685951233, "learning_rate": 0.0006369700253983893, "loss": 0.1193, "num_input_tokens_seen": 124598368, "step": 57685 }, { "epoch": 9.411092985318108, "grad_norm": 0.027157841250300407, "learning_rate": 0.0006369015670461762, "loss": 0.0228, "num_input_tokens_seen": 124608832, "step": 57690 }, { "epoch": 9.411908646003262, "grad_norm": 0.0030664519872516394, "learning_rate": 0.0006368331059194792, "loss": 0.0632, "num_input_tokens_seen": 124620032, "step": 57695 }, { "epoch": 9.412724306688418, "grad_norm": 0.47488272190093994, "learning_rate": 0.0006367646420196857, "loss": 0.1051, "num_input_tokens_seen": 124630784, "step": 57700 }, { "epoch": 9.413539967373573, "grad_norm": 0.007694408297538757, "learning_rate": 0.0006366961753481832, "loss": 0.0419, "num_input_tokens_seen": 124641312, "step": 57705 }, { "epoch": 9.414355628058727, "grad_norm": 0.014576110988855362, "learning_rate": 0.0006366277059063594, "loss": 0.0442, "num_input_tokens_seen": 124652192, "step": 57710 }, { "epoch": 9.415171288743883, "grad_norm": 0.35242435336112976, "learning_rate": 0.0006365592336956017, "loss": 0.1291, "num_input_tokens_seen": 124663168, "step": 57715 }, { "epoch": 9.415986949429037, "grad_norm": 0.023442458361387253, "learning_rate": 0.0006364907587172978, "loss": 0.0275, "num_input_tokens_seen": 124672832, "step": 57720 }, { "epoch": 9.416802610114193, "grad_norm": 0.08906247466802597, "learning_rate": 0.0006364222809728358, "loss": 0.0782, "num_input_tokens_seen": 124684064, "step": 57725 }, { "epoch": 9.417618270799348, "grad_norm": 0.21500514447689056, "learning_rate": 0.0006363538004636032, "loss": 0.0316, "num_input_tokens_seen": 124695040, "step": 57730 }, { "epoch": 9.418433931484502, "grad_norm": 0.008945376612246037, "learning_rate": 0.0006362853171909876, "loss": 0.1661, "num_input_tokens_seen": 124705792, "step": 57735 }, { "epoch": 9.419249592169658, "grad_norm": 0.09412268549203873, "learning_rate": 0.0006362168311563773, "loss": 0.0496, "num_input_tokens_seen": 124716704, "step": 57740 }, { "epoch": 9.420065252854812, "grad_norm": 0.05867978557944298, "learning_rate": 0.00063614834236116, "loss": 0.0418, "num_input_tokens_seen": 124727936, "step": 57745 }, { "epoch": 9.420880913539968, "grad_norm": 0.32948651909828186, "learning_rate": 0.000636079850806724, "loss": 0.0881, "num_input_tokens_seen": 124738112, "step": 57750 }, { "epoch": 9.421696574225122, "grad_norm": 0.007615845184773207, "learning_rate": 0.0006360113564944571, "loss": 0.1154, "num_input_tokens_seen": 124748192, "step": 57755 }, { "epoch": 9.422512234910277, "grad_norm": 0.02363566681742668, "learning_rate": 0.0006359428594257476, "loss": 0.0052, "num_input_tokens_seen": 124758592, "step": 57760 }, { "epoch": 9.423327895595433, "grad_norm": 0.33245760202407837, "learning_rate": 0.0006358743596019836, "loss": 0.1601, "num_input_tokens_seen": 124768128, "step": 57765 }, { "epoch": 9.424143556280587, "grad_norm": 0.2626391053199768, "learning_rate": 0.0006358058570245532, "loss": 0.1659, "num_input_tokens_seen": 124777984, "step": 57770 }, { "epoch": 9.424959216965743, "grad_norm": 0.004645919892936945, "learning_rate": 0.0006357373516948451, "loss": 0.0493, "num_input_tokens_seen": 124788960, "step": 57775 }, { "epoch": 9.425774877650896, "grad_norm": 0.045954253524541855, "learning_rate": 0.0006356688436142471, "loss": 0.0464, "num_input_tokens_seen": 124799968, "step": 57780 }, { "epoch": 9.426590538336052, "grad_norm": 0.0628044605255127, "learning_rate": 0.000635600332784148, "loss": 0.0164, "num_input_tokens_seen": 124809888, "step": 57785 }, { "epoch": 9.427406199021208, "grad_norm": 0.003180962521582842, "learning_rate": 0.0006355318192059361, "loss": 0.0241, "num_input_tokens_seen": 124820544, "step": 57790 }, { "epoch": 9.428221859706362, "grad_norm": 0.005581281613558531, "learning_rate": 0.0006354633028809999, "loss": 0.0133, "num_input_tokens_seen": 124830752, "step": 57795 }, { "epoch": 9.429037520391518, "grad_norm": 0.014229382388293743, "learning_rate": 0.000635394783810728, "loss": 0.0255, "num_input_tokens_seen": 124842016, "step": 57800 }, { "epoch": 9.429853181076671, "grad_norm": 0.01770959608256817, "learning_rate": 0.0006353262619965091, "loss": 0.0568, "num_input_tokens_seen": 124854208, "step": 57805 }, { "epoch": 9.430668841761827, "grad_norm": 0.003146283095702529, "learning_rate": 0.000635257737439732, "loss": 0.0476, "num_input_tokens_seen": 124865600, "step": 57810 }, { "epoch": 9.431484502446983, "grad_norm": 0.010914292186498642, "learning_rate": 0.0006351892101417849, "loss": 0.0275, "num_input_tokens_seen": 124876736, "step": 57815 }, { "epoch": 9.432300163132137, "grad_norm": 0.03408697247505188, "learning_rate": 0.0006351206801040571, "loss": 0.112, "num_input_tokens_seen": 124887456, "step": 57820 }, { "epoch": 9.433115823817293, "grad_norm": 0.01934865489602089, "learning_rate": 0.0006350521473279374, "loss": 0.1592, "num_input_tokens_seen": 124899072, "step": 57825 }, { "epoch": 9.433931484502446, "grad_norm": 0.12198976427316666, "learning_rate": 0.0006349836118148146, "loss": 0.0581, "num_input_tokens_seen": 124910048, "step": 57830 }, { "epoch": 9.434747145187602, "grad_norm": 0.0035420190542936325, "learning_rate": 0.0006349150735660776, "loss": 0.0177, "num_input_tokens_seen": 124920736, "step": 57835 }, { "epoch": 9.435562805872756, "grad_norm": 0.006238948553800583, "learning_rate": 0.0006348465325831155, "loss": 0.012, "num_input_tokens_seen": 124931872, "step": 57840 }, { "epoch": 9.436378466557912, "grad_norm": 0.15444837510585785, "learning_rate": 0.0006347779888673175, "loss": 0.0754, "num_input_tokens_seen": 124941728, "step": 57845 }, { "epoch": 9.437194127243067, "grad_norm": 0.022020984441041946, "learning_rate": 0.0006347094424200724, "loss": 0.0198, "num_input_tokens_seen": 124953120, "step": 57850 }, { "epoch": 9.438009787928221, "grad_norm": 0.017220299690961838, "learning_rate": 0.0006346408932427696, "loss": 0.0385, "num_input_tokens_seen": 124963968, "step": 57855 }, { "epoch": 9.438825448613377, "grad_norm": 0.13935768604278564, "learning_rate": 0.0006345723413367983, "loss": 0.0712, "num_input_tokens_seen": 124975136, "step": 57860 }, { "epoch": 9.439641109298531, "grad_norm": 0.225833460688591, "learning_rate": 0.0006345037867035478, "loss": 0.0764, "num_input_tokens_seen": 124986240, "step": 57865 }, { "epoch": 9.440456769983687, "grad_norm": 0.005525338929146528, "learning_rate": 0.0006344352293444073, "loss": 0.0289, "num_input_tokens_seen": 124996960, "step": 57870 }, { "epoch": 9.441272430668842, "grad_norm": 0.04588594287633896, "learning_rate": 0.0006343666692607665, "loss": 0.1242, "num_input_tokens_seen": 125007584, "step": 57875 }, { "epoch": 9.442088091353996, "grad_norm": 0.2472897619009018, "learning_rate": 0.0006342981064540145, "loss": 0.0699, "num_input_tokens_seen": 125018880, "step": 57880 }, { "epoch": 9.442903752039152, "grad_norm": 0.02394697442650795, "learning_rate": 0.0006342295409255412, "loss": 0.1556, "num_input_tokens_seen": 125029312, "step": 57885 }, { "epoch": 9.443719412724306, "grad_norm": 0.006148052867501974, "learning_rate": 0.000634160972676736, "loss": 0.0163, "num_input_tokens_seen": 125040384, "step": 57890 }, { "epoch": 9.444535073409462, "grad_norm": 0.08018513023853302, "learning_rate": 0.0006340924017089884, "loss": 0.0276, "num_input_tokens_seen": 125051872, "step": 57895 }, { "epoch": 9.445350734094617, "grad_norm": 0.369053453207016, "learning_rate": 0.0006340238280236882, "loss": 0.1174, "num_input_tokens_seen": 125063264, "step": 57900 }, { "epoch": 9.446166394779771, "grad_norm": 0.04056015610694885, "learning_rate": 0.0006339552516222251, "loss": 0.0163, "num_input_tokens_seen": 125073696, "step": 57905 }, { "epoch": 9.446982055464927, "grad_norm": 0.010821258649230003, "learning_rate": 0.0006338866725059889, "loss": 0.1023, "num_input_tokens_seen": 125084896, "step": 57910 }, { "epoch": 9.447797716150081, "grad_norm": 0.003268955973908305, "learning_rate": 0.0006338180906763693, "loss": 0.0413, "num_input_tokens_seen": 125096224, "step": 57915 }, { "epoch": 9.448613376835237, "grad_norm": 0.055417194962501526, "learning_rate": 0.0006337495061347565, "loss": 0.018, "num_input_tokens_seen": 125106624, "step": 57920 }, { "epoch": 9.449429037520392, "grad_norm": 0.22811178863048553, "learning_rate": 0.0006336809188825401, "loss": 0.0576, "num_input_tokens_seen": 125116288, "step": 57925 }, { "epoch": 9.450244698205546, "grad_norm": 0.025044074282050133, "learning_rate": 0.0006336123289211104, "loss": 0.0189, "num_input_tokens_seen": 125125056, "step": 57930 }, { "epoch": 9.451060358890702, "grad_norm": 0.04807139188051224, "learning_rate": 0.0006335437362518574, "loss": 0.1698, "num_input_tokens_seen": 125136192, "step": 57935 }, { "epoch": 9.451876019575856, "grad_norm": 0.01002733688801527, "learning_rate": 0.0006334751408761712, "loss": 0.0116, "num_input_tokens_seen": 125147264, "step": 57940 }, { "epoch": 9.452691680261012, "grad_norm": 0.02477033995091915, "learning_rate": 0.0006334065427954418, "loss": 0.1792, "num_input_tokens_seen": 125158816, "step": 57945 }, { "epoch": 9.453507340946166, "grad_norm": 0.20859457552433014, "learning_rate": 0.0006333379420110597, "loss": 0.0511, "num_input_tokens_seen": 125168864, "step": 57950 }, { "epoch": 9.454323001631321, "grad_norm": 0.12974859774112701, "learning_rate": 0.000633269338524415, "loss": 0.0842, "num_input_tokens_seen": 125179968, "step": 57955 }, { "epoch": 9.455138662316477, "grad_norm": 0.171275794506073, "learning_rate": 0.0006332007323368983, "loss": 0.1068, "num_input_tokens_seen": 125191424, "step": 57960 }, { "epoch": 9.455954323001631, "grad_norm": 0.2113136202096939, "learning_rate": 0.0006331321234498995, "loss": 0.0314, "num_input_tokens_seen": 125200480, "step": 57965 }, { "epoch": 9.456769983686787, "grad_norm": 0.008856832049787045, "learning_rate": 0.0006330635118648093, "loss": 0.0104, "num_input_tokens_seen": 125212768, "step": 57970 }, { "epoch": 9.45758564437194, "grad_norm": 0.22293557226657867, "learning_rate": 0.0006329948975830184, "loss": 0.0636, "num_input_tokens_seen": 125223712, "step": 57975 }, { "epoch": 9.458401305057096, "grad_norm": 0.20843131840229034, "learning_rate": 0.0006329262806059173, "loss": 0.054, "num_input_tokens_seen": 125236224, "step": 57980 }, { "epoch": 9.459216965742252, "grad_norm": 0.04087536782026291, "learning_rate": 0.0006328576609348962, "loss": 0.0079, "num_input_tokens_seen": 125247296, "step": 57985 }, { "epoch": 9.460032626427406, "grad_norm": 0.002280786167830229, "learning_rate": 0.0006327890385713462, "loss": 0.0996, "num_input_tokens_seen": 125257632, "step": 57990 }, { "epoch": 9.460848287112562, "grad_norm": 0.3266606330871582, "learning_rate": 0.000632720413516658, "loss": 0.0486, "num_input_tokens_seen": 125268416, "step": 57995 }, { "epoch": 9.461663947797716, "grad_norm": 0.02909723110496998, "learning_rate": 0.000632651785772222, "loss": 0.0268, "num_input_tokens_seen": 125280064, "step": 58000 }, { "epoch": 9.462479608482871, "grad_norm": 0.002902657026425004, "learning_rate": 0.0006325831553394294, "loss": 0.005, "num_input_tokens_seen": 125291040, "step": 58005 }, { "epoch": 9.463295269168025, "grad_norm": 0.011866576969623566, "learning_rate": 0.000632514522219671, "loss": 0.0618, "num_input_tokens_seen": 125302400, "step": 58010 }, { "epoch": 9.464110929853181, "grad_norm": 0.0071860142052173615, "learning_rate": 0.0006324458864143377, "loss": 0.0143, "num_input_tokens_seen": 125314112, "step": 58015 }, { "epoch": 9.464926590538337, "grad_norm": 0.005821306258440018, "learning_rate": 0.0006323772479248204, "loss": 0.059, "num_input_tokens_seen": 125325088, "step": 58020 }, { "epoch": 9.46574225122349, "grad_norm": 0.30362415313720703, "learning_rate": 0.0006323086067525103, "loss": 0.1722, "num_input_tokens_seen": 125336256, "step": 58025 }, { "epoch": 9.466557911908646, "grad_norm": 0.00525606470182538, "learning_rate": 0.0006322399628987984, "loss": 0.0639, "num_input_tokens_seen": 125346848, "step": 58030 }, { "epoch": 9.4673735725938, "grad_norm": 0.23222877085208893, "learning_rate": 0.000632171316365076, "loss": 0.052, "num_input_tokens_seen": 125356544, "step": 58035 }, { "epoch": 9.468189233278956, "grad_norm": 0.025633899495005608, "learning_rate": 0.000632102667152734, "loss": 0.014, "num_input_tokens_seen": 125367200, "step": 58040 }, { "epoch": 9.469004893964112, "grad_norm": 0.014735725708305836, "learning_rate": 0.000632034015263164, "loss": 0.012, "num_input_tokens_seen": 125378688, "step": 58045 }, { "epoch": 9.469820554649266, "grad_norm": 0.01710418239235878, "learning_rate": 0.0006319653606977571, "loss": 0.0051, "num_input_tokens_seen": 125389664, "step": 58050 }, { "epoch": 9.470636215334421, "grad_norm": 0.2504916191101074, "learning_rate": 0.0006318967034579048, "loss": 0.0984, "num_input_tokens_seen": 125400096, "step": 58055 }, { "epoch": 9.471451876019575, "grad_norm": 0.0372481495141983, "learning_rate": 0.0006318280435449985, "loss": 0.0286, "num_input_tokens_seen": 125410656, "step": 58060 }, { "epoch": 9.47226753670473, "grad_norm": 0.28393882513046265, "learning_rate": 0.0006317593809604298, "loss": 0.1305, "num_input_tokens_seen": 125421152, "step": 58065 }, { "epoch": 9.473083197389887, "grad_norm": 0.029981283470988274, "learning_rate": 0.00063169071570559, "loss": 0.0188, "num_input_tokens_seen": 125432224, "step": 58070 }, { "epoch": 9.47389885807504, "grad_norm": 0.17756913602352142, "learning_rate": 0.0006316220477818707, "loss": 0.0405, "num_input_tokens_seen": 125442464, "step": 58075 }, { "epoch": 9.474714518760196, "grad_norm": 0.2382059395313263, "learning_rate": 0.0006315533771906638, "loss": 0.0382, "num_input_tokens_seen": 125454080, "step": 58080 }, { "epoch": 9.47553017944535, "grad_norm": 0.010461671277880669, "learning_rate": 0.0006314847039333607, "loss": 0.0242, "num_input_tokens_seen": 125464224, "step": 58085 }, { "epoch": 9.476345840130506, "grad_norm": 0.42559152841567993, "learning_rate": 0.0006314160280113532, "loss": 0.1037, "num_input_tokens_seen": 125475104, "step": 58090 }, { "epoch": 9.477161500815662, "grad_norm": 0.37911534309387207, "learning_rate": 0.0006313473494260333, "loss": 0.0369, "num_input_tokens_seen": 125484832, "step": 58095 }, { "epoch": 9.477977161500815, "grad_norm": 0.00439072959125042, "learning_rate": 0.0006312786681787928, "loss": 0.0161, "num_input_tokens_seen": 125496800, "step": 58100 }, { "epoch": 9.478792822185971, "grad_norm": 0.25645124912261963, "learning_rate": 0.0006312099842710234, "loss": 0.0362, "num_input_tokens_seen": 125507328, "step": 58105 }, { "epoch": 9.479608482871125, "grad_norm": 0.006362576503306627, "learning_rate": 0.0006311412977041172, "loss": 0.1536, "num_input_tokens_seen": 125517344, "step": 58110 }, { "epoch": 9.48042414355628, "grad_norm": 0.6152973175048828, "learning_rate": 0.0006310726084794663, "loss": 0.0942, "num_input_tokens_seen": 125528352, "step": 58115 }, { "epoch": 9.481239804241435, "grad_norm": 0.10060965269804001, "learning_rate": 0.0006310039165984628, "loss": 0.0544, "num_input_tokens_seen": 125539328, "step": 58120 }, { "epoch": 9.48205546492659, "grad_norm": 0.3432973623275757, "learning_rate": 0.0006309352220624986, "loss": 0.0893, "num_input_tokens_seen": 125550080, "step": 58125 }, { "epoch": 9.482871125611746, "grad_norm": 0.22590389847755432, "learning_rate": 0.0006308665248729662, "loss": 0.1392, "num_input_tokens_seen": 125559648, "step": 58130 }, { "epoch": 9.4836867862969, "grad_norm": 0.04668281227350235, "learning_rate": 0.0006307978250312574, "loss": 0.1853, "num_input_tokens_seen": 125570432, "step": 58135 }, { "epoch": 9.484502446982056, "grad_norm": 0.3730715811252594, "learning_rate": 0.0006307291225387648, "loss": 0.2305, "num_input_tokens_seen": 125580096, "step": 58140 }, { "epoch": 9.48531810766721, "grad_norm": 0.1770205795764923, "learning_rate": 0.0006306604173968808, "loss": 0.068, "num_input_tokens_seen": 125591008, "step": 58145 }, { "epoch": 9.486133768352365, "grad_norm": 0.20119500160217285, "learning_rate": 0.0006305917096069977, "loss": 0.019, "num_input_tokens_seen": 125601248, "step": 58150 }, { "epoch": 9.486949429037521, "grad_norm": 0.32379111647605896, "learning_rate": 0.000630522999170508, "loss": 0.0727, "num_input_tokens_seen": 125610944, "step": 58155 }, { "epoch": 9.487765089722675, "grad_norm": 0.0593860000371933, "learning_rate": 0.0006304542860888039, "loss": 0.036, "num_input_tokens_seen": 125622336, "step": 58160 }, { "epoch": 9.48858075040783, "grad_norm": 0.022446129471063614, "learning_rate": 0.0006303855703632783, "loss": 0.0938, "num_input_tokens_seen": 125633600, "step": 58165 }, { "epoch": 9.489396411092985, "grad_norm": 0.002450139494612813, "learning_rate": 0.0006303168519953238, "loss": 0.1368, "num_input_tokens_seen": 125645472, "step": 58170 }, { "epoch": 9.49021207177814, "grad_norm": 0.021858789026737213, "learning_rate": 0.0006302481309863329, "loss": 0.0204, "num_input_tokens_seen": 125656416, "step": 58175 }, { "epoch": 9.491027732463296, "grad_norm": 0.01752588339149952, "learning_rate": 0.0006301794073376985, "loss": 0.1022, "num_input_tokens_seen": 125667680, "step": 58180 }, { "epoch": 9.49184339314845, "grad_norm": 0.019962133839726448, "learning_rate": 0.0006301106810508131, "loss": 0.0627, "num_input_tokens_seen": 125678336, "step": 58185 }, { "epoch": 9.492659053833606, "grad_norm": 0.06252746284008026, "learning_rate": 0.0006300419521270697, "loss": 0.0113, "num_input_tokens_seen": 125688768, "step": 58190 }, { "epoch": 9.49347471451876, "grad_norm": 0.22887328267097473, "learning_rate": 0.0006299732205678613, "loss": 0.0288, "num_input_tokens_seen": 125699936, "step": 58195 }, { "epoch": 9.494290375203915, "grad_norm": 0.0024231132119894028, "learning_rate": 0.0006299044863745806, "loss": 0.0118, "num_input_tokens_seen": 125711136, "step": 58200 }, { "epoch": 9.49510603588907, "grad_norm": 0.003173418343067169, "learning_rate": 0.0006298357495486208, "loss": 0.0094, "num_input_tokens_seen": 125722848, "step": 58205 }, { "epoch": 9.495921696574225, "grad_norm": 0.003356012748554349, "learning_rate": 0.0006297670100913748, "loss": 0.2534, "num_input_tokens_seen": 125734464, "step": 58210 }, { "epoch": 9.49673735725938, "grad_norm": 0.06293818354606628, "learning_rate": 0.0006296982680042357, "loss": 0.0294, "num_input_tokens_seen": 125745632, "step": 58215 }, { "epoch": 9.497553017944535, "grad_norm": 0.015413629822432995, "learning_rate": 0.0006296295232885966, "loss": 0.0083, "num_input_tokens_seen": 125757856, "step": 58220 }, { "epoch": 9.49836867862969, "grad_norm": 0.011318295262753963, "learning_rate": 0.0006295607759458508, "loss": 0.1357, "num_input_tokens_seen": 125769248, "step": 58225 }, { "epoch": 9.499184339314844, "grad_norm": 0.14822854101657867, "learning_rate": 0.0006294920259773915, "loss": 0.016, "num_input_tokens_seen": 125780480, "step": 58230 }, { "epoch": 9.5, "grad_norm": 0.37363535165786743, "learning_rate": 0.0006294232733846121, "loss": 0.1286, "num_input_tokens_seen": 125791040, "step": 58235 }, { "epoch": 9.500815660685156, "grad_norm": 0.07917729765176773, "learning_rate": 0.0006293545181689057, "loss": 0.1329, "num_input_tokens_seen": 125801696, "step": 58240 }, { "epoch": 9.50163132137031, "grad_norm": 0.06792290508747101, "learning_rate": 0.000629285760331666, "loss": 0.0105, "num_input_tokens_seen": 125810816, "step": 58245 }, { "epoch": 9.502446982055465, "grad_norm": 0.007692431099712849, "learning_rate": 0.0006292169998742865, "loss": 0.0984, "num_input_tokens_seen": 125822272, "step": 58250 }, { "epoch": 9.50326264274062, "grad_norm": 0.008680978789925575, "learning_rate": 0.0006291482367981605, "loss": 0.0546, "num_input_tokens_seen": 125831808, "step": 58255 }, { "epoch": 9.504078303425775, "grad_norm": 0.013723728246986866, "learning_rate": 0.0006290794711046816, "loss": 0.0097, "num_input_tokens_seen": 125843360, "step": 58260 }, { "epoch": 9.50489396411093, "grad_norm": 0.06591471284627914, "learning_rate": 0.0006290107027952434, "loss": 0.0117, "num_input_tokens_seen": 125853408, "step": 58265 }, { "epoch": 9.505709624796085, "grad_norm": 0.23483747243881226, "learning_rate": 0.0006289419318712397, "loss": 0.1553, "num_input_tokens_seen": 125865152, "step": 58270 }, { "epoch": 9.50652528548124, "grad_norm": 0.0060275401920080185, "learning_rate": 0.0006288731583340642, "loss": 0.0189, "num_input_tokens_seen": 125875968, "step": 58275 }, { "epoch": 9.507340946166394, "grad_norm": 0.012370044365525246, "learning_rate": 0.0006288043821851107, "loss": 0.0119, "num_input_tokens_seen": 125887136, "step": 58280 }, { "epoch": 9.50815660685155, "grad_norm": 0.005109517835080624, "learning_rate": 0.000628735603425773, "loss": 0.0329, "num_input_tokens_seen": 125897792, "step": 58285 }, { "epoch": 9.508972267536706, "grad_norm": 0.005160059779882431, "learning_rate": 0.0006286668220574448, "loss": 0.1185, "num_input_tokens_seen": 125907360, "step": 58290 }, { "epoch": 9.50978792822186, "grad_norm": 0.014347260817885399, "learning_rate": 0.0006285980380815204, "loss": 0.0251, "num_input_tokens_seen": 125918240, "step": 58295 }, { "epoch": 9.510603588907015, "grad_norm": 0.2521634101867676, "learning_rate": 0.0006285292514993936, "loss": 0.1995, "num_input_tokens_seen": 125928928, "step": 58300 }, { "epoch": 9.51141924959217, "grad_norm": 0.037277307361364365, "learning_rate": 0.0006284604623124585, "loss": 0.1115, "num_input_tokens_seen": 125940096, "step": 58305 }, { "epoch": 9.512234910277325, "grad_norm": 0.047494374215602875, "learning_rate": 0.0006283916705221091, "loss": 0.0453, "num_input_tokens_seen": 125950880, "step": 58310 }, { "epoch": 9.513050570962479, "grad_norm": 0.011362025514245033, "learning_rate": 0.0006283228761297396, "loss": 0.014, "num_input_tokens_seen": 125962144, "step": 58315 }, { "epoch": 9.513866231647635, "grad_norm": 0.004715372808277607, "learning_rate": 0.0006282540791367442, "loss": 0.0192, "num_input_tokens_seen": 125973152, "step": 58320 }, { "epoch": 9.51468189233279, "grad_norm": 0.05988180637359619, "learning_rate": 0.0006281852795445173, "loss": 0.1561, "num_input_tokens_seen": 125984128, "step": 58325 }, { "epoch": 9.515497553017944, "grad_norm": 0.22990791499614716, "learning_rate": 0.000628116477354453, "loss": 0.0564, "num_input_tokens_seen": 125994304, "step": 58330 }, { "epoch": 9.5163132137031, "grad_norm": 0.01864621788263321, "learning_rate": 0.0006280476725679457, "loss": 0.0084, "num_input_tokens_seen": 126005472, "step": 58335 }, { "epoch": 9.517128874388254, "grad_norm": 0.040201228111982346, "learning_rate": 0.00062797886518639, "loss": 0.0595, "num_input_tokens_seen": 126015872, "step": 58340 }, { "epoch": 9.51794453507341, "grad_norm": 0.13366523385047913, "learning_rate": 0.0006279100552111803, "loss": 0.0392, "num_input_tokens_seen": 126026528, "step": 58345 }, { "epoch": 9.518760195758565, "grad_norm": 0.19599227607250214, "learning_rate": 0.0006278412426437109, "loss": 0.0216, "num_input_tokens_seen": 126037952, "step": 58350 }, { "epoch": 9.51957585644372, "grad_norm": 0.0015695245238021016, "learning_rate": 0.0006277724274853767, "loss": 0.1863, "num_input_tokens_seen": 126049376, "step": 58355 }, { "epoch": 9.520391517128875, "grad_norm": 1.7492622137069702, "learning_rate": 0.0006277036097375719, "loss": 0.1198, "num_input_tokens_seen": 126060160, "step": 58360 }, { "epoch": 9.521207177814029, "grad_norm": 0.008433963172137737, "learning_rate": 0.0006276347894016917, "loss": 0.0316, "num_input_tokens_seen": 126071744, "step": 58365 }, { "epoch": 9.522022838499185, "grad_norm": 0.007258435245603323, "learning_rate": 0.0006275659664791304, "loss": 0.0119, "num_input_tokens_seen": 126083744, "step": 58370 }, { "epoch": 9.522838499184338, "grad_norm": 0.012848546728491783, "learning_rate": 0.0006274971409712831, "loss": 0.0252, "num_input_tokens_seen": 126094016, "step": 58375 }, { "epoch": 9.523654159869494, "grad_norm": 0.39458566904067993, "learning_rate": 0.0006274283128795445, "loss": 0.2455, "num_input_tokens_seen": 126103808, "step": 58380 }, { "epoch": 9.52446982055465, "grad_norm": 0.03209485858678818, "learning_rate": 0.0006273594822053095, "loss": 0.1695, "num_input_tokens_seen": 126113120, "step": 58385 }, { "epoch": 9.525285481239804, "grad_norm": 0.011099644005298615, "learning_rate": 0.000627290648949973, "loss": 0.0235, "num_input_tokens_seen": 126123360, "step": 58390 }, { "epoch": 9.52610114192496, "grad_norm": 0.010915805585682392, "learning_rate": 0.00062722181311493, "loss": 0.0657, "num_input_tokens_seen": 126134240, "step": 58395 }, { "epoch": 9.526916802610113, "grad_norm": 0.0570676252245903, "learning_rate": 0.0006271529747015755, "loss": 0.0222, "num_input_tokens_seen": 126145952, "step": 58400 }, { "epoch": 9.52773246329527, "grad_norm": 0.19531919062137604, "learning_rate": 0.0006270841337113047, "loss": 0.2215, "num_input_tokens_seen": 126157024, "step": 58405 }, { "epoch": 9.528548123980425, "grad_norm": 0.10229338705539703, "learning_rate": 0.0006270152901455128, "loss": 0.0303, "num_input_tokens_seen": 126167648, "step": 58410 }, { "epoch": 9.529363784665579, "grad_norm": 0.009482331573963165, "learning_rate": 0.0006269464440055948, "loss": 0.1183, "num_input_tokens_seen": 126179424, "step": 58415 }, { "epoch": 9.530179445350734, "grad_norm": 0.055833905935287476, "learning_rate": 0.0006268775952929462, "loss": 0.0924, "num_input_tokens_seen": 126190048, "step": 58420 }, { "epoch": 9.530995106035888, "grad_norm": 0.0862567201256752, "learning_rate": 0.000626808744008962, "loss": 0.0198, "num_input_tokens_seen": 126199840, "step": 58425 }, { "epoch": 9.531810766721044, "grad_norm": 0.01423166785389185, "learning_rate": 0.0006267398901550379, "loss": 0.0231, "num_input_tokens_seen": 126211904, "step": 58430 }, { "epoch": 9.5326264274062, "grad_norm": 0.22748292982578278, "learning_rate": 0.000626671033732569, "loss": 0.0462, "num_input_tokens_seen": 126224160, "step": 58435 }, { "epoch": 9.533442088091354, "grad_norm": 0.03178076446056366, "learning_rate": 0.0006266021747429511, "loss": 0.0361, "num_input_tokens_seen": 126234432, "step": 58440 }, { "epoch": 9.53425774877651, "grad_norm": 0.00904083251953125, "learning_rate": 0.0006265333131875794, "loss": 0.0628, "num_input_tokens_seen": 126246176, "step": 58445 }, { "epoch": 9.535073409461663, "grad_norm": 0.0102092195302248, "learning_rate": 0.0006264644490678496, "loss": 0.0638, "num_input_tokens_seen": 126256992, "step": 58450 }, { "epoch": 9.535889070146819, "grad_norm": 0.2707527279853821, "learning_rate": 0.0006263955823851571, "loss": 0.2005, "num_input_tokens_seen": 126267968, "step": 58455 }, { "epoch": 9.536704730831975, "grad_norm": 0.1802579015493393, "learning_rate": 0.0006263267131408981, "loss": 0.0418, "num_input_tokens_seen": 126278976, "step": 58460 }, { "epoch": 9.537520391517129, "grad_norm": 0.007812032010406256, "learning_rate": 0.0006262578413364679, "loss": 0.0185, "num_input_tokens_seen": 126290464, "step": 58465 }, { "epoch": 9.538336052202284, "grad_norm": 0.03092452511191368, "learning_rate": 0.0006261889669732624, "loss": 0.0897, "num_input_tokens_seen": 126301888, "step": 58470 }, { "epoch": 9.539151712887438, "grad_norm": 0.04860683158040047, "learning_rate": 0.0006261200900526773, "loss": 0.0176, "num_input_tokens_seen": 126312736, "step": 58475 }, { "epoch": 9.539967373572594, "grad_norm": 0.02373124659061432, "learning_rate": 0.0006260512105761086, "loss": 0.0823, "num_input_tokens_seen": 126323840, "step": 58480 }, { "epoch": 9.540783034257748, "grad_norm": 0.2580413520336151, "learning_rate": 0.0006259823285449523, "loss": 0.0238, "num_input_tokens_seen": 126335360, "step": 58485 }, { "epoch": 9.541598694942904, "grad_norm": 0.008553222753107548, "learning_rate": 0.0006259134439606043, "loss": 0.0223, "num_input_tokens_seen": 126345056, "step": 58490 }, { "epoch": 9.54241435562806, "grad_norm": 0.3394491970539093, "learning_rate": 0.0006258445568244605, "loss": 0.1001, "num_input_tokens_seen": 126355168, "step": 58495 }, { "epoch": 9.543230016313213, "grad_norm": 0.007404628675431013, "learning_rate": 0.0006257756671379172, "loss": 0.0125, "num_input_tokens_seen": 126365600, "step": 58500 }, { "epoch": 9.544045676998369, "grad_norm": 0.01686800643801689, "learning_rate": 0.0006257067749023704, "loss": 0.0698, "num_input_tokens_seen": 126375008, "step": 58505 }, { "epoch": 9.544861337683523, "grad_norm": 0.31130531430244446, "learning_rate": 0.0006256378801192163, "loss": 0.046, "num_input_tokens_seen": 126386048, "step": 58510 }, { "epoch": 9.545676998368679, "grad_norm": 0.009447862394154072, "learning_rate": 0.0006255689827898512, "loss": 0.1425, "num_input_tokens_seen": 126396896, "step": 58515 }, { "epoch": 9.546492659053834, "grad_norm": 0.07970761507749557, "learning_rate": 0.0006255000829156714, "loss": 0.029, "num_input_tokens_seen": 126408480, "step": 58520 }, { "epoch": 9.547308319738988, "grad_norm": 0.19980250298976898, "learning_rate": 0.0006254311804980733, "loss": 0.0472, "num_input_tokens_seen": 126419552, "step": 58525 }, { "epoch": 9.548123980424144, "grad_norm": 0.03398562967777252, "learning_rate": 0.0006253622755384531, "loss": 0.0324, "num_input_tokens_seen": 126430240, "step": 58530 }, { "epoch": 9.548939641109298, "grad_norm": 0.18277190625667572, "learning_rate": 0.0006252933680382074, "loss": 0.0262, "num_input_tokens_seen": 126440736, "step": 58535 }, { "epoch": 9.549755301794454, "grad_norm": 0.020722072571516037, "learning_rate": 0.0006252244579987327, "loss": 0.0369, "num_input_tokens_seen": 126450944, "step": 58540 }, { "epoch": 9.550570962479608, "grad_norm": 0.13694825768470764, "learning_rate": 0.0006251555454214254, "loss": 0.0874, "num_input_tokens_seen": 126462112, "step": 58545 }, { "epoch": 9.551386623164763, "grad_norm": 0.018573446199297905, "learning_rate": 0.0006250866303076822, "loss": 0.191, "num_input_tokens_seen": 126472384, "step": 58550 }, { "epoch": 9.552202283849919, "grad_norm": 0.052989520132541656, "learning_rate": 0.0006250177126588998, "loss": 0.0962, "num_input_tokens_seen": 126483328, "step": 58555 }, { "epoch": 9.553017944535073, "grad_norm": 0.3053835332393646, "learning_rate": 0.0006249487924764747, "loss": 0.0618, "num_input_tokens_seen": 126494912, "step": 58560 }, { "epoch": 9.553833605220229, "grad_norm": 0.032893870025873184, "learning_rate": 0.000624879869761804, "loss": 0.0106, "num_input_tokens_seen": 126506944, "step": 58565 }, { "epoch": 9.554649265905383, "grad_norm": 0.030350077897310257, "learning_rate": 0.0006248109445162843, "loss": 0.0809, "num_input_tokens_seen": 126516160, "step": 58570 }, { "epoch": 9.555464926590538, "grad_norm": 0.25913652777671814, "learning_rate": 0.0006247420167413124, "loss": 0.3567, "num_input_tokens_seen": 126526144, "step": 58575 }, { "epoch": 9.556280587275694, "grad_norm": 0.016176484525203705, "learning_rate": 0.0006246730864382853, "loss": 0.0134, "num_input_tokens_seen": 126536608, "step": 58580 }, { "epoch": 9.557096247960848, "grad_norm": 0.012450575828552246, "learning_rate": 0.0006246041536086, "loss": 0.1263, "num_input_tokens_seen": 126547776, "step": 58585 }, { "epoch": 9.557911908646004, "grad_norm": 0.24023890495300293, "learning_rate": 0.0006245352182536535, "loss": 0.0604, "num_input_tokens_seen": 126558944, "step": 58590 }, { "epoch": 9.558727569331158, "grad_norm": 0.03461911529302597, "learning_rate": 0.0006244662803748427, "loss": 0.1038, "num_input_tokens_seen": 126569472, "step": 58595 }, { "epoch": 9.559543230016313, "grad_norm": 0.06925342977046967, "learning_rate": 0.0006243973399735649, "loss": 0.0175, "num_input_tokens_seen": 126580160, "step": 58600 }, { "epoch": 9.560358890701469, "grad_norm": 0.024349384009838104, "learning_rate": 0.0006243283970512172, "loss": 0.1112, "num_input_tokens_seen": 126590912, "step": 58605 }, { "epoch": 9.561174551386623, "grad_norm": 0.01365981251001358, "learning_rate": 0.0006242594516091967, "loss": 0.0242, "num_input_tokens_seen": 126601056, "step": 58610 }, { "epoch": 9.561990212071779, "grad_norm": 0.02429444156587124, "learning_rate": 0.000624190503648901, "loss": 0.0273, "num_input_tokens_seen": 126611936, "step": 58615 }, { "epoch": 9.562805872756933, "grad_norm": 0.14289309084415436, "learning_rate": 0.000624121553171727, "loss": 0.1001, "num_input_tokens_seen": 126622272, "step": 58620 }, { "epoch": 9.563621533442088, "grad_norm": 0.20121271908283234, "learning_rate": 0.0006240526001790723, "loss": 0.0847, "num_input_tokens_seen": 126634080, "step": 58625 }, { "epoch": 9.564437194127244, "grad_norm": 0.026418212801218033, "learning_rate": 0.0006239836446723343, "loss": 0.1158, "num_input_tokens_seen": 126644608, "step": 58630 }, { "epoch": 9.565252854812398, "grad_norm": 0.2299506664276123, "learning_rate": 0.0006239146866529105, "loss": 0.0914, "num_input_tokens_seen": 126654976, "step": 58635 }, { "epoch": 9.566068515497554, "grad_norm": 0.03591744229197502, "learning_rate": 0.0006238457261221983, "loss": 0.0996, "num_input_tokens_seen": 126666272, "step": 58640 }, { "epoch": 9.566884176182707, "grad_norm": 0.3727372884750366, "learning_rate": 0.0006237767630815955, "loss": 0.0848, "num_input_tokens_seen": 126677824, "step": 58645 }, { "epoch": 9.567699836867863, "grad_norm": 0.0906633585691452, "learning_rate": 0.0006237077975324994, "loss": 0.0559, "num_input_tokens_seen": 126688832, "step": 58650 }, { "epoch": 9.568515497553017, "grad_norm": 0.007482402957975864, "learning_rate": 0.0006236388294763079, "loss": 0.0113, "num_input_tokens_seen": 126700000, "step": 58655 }, { "epoch": 9.569331158238173, "grad_norm": 0.0202884990721941, "learning_rate": 0.0006235698589144188, "loss": 0.0524, "num_input_tokens_seen": 126711136, "step": 58660 }, { "epoch": 9.570146818923329, "grad_norm": 0.19632995128631592, "learning_rate": 0.0006235008858482295, "loss": 0.069, "num_input_tokens_seen": 126722944, "step": 58665 }, { "epoch": 9.570962479608482, "grad_norm": 0.02798754721879959, "learning_rate": 0.0006234319102791382, "loss": 0.0135, "num_input_tokens_seen": 126734240, "step": 58670 }, { "epoch": 9.571778140293638, "grad_norm": 0.06160164624452591, "learning_rate": 0.0006233629322085427, "loss": 0.1022, "num_input_tokens_seen": 126744000, "step": 58675 }, { "epoch": 9.572593800978792, "grad_norm": 0.18928822875022888, "learning_rate": 0.0006232939516378408, "loss": 0.0703, "num_input_tokens_seen": 126754592, "step": 58680 }, { "epoch": 9.573409461663948, "grad_norm": 0.0757313072681427, "learning_rate": 0.0006232249685684306, "loss": 0.0523, "num_input_tokens_seen": 126764544, "step": 58685 }, { "epoch": 9.574225122349104, "grad_norm": 0.16569207608699799, "learning_rate": 0.0006231559830017102, "loss": 0.042, "num_input_tokens_seen": 126775456, "step": 58690 }, { "epoch": 9.575040783034257, "grad_norm": 0.2607077956199646, "learning_rate": 0.0006230869949390774, "loss": 0.0709, "num_input_tokens_seen": 126787488, "step": 58695 }, { "epoch": 9.575856443719413, "grad_norm": 0.26447293162345886, "learning_rate": 0.0006230180043819306, "loss": 0.163, "num_input_tokens_seen": 126798944, "step": 58700 }, { "epoch": 9.576672104404567, "grad_norm": 0.019896386191248894, "learning_rate": 0.0006229490113316678, "loss": 0.0264, "num_input_tokens_seen": 126809696, "step": 58705 }, { "epoch": 9.577487765089723, "grad_norm": 0.012389651499688625, "learning_rate": 0.0006228800157896874, "loss": 0.0454, "num_input_tokens_seen": 126820288, "step": 58710 }, { "epoch": 9.578303425774878, "grad_norm": 0.020707737654447556, "learning_rate": 0.0006228110177573876, "loss": 0.0159, "num_input_tokens_seen": 126830400, "step": 58715 }, { "epoch": 9.579119086460032, "grad_norm": 0.026347359642386436, "learning_rate": 0.0006227420172361667, "loss": 0.0181, "num_input_tokens_seen": 126841440, "step": 58720 }, { "epoch": 9.579934747145188, "grad_norm": 0.3248952031135559, "learning_rate": 0.0006226730142274232, "loss": 0.1265, "num_input_tokens_seen": 126852256, "step": 58725 }, { "epoch": 9.580750407830342, "grad_norm": 0.013630959205329418, "learning_rate": 0.0006226040087325553, "loss": 0.025, "num_input_tokens_seen": 126863136, "step": 58730 }, { "epoch": 9.581566068515498, "grad_norm": 0.4216710329055786, "learning_rate": 0.0006225350007529616, "loss": 0.0738, "num_input_tokens_seen": 126874112, "step": 58735 }, { "epoch": 9.582381729200652, "grad_norm": 0.04262214154005051, "learning_rate": 0.0006224659902900408, "loss": 0.215, "num_input_tokens_seen": 126885024, "step": 58740 }, { "epoch": 9.583197389885807, "grad_norm": 0.23827561736106873, "learning_rate": 0.0006223969773451913, "loss": 0.0261, "num_input_tokens_seen": 126896384, "step": 58745 }, { "epoch": 9.584013050570963, "grad_norm": 0.1313164383172989, "learning_rate": 0.0006223279619198118, "loss": 0.033, "num_input_tokens_seen": 126906912, "step": 58750 }, { "epoch": 9.584828711256117, "grad_norm": 0.46747714281082153, "learning_rate": 0.000622258944015301, "loss": 0.1526, "num_input_tokens_seen": 126917568, "step": 58755 }, { "epoch": 9.585644371941273, "grad_norm": 0.031990889459848404, "learning_rate": 0.0006221899236330575, "loss": 0.0248, "num_input_tokens_seen": 126928384, "step": 58760 }, { "epoch": 9.586460032626427, "grad_norm": 0.35653167963027954, "learning_rate": 0.0006221209007744803, "loss": 0.096, "num_input_tokens_seen": 126939264, "step": 58765 }, { "epoch": 9.587275693311582, "grad_norm": 0.26260942220687866, "learning_rate": 0.0006220518754409681, "loss": 0.1236, "num_input_tokens_seen": 126949792, "step": 58770 }, { "epoch": 9.588091353996738, "grad_norm": 0.010245069861412048, "learning_rate": 0.0006219828476339195, "loss": 0.0452, "num_input_tokens_seen": 126961536, "step": 58775 }, { "epoch": 9.588907014681892, "grad_norm": 0.019048362970352173, "learning_rate": 0.0006219138173547341, "loss": 0.0958, "num_input_tokens_seen": 126971648, "step": 58780 }, { "epoch": 9.589722675367048, "grad_norm": 0.03487079590559006, "learning_rate": 0.0006218447846048106, "loss": 0.0147, "num_input_tokens_seen": 126982272, "step": 58785 }, { "epoch": 9.590538336052202, "grad_norm": 0.004236708395183086, "learning_rate": 0.0006217757493855477, "loss": 0.0175, "num_input_tokens_seen": 126993760, "step": 58790 }, { "epoch": 9.591353996737357, "grad_norm": 0.00880725122988224, "learning_rate": 0.0006217067116983449, "loss": 0.0221, "num_input_tokens_seen": 127004448, "step": 58795 }, { "epoch": 9.592169657422513, "grad_norm": 0.003701163223013282, "learning_rate": 0.0006216376715446011, "loss": 0.0328, "num_input_tokens_seen": 127014016, "step": 58800 }, { "epoch": 9.592985318107667, "grad_norm": 0.03611031547188759, "learning_rate": 0.0006215686289257156, "loss": 0.0361, "num_input_tokens_seen": 127024576, "step": 58805 }, { "epoch": 9.593800978792823, "grad_norm": 0.009452610276639462, "learning_rate": 0.0006214995838430878, "loss": 0.0154, "num_input_tokens_seen": 127035936, "step": 58810 }, { "epoch": 9.594616639477977, "grad_norm": 0.1101599782705307, "learning_rate": 0.0006214305362981167, "loss": 0.0382, "num_input_tokens_seen": 127046144, "step": 58815 }, { "epoch": 9.595432300163132, "grad_norm": 0.006794488988816738, "learning_rate": 0.0006213614862922015, "loss": 0.0077, "num_input_tokens_seen": 127056544, "step": 58820 }, { "epoch": 9.596247960848288, "grad_norm": 0.07736363261938095, "learning_rate": 0.0006212924338267421, "loss": 0.0186, "num_input_tokens_seen": 127067040, "step": 58825 }, { "epoch": 9.597063621533442, "grad_norm": 0.06632602959871292, "learning_rate": 0.0006212233789031376, "loss": 0.0108, "num_input_tokens_seen": 127076832, "step": 58830 }, { "epoch": 9.597879282218598, "grad_norm": 0.4707207977771759, "learning_rate": 0.0006211543215227874, "loss": 0.1821, "num_input_tokens_seen": 127088544, "step": 58835 }, { "epoch": 9.598694942903752, "grad_norm": 0.03901342302560806, "learning_rate": 0.0006210852616870913, "loss": 0.0088, "num_input_tokens_seen": 127099040, "step": 58840 }, { "epoch": 9.599510603588907, "grad_norm": 0.019769612699747086, "learning_rate": 0.0006210161993974488, "loss": 0.1643, "num_input_tokens_seen": 127109152, "step": 58845 }, { "epoch": 9.600326264274061, "grad_norm": 0.005478884559124708, "learning_rate": 0.0006209471346552594, "loss": 0.0079, "num_input_tokens_seen": 127121472, "step": 58850 }, { "epoch": 9.601141924959217, "grad_norm": 0.1525253802537918, "learning_rate": 0.000620878067461923, "loss": 0.015, "num_input_tokens_seen": 127133504, "step": 58855 }, { "epoch": 9.601957585644373, "grad_norm": 0.18319138884544373, "learning_rate": 0.0006208089978188392, "loss": 0.0226, "num_input_tokens_seen": 127143904, "step": 58860 }, { "epoch": 9.602773246329527, "grad_norm": 0.014515785500407219, "learning_rate": 0.0006207399257274077, "loss": 0.0462, "num_input_tokens_seen": 127151936, "step": 58865 }, { "epoch": 9.603588907014682, "grad_norm": 0.006759721785783768, "learning_rate": 0.0006206708511890286, "loss": 0.0826, "num_input_tokens_seen": 127162912, "step": 58870 }, { "epoch": 9.604404567699836, "grad_norm": 0.021830957382917404, "learning_rate": 0.0006206017742051014, "loss": 0.0114, "num_input_tokens_seen": 127174432, "step": 58875 }, { "epoch": 9.605220228384992, "grad_norm": 0.5838247537612915, "learning_rate": 0.0006205326947770263, "loss": 0.0761, "num_input_tokens_seen": 127184768, "step": 58880 }, { "epoch": 9.606035889070148, "grad_norm": 0.0159307811409235, "learning_rate": 0.0006204636129062034, "loss": 0.0072, "num_input_tokens_seen": 127195296, "step": 58885 }, { "epoch": 9.606851549755302, "grad_norm": 0.012928396463394165, "learning_rate": 0.0006203945285940325, "loss": 0.0046, "num_input_tokens_seen": 127205280, "step": 58890 }, { "epoch": 9.607667210440457, "grad_norm": 0.008373846299946308, "learning_rate": 0.0006203254418419137, "loss": 0.0993, "num_input_tokens_seen": 127214624, "step": 58895 }, { "epoch": 9.608482871125611, "grad_norm": 0.10752927511930466, "learning_rate": 0.0006202563526512471, "loss": 0.0775, "num_input_tokens_seen": 127225984, "step": 58900 }, { "epoch": 9.609298531810767, "grad_norm": 0.012208987027406693, "learning_rate": 0.0006201872610234331, "loss": 0.043, "num_input_tokens_seen": 127237472, "step": 58905 }, { "epoch": 9.61011419249592, "grad_norm": 0.4600610136985779, "learning_rate": 0.0006201181669598717, "loss": 0.0201, "num_input_tokens_seen": 127247328, "step": 58910 }, { "epoch": 9.610929853181077, "grad_norm": 0.0066894530318677425, "learning_rate": 0.0006200490704619633, "loss": 0.0109, "num_input_tokens_seen": 127259424, "step": 58915 }, { "epoch": 9.611745513866232, "grad_norm": 0.005828152410686016, "learning_rate": 0.0006199799715311083, "loss": 0.0071, "num_input_tokens_seen": 127270976, "step": 58920 }, { "epoch": 9.612561174551386, "grad_norm": 0.27778568863868713, "learning_rate": 0.0006199108701687068, "loss": 0.0803, "num_input_tokens_seen": 127282112, "step": 58925 }, { "epoch": 9.613376835236542, "grad_norm": 0.007226116955280304, "learning_rate": 0.0006198417663761596, "loss": 0.0037, "num_input_tokens_seen": 127293504, "step": 58930 }, { "epoch": 9.614192495921696, "grad_norm": 0.0546656958758831, "learning_rate": 0.0006197726601548667, "loss": 0.0204, "num_input_tokens_seen": 127305408, "step": 58935 }, { "epoch": 9.615008156606851, "grad_norm": 0.323722779750824, "learning_rate": 0.0006197035515062291, "loss": 0.0675, "num_input_tokens_seen": 127317344, "step": 58940 }, { "epoch": 9.615823817292007, "grad_norm": 0.006941157393157482, "learning_rate": 0.0006196344404316472, "loss": 0.007, "num_input_tokens_seen": 127328352, "step": 58945 }, { "epoch": 9.616639477977161, "grad_norm": 0.3557772636413574, "learning_rate": 0.0006195653269325214, "loss": 0.0374, "num_input_tokens_seen": 127340096, "step": 58950 }, { "epoch": 9.617455138662317, "grad_norm": 0.012230448424816132, "learning_rate": 0.0006194962110102528, "loss": 0.1167, "num_input_tokens_seen": 127351200, "step": 58955 }, { "epoch": 9.61827079934747, "grad_norm": 0.0034625427797436714, "learning_rate": 0.0006194270926662416, "loss": 0.0182, "num_input_tokens_seen": 127362400, "step": 58960 }, { "epoch": 9.619086460032626, "grad_norm": 0.0702400952577591, "learning_rate": 0.000619357971901889, "loss": 0.0783, "num_input_tokens_seen": 127371904, "step": 58965 }, { "epoch": 9.619902120717782, "grad_norm": 0.18260350823402405, "learning_rate": 0.0006192888487185958, "loss": 0.0812, "num_input_tokens_seen": 127381984, "step": 58970 }, { "epoch": 9.620717781402936, "grad_norm": 0.10353845357894897, "learning_rate": 0.0006192197231177627, "loss": 0.0111, "num_input_tokens_seen": 127391584, "step": 58975 }, { "epoch": 9.621533442088092, "grad_norm": 0.01583891175687313, "learning_rate": 0.0006191505951007906, "loss": 0.1023, "num_input_tokens_seen": 127401728, "step": 58980 }, { "epoch": 9.622349102773246, "grad_norm": 0.18626467883586884, "learning_rate": 0.0006190814646690805, "loss": 0.0142, "num_input_tokens_seen": 127413184, "step": 58985 }, { "epoch": 9.623164763458401, "grad_norm": 0.027829086408019066, "learning_rate": 0.0006190123318240335, "loss": 0.024, "num_input_tokens_seen": 127423264, "step": 58990 }, { "epoch": 9.623980424143557, "grad_norm": 0.0037336426321417093, "learning_rate": 0.0006189431965670507, "loss": 0.0074, "num_input_tokens_seen": 127435488, "step": 58995 }, { "epoch": 9.624796084828711, "grad_norm": 0.003280236152932048, "learning_rate": 0.0006188740588995331, "loss": 0.046, "num_input_tokens_seen": 127445472, "step": 59000 }, { "epoch": 9.625611745513867, "grad_norm": 0.036388151347637177, "learning_rate": 0.000618804918822882, "loss": 0.062, "num_input_tokens_seen": 127453984, "step": 59005 }, { "epoch": 9.62642740619902, "grad_norm": 0.1263275146484375, "learning_rate": 0.0006187357763384982, "loss": 0.0422, "num_input_tokens_seen": 127464960, "step": 59010 }, { "epoch": 9.627243066884176, "grad_norm": 0.006821201648563147, "learning_rate": 0.0006186666314477835, "loss": 0.0234, "num_input_tokens_seen": 127475040, "step": 59015 }, { "epoch": 9.62805872756933, "grad_norm": 0.04489858075976372, "learning_rate": 0.0006185974841521389, "loss": 0.1792, "num_input_tokens_seen": 127486496, "step": 59020 }, { "epoch": 9.628874388254486, "grad_norm": 0.005536007694900036, "learning_rate": 0.0006185283344529659, "loss": 0.0293, "num_input_tokens_seen": 127496416, "step": 59025 }, { "epoch": 9.629690048939642, "grad_norm": 0.0015038796700537205, "learning_rate": 0.0006184591823516658, "loss": 0.0551, "num_input_tokens_seen": 127508000, "step": 59030 }, { "epoch": 9.630505709624796, "grad_norm": 0.6168532371520996, "learning_rate": 0.00061839002784964, "loss": 0.0912, "num_input_tokens_seen": 127518016, "step": 59035 }, { "epoch": 9.631321370309951, "grad_norm": 0.021128684282302856, "learning_rate": 0.0006183208709482903, "loss": 0.0933, "num_input_tokens_seen": 127528576, "step": 59040 }, { "epoch": 9.632137030995105, "grad_norm": 0.01242469996213913, "learning_rate": 0.0006182517116490179, "loss": 0.0197, "num_input_tokens_seen": 127538336, "step": 59045 }, { "epoch": 9.632952691680261, "grad_norm": 0.4242917001247406, "learning_rate": 0.0006181825499532247, "loss": 0.0546, "num_input_tokens_seen": 127548672, "step": 59050 }, { "epoch": 9.633768352365417, "grad_norm": 0.06874293833971024, "learning_rate": 0.000618113385862312, "loss": 0.0584, "num_input_tokens_seen": 127559584, "step": 59055 }, { "epoch": 9.63458401305057, "grad_norm": 0.026289397850632668, "learning_rate": 0.0006180442193776818, "loss": 0.1536, "num_input_tokens_seen": 127570656, "step": 59060 }, { "epoch": 9.635399673735726, "grad_norm": 0.0054007284343242645, "learning_rate": 0.0006179750505007357, "loss": 0.0494, "num_input_tokens_seen": 127583008, "step": 59065 }, { "epoch": 9.63621533442088, "grad_norm": 0.39873865246772766, "learning_rate": 0.0006179058792328756, "loss": 0.2067, "num_input_tokens_seen": 127594464, "step": 59070 }, { "epoch": 9.637030995106036, "grad_norm": 0.007571790833026171, "learning_rate": 0.0006178367055755032, "loss": 0.0249, "num_input_tokens_seen": 127605568, "step": 59075 }, { "epoch": 9.63784665579119, "grad_norm": 0.20183655619621277, "learning_rate": 0.0006177675295300206, "loss": 0.0159, "num_input_tokens_seen": 127615328, "step": 59080 }, { "epoch": 9.638662316476346, "grad_norm": 0.133195161819458, "learning_rate": 0.0006176983510978296, "loss": 0.0184, "num_input_tokens_seen": 127625984, "step": 59085 }, { "epoch": 9.639477977161501, "grad_norm": 0.08000269532203674, "learning_rate": 0.000617629170280332, "loss": 0.0184, "num_input_tokens_seen": 127637056, "step": 59090 }, { "epoch": 9.640293637846655, "grad_norm": 0.2687270939350128, "learning_rate": 0.0006175599870789301, "loss": 0.0766, "num_input_tokens_seen": 127647648, "step": 59095 }, { "epoch": 9.641109298531811, "grad_norm": 0.012075605802237988, "learning_rate": 0.000617490801495026, "loss": 0.0204, "num_input_tokens_seen": 127657728, "step": 59100 }, { "epoch": 9.641924959216965, "grad_norm": 0.33024096488952637, "learning_rate": 0.0006174216135300219, "loss": 0.1815, "num_input_tokens_seen": 127669568, "step": 59105 }, { "epoch": 9.64274061990212, "grad_norm": 0.03295661136507988, "learning_rate": 0.0006173524231853197, "loss": 0.027, "num_input_tokens_seen": 127681056, "step": 59110 }, { "epoch": 9.643556280587276, "grad_norm": 0.006422259379178286, "learning_rate": 0.0006172832304623217, "loss": 0.0367, "num_input_tokens_seen": 127692000, "step": 59115 }, { "epoch": 9.64437194127243, "grad_norm": 0.6102940440177917, "learning_rate": 0.0006172140353624304, "loss": 0.2075, "num_input_tokens_seen": 127704352, "step": 59120 }, { "epoch": 9.645187601957586, "grad_norm": 0.01776854135096073, "learning_rate": 0.0006171448378870479, "loss": 0.0831, "num_input_tokens_seen": 127715520, "step": 59125 }, { "epoch": 9.64600326264274, "grad_norm": 0.08590822666883469, "learning_rate": 0.0006170756380375766, "loss": 0.1248, "num_input_tokens_seen": 127726656, "step": 59130 }, { "epoch": 9.646818923327896, "grad_norm": 0.009952404536306858, "learning_rate": 0.000617006435815419, "loss": 0.0376, "num_input_tokens_seen": 127738784, "step": 59135 }, { "epoch": 9.647634584013051, "grad_norm": 0.014745515771210194, "learning_rate": 0.0006169372312219777, "loss": 0.0935, "num_input_tokens_seen": 127748992, "step": 59140 }, { "epoch": 9.648450244698205, "grad_norm": 0.06781429797410965, "learning_rate": 0.0006168680242586549, "loss": 0.2448, "num_input_tokens_seen": 127760992, "step": 59145 }, { "epoch": 9.649265905383361, "grad_norm": 0.08127600699663162, "learning_rate": 0.0006167988149268533, "loss": 0.0143, "num_input_tokens_seen": 127772000, "step": 59150 }, { "epoch": 9.650081566068515, "grad_norm": 0.11132414638996124, "learning_rate": 0.0006167296032279757, "loss": 0.0186, "num_input_tokens_seen": 127781984, "step": 59155 }, { "epoch": 9.65089722675367, "grad_norm": 0.11625031381845474, "learning_rate": 0.0006166603891634245, "loss": 0.0966, "num_input_tokens_seen": 127793440, "step": 59160 }, { "epoch": 9.651712887438826, "grad_norm": 0.01904509961605072, "learning_rate": 0.0006165911727346025, "loss": 0.0056, "num_input_tokens_seen": 127803232, "step": 59165 }, { "epoch": 9.65252854812398, "grad_norm": 0.017921043559908867, "learning_rate": 0.0006165219539429126, "loss": 0.0542, "num_input_tokens_seen": 127813696, "step": 59170 }, { "epoch": 9.653344208809136, "grad_norm": 0.30042189359664917, "learning_rate": 0.0006164527327897574, "loss": 0.0398, "num_input_tokens_seen": 127824096, "step": 59175 }, { "epoch": 9.65415986949429, "grad_norm": 0.18783047795295715, "learning_rate": 0.0006163835092765399, "loss": 0.1257, "num_input_tokens_seen": 127833856, "step": 59180 }, { "epoch": 9.654975530179446, "grad_norm": 0.033547911792993546, "learning_rate": 0.0006163142834046629, "loss": 0.0244, "num_input_tokens_seen": 127845248, "step": 59185 }, { "epoch": 9.655791190864601, "grad_norm": 0.2385920286178589, "learning_rate": 0.0006162450551755295, "loss": 0.0306, "num_input_tokens_seen": 127855936, "step": 59190 }, { "epoch": 9.656606851549755, "grad_norm": 0.018533451482653618, "learning_rate": 0.0006161758245905423, "loss": 0.0113, "num_input_tokens_seen": 127866688, "step": 59195 }, { "epoch": 9.65742251223491, "grad_norm": 0.0036566208582371473, "learning_rate": 0.0006161065916511047, "loss": 0.0683, "num_input_tokens_seen": 127877568, "step": 59200 }, { "epoch": 9.658238172920065, "grad_norm": 0.08195324242115021, "learning_rate": 0.0006160373563586199, "loss": 0.0547, "num_input_tokens_seen": 127887808, "step": 59205 }, { "epoch": 9.65905383360522, "grad_norm": 0.018927576020359993, "learning_rate": 0.0006159681187144909, "loss": 0.0134, "num_input_tokens_seen": 127898816, "step": 59210 }, { "epoch": 9.659869494290374, "grad_norm": 0.01739303395152092, "learning_rate": 0.0006158988787201208, "loss": 0.0732, "num_input_tokens_seen": 127909856, "step": 59215 }, { "epoch": 9.66068515497553, "grad_norm": 0.006371657829731703, "learning_rate": 0.0006158296363769128, "loss": 0.1002, "num_input_tokens_seen": 127920000, "step": 59220 }, { "epoch": 9.661500815660686, "grad_norm": 0.20840153098106384, "learning_rate": 0.0006157603916862703, "loss": 0.1262, "num_input_tokens_seen": 127931040, "step": 59225 }, { "epoch": 9.66231647634584, "grad_norm": 0.0057899076491594315, "learning_rate": 0.0006156911446495967, "loss": 0.038, "num_input_tokens_seen": 127941888, "step": 59230 }, { "epoch": 9.663132137030995, "grad_norm": 0.02108006179332733, "learning_rate": 0.0006156218952682953, "loss": 0.014, "num_input_tokens_seen": 127951328, "step": 59235 }, { "epoch": 9.66394779771615, "grad_norm": 0.07076103985309601, "learning_rate": 0.0006155526435437694, "loss": 0.1026, "num_input_tokens_seen": 127962816, "step": 59240 }, { "epoch": 9.664763458401305, "grad_norm": 0.0183942299336195, "learning_rate": 0.0006154833894774226, "loss": 0.0736, "num_input_tokens_seen": 127974016, "step": 59245 }, { "epoch": 9.66557911908646, "grad_norm": 0.038768794387578964, "learning_rate": 0.0006154141330706586, "loss": 0.0123, "num_input_tokens_seen": 127983808, "step": 59250 }, { "epoch": 9.666394779771615, "grad_norm": 0.18464896082878113, "learning_rate": 0.0006153448743248805, "loss": 0.0517, "num_input_tokens_seen": 127994912, "step": 59255 }, { "epoch": 9.66721044045677, "grad_norm": 0.010973125696182251, "learning_rate": 0.0006152756132414924, "loss": 0.1439, "num_input_tokens_seen": 128003488, "step": 59260 }, { "epoch": 9.668026101141924, "grad_norm": 0.09602286666631699, "learning_rate": 0.0006152063498218977, "loss": 0.091, "num_input_tokens_seen": 128014272, "step": 59265 }, { "epoch": 9.66884176182708, "grad_norm": 0.25912317633628845, "learning_rate": 0.0006151370840675001, "loss": 0.0373, "num_input_tokens_seen": 128023808, "step": 59270 }, { "epoch": 9.669657422512234, "grad_norm": 0.046629659831523895, "learning_rate": 0.0006150678159797034, "loss": 0.04, "num_input_tokens_seen": 128034624, "step": 59275 }, { "epoch": 9.67047308319739, "grad_norm": 0.052462734282016754, "learning_rate": 0.0006149985455599115, "loss": 0.1757, "num_input_tokens_seen": 128045440, "step": 59280 }, { "epoch": 9.671288743882545, "grad_norm": 0.06739270687103271, "learning_rate": 0.0006149292728095283, "loss": 0.1342, "num_input_tokens_seen": 128054624, "step": 59285 }, { "epoch": 9.6721044045677, "grad_norm": 0.02121824584901333, "learning_rate": 0.0006148599977299575, "loss": 0.0641, "num_input_tokens_seen": 128065984, "step": 59290 }, { "epoch": 9.672920065252855, "grad_norm": 0.0886927992105484, "learning_rate": 0.0006147907203226031, "loss": 0.0409, "num_input_tokens_seen": 128076768, "step": 59295 }, { "epoch": 9.673735725938009, "grad_norm": 0.013269363902509212, "learning_rate": 0.0006147214405888692, "loss": 0.0179, "num_input_tokens_seen": 128087840, "step": 59300 }, { "epoch": 9.674551386623165, "grad_norm": 0.06566104292869568, "learning_rate": 0.0006146521585301596, "loss": 0.1646, "num_input_tokens_seen": 128099168, "step": 59305 }, { "epoch": 9.67536704730832, "grad_norm": 0.013543189503252506, "learning_rate": 0.0006145828741478788, "loss": 0.0231, "num_input_tokens_seen": 128109696, "step": 59310 }, { "epoch": 9.676182707993474, "grad_norm": 0.010334925726056099, "learning_rate": 0.0006145135874434305, "loss": 0.0411, "num_input_tokens_seen": 128120768, "step": 59315 }, { "epoch": 9.67699836867863, "grad_norm": 0.003125895978882909, "learning_rate": 0.0006144442984182193, "loss": 0.0101, "num_input_tokens_seen": 128132768, "step": 59320 }, { "epoch": 9.677814029363784, "grad_norm": 0.025807317346334457, "learning_rate": 0.0006143750070736491, "loss": 0.0181, "num_input_tokens_seen": 128144768, "step": 59325 }, { "epoch": 9.67862969004894, "grad_norm": 0.016329145058989525, "learning_rate": 0.0006143057134111243, "loss": 0.058, "num_input_tokens_seen": 128155616, "step": 59330 }, { "epoch": 9.679445350734095, "grad_norm": 0.21162718534469604, "learning_rate": 0.0006142364174320492, "loss": 0.1422, "num_input_tokens_seen": 128166240, "step": 59335 }, { "epoch": 9.68026101141925, "grad_norm": 0.03283459693193436, "learning_rate": 0.0006141671191378281, "loss": 0.0244, "num_input_tokens_seen": 128177408, "step": 59340 }, { "epoch": 9.681076672104405, "grad_norm": 0.05063774436712265, "learning_rate": 0.0006140978185298656, "loss": 0.0938, "num_input_tokens_seen": 128188672, "step": 59345 }, { "epoch": 9.681892332789559, "grad_norm": 0.0327952615916729, "learning_rate": 0.0006140285156095661, "loss": 0.0925, "num_input_tokens_seen": 128199520, "step": 59350 }, { "epoch": 9.682707993474715, "grad_norm": 0.3350673019886017, "learning_rate": 0.0006139592103783339, "loss": 0.0744, "num_input_tokens_seen": 128211232, "step": 59355 }, { "epoch": 9.68352365415987, "grad_norm": 0.008121310733258724, "learning_rate": 0.000613889902837574, "loss": 0.0152, "num_input_tokens_seen": 128220672, "step": 59360 }, { "epoch": 9.684339314845024, "grad_norm": 0.2720755338668823, "learning_rate": 0.0006138205929886905, "loss": 0.1245, "num_input_tokens_seen": 128231680, "step": 59365 }, { "epoch": 9.68515497553018, "grad_norm": 0.02992718666791916, "learning_rate": 0.0006137512808330884, "loss": 0.2193, "num_input_tokens_seen": 128242016, "step": 59370 }, { "epoch": 9.685970636215334, "grad_norm": 0.023438721895217896, "learning_rate": 0.0006136819663721722, "loss": 0.0225, "num_input_tokens_seen": 128252736, "step": 59375 }, { "epoch": 9.68678629690049, "grad_norm": 0.07434836030006409, "learning_rate": 0.0006136126496073469, "loss": 0.0126, "num_input_tokens_seen": 128262016, "step": 59380 }, { "epoch": 9.687601957585644, "grad_norm": 0.004536564461886883, "learning_rate": 0.0006135433305400169, "loss": 0.0208, "num_input_tokens_seen": 128273440, "step": 59385 }, { "epoch": 9.6884176182708, "grad_norm": 0.12715773284435272, "learning_rate": 0.0006134740091715875, "loss": 0.0186, "num_input_tokens_seen": 128284096, "step": 59390 }, { "epoch": 9.689233278955955, "grad_norm": 0.1364831030368805, "learning_rate": 0.0006134046855034631, "loss": 0.0775, "num_input_tokens_seen": 128295264, "step": 59395 }, { "epoch": 9.690048939641109, "grad_norm": 0.0033251584973186255, "learning_rate": 0.0006133353595370491, "loss": 0.0707, "num_input_tokens_seen": 128305856, "step": 59400 }, { "epoch": 9.690864600326265, "grad_norm": 0.1248481348156929, "learning_rate": 0.0006132660312737502, "loss": 0.1275, "num_input_tokens_seen": 128316928, "step": 59405 }, { "epoch": 9.691680261011419, "grad_norm": 0.0053417375311255455, "learning_rate": 0.0006131967007149716, "loss": 0.0222, "num_input_tokens_seen": 128327168, "step": 59410 }, { "epoch": 9.692495921696574, "grad_norm": 0.03325970470905304, "learning_rate": 0.000613127367862118, "loss": 0.0222, "num_input_tokens_seen": 128338720, "step": 59415 }, { "epoch": 9.69331158238173, "grad_norm": 0.1174701601266861, "learning_rate": 0.0006130580327165949, "loss": 0.0693, "num_input_tokens_seen": 128349216, "step": 59420 }, { "epoch": 9.694127243066884, "grad_norm": 0.002682819264009595, "learning_rate": 0.0006129886952798074, "loss": 0.0506, "num_input_tokens_seen": 128359552, "step": 59425 }, { "epoch": 9.69494290375204, "grad_norm": 0.00796019472181797, "learning_rate": 0.0006129193555531606, "loss": 0.0339, "num_input_tokens_seen": 128370976, "step": 59430 }, { "epoch": 9.695758564437194, "grad_norm": 0.21734458208084106, "learning_rate": 0.0006128500135380598, "loss": 0.029, "num_input_tokens_seen": 128381472, "step": 59435 }, { "epoch": 9.69657422512235, "grad_norm": 0.2784644365310669, "learning_rate": 0.0006127806692359103, "loss": 0.0328, "num_input_tokens_seen": 128393088, "step": 59440 }, { "epoch": 9.697389885807503, "grad_norm": 0.010838964022696018, "learning_rate": 0.0006127113226481175, "loss": 0.044, "num_input_tokens_seen": 128403520, "step": 59445 }, { "epoch": 9.698205546492659, "grad_norm": 0.0185268372297287, "learning_rate": 0.0006126419737760868, "loss": 0.0095, "num_input_tokens_seen": 128415040, "step": 59450 }, { "epoch": 9.699021207177815, "grad_norm": 0.29471975564956665, "learning_rate": 0.0006125726226212236, "loss": 0.2644, "num_input_tokens_seen": 128425376, "step": 59455 }, { "epoch": 9.699836867862969, "grad_norm": 0.2613866627216339, "learning_rate": 0.0006125032691849333, "loss": 0.1377, "num_input_tokens_seen": 128435520, "step": 59460 }, { "epoch": 9.700652528548124, "grad_norm": 0.01687040366232395, "learning_rate": 0.0006124339134686216, "loss": 0.0096, "num_input_tokens_seen": 128446368, "step": 59465 }, { "epoch": 9.701468189233278, "grad_norm": 0.07034385949373245, "learning_rate": 0.0006123645554736941, "loss": 0.0466, "num_input_tokens_seen": 128456736, "step": 59470 }, { "epoch": 9.702283849918434, "grad_norm": 0.13933435082435608, "learning_rate": 0.0006122951952015562, "loss": 0.0233, "num_input_tokens_seen": 128467680, "step": 59475 }, { "epoch": 9.70309951060359, "grad_norm": 0.008489076048135757, "learning_rate": 0.0006122258326536138, "loss": 0.037, "num_input_tokens_seen": 128478720, "step": 59480 }, { "epoch": 9.703915171288743, "grad_norm": 0.2870648205280304, "learning_rate": 0.0006121564678312724, "loss": 0.0918, "num_input_tokens_seen": 128489344, "step": 59485 }, { "epoch": 9.7047308319739, "grad_norm": 0.0050661033019423485, "learning_rate": 0.0006120871007359381, "loss": 0.0205, "num_input_tokens_seen": 128498816, "step": 59490 }, { "epoch": 9.705546492659053, "grad_norm": 0.004017225466668606, "learning_rate": 0.0006120177313690164, "loss": 0.023, "num_input_tokens_seen": 128509504, "step": 59495 }, { "epoch": 9.706362153344209, "grad_norm": 0.1601206660270691, "learning_rate": 0.0006119483597319132, "loss": 0.0454, "num_input_tokens_seen": 128521856, "step": 59500 }, { "epoch": 9.707177814029365, "grad_norm": 0.008326910436153412, "learning_rate": 0.0006118789858260347, "loss": 0.1383, "num_input_tokens_seen": 128532672, "step": 59505 }, { "epoch": 9.707993474714518, "grad_norm": 0.06540261209011078, "learning_rate": 0.0006118096096527863, "loss": 0.0317, "num_input_tokens_seen": 128543392, "step": 59510 }, { "epoch": 9.708809135399674, "grad_norm": 0.034490231424570084, "learning_rate": 0.0006117402312135746, "loss": 0.0547, "num_input_tokens_seen": 128554464, "step": 59515 }, { "epoch": 9.709624796084828, "grad_norm": 0.01300547644495964, "learning_rate": 0.0006116708505098051, "loss": 0.1313, "num_input_tokens_seen": 128565472, "step": 59520 }, { "epoch": 9.710440456769984, "grad_norm": 0.009446562267839909, "learning_rate": 0.0006116014675428842, "loss": 0.0744, "num_input_tokens_seen": 128576832, "step": 59525 }, { "epoch": 9.71125611745514, "grad_norm": 0.007366952486336231, "learning_rate": 0.0006115320823142182, "loss": 0.1082, "num_input_tokens_seen": 128587136, "step": 59530 }, { "epoch": 9.712071778140293, "grad_norm": 0.011812661774456501, "learning_rate": 0.000611462694825213, "loss": 0.1366, "num_input_tokens_seen": 128598176, "step": 59535 }, { "epoch": 9.71288743882545, "grad_norm": 0.08814537525177002, "learning_rate": 0.0006113933050772749, "loss": 0.0737, "num_input_tokens_seen": 128608736, "step": 59540 }, { "epoch": 9.713703099510603, "grad_norm": 0.0336206778883934, "learning_rate": 0.00061132391307181, "loss": 0.0218, "num_input_tokens_seen": 128619104, "step": 59545 }, { "epoch": 9.714518760195759, "grad_norm": 0.06537744402885437, "learning_rate": 0.0006112545188102249, "loss": 0.0687, "num_input_tokens_seen": 128629088, "step": 59550 }, { "epoch": 9.715334420880914, "grad_norm": 0.03816675767302513, "learning_rate": 0.0006111851222939257, "loss": 0.1662, "num_input_tokens_seen": 128640224, "step": 59555 }, { "epoch": 9.716150081566068, "grad_norm": 0.09322861582040787, "learning_rate": 0.0006111157235243192, "loss": 0.1321, "num_input_tokens_seen": 128651040, "step": 59560 }, { "epoch": 9.716965742251224, "grad_norm": 0.0767819806933403, "learning_rate": 0.0006110463225028114, "loss": 0.137, "num_input_tokens_seen": 128660928, "step": 59565 }, { "epoch": 9.717781402936378, "grad_norm": 0.04609353095293045, "learning_rate": 0.0006109769192308091, "loss": 0.1022, "num_input_tokens_seen": 128670144, "step": 59570 }, { "epoch": 9.718597063621534, "grad_norm": 0.014793830923736095, "learning_rate": 0.0006109075137097188, "loss": 0.0489, "num_input_tokens_seen": 128680352, "step": 59575 }, { "epoch": 9.719412724306688, "grad_norm": 0.07300538569688797, "learning_rate": 0.0006108381059409469, "loss": 0.0268, "num_input_tokens_seen": 128690240, "step": 59580 }, { "epoch": 9.720228384991843, "grad_norm": 0.0640680119395256, "learning_rate": 0.0006107686959259003, "loss": 0.0205, "num_input_tokens_seen": 128700032, "step": 59585 }, { "epoch": 9.721044045676999, "grad_norm": 0.01870567351579666, "learning_rate": 0.0006106992836659853, "loss": 0.0472, "num_input_tokens_seen": 128710016, "step": 59590 }, { "epoch": 9.721859706362153, "grad_norm": 0.06907132267951965, "learning_rate": 0.0006106298691626091, "loss": 0.0095, "num_input_tokens_seen": 128719040, "step": 59595 }, { "epoch": 9.722675367047309, "grad_norm": 0.29290202260017395, "learning_rate": 0.0006105604524171782, "loss": 0.0434, "num_input_tokens_seen": 128729824, "step": 59600 }, { "epoch": 9.723491027732463, "grad_norm": 0.029978347942233086, "learning_rate": 0.0006104910334310996, "loss": 0.031, "num_input_tokens_seen": 128740960, "step": 59605 }, { "epoch": 9.724306688417618, "grad_norm": 0.008729356341063976, "learning_rate": 0.0006104216122057799, "loss": 0.0101, "num_input_tokens_seen": 128752224, "step": 59610 }, { "epoch": 9.725122349102774, "grad_norm": 0.057942282408475876, "learning_rate": 0.0006103521887426262, "loss": 0.0909, "num_input_tokens_seen": 128762304, "step": 59615 }, { "epoch": 9.725938009787928, "grad_norm": 0.005804257933050394, "learning_rate": 0.0006102827630430454, "loss": 0.055, "num_input_tokens_seen": 128772448, "step": 59620 }, { "epoch": 9.726753670473084, "grad_norm": 0.27918195724487305, "learning_rate": 0.0006102133351084443, "loss": 0.0455, "num_input_tokens_seen": 128784320, "step": 59625 }, { "epoch": 9.727569331158238, "grad_norm": 0.39616507291793823, "learning_rate": 0.0006101439049402304, "loss": 0.1558, "num_input_tokens_seen": 128794784, "step": 59630 }, { "epoch": 9.728384991843393, "grad_norm": 0.0009002613369375467, "learning_rate": 0.0006100744725398105, "loss": 0.0125, "num_input_tokens_seen": 128806688, "step": 59635 }, { "epoch": 9.729200652528547, "grad_norm": 0.1437094509601593, "learning_rate": 0.0006100050379085918, "loss": 0.0356, "num_input_tokens_seen": 128816768, "step": 59640 }, { "epoch": 9.730016313213703, "grad_norm": 0.03397361934185028, "learning_rate": 0.0006099356010479814, "loss": 0.104, "num_input_tokens_seen": 128827104, "step": 59645 }, { "epoch": 9.730831973898859, "grad_norm": 0.06053118407726288, "learning_rate": 0.0006098661619593866, "loss": 0.0096, "num_input_tokens_seen": 128838592, "step": 59650 }, { "epoch": 9.731647634584013, "grad_norm": 0.0009688441641628742, "learning_rate": 0.0006097967206442147, "loss": 0.0316, "num_input_tokens_seen": 128847936, "step": 59655 }, { "epoch": 9.732463295269168, "grad_norm": 0.010555686429142952, "learning_rate": 0.0006097272771038728, "loss": 0.008, "num_input_tokens_seen": 128858432, "step": 59660 }, { "epoch": 9.733278955954322, "grad_norm": 0.02458445355296135, "learning_rate": 0.0006096578313397687, "loss": 0.0158, "num_input_tokens_seen": 128869888, "step": 59665 }, { "epoch": 9.734094616639478, "grad_norm": 0.19622117280960083, "learning_rate": 0.0006095883833533094, "loss": 0.109, "num_input_tokens_seen": 128879648, "step": 59670 }, { "epoch": 9.734910277324634, "grad_norm": 0.007972361519932747, "learning_rate": 0.0006095189331459024, "loss": 0.0179, "num_input_tokens_seen": 128891072, "step": 59675 }, { "epoch": 9.735725938009788, "grad_norm": 0.003296657232567668, "learning_rate": 0.0006094494807189555, "loss": 0.0588, "num_input_tokens_seen": 128903008, "step": 59680 }, { "epoch": 9.736541598694943, "grad_norm": 0.08102521300315857, "learning_rate": 0.0006093800260738758, "loss": 0.0472, "num_input_tokens_seen": 128913632, "step": 59685 }, { "epoch": 9.737357259380097, "grad_norm": 0.014509606175124645, "learning_rate": 0.0006093105692120712, "loss": 0.1708, "num_input_tokens_seen": 128924064, "step": 59690 }, { "epoch": 9.738172920065253, "grad_norm": 0.0067308759316802025, "learning_rate": 0.0006092411101349492, "loss": 0.0577, "num_input_tokens_seen": 128935104, "step": 59695 }, { "epoch": 9.738988580750409, "grad_norm": 0.01863669790327549, "learning_rate": 0.0006091716488439177, "loss": 0.0272, "num_input_tokens_seen": 128945952, "step": 59700 }, { "epoch": 9.739804241435563, "grad_norm": 0.3315466344356537, "learning_rate": 0.0006091021853403841, "loss": 0.0688, "num_input_tokens_seen": 128955808, "step": 59705 }, { "epoch": 9.740619902120718, "grad_norm": 0.0017027267022058368, "learning_rate": 0.0006090327196257562, "loss": 0.0629, "num_input_tokens_seen": 128966304, "step": 59710 }, { "epoch": 9.741435562805872, "grad_norm": 0.08503128588199615, "learning_rate": 0.000608963251701442, "loss": 0.0178, "num_input_tokens_seen": 128976768, "step": 59715 }, { "epoch": 9.742251223491028, "grad_norm": 0.017233064398169518, "learning_rate": 0.0006088937815688495, "loss": 0.0258, "num_input_tokens_seen": 128988416, "step": 59720 }, { "epoch": 9.743066884176184, "grad_norm": 0.4578186571598053, "learning_rate": 0.0006088243092293861, "loss": 0.1782, "num_input_tokens_seen": 129000032, "step": 59725 }, { "epoch": 9.743882544861338, "grad_norm": 0.0711468756198883, "learning_rate": 0.0006087548346844601, "loss": 0.0207, "num_input_tokens_seen": 129010464, "step": 59730 }, { "epoch": 9.744698205546493, "grad_norm": 0.0229730773717165, "learning_rate": 0.0006086853579354793, "loss": 0.0431, "num_input_tokens_seen": 129021920, "step": 59735 }, { "epoch": 9.745513866231647, "grad_norm": 0.008088946342468262, "learning_rate": 0.0006086158789838519, "loss": 0.0827, "num_input_tokens_seen": 129032480, "step": 59740 }, { "epoch": 9.746329526916803, "grad_norm": 0.018943075090646744, "learning_rate": 0.0006085463978309861, "loss": 0.045, "num_input_tokens_seen": 129043712, "step": 59745 }, { "epoch": 9.747145187601957, "grad_norm": 0.02913905493915081, "learning_rate": 0.0006084769144782897, "loss": 0.0292, "num_input_tokens_seen": 129054560, "step": 59750 }, { "epoch": 9.747960848287113, "grad_norm": 0.003888669889420271, "learning_rate": 0.0006084074289271711, "loss": 0.0071, "num_input_tokens_seen": 129065504, "step": 59755 }, { "epoch": 9.748776508972268, "grad_norm": 0.08220919221639633, "learning_rate": 0.0006083379411790383, "loss": 0.0388, "num_input_tokens_seen": 129075936, "step": 59760 }, { "epoch": 9.749592169657422, "grad_norm": 0.13562801480293274, "learning_rate": 0.0006082684512352997, "loss": 0.08, "num_input_tokens_seen": 129087168, "step": 59765 }, { "epoch": 9.750407830342578, "grad_norm": 0.0025733276270329952, "learning_rate": 0.0006081989590973637, "loss": 0.0074, "num_input_tokens_seen": 129096832, "step": 59770 }, { "epoch": 9.751223491027732, "grad_norm": 0.019972285255789757, "learning_rate": 0.0006081294647666385, "loss": 0.3124, "num_input_tokens_seen": 129107616, "step": 59775 }, { "epoch": 9.752039151712887, "grad_norm": 0.36964648962020874, "learning_rate": 0.0006080599682445325, "loss": 0.0386, "num_input_tokens_seen": 129119296, "step": 59780 }, { "epoch": 9.752854812398043, "grad_norm": 0.11169246584177017, "learning_rate": 0.000607990469532454, "loss": 0.0647, "num_input_tokens_seen": 129129792, "step": 59785 }, { "epoch": 9.753670473083197, "grad_norm": 0.010639806278049946, "learning_rate": 0.0006079209686318119, "loss": 0.0163, "num_input_tokens_seen": 129140320, "step": 59790 }, { "epoch": 9.754486133768353, "grad_norm": 0.05021905153989792, "learning_rate": 0.0006078514655440144, "loss": 0.0626, "num_input_tokens_seen": 129152288, "step": 59795 }, { "epoch": 9.755301794453507, "grad_norm": 0.042118266224861145, "learning_rate": 0.0006077819602704702, "loss": 0.035, "num_input_tokens_seen": 129162528, "step": 59800 }, { "epoch": 9.756117455138662, "grad_norm": 0.8312237858772278, "learning_rate": 0.0006077124528125877, "loss": 0.0373, "num_input_tokens_seen": 129172576, "step": 59805 }, { "epoch": 9.756933115823816, "grad_norm": 0.0063532376661896706, "learning_rate": 0.0006076429431717757, "loss": 0.0285, "num_input_tokens_seen": 129183744, "step": 59810 }, { "epoch": 9.757748776508972, "grad_norm": 0.01858111470937729, "learning_rate": 0.000607573431349443, "loss": 0.1904, "num_input_tokens_seen": 129194048, "step": 59815 }, { "epoch": 9.758564437194128, "grad_norm": 0.23809611797332764, "learning_rate": 0.0006075039173469982, "loss": 0.1462, "num_input_tokens_seen": 129204416, "step": 59820 }, { "epoch": 9.759380097879282, "grad_norm": 0.006448432803153992, "learning_rate": 0.0006074344011658501, "loss": 0.0679, "num_input_tokens_seen": 129214880, "step": 59825 }, { "epoch": 9.760195758564437, "grad_norm": 0.0030931164510548115, "learning_rate": 0.0006073648828074077, "loss": 0.1305, "num_input_tokens_seen": 129226176, "step": 59830 }, { "epoch": 9.761011419249591, "grad_norm": 0.03220542520284653, "learning_rate": 0.0006072953622730796, "loss": 0.0369, "num_input_tokens_seen": 129236896, "step": 59835 }, { "epoch": 9.761827079934747, "grad_norm": 0.06129692122340202, "learning_rate": 0.0006072258395642748, "loss": 0.0273, "num_input_tokens_seen": 129249088, "step": 59840 }, { "epoch": 9.762642740619903, "grad_norm": 0.1948278546333313, "learning_rate": 0.0006071563146824024, "loss": 0.0644, "num_input_tokens_seen": 129260096, "step": 59845 }, { "epoch": 9.763458401305057, "grad_norm": 0.20120300352573395, "learning_rate": 0.0006070867876288715, "loss": 0.0493, "num_input_tokens_seen": 129271744, "step": 59850 }, { "epoch": 9.764274061990212, "grad_norm": 0.010271182283759117, "learning_rate": 0.0006070172584050908, "loss": 0.0582, "num_input_tokens_seen": 129282688, "step": 59855 }, { "epoch": 9.765089722675366, "grad_norm": 0.004881757777184248, "learning_rate": 0.0006069477270124697, "loss": 0.0399, "num_input_tokens_seen": 129292576, "step": 59860 }, { "epoch": 9.765905383360522, "grad_norm": 0.009391111321747303, "learning_rate": 0.0006068781934524172, "loss": 0.011, "num_input_tokens_seen": 129302784, "step": 59865 }, { "epoch": 9.766721044045678, "grad_norm": 0.014628876000642776, "learning_rate": 0.0006068086577263426, "loss": 0.1397, "num_input_tokens_seen": 129314304, "step": 59870 }, { "epoch": 9.767536704730832, "grad_norm": 0.29984965920448303, "learning_rate": 0.0006067391198356551, "loss": 0.0568, "num_input_tokens_seen": 129325856, "step": 59875 }, { "epoch": 9.768352365415987, "grad_norm": 0.06933252513408661, "learning_rate": 0.0006066695797817638, "loss": 0.0323, "num_input_tokens_seen": 129337504, "step": 59880 }, { "epoch": 9.769168026101141, "grad_norm": 0.3127744495868683, "learning_rate": 0.0006066000375660782, "loss": 0.1433, "num_input_tokens_seen": 129350208, "step": 59885 }, { "epoch": 9.769983686786297, "grad_norm": 0.02711273729801178, "learning_rate": 0.0006065304931900076, "loss": 0.0513, "num_input_tokens_seen": 129362048, "step": 59890 }, { "epoch": 9.770799347471453, "grad_norm": 0.18783365190029144, "learning_rate": 0.0006064609466549614, "loss": 0.0665, "num_input_tokens_seen": 129372768, "step": 59895 }, { "epoch": 9.771615008156607, "grad_norm": 0.928313672542572, "learning_rate": 0.0006063913979623491, "loss": 0.0947, "num_input_tokens_seen": 129383584, "step": 59900 }, { "epoch": 9.772430668841762, "grad_norm": 0.41291940212249756, "learning_rate": 0.0006063218471135801, "loss": 0.1186, "num_input_tokens_seen": 129395136, "step": 59905 }, { "epoch": 9.773246329526916, "grad_norm": 0.016780929639935493, "learning_rate": 0.0006062522941100639, "loss": 0.0408, "num_input_tokens_seen": 129406720, "step": 59910 }, { "epoch": 9.774061990212072, "grad_norm": 0.20784629881381989, "learning_rate": 0.0006061827389532103, "loss": 0.1078, "num_input_tokens_seen": 129417888, "step": 59915 }, { "epoch": 9.774877650897226, "grad_norm": 0.544096052646637, "learning_rate": 0.0006061131816444287, "loss": 0.071, "num_input_tokens_seen": 129429088, "step": 59920 }, { "epoch": 9.775693311582382, "grad_norm": 0.05459734797477722, "learning_rate": 0.000606043622185129, "loss": 0.0259, "num_input_tokens_seen": 129439008, "step": 59925 }, { "epoch": 9.776508972267537, "grad_norm": 0.05036766454577446, "learning_rate": 0.0006059740605767207, "loss": 0.1057, "num_input_tokens_seen": 129450528, "step": 59930 }, { "epoch": 9.777324632952691, "grad_norm": 0.010146408341825008, "learning_rate": 0.0006059044968206136, "loss": 0.0881, "num_input_tokens_seen": 129460864, "step": 59935 }, { "epoch": 9.778140293637847, "grad_norm": 0.00299668638035655, "learning_rate": 0.0006058349309182176, "loss": 0.0243, "num_input_tokens_seen": 129471264, "step": 59940 }, { "epoch": 9.778955954323001, "grad_norm": 0.00625614495947957, "learning_rate": 0.0006057653628709424, "loss": 0.1429, "num_input_tokens_seen": 129482656, "step": 59945 }, { "epoch": 9.779771615008157, "grad_norm": 0.15388129651546478, "learning_rate": 0.0006056957926801979, "loss": 0.0511, "num_input_tokens_seen": 129493888, "step": 59950 }, { "epoch": 9.780587275693312, "grad_norm": 0.35571151971817017, "learning_rate": 0.0006056262203473941, "loss": 0.2147, "num_input_tokens_seen": 129505216, "step": 59955 }, { "epoch": 9.781402936378466, "grad_norm": 0.028294721618294716, "learning_rate": 0.000605556645873941, "loss": 0.0188, "num_input_tokens_seen": 129517216, "step": 59960 }, { "epoch": 9.782218597063622, "grad_norm": 0.20071174204349518, "learning_rate": 0.0006054870692612487, "loss": 0.1073, "num_input_tokens_seen": 129528896, "step": 59965 }, { "epoch": 9.783034257748776, "grad_norm": 0.03279627487063408, "learning_rate": 0.0006054174905107269, "loss": 0.0299, "num_input_tokens_seen": 129541088, "step": 59970 }, { "epoch": 9.783849918433932, "grad_norm": 0.04848659038543701, "learning_rate": 0.0006053479096237859, "loss": 0.0347, "num_input_tokens_seen": 129552320, "step": 59975 }, { "epoch": 9.784665579119086, "grad_norm": 0.262071430683136, "learning_rate": 0.000605278326601836, "loss": 0.1985, "num_input_tokens_seen": 129563104, "step": 59980 }, { "epoch": 9.785481239804241, "grad_norm": 0.08661609143018723, "learning_rate": 0.0006052087414462873, "loss": 0.0616, "num_input_tokens_seen": 129575168, "step": 59985 }, { "epoch": 9.786296900489397, "grad_norm": 0.4128914177417755, "learning_rate": 0.00060513915415855, "loss": 0.0687, "num_input_tokens_seen": 129586592, "step": 59990 }, { "epoch": 9.78711256117455, "grad_norm": 0.101948581635952, "learning_rate": 0.0006050695647400342, "loss": 0.0472, "num_input_tokens_seen": 129596416, "step": 59995 }, { "epoch": 9.787928221859707, "grad_norm": 0.008494734764099121, "learning_rate": 0.0006049999731921504, "loss": 0.0759, "num_input_tokens_seen": 129608256, "step": 60000 }, { "epoch": 9.78874388254486, "grad_norm": 0.1565508395433426, "learning_rate": 0.0006049303795163091, "loss": 0.0226, "num_input_tokens_seen": 129620160, "step": 60005 }, { "epoch": 9.789559543230016, "grad_norm": 0.0611579567193985, "learning_rate": 0.0006048607837139204, "loss": 0.0151, "num_input_tokens_seen": 129631264, "step": 60010 }, { "epoch": 9.790375203915172, "grad_norm": 0.013933761976659298, "learning_rate": 0.0006047911857863949, "loss": 0.0899, "num_input_tokens_seen": 129642048, "step": 60015 }, { "epoch": 9.791190864600326, "grad_norm": 0.07861107587814331, "learning_rate": 0.0006047215857351431, "loss": 0.021, "num_input_tokens_seen": 129652928, "step": 60020 }, { "epoch": 9.792006525285482, "grad_norm": 0.005836360156536102, "learning_rate": 0.0006046519835615756, "loss": 0.036, "num_input_tokens_seen": 129663008, "step": 60025 }, { "epoch": 9.792822185970635, "grad_norm": 0.025735294446349144, "learning_rate": 0.0006045823792671029, "loss": 0.1123, "num_input_tokens_seen": 129674688, "step": 60030 }, { "epoch": 9.793637846655791, "grad_norm": 0.07211139798164368, "learning_rate": 0.0006045127728531354, "loss": 0.0211, "num_input_tokens_seen": 129685312, "step": 60035 }, { "epoch": 9.794453507340947, "grad_norm": 0.01239249762147665, "learning_rate": 0.0006044431643210842, "loss": 0.046, "num_input_tokens_seen": 129697120, "step": 60040 }, { "epoch": 9.7952691680261, "grad_norm": 0.15044008195400238, "learning_rate": 0.0006043735536723595, "loss": 0.0516, "num_input_tokens_seen": 129708512, "step": 60045 }, { "epoch": 9.796084828711257, "grad_norm": 0.1912791132926941, "learning_rate": 0.0006043039409083726, "loss": 0.0484, "num_input_tokens_seen": 129719232, "step": 60050 }, { "epoch": 9.79690048939641, "grad_norm": 0.013628056272864342, "learning_rate": 0.0006042343260305339, "loss": 0.0108, "num_input_tokens_seen": 129729216, "step": 60055 }, { "epoch": 9.797716150081566, "grad_norm": 0.024676060304045677, "learning_rate": 0.0006041647090402544, "loss": 0.0833, "num_input_tokens_seen": 129740384, "step": 60060 }, { "epoch": 9.798531810766722, "grad_norm": 0.018948128446936607, "learning_rate": 0.0006040950899389449, "loss": 0.0182, "num_input_tokens_seen": 129750080, "step": 60065 }, { "epoch": 9.799347471451876, "grad_norm": 0.06415722519159317, "learning_rate": 0.0006040254687280163, "loss": 0.037, "num_input_tokens_seen": 129761216, "step": 60070 }, { "epoch": 9.800163132137031, "grad_norm": 1.0381970405578613, "learning_rate": 0.0006039558454088796, "loss": 0.0945, "num_input_tokens_seen": 129771872, "step": 60075 }, { "epoch": 9.800978792822185, "grad_norm": 0.29000842571258545, "learning_rate": 0.0006038862199829459, "loss": 0.2056, "num_input_tokens_seen": 129782240, "step": 60080 }, { "epoch": 9.801794453507341, "grad_norm": 0.10297328978776932, "learning_rate": 0.0006038165924516262, "loss": 0.0503, "num_input_tokens_seen": 129793664, "step": 60085 }, { "epoch": 9.802610114192497, "grad_norm": 0.05766294151544571, "learning_rate": 0.0006037469628163315, "loss": 0.0227, "num_input_tokens_seen": 129803904, "step": 60090 }, { "epoch": 9.80342577487765, "grad_norm": 0.12936429679393768, "learning_rate": 0.000603677331078473, "loss": 0.0057, "num_input_tokens_seen": 129814400, "step": 60095 }, { "epoch": 9.804241435562806, "grad_norm": 0.004301194101572037, "learning_rate": 0.0006036076972394618, "loss": 0.0709, "num_input_tokens_seen": 129825856, "step": 60100 }, { "epoch": 9.80505709624796, "grad_norm": 0.008284702897071838, "learning_rate": 0.0006035380613007093, "loss": 0.1158, "num_input_tokens_seen": 129836192, "step": 60105 }, { "epoch": 9.805872756933116, "grad_norm": 0.03883660212159157, "learning_rate": 0.0006034684232636266, "loss": 0.0156, "num_input_tokens_seen": 129847424, "step": 60110 }, { "epoch": 9.80668841761827, "grad_norm": 0.0031957624014467, "learning_rate": 0.0006033987831296251, "loss": 0.0549, "num_input_tokens_seen": 129858944, "step": 60115 }, { "epoch": 9.807504078303426, "grad_norm": 0.0031466346699744463, "learning_rate": 0.0006033291409001159, "loss": 0.0405, "num_input_tokens_seen": 129869056, "step": 60120 }, { "epoch": 9.808319738988581, "grad_norm": 0.011443628929555416, "learning_rate": 0.0006032594965765107, "loss": 0.027, "num_input_tokens_seen": 129878976, "step": 60125 }, { "epoch": 9.809135399673735, "grad_norm": 0.006320877466350794, "learning_rate": 0.0006031898501602207, "loss": 0.1365, "num_input_tokens_seen": 129889984, "step": 60130 }, { "epoch": 9.809951060358891, "grad_norm": 0.25152286887168884, "learning_rate": 0.0006031202016526576, "loss": 0.0353, "num_input_tokens_seen": 129900832, "step": 60135 }, { "epoch": 9.810766721044045, "grad_norm": 0.09581360220909119, "learning_rate": 0.0006030505510552329, "loss": 0.1428, "num_input_tokens_seen": 129911872, "step": 60140 }, { "epoch": 9.8115823817292, "grad_norm": 0.02002377063035965, "learning_rate": 0.0006029808983693579, "loss": 0.0112, "num_input_tokens_seen": 129919808, "step": 60145 }, { "epoch": 9.812398042414356, "grad_norm": 0.03476516902446747, "learning_rate": 0.0006029112435964444, "loss": 0.016, "num_input_tokens_seen": 129930848, "step": 60150 }, { "epoch": 9.81321370309951, "grad_norm": 0.005062747281044722, "learning_rate": 0.0006028415867379039, "loss": 0.1012, "num_input_tokens_seen": 129940576, "step": 60155 }, { "epoch": 9.814029363784666, "grad_norm": 0.003687590127810836, "learning_rate": 0.0006027719277951482, "loss": 0.042, "num_input_tokens_seen": 129952448, "step": 60160 }, { "epoch": 9.81484502446982, "grad_norm": 0.006466528866440058, "learning_rate": 0.000602702266769589, "loss": 0.1049, "num_input_tokens_seen": 129962752, "step": 60165 }, { "epoch": 9.815660685154976, "grad_norm": 0.004529135767370462, "learning_rate": 0.0006026326036626382, "loss": 0.0156, "num_input_tokens_seen": 129973184, "step": 60170 }, { "epoch": 9.81647634584013, "grad_norm": 0.007038953714072704, "learning_rate": 0.0006025629384757075, "loss": 0.0083, "num_input_tokens_seen": 129984608, "step": 60175 }, { "epoch": 9.817292006525285, "grad_norm": 0.0019257472595199943, "learning_rate": 0.0006024932712102085, "loss": 0.0659, "num_input_tokens_seen": 129995616, "step": 60180 }, { "epoch": 9.818107667210441, "grad_norm": 0.004110215697437525, "learning_rate": 0.0006024236018675537, "loss": 0.0442, "num_input_tokens_seen": 130007360, "step": 60185 }, { "epoch": 9.818923327895595, "grad_norm": 0.15984342992305756, "learning_rate": 0.0006023539304491544, "loss": 0.0569, "num_input_tokens_seen": 130018592, "step": 60190 }, { "epoch": 9.81973898858075, "grad_norm": 0.11752087622880936, "learning_rate": 0.000602284256956423, "loss": 0.0347, "num_input_tokens_seen": 130026720, "step": 60195 }, { "epoch": 9.820554649265905, "grad_norm": 0.007008385378867388, "learning_rate": 0.0006022145813907713, "loss": 0.0883, "num_input_tokens_seen": 130037728, "step": 60200 }, { "epoch": 9.82137030995106, "grad_norm": 0.04539079964160919, "learning_rate": 0.0006021449037536114, "loss": 0.0214, "num_input_tokens_seen": 130048928, "step": 60205 }, { "epoch": 9.822185970636216, "grad_norm": 0.024069270119071007, "learning_rate": 0.0006020752240463555, "loss": 0.0293, "num_input_tokens_seen": 130059968, "step": 60210 }, { "epoch": 9.82300163132137, "grad_norm": 0.06953584402799606, "learning_rate": 0.0006020055422704156, "loss": 0.0119, "num_input_tokens_seen": 130071712, "step": 60215 }, { "epoch": 9.823817292006526, "grad_norm": 0.012529253028333187, "learning_rate": 0.0006019358584272042, "loss": 0.0079, "num_input_tokens_seen": 130083360, "step": 60220 }, { "epoch": 9.82463295269168, "grad_norm": 0.38807380199432373, "learning_rate": 0.0006018661725181332, "loss": 0.1423, "num_input_tokens_seen": 130094368, "step": 60225 }, { "epoch": 9.825448613376835, "grad_norm": 0.08964411914348602, "learning_rate": 0.0006017964845446149, "loss": 0.0273, "num_input_tokens_seen": 130105184, "step": 60230 }, { "epoch": 9.826264274061991, "grad_norm": 0.06751274317502975, "learning_rate": 0.0006017267945080618, "loss": 0.0312, "num_input_tokens_seen": 130115584, "step": 60235 }, { "epoch": 9.827079934747145, "grad_norm": 0.007398010231554508, "learning_rate": 0.000601657102409886, "loss": 0.0225, "num_input_tokens_seen": 130126432, "step": 60240 }, { "epoch": 9.8278955954323, "grad_norm": 0.4739130735397339, "learning_rate": 0.0006015874082515003, "loss": 0.1254, "num_input_tokens_seen": 130137344, "step": 60245 }, { "epoch": 9.828711256117455, "grad_norm": 0.06454982608556747, "learning_rate": 0.0006015177120343168, "loss": 0.0456, "num_input_tokens_seen": 130148064, "step": 60250 }, { "epoch": 9.82952691680261, "grad_norm": 0.035548772662878036, "learning_rate": 0.000601448013759748, "loss": 0.069, "num_input_tokens_seen": 130159744, "step": 60255 }, { "epoch": 9.830342577487766, "grad_norm": 0.07255587726831436, "learning_rate": 0.0006013783134292067, "loss": 0.0417, "num_input_tokens_seen": 130171072, "step": 60260 }, { "epoch": 9.83115823817292, "grad_norm": 0.0018127447692677379, "learning_rate": 0.0006013086110441049, "loss": 0.0485, "num_input_tokens_seen": 130182400, "step": 60265 }, { "epoch": 9.831973898858076, "grad_norm": 0.034299176186323166, "learning_rate": 0.0006012389066058559, "loss": 0.0827, "num_input_tokens_seen": 130193568, "step": 60270 }, { "epoch": 9.83278955954323, "grad_norm": 0.08214818686246872, "learning_rate": 0.0006011692001158719, "loss": 0.0305, "num_input_tokens_seen": 130203680, "step": 60275 }, { "epoch": 9.833605220228385, "grad_norm": 0.0023437021300196648, "learning_rate": 0.0006010994915755659, "loss": 0.006, "num_input_tokens_seen": 130213632, "step": 60280 }, { "epoch": 9.83442088091354, "grad_norm": 0.007850533351302147, "learning_rate": 0.0006010297809863503, "loss": 0.0241, "num_input_tokens_seen": 130224128, "step": 60285 }, { "epoch": 9.835236541598695, "grad_norm": 0.040578775107860565, "learning_rate": 0.000600960068349638, "loss": 0.1383, "num_input_tokens_seen": 130235648, "step": 60290 }, { "epoch": 9.83605220228385, "grad_norm": 0.08046291023492813, "learning_rate": 0.000600890353666842, "loss": 0.0305, "num_input_tokens_seen": 130245792, "step": 60295 }, { "epoch": 9.836867862969005, "grad_norm": 0.002211250364780426, "learning_rate": 0.0006008206369393748, "loss": 0.1303, "num_input_tokens_seen": 130255744, "step": 60300 }, { "epoch": 9.83768352365416, "grad_norm": 0.1887092888355255, "learning_rate": 0.0006007509181686496, "loss": 0.15, "num_input_tokens_seen": 130266080, "step": 60305 }, { "epoch": 9.838499184339314, "grad_norm": 0.5672429800033569, "learning_rate": 0.0006006811973560792, "loss": 0.1197, "num_input_tokens_seen": 130277344, "step": 60310 }, { "epoch": 9.83931484502447, "grad_norm": 0.398388534784317, "learning_rate": 0.0006006114745030766, "loss": 0.0883, "num_input_tokens_seen": 130286656, "step": 60315 }, { "epoch": 9.840130505709626, "grad_norm": 0.003656855085864663, "learning_rate": 0.0006005417496110549, "loss": 0.0308, "num_input_tokens_seen": 130297344, "step": 60320 }, { "epoch": 9.84094616639478, "grad_norm": 0.02871028333902359, "learning_rate": 0.0006004720226814271, "loss": 0.0243, "num_input_tokens_seen": 130307200, "step": 60325 }, { "epoch": 9.841761827079935, "grad_norm": 0.3796071410179138, "learning_rate": 0.0006004022937156062, "loss": 0.1448, "num_input_tokens_seen": 130318144, "step": 60330 }, { "epoch": 9.84257748776509, "grad_norm": 0.018963851034641266, "learning_rate": 0.0006003325627150054, "loss": 0.0183, "num_input_tokens_seen": 130327904, "step": 60335 }, { "epoch": 9.843393148450245, "grad_norm": 0.046277206391096115, "learning_rate": 0.0006002628296810381, "loss": 0.0121, "num_input_tokens_seen": 130338176, "step": 60340 }, { "epoch": 9.844208809135399, "grad_norm": 0.0019071658607572317, "learning_rate": 0.0006001930946151172, "loss": 0.0116, "num_input_tokens_seen": 130349600, "step": 60345 }, { "epoch": 9.845024469820554, "grad_norm": 0.010552327148616314, "learning_rate": 0.0006001233575186563, "loss": 0.1436, "num_input_tokens_seen": 130360352, "step": 60350 }, { "epoch": 9.84584013050571, "grad_norm": 0.023848099634051323, "learning_rate": 0.0006000536183930684, "loss": 0.0938, "num_input_tokens_seen": 130370944, "step": 60355 }, { "epoch": 9.846655791190864, "grad_norm": 0.08024164289236069, "learning_rate": 0.000599983877239767, "loss": 0.0648, "num_input_tokens_seen": 130382336, "step": 60360 }, { "epoch": 9.84747145187602, "grad_norm": 0.018898021429777145, "learning_rate": 0.0005999141340601657, "loss": 0.01, "num_input_tokens_seen": 130392480, "step": 60365 }, { "epoch": 9.848287112561174, "grad_norm": 0.03573465347290039, "learning_rate": 0.0005998443888556776, "loss": 0.0339, "num_input_tokens_seen": 130403360, "step": 60370 }, { "epoch": 9.84910277324633, "grad_norm": 0.09511330723762512, "learning_rate": 0.0005997746416277162, "loss": 0.0313, "num_input_tokens_seen": 130414432, "step": 60375 }, { "epoch": 9.849918433931485, "grad_norm": 0.00512855825945735, "learning_rate": 0.0005997048923776953, "loss": 0.0605, "num_input_tokens_seen": 130424992, "step": 60380 }, { "epoch": 9.850734094616639, "grad_norm": 0.26799216866493225, "learning_rate": 0.000599635141107028, "loss": 0.0399, "num_input_tokens_seen": 130433920, "step": 60385 }, { "epoch": 9.851549755301795, "grad_norm": 0.007397581823170185, "learning_rate": 0.0005995653878171283, "loss": 0.0136, "num_input_tokens_seen": 130444544, "step": 60390 }, { "epoch": 9.852365415986949, "grad_norm": 0.6185172200202942, "learning_rate": 0.0005994956325094099, "loss": 0.0705, "num_input_tokens_seen": 130455360, "step": 60395 }, { "epoch": 9.853181076672104, "grad_norm": 0.03895857185125351, "learning_rate": 0.000599425875185286, "loss": 0.2108, "num_input_tokens_seen": 130465824, "step": 60400 }, { "epoch": 9.85399673735726, "grad_norm": 0.0034981996286660433, "learning_rate": 0.0005993561158461708, "loss": 0.0399, "num_input_tokens_seen": 130477376, "step": 60405 }, { "epoch": 9.854812398042414, "grad_norm": 0.003301647724583745, "learning_rate": 0.0005992863544934777, "loss": 0.1199, "num_input_tokens_seen": 130487328, "step": 60410 }, { "epoch": 9.85562805872757, "grad_norm": 0.02783522754907608, "learning_rate": 0.000599216591128621, "loss": 0.031, "num_input_tokens_seen": 130497248, "step": 60415 }, { "epoch": 9.856443719412724, "grad_norm": 0.004357766360044479, "learning_rate": 0.000599146825753014, "loss": 0.1704, "num_input_tokens_seen": 130507520, "step": 60420 }, { "epoch": 9.85725938009788, "grad_norm": 0.004188814666122198, "learning_rate": 0.0005990770583680707, "loss": 0.0326, "num_input_tokens_seen": 130519488, "step": 60425 }, { "epoch": 9.858075040783035, "grad_norm": 0.10563724488019943, "learning_rate": 0.0005990072889752052, "loss": 0.0153, "num_input_tokens_seen": 130530304, "step": 60430 }, { "epoch": 9.858890701468189, "grad_norm": 0.052509855479002, "learning_rate": 0.0005989375175758315, "loss": 0.0174, "num_input_tokens_seen": 130540544, "step": 60435 }, { "epoch": 9.859706362153345, "grad_norm": 0.013773845508694649, "learning_rate": 0.0005988677441713633, "loss": 0.0949, "num_input_tokens_seen": 130550336, "step": 60440 }, { "epoch": 9.860522022838499, "grad_norm": 0.20639842748641968, "learning_rate": 0.000598797968763215, "loss": 0.0226, "num_input_tokens_seen": 130560896, "step": 60445 }, { "epoch": 9.861337683523654, "grad_norm": 0.009240848943591118, "learning_rate": 0.0005987281913528006, "loss": 0.0283, "num_input_tokens_seen": 130572064, "step": 60450 }, { "epoch": 9.86215334420881, "grad_norm": 0.025932233780622482, "learning_rate": 0.0005986584119415339, "loss": 0.1094, "num_input_tokens_seen": 130583328, "step": 60455 }, { "epoch": 9.862969004893964, "grad_norm": 0.009651134721934795, "learning_rate": 0.0005985886305308295, "loss": 0.0137, "num_input_tokens_seen": 130591840, "step": 60460 }, { "epoch": 9.86378466557912, "grad_norm": 0.030490349978208542, "learning_rate": 0.0005985188471221014, "loss": 0.0965, "num_input_tokens_seen": 130603040, "step": 60465 }, { "epoch": 9.864600326264274, "grad_norm": 0.05847940593957901, "learning_rate": 0.0005984490617167639, "loss": 0.0361, "num_input_tokens_seen": 130614208, "step": 60470 }, { "epoch": 9.86541598694943, "grad_norm": 0.13259384036064148, "learning_rate": 0.0005983792743162313, "loss": 0.033, "num_input_tokens_seen": 130624064, "step": 60475 }, { "epoch": 9.866231647634583, "grad_norm": 0.007011461537331343, "learning_rate": 0.0005983094849219177, "loss": 0.0917, "num_input_tokens_seen": 130633984, "step": 60480 }, { "epoch": 9.867047308319739, "grad_norm": 0.007801242638379335, "learning_rate": 0.0005982396935352379, "loss": 0.0606, "num_input_tokens_seen": 130644896, "step": 60485 }, { "epoch": 9.867862969004895, "grad_norm": 0.04473032057285309, "learning_rate": 0.000598169900157606, "loss": 0.1836, "num_input_tokens_seen": 130654752, "step": 60490 }, { "epoch": 9.868678629690049, "grad_norm": 0.29122939705848694, "learning_rate": 0.0005981001047904365, "loss": 0.0549, "num_input_tokens_seen": 130665568, "step": 60495 }, { "epoch": 9.869494290375204, "grad_norm": 0.007103382144123316, "learning_rate": 0.000598030307435144, "loss": 0.0387, "num_input_tokens_seen": 130675904, "step": 60500 }, { "epoch": 9.870309951060358, "grad_norm": 0.04765298217535019, "learning_rate": 0.000597960508093143, "loss": 0.0485, "num_input_tokens_seen": 130686464, "step": 60505 }, { "epoch": 9.871125611745514, "grad_norm": 0.009073898196220398, "learning_rate": 0.0005978907067658479, "loss": 0.0195, "num_input_tokens_seen": 130697568, "step": 60510 }, { "epoch": 9.87194127243067, "grad_norm": 0.01489250548183918, "learning_rate": 0.0005978209034546736, "loss": 0.0561, "num_input_tokens_seen": 130708480, "step": 60515 }, { "epoch": 9.872756933115824, "grad_norm": 0.3078218400478363, "learning_rate": 0.0005977510981610344, "loss": 0.1301, "num_input_tokens_seen": 130719136, "step": 60520 }, { "epoch": 9.87357259380098, "grad_norm": 0.0026563978753983974, "learning_rate": 0.0005976812908863454, "loss": 0.0459, "num_input_tokens_seen": 130730368, "step": 60525 }, { "epoch": 9.874388254486133, "grad_norm": 0.02801201492547989, "learning_rate": 0.0005976114816320208, "loss": 0.0951, "num_input_tokens_seen": 130740544, "step": 60530 }, { "epoch": 9.875203915171289, "grad_norm": 0.03558781370520592, "learning_rate": 0.000597541670399476, "loss": 0.0783, "num_input_tokens_seen": 130752448, "step": 60535 }, { "epoch": 9.876019575856443, "grad_norm": 0.15019097924232483, "learning_rate": 0.0005974718571901254, "loss": 0.0271, "num_input_tokens_seen": 130763520, "step": 60540 }, { "epoch": 9.876835236541599, "grad_norm": 0.005682834889739752, "learning_rate": 0.0005974020420053841, "loss": 0.0042, "num_input_tokens_seen": 130774560, "step": 60545 }, { "epoch": 9.877650897226754, "grad_norm": 0.40169715881347656, "learning_rate": 0.0005973322248466666, "loss": 0.1045, "num_input_tokens_seen": 130784000, "step": 60550 }, { "epoch": 9.878466557911908, "grad_norm": 0.014299996197223663, "learning_rate": 0.0005972624057153882, "loss": 0.0572, "num_input_tokens_seen": 130793600, "step": 60555 }, { "epoch": 9.879282218597064, "grad_norm": 0.006953614763915539, "learning_rate": 0.0005971925846129639, "loss": 0.0291, "num_input_tokens_seen": 130805728, "step": 60560 }, { "epoch": 9.880097879282218, "grad_norm": 0.007064024917781353, "learning_rate": 0.0005971227615408084, "loss": 0.015, "num_input_tokens_seen": 130815936, "step": 60565 }, { "epoch": 9.880913539967374, "grad_norm": 0.15870408713817596, "learning_rate": 0.0005970529365003371, "loss": 0.1804, "num_input_tokens_seen": 130827456, "step": 60570 }, { "epoch": 9.88172920065253, "grad_norm": 0.0761968344449997, "learning_rate": 0.0005969831094929648, "loss": 0.0119, "num_input_tokens_seen": 130837664, "step": 60575 }, { "epoch": 9.882544861337683, "grad_norm": 0.0183928981423378, "learning_rate": 0.0005969132805201067, "loss": 0.0249, "num_input_tokens_seen": 130848928, "step": 60580 }, { "epoch": 9.883360522022839, "grad_norm": 0.3906119465827942, "learning_rate": 0.0005968434495831781, "loss": 0.029, "num_input_tokens_seen": 130860640, "step": 60585 }, { "epoch": 9.884176182707993, "grad_norm": 0.004583487752825022, "learning_rate": 0.000596773616683594, "loss": 0.0393, "num_input_tokens_seen": 130872768, "step": 60590 }, { "epoch": 9.884991843393149, "grad_norm": 0.020378025248646736, "learning_rate": 0.0005967037818227701, "loss": 0.0306, "num_input_tokens_seen": 130883360, "step": 60595 }, { "epoch": 9.885807504078304, "grad_norm": 0.018993549048900604, "learning_rate": 0.0005966339450021212, "loss": 0.016, "num_input_tokens_seen": 130893984, "step": 60600 }, { "epoch": 9.886623164763458, "grad_norm": 0.0059882765635848045, "learning_rate": 0.0005965641062230627, "loss": 0.0176, "num_input_tokens_seen": 130905152, "step": 60605 }, { "epoch": 9.887438825448614, "grad_norm": 0.024164140224456787, "learning_rate": 0.0005964942654870103, "loss": 0.0178, "num_input_tokens_seen": 130914368, "step": 60610 }, { "epoch": 9.888254486133768, "grad_norm": 0.1327124685049057, "learning_rate": 0.0005964244227953791, "loss": 0.0153, "num_input_tokens_seen": 130925088, "step": 60615 }, { "epoch": 9.889070146818923, "grad_norm": 0.027247849851846695, "learning_rate": 0.0005963545781495847, "loss": 0.1065, "num_input_tokens_seen": 130933344, "step": 60620 }, { "epoch": 9.88988580750408, "grad_norm": 0.1004333570599556, "learning_rate": 0.0005962847315510426, "loss": 0.0409, "num_input_tokens_seen": 130944448, "step": 60625 }, { "epoch": 9.890701468189233, "grad_norm": 0.01161018293350935, "learning_rate": 0.0005962148830011681, "loss": 0.0105, "num_input_tokens_seen": 130955040, "step": 60630 }, { "epoch": 9.891517128874389, "grad_norm": 0.4277346134185791, "learning_rate": 0.0005961450325013771, "loss": 0.0289, "num_input_tokens_seen": 130966400, "step": 60635 }, { "epoch": 9.892332789559543, "grad_norm": 0.1819687783718109, "learning_rate": 0.0005960751800530849, "loss": 0.0179, "num_input_tokens_seen": 130978176, "step": 60640 }, { "epoch": 9.893148450244698, "grad_norm": 0.027873460203409195, "learning_rate": 0.0005960053256577073, "loss": 0.0477, "num_input_tokens_seen": 130988352, "step": 60645 }, { "epoch": 9.893964110929852, "grad_norm": 0.0183260440826416, "learning_rate": 0.0005959354693166601, "loss": 0.0455, "num_input_tokens_seen": 130998560, "step": 60650 }, { "epoch": 9.894779771615008, "grad_norm": 0.2565465271472931, "learning_rate": 0.0005958656110313589, "loss": 0.189, "num_input_tokens_seen": 131008352, "step": 60655 }, { "epoch": 9.895595432300164, "grad_norm": 0.08927177637815475, "learning_rate": 0.0005957957508032194, "loss": 0.0242, "num_input_tokens_seen": 131019232, "step": 60660 }, { "epoch": 9.896411092985318, "grad_norm": 0.0050674197264015675, "learning_rate": 0.0005957258886336575, "loss": 0.0053, "num_input_tokens_seen": 131030336, "step": 60665 }, { "epoch": 9.897226753670473, "grad_norm": 0.06827183067798615, "learning_rate": 0.0005956560245240891, "loss": 0.1387, "num_input_tokens_seen": 131040416, "step": 60670 }, { "epoch": 9.898042414355627, "grad_norm": 0.04122605174779892, "learning_rate": 0.0005955861584759298, "loss": 0.0608, "num_input_tokens_seen": 131050624, "step": 60675 }, { "epoch": 9.898858075040783, "grad_norm": 0.7276883125305176, "learning_rate": 0.0005955162904905959, "loss": 0.0778, "num_input_tokens_seen": 131061600, "step": 60680 }, { "epoch": 9.899673735725939, "grad_norm": 0.1265845149755478, "learning_rate": 0.0005954464205695033, "loss": 0.0996, "num_input_tokens_seen": 131070848, "step": 60685 }, { "epoch": 9.900489396411093, "grad_norm": 0.01593145728111267, "learning_rate": 0.0005953765487140678, "loss": 0.0745, "num_input_tokens_seen": 131081888, "step": 60690 }, { "epoch": 9.901305057096248, "grad_norm": 0.002230089157819748, "learning_rate": 0.0005953066749257055, "loss": 0.0098, "num_input_tokens_seen": 131092896, "step": 60695 }, { "epoch": 9.902120717781402, "grad_norm": 0.010627939365804195, "learning_rate": 0.0005952367992058326, "loss": 0.0212, "num_input_tokens_seen": 131103936, "step": 60700 }, { "epoch": 9.902936378466558, "grad_norm": 0.08064654469490051, "learning_rate": 0.0005951669215558651, "loss": 0.0163, "num_input_tokens_seen": 131115424, "step": 60705 }, { "epoch": 9.903752039151712, "grad_norm": 0.006806948687881231, "learning_rate": 0.0005950970419772192, "loss": 0.089, "num_input_tokens_seen": 131126752, "step": 60710 }, { "epoch": 9.904567699836868, "grad_norm": 0.1305762231349945, "learning_rate": 0.0005950271604713111, "loss": 0.1594, "num_input_tokens_seen": 131138688, "step": 60715 }, { "epoch": 9.905383360522023, "grad_norm": 0.056780025362968445, "learning_rate": 0.000594957277039557, "loss": 0.0415, "num_input_tokens_seen": 131149440, "step": 60720 }, { "epoch": 9.906199021207177, "grad_norm": 0.0119969192892313, "learning_rate": 0.0005948873916833733, "loss": 0.0214, "num_input_tokens_seen": 131160768, "step": 60725 }, { "epoch": 9.907014681892333, "grad_norm": 0.0028076330199837685, "learning_rate": 0.0005948175044041764, "loss": 0.0179, "num_input_tokens_seen": 131172096, "step": 60730 }, { "epoch": 9.907830342577487, "grad_norm": 0.4447990357875824, "learning_rate": 0.0005947476152033822, "loss": 0.0369, "num_input_tokens_seen": 131182016, "step": 60735 }, { "epoch": 9.908646003262643, "grad_norm": 0.6924632787704468, "learning_rate": 0.0005946777240824076, "loss": 0.1668, "num_input_tokens_seen": 131193536, "step": 60740 }, { "epoch": 9.909461663947798, "grad_norm": 0.015939053148031235, "learning_rate": 0.0005946078310426687, "loss": 0.1175, "num_input_tokens_seen": 131204448, "step": 60745 }, { "epoch": 9.910277324632952, "grad_norm": 0.0262970682233572, "learning_rate": 0.000594537936085582, "loss": 0.0461, "num_input_tokens_seen": 131216384, "step": 60750 }, { "epoch": 9.911092985318108, "grad_norm": 0.05035187304019928, "learning_rate": 0.0005944680392125643, "loss": 0.0649, "num_input_tokens_seen": 131228064, "step": 60755 }, { "epoch": 9.911908646003262, "grad_norm": 0.487958163022995, "learning_rate": 0.0005943981404250318, "loss": 0.0762, "num_input_tokens_seen": 131239264, "step": 60760 }, { "epoch": 9.912724306688418, "grad_norm": 0.003221668768674135, "learning_rate": 0.0005943282397244013, "loss": 0.0295, "num_input_tokens_seen": 131251040, "step": 60765 }, { "epoch": 9.913539967373573, "grad_norm": 0.16377022862434387, "learning_rate": 0.0005942583371120893, "loss": 0.0312, "num_input_tokens_seen": 131261568, "step": 60770 }, { "epoch": 9.914355628058727, "grad_norm": 0.024691110476851463, "learning_rate": 0.0005941884325895127, "loss": 0.0818, "num_input_tokens_seen": 131272768, "step": 60775 }, { "epoch": 9.915171288743883, "grad_norm": 0.026219695806503296, "learning_rate": 0.0005941185261580878, "loss": 0.0304, "num_input_tokens_seen": 131282240, "step": 60780 }, { "epoch": 9.915986949429037, "grad_norm": 0.003964452538639307, "learning_rate": 0.0005940486178192317, "loss": 0.1644, "num_input_tokens_seen": 131292608, "step": 60785 }, { "epoch": 9.916802610114193, "grad_norm": 0.03120097890496254, "learning_rate": 0.000593978707574361, "loss": 0.011, "num_input_tokens_seen": 131303040, "step": 60790 }, { "epoch": 9.917618270799348, "grad_norm": 0.05793861672282219, "learning_rate": 0.0005939087954248926, "loss": 0.0545, "num_input_tokens_seen": 131314208, "step": 60795 }, { "epoch": 9.918433931484502, "grad_norm": 0.005540280602872372, "learning_rate": 0.0005938388813722432, "loss": 0.0708, "num_input_tokens_seen": 131324832, "step": 60800 }, { "epoch": 9.919249592169658, "grad_norm": 0.431779146194458, "learning_rate": 0.0005937689654178298, "loss": 0.0786, "num_input_tokens_seen": 131335904, "step": 60805 }, { "epoch": 9.920065252854812, "grad_norm": 0.2070673108100891, "learning_rate": 0.0005936990475630696, "loss": 0.0929, "num_input_tokens_seen": 131346368, "step": 60810 }, { "epoch": 9.920880913539968, "grad_norm": 0.06501749902963638, "learning_rate": 0.0005936291278093793, "loss": 0.1148, "num_input_tokens_seen": 131357216, "step": 60815 }, { "epoch": 9.921696574225122, "grad_norm": 0.0018129857489839196, "learning_rate": 0.0005935592061581758, "loss": 0.0093, "num_input_tokens_seen": 131367200, "step": 60820 }, { "epoch": 9.922512234910277, "grad_norm": 0.028841711580753326, "learning_rate": 0.0005934892826108764, "loss": 0.0269, "num_input_tokens_seen": 131379264, "step": 60825 }, { "epoch": 9.923327895595433, "grad_norm": 0.01920981891453266, "learning_rate": 0.0005934193571688981, "loss": 0.0094, "num_input_tokens_seen": 131389888, "step": 60830 }, { "epoch": 9.924143556280587, "grad_norm": 0.013067704625427723, "learning_rate": 0.0005933494298336579, "loss": 0.0357, "num_input_tokens_seen": 131401088, "step": 60835 }, { "epoch": 9.924959216965743, "grad_norm": 0.0020066085271537304, "learning_rate": 0.0005932795006065732, "loss": 0.0285, "num_input_tokens_seen": 131412320, "step": 60840 }, { "epoch": 9.925774877650896, "grad_norm": 0.04752558097243309, "learning_rate": 0.000593209569489061, "loss": 0.02, "num_input_tokens_seen": 131422880, "step": 60845 }, { "epoch": 9.926590538336052, "grad_norm": 0.045283108949661255, "learning_rate": 0.0005931396364825387, "loss": 0.0159, "num_input_tokens_seen": 131434272, "step": 60850 }, { "epoch": 9.927406199021208, "grad_norm": 0.021290739998221397, "learning_rate": 0.0005930697015884234, "loss": 0.0388, "num_input_tokens_seen": 131444736, "step": 60855 }, { "epoch": 9.928221859706362, "grad_norm": 0.011512097902595997, "learning_rate": 0.0005929997648081327, "loss": 0.1208, "num_input_tokens_seen": 131455104, "step": 60860 }, { "epoch": 9.929037520391518, "grad_norm": 0.03910503908991814, "learning_rate": 0.0005929298261430837, "loss": 0.0365, "num_input_tokens_seen": 131466272, "step": 60865 }, { "epoch": 9.929853181076671, "grad_norm": 0.21563324332237244, "learning_rate": 0.0005928598855946939, "loss": 0.0183, "num_input_tokens_seen": 131477856, "step": 60870 }, { "epoch": 9.930668841761827, "grad_norm": 0.12936514616012573, "learning_rate": 0.0005927899431643807, "loss": 0.0206, "num_input_tokens_seen": 131488288, "step": 60875 }, { "epoch": 9.931484502446983, "grad_norm": 0.04893166944384575, "learning_rate": 0.0005927199988535616, "loss": 0.0832, "num_input_tokens_seen": 131498848, "step": 60880 }, { "epoch": 9.932300163132137, "grad_norm": 0.006314895115792751, "learning_rate": 0.0005926500526636542, "loss": 0.0453, "num_input_tokens_seen": 131509440, "step": 60885 }, { "epoch": 9.933115823817293, "grad_norm": 0.018990257754921913, "learning_rate": 0.0005925801045960757, "loss": 0.0191, "num_input_tokens_seen": 131519840, "step": 60890 }, { "epoch": 9.933931484502446, "grad_norm": 0.17692513763904572, "learning_rate": 0.0005925101546522441, "loss": 0.072, "num_input_tokens_seen": 131530400, "step": 60895 }, { "epoch": 9.934747145187602, "grad_norm": 0.008013364858925343, "learning_rate": 0.0005924402028335769, "loss": 0.0433, "num_input_tokens_seen": 131540000, "step": 60900 }, { "epoch": 9.935562805872756, "grad_norm": 0.1491565853357315, "learning_rate": 0.0005923702491414916, "loss": 0.0657, "num_input_tokens_seen": 131552320, "step": 60905 }, { "epoch": 9.936378466557912, "grad_norm": 0.11109715700149536, "learning_rate": 0.000592300293577406, "loss": 0.0222, "num_input_tokens_seen": 131563840, "step": 60910 }, { "epoch": 9.937194127243067, "grad_norm": 0.08512434363365173, "learning_rate": 0.0005922303361427379, "loss": 0.1019, "num_input_tokens_seen": 131575488, "step": 60915 }, { "epoch": 9.938009787928221, "grad_norm": 0.058436449617147446, "learning_rate": 0.0005921603768389051, "loss": 0.0415, "num_input_tokens_seen": 131586080, "step": 60920 }, { "epoch": 9.938825448613377, "grad_norm": 0.02144043892621994, "learning_rate": 0.0005920904156673254, "loss": 0.0409, "num_input_tokens_seen": 131596096, "step": 60925 }, { "epoch": 9.939641109298531, "grad_norm": 0.04910271614789963, "learning_rate": 0.0005920204526294165, "loss": 0.0446, "num_input_tokens_seen": 131606304, "step": 60930 }, { "epoch": 9.940456769983687, "grad_norm": 0.09139198809862137, "learning_rate": 0.0005919504877265965, "loss": 0.0119, "num_input_tokens_seen": 131617312, "step": 60935 }, { "epoch": 9.941272430668842, "grad_norm": 0.020040472969412804, "learning_rate": 0.000591880520960283, "loss": 0.1875, "num_input_tokens_seen": 131627840, "step": 60940 }, { "epoch": 9.942088091353996, "grad_norm": 0.07437156140804291, "learning_rate": 0.0005918105523318944, "loss": 0.0518, "num_input_tokens_seen": 131639168, "step": 60945 }, { "epoch": 9.942903752039152, "grad_norm": 0.009552833624184132, "learning_rate": 0.0005917405818428484, "loss": 0.1345, "num_input_tokens_seen": 131649728, "step": 60950 }, { "epoch": 9.943719412724306, "grad_norm": 0.12387213110923767, "learning_rate": 0.0005916706094945631, "loss": 0.2201, "num_input_tokens_seen": 131659616, "step": 60955 }, { "epoch": 9.944535073409462, "grad_norm": 0.010083312168717384, "learning_rate": 0.0005916006352884567, "loss": 0.0324, "num_input_tokens_seen": 131669952, "step": 60960 }, { "epoch": 9.945350734094617, "grad_norm": 0.19099733233451843, "learning_rate": 0.0005915306592259471, "loss": 0.0257, "num_input_tokens_seen": 131679872, "step": 60965 }, { "epoch": 9.946166394779771, "grad_norm": 0.08524459600448608, "learning_rate": 0.0005914606813084526, "loss": 0.0378, "num_input_tokens_seen": 131690496, "step": 60970 }, { "epoch": 9.946982055464927, "grad_norm": 0.3759321868419647, "learning_rate": 0.0005913907015373915, "loss": 0.0468, "num_input_tokens_seen": 131701248, "step": 60975 }, { "epoch": 9.947797716150081, "grad_norm": 0.00874224305152893, "learning_rate": 0.0005913207199141818, "loss": 0.0293, "num_input_tokens_seen": 131712640, "step": 60980 }, { "epoch": 9.948613376835237, "grad_norm": 0.49674367904663086, "learning_rate": 0.0005912507364402419, "loss": 0.0602, "num_input_tokens_seen": 131723680, "step": 60985 }, { "epoch": 9.949429037520392, "grad_norm": 0.010632969439029694, "learning_rate": 0.0005911807511169899, "loss": 0.1221, "num_input_tokens_seen": 131734336, "step": 60990 }, { "epoch": 9.950244698205546, "grad_norm": 0.24199523031711578, "learning_rate": 0.0005911107639458444, "loss": 0.0444, "num_input_tokens_seen": 131744864, "step": 60995 }, { "epoch": 9.951060358890702, "grad_norm": 0.0021362698171287775, "learning_rate": 0.0005910407749282237, "loss": 0.0504, "num_input_tokens_seen": 131756128, "step": 61000 }, { "epoch": 9.951876019575856, "grad_norm": 0.01978922076523304, "learning_rate": 0.0005909707840655462, "loss": 0.0828, "num_input_tokens_seen": 131766528, "step": 61005 }, { "epoch": 9.952691680261012, "grad_norm": 0.5361757278442383, "learning_rate": 0.0005909007913592304, "loss": 0.0965, "num_input_tokens_seen": 131776096, "step": 61010 }, { "epoch": 9.953507340946166, "grad_norm": 0.02593553252518177, "learning_rate": 0.0005908307968106948, "loss": 0.1404, "num_input_tokens_seen": 131787328, "step": 61015 }, { "epoch": 9.954323001631321, "grad_norm": 0.3522297441959381, "learning_rate": 0.0005907608004213577, "loss": 0.1902, "num_input_tokens_seen": 131797856, "step": 61020 }, { "epoch": 9.955138662316477, "grad_norm": 0.29401418566703796, "learning_rate": 0.0005906908021926379, "loss": 0.0445, "num_input_tokens_seen": 131808224, "step": 61025 }, { "epoch": 9.955954323001631, "grad_norm": 0.022986039519309998, "learning_rate": 0.000590620802125954, "loss": 0.0731, "num_input_tokens_seen": 131818976, "step": 61030 }, { "epoch": 9.956769983686787, "grad_norm": 0.43107253313064575, "learning_rate": 0.0005905508002227247, "loss": 0.0791, "num_input_tokens_seen": 131829184, "step": 61035 }, { "epoch": 9.95758564437194, "grad_norm": 0.009766715578734875, "learning_rate": 0.0005904807964843684, "loss": 0.0151, "num_input_tokens_seen": 131840608, "step": 61040 }, { "epoch": 9.958401305057096, "grad_norm": 0.27253955602645874, "learning_rate": 0.0005904107909123039, "loss": 0.1741, "num_input_tokens_seen": 131850336, "step": 61045 }, { "epoch": 9.959216965742252, "grad_norm": 0.35473403334617615, "learning_rate": 0.0005903407835079502, "loss": 0.0686, "num_input_tokens_seen": 131860800, "step": 61050 }, { "epoch": 9.960032626427406, "grad_norm": 0.0052872272208333015, "learning_rate": 0.000590270774272726, "loss": 0.0473, "num_input_tokens_seen": 131871552, "step": 61055 }, { "epoch": 9.960848287112562, "grad_norm": 0.04657332971692085, "learning_rate": 0.0005902007632080499, "loss": 0.0308, "num_input_tokens_seen": 131881760, "step": 61060 }, { "epoch": 9.961663947797716, "grad_norm": 0.22349655628204346, "learning_rate": 0.0005901307503153408, "loss": 0.0449, "num_input_tokens_seen": 131892416, "step": 61065 }, { "epoch": 9.962479608482871, "grad_norm": 0.0609329491853714, "learning_rate": 0.0005900607355960178, "loss": 0.0128, "num_input_tokens_seen": 131903744, "step": 61070 }, { "epoch": 9.963295269168025, "grad_norm": 0.22006216645240784, "learning_rate": 0.0005899907190514999, "loss": 0.1003, "num_input_tokens_seen": 131914112, "step": 61075 }, { "epoch": 9.964110929853181, "grad_norm": 0.04818188399076462, "learning_rate": 0.0005899207006832056, "loss": 0.0405, "num_input_tokens_seen": 131924896, "step": 61080 }, { "epoch": 9.964926590538337, "grad_norm": 0.2611391842365265, "learning_rate": 0.0005898506804925545, "loss": 0.1429, "num_input_tokens_seen": 131936192, "step": 61085 }, { "epoch": 9.96574225122349, "grad_norm": 0.2683767080307007, "learning_rate": 0.0005897806584809653, "loss": 0.0817, "num_input_tokens_seen": 131947168, "step": 61090 }, { "epoch": 9.966557911908646, "grad_norm": 0.005635687615722418, "learning_rate": 0.0005897106346498571, "loss": 0.0096, "num_input_tokens_seen": 131958080, "step": 61095 }, { "epoch": 9.9673735725938, "grad_norm": 0.005916200112551451, "learning_rate": 0.0005896406090006491, "loss": 0.0107, "num_input_tokens_seen": 131969920, "step": 61100 }, { "epoch": 9.968189233278956, "grad_norm": 0.021341202780604362, "learning_rate": 0.0005895705815347605, "loss": 0.0181, "num_input_tokens_seen": 131979680, "step": 61105 }, { "epoch": 9.969004893964112, "grad_norm": 0.19587284326553345, "learning_rate": 0.0005895005522536104, "loss": 0.0203, "num_input_tokens_seen": 131991776, "step": 61110 }, { "epoch": 9.969820554649266, "grad_norm": 0.0793571025133133, "learning_rate": 0.000589430521158618, "loss": 0.0262, "num_input_tokens_seen": 132002656, "step": 61115 }, { "epoch": 9.970636215334421, "grad_norm": 0.10754761099815369, "learning_rate": 0.0005893604882512027, "loss": 0.0962, "num_input_tokens_seen": 132012928, "step": 61120 }, { "epoch": 9.971451876019575, "grad_norm": 0.4493594765663147, "learning_rate": 0.0005892904535327837, "loss": 0.0467, "num_input_tokens_seen": 132023520, "step": 61125 }, { "epoch": 9.97226753670473, "grad_norm": 0.2553941011428833, "learning_rate": 0.0005892204170047804, "loss": 0.0181, "num_input_tokens_seen": 132033216, "step": 61130 }, { "epoch": 9.973083197389887, "grad_norm": 0.33169591426849365, "learning_rate": 0.0005891503786686123, "loss": 0.1709, "num_input_tokens_seen": 132044736, "step": 61135 }, { "epoch": 9.97389885807504, "grad_norm": 0.005872929468750954, "learning_rate": 0.0005890803385256985, "loss": 0.0358, "num_input_tokens_seen": 132055232, "step": 61140 }, { "epoch": 9.974714518760196, "grad_norm": 0.0037564358208328485, "learning_rate": 0.0005890102965774587, "loss": 0.0128, "num_input_tokens_seen": 132066176, "step": 61145 }, { "epoch": 9.97553017944535, "grad_norm": 0.04645087197422981, "learning_rate": 0.0005889402528253124, "loss": 0.0163, "num_input_tokens_seen": 132076320, "step": 61150 }, { "epoch": 9.976345840130506, "grad_norm": 0.002770586172118783, "learning_rate": 0.0005888702072706788, "loss": 0.0154, "num_input_tokens_seen": 132086816, "step": 61155 }, { "epoch": 9.977161500815662, "grad_norm": 0.286471962928772, "learning_rate": 0.0005888001599149781, "loss": 0.0986, "num_input_tokens_seen": 132097056, "step": 61160 }, { "epoch": 9.977977161500815, "grad_norm": 0.32440730929374695, "learning_rate": 0.0005887301107596292, "loss": 0.0263, "num_input_tokens_seen": 132107424, "step": 61165 }, { "epoch": 9.978792822185971, "grad_norm": 0.07186294347047806, "learning_rate": 0.0005886600598060522, "loss": 0.0543, "num_input_tokens_seen": 132117376, "step": 61170 }, { "epoch": 9.979608482871125, "grad_norm": 0.08263949304819107, "learning_rate": 0.0005885900070556665, "loss": 0.0171, "num_input_tokens_seen": 132127776, "step": 61175 }, { "epoch": 9.98042414355628, "grad_norm": 0.002160472795367241, "learning_rate": 0.0005885199525098919, "loss": 0.0293, "num_input_tokens_seen": 132138720, "step": 61180 }, { "epoch": 9.981239804241435, "grad_norm": 0.005913755390793085, "learning_rate": 0.0005884498961701483, "loss": 0.0074, "num_input_tokens_seen": 132149632, "step": 61185 }, { "epoch": 9.98205546492659, "grad_norm": 0.0032838478218764067, "learning_rate": 0.0005883798380378554, "loss": 0.1135, "num_input_tokens_seen": 132161536, "step": 61190 }, { "epoch": 9.982871125611746, "grad_norm": 0.21511363983154297, "learning_rate": 0.0005883097781144329, "loss": 0.234, "num_input_tokens_seen": 132172992, "step": 61195 }, { "epoch": 9.9836867862969, "grad_norm": 0.0015851340722292662, "learning_rate": 0.0005882397164013005, "loss": 0.0314, "num_input_tokens_seen": 132183808, "step": 61200 }, { "epoch": 9.984502446982056, "grad_norm": 0.02231002226471901, "learning_rate": 0.0005881696528998785, "loss": 0.0092, "num_input_tokens_seen": 132194048, "step": 61205 }, { "epoch": 9.98531810766721, "grad_norm": 0.014449645765125751, "learning_rate": 0.0005880995876115868, "loss": 0.0419, "num_input_tokens_seen": 132205120, "step": 61210 }, { "epoch": 9.986133768352365, "grad_norm": 0.01147771067917347, "learning_rate": 0.0005880295205378449, "loss": 0.0656, "num_input_tokens_seen": 132214880, "step": 61215 }, { "epoch": 9.986949429037521, "grad_norm": 0.061700109392404556, "learning_rate": 0.0005879594516800732, "loss": 0.0892, "num_input_tokens_seen": 132225408, "step": 61220 }, { "epoch": 9.987765089722675, "grad_norm": 0.010661949403584003, "learning_rate": 0.0005878893810396916, "loss": 0.1024, "num_input_tokens_seen": 132236288, "step": 61225 }, { "epoch": 9.98858075040783, "grad_norm": 0.08864899724721909, "learning_rate": 0.0005878193086181203, "loss": 0.0417, "num_input_tokens_seen": 132247680, "step": 61230 }, { "epoch": 9.989396411092985, "grad_norm": 0.3488578200340271, "learning_rate": 0.0005877492344167792, "loss": 0.1304, "num_input_tokens_seen": 132257632, "step": 61235 }, { "epoch": 9.99021207177814, "grad_norm": 0.0029131618794053793, "learning_rate": 0.0005876791584370886, "loss": 0.0816, "num_input_tokens_seen": 132268288, "step": 61240 }, { "epoch": 9.991027732463294, "grad_norm": 0.03511757031083107, "learning_rate": 0.0005876090806804686, "loss": 0.0618, "num_input_tokens_seen": 132278432, "step": 61245 }, { "epoch": 9.99184339314845, "grad_norm": 0.28010329604148865, "learning_rate": 0.0005875390011483394, "loss": 0.0442, "num_input_tokens_seen": 132288512, "step": 61250 }, { "epoch": 9.992659053833606, "grad_norm": 0.0038962685503065586, "learning_rate": 0.0005874689198421214, "loss": 0.0094, "num_input_tokens_seen": 132300096, "step": 61255 }, { "epoch": 9.99347471451876, "grad_norm": 0.13746079802513123, "learning_rate": 0.0005873988367632347, "loss": 0.1822, "num_input_tokens_seen": 132309824, "step": 61260 }, { "epoch": 9.994290375203915, "grad_norm": 0.16750729084014893, "learning_rate": 0.0005873287519130997, "loss": 0.1562, "num_input_tokens_seen": 132320512, "step": 61265 }, { "epoch": 9.99510603588907, "grad_norm": 0.09587028622627258, "learning_rate": 0.0005872586652931368, "loss": 0.0437, "num_input_tokens_seen": 132331072, "step": 61270 }, { "epoch": 9.995921696574225, "grad_norm": 0.22070851922035217, "learning_rate": 0.0005871885769047664, "loss": 0.0955, "num_input_tokens_seen": 132340960, "step": 61275 }, { "epoch": 9.99673735725938, "grad_norm": 0.021899957209825516, "learning_rate": 0.0005871184867494088, "loss": 0.0462, "num_input_tokens_seen": 132351648, "step": 61280 }, { "epoch": 9.997553017944535, "grad_norm": 0.18221528828144073, "learning_rate": 0.0005870483948284845, "loss": 0.0415, "num_input_tokens_seen": 132363040, "step": 61285 }, { "epoch": 9.99836867862969, "grad_norm": 0.2870374619960785, "learning_rate": 0.0005869783011434141, "loss": 0.0339, "num_input_tokens_seen": 132372544, "step": 61290 }, { "epoch": 9.999184339314844, "grad_norm": 0.04292251169681549, "learning_rate": 0.0005869082056956181, "loss": 0.1169, "num_input_tokens_seen": 132383936, "step": 61295 }, { "epoch": 10.0, "grad_norm": 0.024575114250183105, "learning_rate": 0.000586838108486517, "loss": 0.0154, "num_input_tokens_seen": 132392640, "step": 61300 }, { "epoch": 10.0, "eval_loss": 0.14106982946395874, "eval_runtime": 103.848, "eval_samples_per_second": 26.24, "eval_steps_per_second": 6.567, "num_input_tokens_seen": 132392640, "step": 61300 }, { "epoch": 10.000815660685156, "grad_norm": 0.09867502748966217, "learning_rate": 0.0005867680095175315, "loss": 0.0202, "num_input_tokens_seen": 132403680, "step": 61305 }, { "epoch": 10.00163132137031, "grad_norm": 0.09049554169178009, "learning_rate": 0.0005866979087900822, "loss": 0.1042, "num_input_tokens_seen": 132415040, "step": 61310 }, { "epoch": 10.002446982055465, "grad_norm": 0.025646690279245377, "learning_rate": 0.0005866278063055898, "loss": 0.0278, "num_input_tokens_seen": 132426496, "step": 61315 }, { "epoch": 10.00326264274062, "grad_norm": 0.043658383190631866, "learning_rate": 0.0005865577020654751, "loss": 0.211, "num_input_tokens_seen": 132436640, "step": 61320 }, { "epoch": 10.004078303425775, "grad_norm": 0.10407885164022446, "learning_rate": 0.0005864875960711588, "loss": 0.0403, "num_input_tokens_seen": 132447840, "step": 61325 }, { "epoch": 10.00489396411093, "grad_norm": 0.010089286603033543, "learning_rate": 0.0005864174883240614, "loss": 0.0598, "num_input_tokens_seen": 132458336, "step": 61330 }, { "epoch": 10.005709624796085, "grad_norm": 0.1301630288362503, "learning_rate": 0.0005863473788256042, "loss": 0.1366, "num_input_tokens_seen": 132469856, "step": 61335 }, { "epoch": 10.00652528548124, "grad_norm": 0.051968734711408615, "learning_rate": 0.0005862772675772076, "loss": 0.0374, "num_input_tokens_seen": 132479104, "step": 61340 }, { "epoch": 10.007340946166394, "grad_norm": 0.017379827797412872, "learning_rate": 0.000586207154580293, "loss": 0.01, "num_input_tokens_seen": 132490240, "step": 61345 }, { "epoch": 10.00815660685155, "grad_norm": 0.002726492937654257, "learning_rate": 0.0005861370398362809, "loss": 0.0225, "num_input_tokens_seen": 132500128, "step": 61350 }, { "epoch": 10.008972267536704, "grad_norm": 0.035465896129608154, "learning_rate": 0.0005860669233465925, "loss": 0.1204, "num_input_tokens_seen": 132511584, "step": 61355 }, { "epoch": 10.00978792822186, "grad_norm": 0.18338146805763245, "learning_rate": 0.0005859968051126486, "loss": 0.0472, "num_input_tokens_seen": 132521824, "step": 61360 }, { "epoch": 10.010603588907015, "grad_norm": 0.02428065799176693, "learning_rate": 0.0005859266851358704, "loss": 0.0335, "num_input_tokens_seen": 132533952, "step": 61365 }, { "epoch": 10.01141924959217, "grad_norm": 0.0904858410358429, "learning_rate": 0.0005858565634176789, "loss": 0.0157, "num_input_tokens_seen": 132544960, "step": 61370 }, { "epoch": 10.012234910277325, "grad_norm": 0.019217217341065407, "learning_rate": 0.0005857864399594953, "loss": 0.0082, "num_input_tokens_seen": 132556096, "step": 61375 }, { "epoch": 10.013050570962479, "grad_norm": 0.20956331491470337, "learning_rate": 0.0005857163147627406, "loss": 0.055, "num_input_tokens_seen": 132567264, "step": 61380 }, { "epoch": 10.013866231647635, "grad_norm": 0.0058461870066821575, "learning_rate": 0.000585646187828836, "loss": 0.0078, "num_input_tokens_seen": 132577376, "step": 61385 }, { "epoch": 10.01468189233279, "grad_norm": 0.020629743114113808, "learning_rate": 0.000585576059159203, "loss": 0.0471, "num_input_tokens_seen": 132587968, "step": 61390 }, { "epoch": 10.015497553017944, "grad_norm": 0.023331712931394577, "learning_rate": 0.0005855059287552623, "loss": 0.0168, "num_input_tokens_seen": 132600064, "step": 61395 }, { "epoch": 10.0163132137031, "grad_norm": 0.0030110483057796955, "learning_rate": 0.0005854357966184356, "loss": 0.1784, "num_input_tokens_seen": 132611008, "step": 61400 }, { "epoch": 10.017128874388254, "grad_norm": 0.2905384600162506, "learning_rate": 0.0005853656627501442, "loss": 0.1317, "num_input_tokens_seen": 132622592, "step": 61405 }, { "epoch": 10.01794453507341, "grad_norm": 0.2557068467140198, "learning_rate": 0.0005852955271518092, "loss": 0.0358, "num_input_tokens_seen": 132633120, "step": 61410 }, { "epoch": 10.018760195758565, "grad_norm": 0.035086777061223984, "learning_rate": 0.0005852253898248522, "loss": 0.0359, "num_input_tokens_seen": 132643712, "step": 61415 }, { "epoch": 10.01957585644372, "grad_norm": 0.08694098889827728, "learning_rate": 0.0005851552507706945, "loss": 0.0205, "num_input_tokens_seen": 132654144, "step": 61420 }, { "epoch": 10.020391517128875, "grad_norm": 0.011595198884606361, "learning_rate": 0.0005850851099907577, "loss": 0.0072, "num_input_tokens_seen": 132664096, "step": 61425 }, { "epoch": 10.021207177814029, "grad_norm": 0.015262764878571033, "learning_rate": 0.0005850149674864631, "loss": 0.0379, "num_input_tokens_seen": 132674528, "step": 61430 }, { "epoch": 10.022022838499185, "grad_norm": 0.003421793458983302, "learning_rate": 0.0005849448232592324, "loss": 0.0696, "num_input_tokens_seen": 132684896, "step": 61435 }, { "epoch": 10.022838499184338, "grad_norm": 0.1386803835630417, "learning_rate": 0.0005848746773104871, "loss": 0.1301, "num_input_tokens_seen": 132695904, "step": 61440 }, { "epoch": 10.023654159869494, "grad_norm": 0.0038962659891694784, "learning_rate": 0.0005848045296416488, "loss": 0.0296, "num_input_tokens_seen": 132706112, "step": 61445 }, { "epoch": 10.02446982055465, "grad_norm": 0.10918844491243362, "learning_rate": 0.0005847343802541391, "loss": 0.0186, "num_input_tokens_seen": 132716160, "step": 61450 }, { "epoch": 10.025285481239804, "grad_norm": 0.18185824155807495, "learning_rate": 0.0005846642291493796, "loss": 0.0326, "num_input_tokens_seen": 132728672, "step": 61455 }, { "epoch": 10.02610114192496, "grad_norm": 0.036952123045921326, "learning_rate": 0.0005845940763287923, "loss": 0.0283, "num_input_tokens_seen": 132739776, "step": 61460 }, { "epoch": 10.026916802610113, "grad_norm": 0.16382429003715515, "learning_rate": 0.0005845239217937986, "loss": 0.0539, "num_input_tokens_seen": 132751264, "step": 61465 }, { "epoch": 10.02773246329527, "grad_norm": 0.04407104477286339, "learning_rate": 0.0005844537655458203, "loss": 0.0261, "num_input_tokens_seen": 132762208, "step": 61470 }, { "epoch": 10.028548123980425, "grad_norm": 0.0067809708416461945, "learning_rate": 0.0005843836075862794, "loss": 0.0284, "num_input_tokens_seen": 132771040, "step": 61475 }, { "epoch": 10.029363784665579, "grad_norm": 0.017581727355718613, "learning_rate": 0.0005843134479165977, "loss": 0.0743, "num_input_tokens_seen": 132782528, "step": 61480 }, { "epoch": 10.030179445350734, "grad_norm": 0.10218022763729095, "learning_rate": 0.0005842432865381971, "loss": 0.0171, "num_input_tokens_seen": 132792704, "step": 61485 }, { "epoch": 10.030995106035888, "grad_norm": 0.33561691641807556, "learning_rate": 0.0005841731234524993, "loss": 0.1242, "num_input_tokens_seen": 132803904, "step": 61490 }, { "epoch": 10.031810766721044, "grad_norm": 0.36867034435272217, "learning_rate": 0.0005841029586609263, "loss": 0.0274, "num_input_tokens_seen": 132813664, "step": 61495 }, { "epoch": 10.0326264274062, "grad_norm": 0.003252339782193303, "learning_rate": 0.0005840327921649003, "loss": 0.0351, "num_input_tokens_seen": 132824640, "step": 61500 }, { "epoch": 10.033442088091354, "grad_norm": 0.0059877620078623295, "learning_rate": 0.0005839626239658431, "loss": 0.0097, "num_input_tokens_seen": 132836128, "step": 61505 }, { "epoch": 10.03425774877651, "grad_norm": 0.002705489983782172, "learning_rate": 0.0005838924540651769, "loss": 0.0107, "num_input_tokens_seen": 132847392, "step": 61510 }, { "epoch": 10.035073409461663, "grad_norm": 0.005862162448465824, "learning_rate": 0.0005838222824643235, "loss": 0.065, "num_input_tokens_seen": 132857152, "step": 61515 }, { "epoch": 10.035889070146819, "grad_norm": 0.13416972756385803, "learning_rate": 0.0005837521091647054, "loss": 0.0183, "num_input_tokens_seen": 132866560, "step": 61520 }, { "epoch": 10.036704730831975, "grad_norm": 0.024556193500757217, "learning_rate": 0.0005836819341677444, "loss": 0.0198, "num_input_tokens_seen": 132877952, "step": 61525 }, { "epoch": 10.037520391517129, "grad_norm": 0.025332549586892128, "learning_rate": 0.0005836117574748629, "loss": 0.0488, "num_input_tokens_seen": 132887744, "step": 61530 }, { "epoch": 10.038336052202284, "grad_norm": 0.024207156151533127, "learning_rate": 0.0005835415790874832, "loss": 0.0257, "num_input_tokens_seen": 132897696, "step": 61535 }, { "epoch": 10.039151712887438, "grad_norm": 0.029303235933184624, "learning_rate": 0.0005834713990070273, "loss": 0.0162, "num_input_tokens_seen": 132908704, "step": 61540 }, { "epoch": 10.039967373572594, "grad_norm": 0.008033953607082367, "learning_rate": 0.0005834012172349174, "loss": 0.022, "num_input_tokens_seen": 132919168, "step": 61545 }, { "epoch": 10.040783034257748, "grad_norm": 0.5109202861785889, "learning_rate": 0.0005833310337725764, "loss": 0.1255, "num_input_tokens_seen": 132931136, "step": 61550 }, { "epoch": 10.041598694942904, "grad_norm": 0.008069335483014584, "learning_rate": 0.0005832608486214261, "loss": 0.0371, "num_input_tokens_seen": 132942432, "step": 61555 }, { "epoch": 10.04241435562806, "grad_norm": 0.007886008359491825, "learning_rate": 0.0005831906617828892, "loss": 0.0128, "num_input_tokens_seen": 132952480, "step": 61560 }, { "epoch": 10.043230016313213, "grad_norm": 0.005050900857895613, "learning_rate": 0.0005831204732583879, "loss": 0.0104, "num_input_tokens_seen": 132963584, "step": 61565 }, { "epoch": 10.044045676998369, "grad_norm": 0.007038873620331287, "learning_rate": 0.0005830502830493447, "loss": 0.0113, "num_input_tokens_seen": 132973888, "step": 61570 }, { "epoch": 10.044861337683523, "grad_norm": 0.00195617089048028, "learning_rate": 0.0005829800911571824, "loss": 0.0114, "num_input_tokens_seen": 132984224, "step": 61575 }, { "epoch": 10.045676998368679, "grad_norm": 0.020171010866761208, "learning_rate": 0.000582909897583323, "loss": 0.0065, "num_input_tokens_seen": 132995392, "step": 61580 }, { "epoch": 10.046492659053834, "grad_norm": 0.005067338235676289, "learning_rate": 0.0005828397023291895, "loss": 0.0929, "num_input_tokens_seen": 133006432, "step": 61585 }, { "epoch": 10.047308319738988, "grad_norm": 0.048682741820812225, "learning_rate": 0.0005827695053962043, "loss": 0.0268, "num_input_tokens_seen": 133017280, "step": 61590 }, { "epoch": 10.048123980424144, "grad_norm": 0.07136274874210358, "learning_rate": 0.0005826993067857901, "loss": 0.1019, "num_input_tokens_seen": 133027936, "step": 61595 }, { "epoch": 10.048939641109298, "grad_norm": 0.004224831238389015, "learning_rate": 0.0005826291064993695, "loss": 0.0079, "num_input_tokens_seen": 133039200, "step": 61600 }, { "epoch": 10.049755301794454, "grad_norm": 0.32198604941368103, "learning_rate": 0.0005825589045383654, "loss": 0.1041, "num_input_tokens_seen": 133050528, "step": 61605 }, { "epoch": 10.05057096247961, "grad_norm": 0.054331421852111816, "learning_rate": 0.0005824887009042002, "loss": 0.0091, "num_input_tokens_seen": 133061248, "step": 61610 }, { "epoch": 10.051386623164763, "grad_norm": 0.10078129172325134, "learning_rate": 0.0005824184955982967, "loss": 0.0125, "num_input_tokens_seen": 133072544, "step": 61615 }, { "epoch": 10.052202283849919, "grad_norm": 0.0027622587513178587, "learning_rate": 0.000582348288622078, "loss": 0.0034, "num_input_tokens_seen": 133083616, "step": 61620 }, { "epoch": 10.053017944535073, "grad_norm": 0.03783497214317322, "learning_rate": 0.0005822780799769667, "loss": 0.006, "num_input_tokens_seen": 133094912, "step": 61625 }, { "epoch": 10.053833605220229, "grad_norm": 0.06255345791578293, "learning_rate": 0.0005822078696643859, "loss": 0.0445, "num_input_tokens_seen": 133106048, "step": 61630 }, { "epoch": 10.054649265905383, "grad_norm": 0.02925264462828636, "learning_rate": 0.0005821376576857582, "loss": 0.1186, "num_input_tokens_seen": 133116640, "step": 61635 }, { "epoch": 10.055464926590538, "grad_norm": 0.014886287972331047, "learning_rate": 0.0005820674440425067, "loss": 0.0199, "num_input_tokens_seen": 133127104, "step": 61640 }, { "epoch": 10.056280587275694, "grad_norm": 0.08674079179763794, "learning_rate": 0.0005819972287360543, "loss": 0.0493, "num_input_tokens_seen": 133138336, "step": 61645 }, { "epoch": 10.057096247960848, "grad_norm": 0.03542179614305496, "learning_rate": 0.0005819270117678239, "loss": 0.0037, "num_input_tokens_seen": 133147904, "step": 61650 }, { "epoch": 10.057911908646004, "grad_norm": 0.3200596868991852, "learning_rate": 0.0005818567931392389, "loss": 0.0617, "num_input_tokens_seen": 133158528, "step": 61655 }, { "epoch": 10.058727569331158, "grad_norm": 0.0011431258171796799, "learning_rate": 0.000581786572851722, "loss": 0.0048, "num_input_tokens_seen": 133169472, "step": 61660 }, { "epoch": 10.059543230016313, "grad_norm": 0.011449893936514854, "learning_rate": 0.0005817163509066966, "loss": 0.009, "num_input_tokens_seen": 133178880, "step": 61665 }, { "epoch": 10.060358890701469, "grad_norm": 0.11256249994039536, "learning_rate": 0.0005816461273055857, "loss": 0.0151, "num_input_tokens_seen": 133190464, "step": 61670 }, { "epoch": 10.061174551386623, "grad_norm": 0.11716876924037933, "learning_rate": 0.0005815759020498122, "loss": 0.0041, "num_input_tokens_seen": 133201920, "step": 61675 }, { "epoch": 10.061990212071779, "grad_norm": 0.04273887351155281, "learning_rate": 0.0005815056751407999, "loss": 0.0298, "num_input_tokens_seen": 133211648, "step": 61680 }, { "epoch": 10.062805872756933, "grad_norm": 0.006083001848310232, "learning_rate": 0.0005814354465799715, "loss": 0.1231, "num_input_tokens_seen": 133223072, "step": 61685 }, { "epoch": 10.063621533442088, "grad_norm": 0.0192166268825531, "learning_rate": 0.0005813652163687504, "loss": 0.0106, "num_input_tokens_seen": 133234784, "step": 61690 }, { "epoch": 10.064437194127244, "grad_norm": 0.00653346860781312, "learning_rate": 0.0005812949845085601, "loss": 0.2327, "num_input_tokens_seen": 133245504, "step": 61695 }, { "epoch": 10.065252854812398, "grad_norm": 0.012311974540352821, "learning_rate": 0.0005812247510008238, "loss": 0.1, "num_input_tokens_seen": 133255136, "step": 61700 }, { "epoch": 10.066068515497554, "grad_norm": 0.1300489455461502, "learning_rate": 0.0005811545158469649, "loss": 0.0179, "num_input_tokens_seen": 133267680, "step": 61705 }, { "epoch": 10.066884176182707, "grad_norm": 0.6340866684913635, "learning_rate": 0.0005810842790484066, "loss": 0.2703, "num_input_tokens_seen": 133279168, "step": 61710 }, { "epoch": 10.067699836867863, "grad_norm": 0.026448015123605728, "learning_rate": 0.0005810140406065727, "loss": 0.0207, "num_input_tokens_seen": 133289632, "step": 61715 }, { "epoch": 10.068515497553017, "grad_norm": 0.08431733399629593, "learning_rate": 0.0005809438005228866, "loss": 0.0162, "num_input_tokens_seen": 133300704, "step": 61720 }, { "epoch": 10.069331158238173, "grad_norm": 0.008198346011340618, "learning_rate": 0.0005808735587987714, "loss": 0.0219, "num_input_tokens_seen": 133311936, "step": 61725 }, { "epoch": 10.070146818923329, "grad_norm": 0.06876257807016373, "learning_rate": 0.0005808033154356511, "loss": 0.0664, "num_input_tokens_seen": 133322688, "step": 61730 }, { "epoch": 10.070962479608482, "grad_norm": 0.03216838836669922, "learning_rate": 0.0005807330704349492, "loss": 0.069, "num_input_tokens_seen": 133334304, "step": 61735 }, { "epoch": 10.071778140293638, "grad_norm": 0.009478418156504631, "learning_rate": 0.0005806628237980891, "loss": 0.0064, "num_input_tokens_seen": 133345568, "step": 61740 }, { "epoch": 10.072593800978792, "grad_norm": 0.02647106908261776, "learning_rate": 0.0005805925755264945, "loss": 0.0087, "num_input_tokens_seen": 133357472, "step": 61745 }, { "epoch": 10.073409461663948, "grad_norm": 0.005002718418836594, "learning_rate": 0.0005805223256215891, "loss": 0.0052, "num_input_tokens_seen": 133367712, "step": 61750 }, { "epoch": 10.074225122349104, "grad_norm": 0.014104902744293213, "learning_rate": 0.0005804520740847966, "loss": 0.0377, "num_input_tokens_seen": 133379840, "step": 61755 }, { "epoch": 10.075040783034257, "grad_norm": 0.2684130072593689, "learning_rate": 0.0005803818209175409, "loss": 0.1045, "num_input_tokens_seen": 133390688, "step": 61760 }, { "epoch": 10.075856443719413, "grad_norm": 0.010500526987016201, "learning_rate": 0.0005803115661212456, "loss": 0.0658, "num_input_tokens_seen": 133401632, "step": 61765 }, { "epoch": 10.076672104404567, "grad_norm": 0.008303754031658173, "learning_rate": 0.0005802413096973345, "loss": 0.0029, "num_input_tokens_seen": 133411808, "step": 61770 }, { "epoch": 10.077487765089723, "grad_norm": 0.03415251150727272, "learning_rate": 0.0005801710516472315, "loss": 0.0039, "num_input_tokens_seen": 133423616, "step": 61775 }, { "epoch": 10.078303425774878, "grad_norm": 0.0074006495997309685, "learning_rate": 0.0005801007919723605, "loss": 0.0943, "num_input_tokens_seen": 133435424, "step": 61780 }, { "epoch": 10.079119086460032, "grad_norm": 0.002399343764409423, "learning_rate": 0.000580030530674145, "loss": 0.0127, "num_input_tokens_seen": 133445472, "step": 61785 }, { "epoch": 10.079934747145188, "grad_norm": 0.28245264291763306, "learning_rate": 0.0005799602677540095, "loss": 0.0462, "num_input_tokens_seen": 133459008, "step": 61790 }, { "epoch": 10.080750407830342, "grad_norm": 0.023097572848200798, "learning_rate": 0.0005798900032133778, "loss": 0.0592, "num_input_tokens_seen": 133469440, "step": 61795 }, { "epoch": 10.081566068515498, "grad_norm": 0.09736547619104385, "learning_rate": 0.0005798197370536737, "loss": 0.0638, "num_input_tokens_seen": 133480832, "step": 61800 }, { "epoch": 10.082381729200652, "grad_norm": 0.1749315857887268, "learning_rate": 0.0005797494692763215, "loss": 0.0504, "num_input_tokens_seen": 133491552, "step": 61805 }, { "epoch": 10.083197389885807, "grad_norm": 0.026390263810753822, "learning_rate": 0.0005796791998827451, "loss": 0.1452, "num_input_tokens_seen": 133502720, "step": 61810 }, { "epoch": 10.084013050570963, "grad_norm": 0.0233746450394392, "learning_rate": 0.0005796089288743687, "loss": 0.0221, "num_input_tokens_seen": 133511936, "step": 61815 }, { "epoch": 10.084828711256117, "grad_norm": 0.029851028695702553, "learning_rate": 0.0005795386562526163, "loss": 0.0236, "num_input_tokens_seen": 133522432, "step": 61820 }, { "epoch": 10.085644371941273, "grad_norm": 0.007268915418535471, "learning_rate": 0.000579468382018912, "loss": 0.0073, "num_input_tokens_seen": 133533184, "step": 61825 }, { "epoch": 10.086460032626427, "grad_norm": 0.033539608120918274, "learning_rate": 0.0005793981061746802, "loss": 0.0168, "num_input_tokens_seen": 133544352, "step": 61830 }, { "epoch": 10.087275693311582, "grad_norm": 0.006324341986328363, "learning_rate": 0.0005793278287213453, "loss": 0.0047, "num_input_tokens_seen": 133554944, "step": 61835 }, { "epoch": 10.088091353996738, "grad_norm": 0.28266897797584534, "learning_rate": 0.000579257549660331, "loss": 0.0245, "num_input_tokens_seen": 133566048, "step": 61840 }, { "epoch": 10.088907014681892, "grad_norm": 0.01207935530692339, "learning_rate": 0.0005791872689930621, "loss": 0.0218, "num_input_tokens_seen": 133577760, "step": 61845 }, { "epoch": 10.089722675367048, "grad_norm": 0.019263241440057755, "learning_rate": 0.0005791169867209626, "loss": 0.0166, "num_input_tokens_seen": 133587424, "step": 61850 }, { "epoch": 10.090538336052202, "grad_norm": 0.001974908635020256, "learning_rate": 0.0005790467028454571, "loss": 0.0168, "num_input_tokens_seen": 133597024, "step": 61855 }, { "epoch": 10.091353996737357, "grad_norm": 0.13980016112327576, "learning_rate": 0.0005789764173679698, "loss": 0.0141, "num_input_tokens_seen": 133608704, "step": 61860 }, { "epoch": 10.092169657422513, "grad_norm": 0.01586383581161499, "learning_rate": 0.0005789061302899252, "loss": 0.0035, "num_input_tokens_seen": 133617504, "step": 61865 }, { "epoch": 10.092985318107667, "grad_norm": 0.007646430283784866, "learning_rate": 0.0005788358416127478, "loss": 0.1635, "num_input_tokens_seen": 133628448, "step": 61870 }, { "epoch": 10.093800978792823, "grad_norm": 0.02338665910065174, "learning_rate": 0.0005787655513378622, "loss": 0.0679, "num_input_tokens_seen": 133639328, "step": 61875 }, { "epoch": 10.094616639477977, "grad_norm": 0.04463246837258339, "learning_rate": 0.0005786952594666925, "loss": 0.008, "num_input_tokens_seen": 133649568, "step": 61880 }, { "epoch": 10.095432300163132, "grad_norm": 0.17297907173633575, "learning_rate": 0.0005786249660006638, "loss": 0.0381, "num_input_tokens_seen": 133660384, "step": 61885 }, { "epoch": 10.096247960848286, "grad_norm": 0.0007581968093290925, "learning_rate": 0.0005785546709412004, "loss": 0.0061, "num_input_tokens_seen": 133671232, "step": 61890 }, { "epoch": 10.097063621533442, "grad_norm": 0.007879073731601238, "learning_rate": 0.0005784843742897268, "loss": 0.0031, "num_input_tokens_seen": 133682080, "step": 61895 }, { "epoch": 10.097879282218598, "grad_norm": 0.380930095911026, "learning_rate": 0.0005784140760476679, "loss": 0.1592, "num_input_tokens_seen": 133692896, "step": 61900 }, { "epoch": 10.098694942903752, "grad_norm": 0.017733553424477577, "learning_rate": 0.0005783437762164483, "loss": 0.0068, "num_input_tokens_seen": 133702944, "step": 61905 }, { "epoch": 10.099510603588907, "grad_norm": 0.015353784896433353, "learning_rate": 0.0005782734747974926, "loss": 0.1314, "num_input_tokens_seen": 133714208, "step": 61910 }, { "epoch": 10.100326264274061, "grad_norm": 0.004601314663887024, "learning_rate": 0.0005782031717922256, "loss": 0.0085, "num_input_tokens_seen": 133725248, "step": 61915 }, { "epoch": 10.101141924959217, "grad_norm": 0.005082305055111647, "learning_rate": 0.0005781328672020723, "loss": 0.0492, "num_input_tokens_seen": 133736224, "step": 61920 }, { "epoch": 10.101957585644373, "grad_norm": 0.07641912996768951, "learning_rate": 0.0005780625610284572, "loss": 0.0136, "num_input_tokens_seen": 133747040, "step": 61925 }, { "epoch": 10.102773246329527, "grad_norm": 0.1645813286304474, "learning_rate": 0.000577992253272805, "loss": 0.1875, "num_input_tokens_seen": 133756896, "step": 61930 }, { "epoch": 10.103588907014682, "grad_norm": 0.3278946280479431, "learning_rate": 0.0005779219439365411, "loss": 0.0501, "num_input_tokens_seen": 133766720, "step": 61935 }, { "epoch": 10.104404567699836, "grad_norm": 0.01809718646109104, "learning_rate": 0.0005778516330210902, "loss": 0.026, "num_input_tokens_seen": 133778240, "step": 61940 }, { "epoch": 10.105220228384992, "grad_norm": 0.006549215409904718, "learning_rate": 0.0005777813205278772, "loss": 0.0067, "num_input_tokens_seen": 133788352, "step": 61945 }, { "epoch": 10.106035889070148, "grad_norm": 0.002958184340968728, "learning_rate": 0.0005777110064583271, "loss": 0.0551, "num_input_tokens_seen": 133798304, "step": 61950 }, { "epoch": 10.106851549755302, "grad_norm": 0.003981069661676884, "learning_rate": 0.0005776406908138648, "loss": 0.0053, "num_input_tokens_seen": 133809728, "step": 61955 }, { "epoch": 10.107667210440457, "grad_norm": 0.04357610270380974, "learning_rate": 0.0005775703735959155, "loss": 0.0126, "num_input_tokens_seen": 133820608, "step": 61960 }, { "epoch": 10.108482871125611, "grad_norm": 0.3853638470172882, "learning_rate": 0.000577500054805904, "loss": 0.0787, "num_input_tokens_seen": 133831904, "step": 61965 }, { "epoch": 10.109298531810767, "grad_norm": 0.005354811903089285, "learning_rate": 0.0005774297344452556, "loss": 0.0685, "num_input_tokens_seen": 133843392, "step": 61970 }, { "epoch": 10.11011419249592, "grad_norm": 0.05633273720741272, "learning_rate": 0.0005773594125153955, "loss": 0.0138, "num_input_tokens_seen": 133855008, "step": 61975 }, { "epoch": 10.110929853181077, "grad_norm": 0.015436145476996899, "learning_rate": 0.0005772890890177487, "loss": 0.0687, "num_input_tokens_seen": 133866304, "step": 61980 }, { "epoch": 10.111745513866232, "grad_norm": 0.25812065601348877, "learning_rate": 0.0005772187639537405, "loss": 0.1369, "num_input_tokens_seen": 133877952, "step": 61985 }, { "epoch": 10.112561174551386, "grad_norm": 0.03582283854484558, "learning_rate": 0.000577148437324796, "loss": 0.0172, "num_input_tokens_seen": 133888800, "step": 61990 }, { "epoch": 10.113376835236542, "grad_norm": 0.004719121847301722, "learning_rate": 0.0005770781091323407, "loss": 0.0038, "num_input_tokens_seen": 133900448, "step": 61995 }, { "epoch": 10.114192495921696, "grad_norm": 0.0029625471215695143, "learning_rate": 0.0005770077793777996, "loss": 0.0131, "num_input_tokens_seen": 133910304, "step": 62000 }, { "epoch": 10.115008156606851, "grad_norm": 0.006111313123255968, "learning_rate": 0.0005769374480625983, "loss": 0.1681, "num_input_tokens_seen": 133920640, "step": 62005 }, { "epoch": 10.115823817292007, "grad_norm": 0.018650345504283905, "learning_rate": 0.000576867115188162, "loss": 0.0207, "num_input_tokens_seen": 133932800, "step": 62010 }, { "epoch": 10.116639477977161, "grad_norm": 0.002345480490475893, "learning_rate": 0.000576796780755916, "loss": 0.1642, "num_input_tokens_seen": 133944608, "step": 62015 }, { "epoch": 10.117455138662317, "grad_norm": 0.0071258461102843285, "learning_rate": 0.0005767264447672859, "loss": 0.0406, "num_input_tokens_seen": 133956352, "step": 62020 }, { "epoch": 10.11827079934747, "grad_norm": 0.020247265696525574, "learning_rate": 0.000576656107223697, "loss": 0.0582, "num_input_tokens_seen": 133967200, "step": 62025 }, { "epoch": 10.119086460032626, "grad_norm": 0.006692581344395876, "learning_rate": 0.0005765857681265749, "loss": 0.0067, "num_input_tokens_seen": 133977216, "step": 62030 }, { "epoch": 10.119902120717782, "grad_norm": 0.04609968885779381, "learning_rate": 0.000576515427477345, "loss": 0.0105, "num_input_tokens_seen": 133988256, "step": 62035 }, { "epoch": 10.120717781402936, "grad_norm": 0.016456104815006256, "learning_rate": 0.0005764450852774329, "loss": 0.0139, "num_input_tokens_seen": 134000160, "step": 62040 }, { "epoch": 10.121533442088092, "grad_norm": 0.01033777091652155, "learning_rate": 0.0005763747415282642, "loss": 0.0045, "num_input_tokens_seen": 134010368, "step": 62045 }, { "epoch": 10.122349102773246, "grad_norm": 0.2594705820083618, "learning_rate": 0.0005763043962312644, "loss": 0.0173, "num_input_tokens_seen": 134020576, "step": 62050 }, { "epoch": 10.123164763458401, "grad_norm": 0.0942777767777443, "learning_rate": 0.0005762340493878593, "loss": 0.095, "num_input_tokens_seen": 134030656, "step": 62055 }, { "epoch": 10.123980424143557, "grad_norm": 0.013030118308961391, "learning_rate": 0.0005761637009994745, "loss": 0.0844, "num_input_tokens_seen": 134042176, "step": 62060 }, { "epoch": 10.124796084828711, "grad_norm": 0.001847845152951777, "learning_rate": 0.0005760933510675356, "loss": 0.0658, "num_input_tokens_seen": 134052736, "step": 62065 }, { "epoch": 10.125611745513867, "grad_norm": 0.019251855090260506, "learning_rate": 0.0005760229995934684, "loss": 0.0617, "num_input_tokens_seen": 134064032, "step": 62070 }, { "epoch": 10.12642740619902, "grad_norm": 0.07578039169311523, "learning_rate": 0.0005759526465786986, "loss": 0.0156, "num_input_tokens_seen": 134075936, "step": 62075 }, { "epoch": 10.127243066884176, "grad_norm": 0.027117077261209488, "learning_rate": 0.0005758822920246523, "loss": 0.0148, "num_input_tokens_seen": 134086560, "step": 62080 }, { "epoch": 10.12805872756933, "grad_norm": 0.0019536991603672504, "learning_rate": 0.000575811935932755, "loss": 0.0139, "num_input_tokens_seen": 134097728, "step": 62085 }, { "epoch": 10.128874388254486, "grad_norm": 0.00918999221175909, "learning_rate": 0.0005757415783044325, "loss": 0.027, "num_input_tokens_seen": 134107712, "step": 62090 }, { "epoch": 10.129690048939642, "grad_norm": 0.029966186732053757, "learning_rate": 0.0005756712191411109, "loss": 0.0047, "num_input_tokens_seen": 134119456, "step": 62095 }, { "epoch": 10.130505709624796, "grad_norm": 0.3509294390678406, "learning_rate": 0.0005756008584442161, "loss": 0.0582, "num_input_tokens_seen": 134129696, "step": 62100 }, { "epoch": 10.131321370309951, "grad_norm": 0.08585608005523682, "learning_rate": 0.0005755304962151739, "loss": 0.0674, "num_input_tokens_seen": 134140096, "step": 62105 }, { "epoch": 10.132137030995105, "grad_norm": 0.0772661566734314, "learning_rate": 0.0005754601324554104, "loss": 0.0148, "num_input_tokens_seen": 134151232, "step": 62110 }, { "epoch": 10.132952691680261, "grad_norm": 0.002000660402700305, "learning_rate": 0.0005753897671663518, "loss": 0.0131, "num_input_tokens_seen": 134161920, "step": 62115 }, { "epoch": 10.133768352365417, "grad_norm": 0.06940358877182007, "learning_rate": 0.0005753194003494237, "loss": 0.025, "num_input_tokens_seen": 134172448, "step": 62120 }, { "epoch": 10.13458401305057, "grad_norm": 0.031187528744339943, "learning_rate": 0.0005752490320060524, "loss": 0.023, "num_input_tokens_seen": 134182944, "step": 62125 }, { "epoch": 10.135399673735726, "grad_norm": 0.00791481975466013, "learning_rate": 0.0005751786621376641, "loss": 0.1079, "num_input_tokens_seen": 134194176, "step": 62130 }, { "epoch": 10.13621533442088, "grad_norm": 0.03876109793782234, "learning_rate": 0.0005751082907456849, "loss": 0.0192, "num_input_tokens_seen": 134205344, "step": 62135 }, { "epoch": 10.137030995106036, "grad_norm": 0.01723620668053627, "learning_rate": 0.0005750379178315408, "loss": 0.0652, "num_input_tokens_seen": 134216032, "step": 62140 }, { "epoch": 10.137846655791192, "grad_norm": 0.059835128486156464, "learning_rate": 0.0005749675433966581, "loss": 0.0057, "num_input_tokens_seen": 134227840, "step": 62145 }, { "epoch": 10.138662316476346, "grad_norm": 0.0036580360028892756, "learning_rate": 0.0005748971674424631, "loss": 0.122, "num_input_tokens_seen": 134237888, "step": 62150 }, { "epoch": 10.139477977161501, "grad_norm": 0.09279145300388336, "learning_rate": 0.0005748267899703819, "loss": 0.0136, "num_input_tokens_seen": 134249568, "step": 62155 }, { "epoch": 10.140293637846655, "grad_norm": 0.00488590681925416, "learning_rate": 0.000574756410981841, "loss": 0.0038, "num_input_tokens_seen": 134260736, "step": 62160 }, { "epoch": 10.141109298531811, "grad_norm": 0.03295081481337547, "learning_rate": 0.0005746860304782665, "loss": 0.0761, "num_input_tokens_seen": 134270784, "step": 62165 }, { "epoch": 10.141924959216965, "grad_norm": 0.048429399728775024, "learning_rate": 0.0005746156484610849, "loss": 0.0327, "num_input_tokens_seen": 134280928, "step": 62170 }, { "epoch": 10.14274061990212, "grad_norm": 0.004736216738820076, "learning_rate": 0.0005745452649317225, "loss": 0.0293, "num_input_tokens_seen": 134292352, "step": 62175 }, { "epoch": 10.143556280587276, "grad_norm": 0.021222488954663277, "learning_rate": 0.0005744748798916057, "loss": 0.0179, "num_input_tokens_seen": 134302880, "step": 62180 }, { "epoch": 10.14437194127243, "grad_norm": 0.003893442451953888, "learning_rate": 0.0005744044933421609, "loss": 0.0136, "num_input_tokens_seen": 134314240, "step": 62185 }, { "epoch": 10.145187601957586, "grad_norm": 0.004684086889028549, "learning_rate": 0.0005743341052848147, "loss": 0.0761, "num_input_tokens_seen": 134326144, "step": 62190 }, { "epoch": 10.14600326264274, "grad_norm": 0.0026035832706838846, "learning_rate": 0.0005742637157209936, "loss": 0.1189, "num_input_tokens_seen": 134335808, "step": 62195 }, { "epoch": 10.146818923327896, "grad_norm": 0.0014494028873741627, "learning_rate": 0.0005741933246521243, "loss": 0.0027, "num_input_tokens_seen": 134347072, "step": 62200 }, { "epoch": 10.147634584013051, "grad_norm": 0.032787173986434937, "learning_rate": 0.0005741229320796329, "loss": 0.0069, "num_input_tokens_seen": 134358080, "step": 62205 }, { "epoch": 10.148450244698205, "grad_norm": 0.019167525693774223, "learning_rate": 0.0005740525380049464, "loss": 0.0263, "num_input_tokens_seen": 134369504, "step": 62210 }, { "epoch": 10.149265905383361, "grad_norm": 0.031887758523225784, "learning_rate": 0.0005739821424294911, "loss": 0.0095, "num_input_tokens_seen": 134379008, "step": 62215 }, { "epoch": 10.150081566068515, "grad_norm": 0.018726017326116562, "learning_rate": 0.000573911745354694, "loss": 0.0169, "num_input_tokens_seen": 134390624, "step": 62220 }, { "epoch": 10.15089722675367, "grad_norm": 0.15560075640678406, "learning_rate": 0.0005738413467819816, "loss": 0.0187, "num_input_tokens_seen": 134402304, "step": 62225 }, { "epoch": 10.151712887438826, "grad_norm": 0.026487508788704872, "learning_rate": 0.0005737709467127805, "loss": 0.1375, "num_input_tokens_seen": 134412320, "step": 62230 }, { "epoch": 10.15252854812398, "grad_norm": 0.023451926186680794, "learning_rate": 0.0005737005451485177, "loss": 0.1566, "num_input_tokens_seen": 134423072, "step": 62235 }, { "epoch": 10.153344208809136, "grad_norm": 0.04784998297691345, "learning_rate": 0.0005736301420906196, "loss": 0.0129, "num_input_tokens_seen": 134434880, "step": 62240 }, { "epoch": 10.15415986949429, "grad_norm": 0.25198274850845337, "learning_rate": 0.0005735597375405135, "loss": 0.0616, "num_input_tokens_seen": 134446688, "step": 62245 }, { "epoch": 10.154975530179446, "grad_norm": 0.10130038857460022, "learning_rate": 0.000573489331499626, "loss": 0.2152, "num_input_tokens_seen": 134457632, "step": 62250 }, { "epoch": 10.1557911908646, "grad_norm": 0.011092791333794594, "learning_rate": 0.000573418923969384, "loss": 0.0103, "num_input_tokens_seen": 134467456, "step": 62255 }, { "epoch": 10.156606851549755, "grad_norm": 0.03461114689707756, "learning_rate": 0.0005733485149512143, "loss": 0.1422, "num_input_tokens_seen": 134478880, "step": 62260 }, { "epoch": 10.15742251223491, "grad_norm": 0.47994375228881836, "learning_rate": 0.000573278104446544, "loss": 0.1253, "num_input_tokens_seen": 134490912, "step": 62265 }, { "epoch": 10.158238172920065, "grad_norm": 0.013990727253258228, "learning_rate": 0.0005732076924567999, "loss": 0.0087, "num_input_tokens_seen": 134499872, "step": 62270 }, { "epoch": 10.15905383360522, "grad_norm": 0.007365802302956581, "learning_rate": 0.0005731372789834089, "loss": 0.0073, "num_input_tokens_seen": 134510944, "step": 62275 }, { "epoch": 10.159869494290374, "grad_norm": 0.010104432702064514, "learning_rate": 0.0005730668640277983, "loss": 0.1117, "num_input_tokens_seen": 134522464, "step": 62280 }, { "epoch": 10.16068515497553, "grad_norm": 0.026980755850672722, "learning_rate": 0.0005729964475913949, "loss": 0.157, "num_input_tokens_seen": 134532992, "step": 62285 }, { "epoch": 10.161500815660686, "grad_norm": 0.003204572247341275, "learning_rate": 0.0005729260296756259, "loss": 0.1405, "num_input_tokens_seen": 134542688, "step": 62290 }, { "epoch": 10.16231647634584, "grad_norm": 0.13098019361495972, "learning_rate": 0.0005728556102819185, "loss": 0.0433, "num_input_tokens_seen": 134553888, "step": 62295 }, { "epoch": 10.163132137030995, "grad_norm": 0.0058778622187674046, "learning_rate": 0.0005727851894116997, "loss": 0.0117, "num_input_tokens_seen": 134565088, "step": 62300 }, { "epoch": 10.16394779771615, "grad_norm": 0.017019178718328476, "learning_rate": 0.0005727147670663967, "loss": 0.0125, "num_input_tokens_seen": 134574080, "step": 62305 }, { "epoch": 10.164763458401305, "grad_norm": 0.007292016409337521, "learning_rate": 0.0005726443432474366, "loss": 0.0079, "num_input_tokens_seen": 134585536, "step": 62310 }, { "epoch": 10.16557911908646, "grad_norm": 0.002903412329033017, "learning_rate": 0.0005725739179562469, "loss": 0.1014, "num_input_tokens_seen": 134596512, "step": 62315 }, { "epoch": 10.166394779771615, "grad_norm": 0.06075170263648033, "learning_rate": 0.0005725034911942546, "loss": 0.0459, "num_input_tokens_seen": 134608192, "step": 62320 }, { "epoch": 10.16721044045677, "grad_norm": 0.15462207794189453, "learning_rate": 0.0005724330629628871, "loss": 0.0748, "num_input_tokens_seen": 134618688, "step": 62325 }, { "epoch": 10.168026101141924, "grad_norm": 0.015479236841201782, "learning_rate": 0.0005723626332635717, "loss": 0.0093, "num_input_tokens_seen": 134628128, "step": 62330 }, { "epoch": 10.16884176182708, "grad_norm": 0.010428624227643013, "learning_rate": 0.0005722922020977356, "loss": 0.0492, "num_input_tokens_seen": 134639200, "step": 62335 }, { "epoch": 10.169657422512234, "grad_norm": 0.00993596762418747, "learning_rate": 0.0005722217694668065, "loss": 0.0162, "num_input_tokens_seen": 134648192, "step": 62340 }, { "epoch": 10.17047308319739, "grad_norm": 0.2954118549823761, "learning_rate": 0.0005721513353722116, "loss": 0.1768, "num_input_tokens_seen": 134658784, "step": 62345 }, { "epoch": 10.171288743882545, "grad_norm": 0.34019842743873596, "learning_rate": 0.0005720808998153782, "loss": 0.031, "num_input_tokens_seen": 134670496, "step": 62350 }, { "epoch": 10.1721044045677, "grad_norm": 0.055857911705970764, "learning_rate": 0.000572010462797734, "loss": 0.0216, "num_input_tokens_seen": 134682272, "step": 62355 }, { "epoch": 10.172920065252855, "grad_norm": 0.0796642005443573, "learning_rate": 0.0005719400243207065, "loss": 0.0347, "num_input_tokens_seen": 134695008, "step": 62360 }, { "epoch": 10.173735725938009, "grad_norm": 0.051125604659318924, "learning_rate": 0.0005718695843857231, "loss": 0.0959, "num_input_tokens_seen": 134706144, "step": 62365 }, { "epoch": 10.174551386623165, "grad_norm": 0.6277033686637878, "learning_rate": 0.0005717991429942114, "loss": 0.0287, "num_input_tokens_seen": 134716896, "step": 62370 }, { "epoch": 10.17536704730832, "grad_norm": 0.1190236285328865, "learning_rate": 0.000571728700147599, "loss": 0.1365, "num_input_tokens_seen": 134726368, "step": 62375 }, { "epoch": 10.176182707993474, "grad_norm": 0.5035422444343567, "learning_rate": 0.0005716582558473136, "loss": 0.054, "num_input_tokens_seen": 134737568, "step": 62380 }, { "epoch": 10.17699836867863, "grad_norm": 0.10199093073606491, "learning_rate": 0.0005715878100947824, "loss": 0.0832, "num_input_tokens_seen": 134747168, "step": 62385 }, { "epoch": 10.177814029363784, "grad_norm": 0.09676895290613174, "learning_rate": 0.0005715173628914336, "loss": 0.0742, "num_input_tokens_seen": 134758464, "step": 62390 }, { "epoch": 10.17862969004894, "grad_norm": 0.040533117949962616, "learning_rate": 0.0005714469142386948, "loss": 0.0183, "num_input_tokens_seen": 134769760, "step": 62395 }, { "epoch": 10.179445350734095, "grad_norm": 0.05296805128455162, "learning_rate": 0.0005713764641379936, "loss": 0.0145, "num_input_tokens_seen": 134780896, "step": 62400 }, { "epoch": 10.18026101141925, "grad_norm": 0.03004343807697296, "learning_rate": 0.0005713060125907578, "loss": 0.0247, "num_input_tokens_seen": 134790624, "step": 62405 }, { "epoch": 10.181076672104405, "grad_norm": 0.009422010742127895, "learning_rate": 0.0005712355595984151, "loss": 0.1624, "num_input_tokens_seen": 134801952, "step": 62410 }, { "epoch": 10.181892332789559, "grad_norm": 0.009660206735134125, "learning_rate": 0.0005711651051623935, "loss": 0.0074, "num_input_tokens_seen": 134813472, "step": 62415 }, { "epoch": 10.182707993474715, "grad_norm": 0.07207388430833817, "learning_rate": 0.0005710946492841208, "loss": 0.1007, "num_input_tokens_seen": 134824512, "step": 62420 }, { "epoch": 10.18352365415987, "grad_norm": 0.05100369080901146, "learning_rate": 0.0005710241919650248, "loss": 0.0882, "num_input_tokens_seen": 134834816, "step": 62425 }, { "epoch": 10.184339314845024, "grad_norm": 0.3938085436820984, "learning_rate": 0.0005709537332065335, "loss": 0.0666, "num_input_tokens_seen": 134845280, "step": 62430 }, { "epoch": 10.18515497553018, "grad_norm": 0.007621200289577246, "learning_rate": 0.0005708832730100747, "loss": 0.0403, "num_input_tokens_seen": 134855744, "step": 62435 }, { "epoch": 10.185970636215334, "grad_norm": 0.12467098236083984, "learning_rate": 0.0005708128113770765, "loss": 0.0412, "num_input_tokens_seen": 134865568, "step": 62440 }, { "epoch": 10.18678629690049, "grad_norm": 0.007261715363711119, "learning_rate": 0.0005707423483089669, "loss": 0.0355, "num_input_tokens_seen": 134877344, "step": 62445 }, { "epoch": 10.187601957585644, "grad_norm": 0.11845030635595322, "learning_rate": 0.0005706718838071738, "loss": 0.1115, "num_input_tokens_seen": 134888896, "step": 62450 }, { "epoch": 10.1884176182708, "grad_norm": 0.002159345429390669, "learning_rate": 0.0005706014178731253, "loss": 0.0134, "num_input_tokens_seen": 134900000, "step": 62455 }, { "epoch": 10.189233278955955, "grad_norm": 0.0020519730169326067, "learning_rate": 0.0005705309505082496, "loss": 0.0123, "num_input_tokens_seen": 134909568, "step": 62460 }, { "epoch": 10.190048939641109, "grad_norm": 0.1217883750796318, "learning_rate": 0.0005704604817139747, "loss": 0.0287, "num_input_tokens_seen": 134920704, "step": 62465 }, { "epoch": 10.190864600326265, "grad_norm": 0.011312576942145824, "learning_rate": 0.0005703900114917286, "loss": 0.0129, "num_input_tokens_seen": 134932736, "step": 62470 }, { "epoch": 10.191680261011419, "grad_norm": 0.004563915077596903, "learning_rate": 0.0005703195398429397, "loss": 0.0948, "num_input_tokens_seen": 134943616, "step": 62475 }, { "epoch": 10.192495921696574, "grad_norm": 0.10664792358875275, "learning_rate": 0.0005702490667690363, "loss": 0.0306, "num_input_tokens_seen": 134954560, "step": 62480 }, { "epoch": 10.19331158238173, "grad_norm": 0.212308868765831, "learning_rate": 0.0005701785922714461, "loss": 0.1525, "num_input_tokens_seen": 134964192, "step": 62485 }, { "epoch": 10.194127243066884, "grad_norm": 0.04527709260582924, "learning_rate": 0.000570108116351598, "loss": 0.036, "num_input_tokens_seen": 134974976, "step": 62490 }, { "epoch": 10.19494290375204, "grad_norm": 0.009464547038078308, "learning_rate": 0.0005700376390109198, "loss": 0.0156, "num_input_tokens_seen": 134985696, "step": 62495 }, { "epoch": 10.195758564437194, "grad_norm": 0.0898800790309906, "learning_rate": 0.00056996716025084, "loss": 0.0191, "num_input_tokens_seen": 134996736, "step": 62500 }, { "epoch": 10.19657422512235, "grad_norm": 0.05027703940868378, "learning_rate": 0.000569896680072787, "loss": 0.0106, "num_input_tokens_seen": 135005856, "step": 62505 }, { "epoch": 10.197389885807505, "grad_norm": 0.06976497173309326, "learning_rate": 0.0005698261984781891, "loss": 0.0189, "num_input_tokens_seen": 135018400, "step": 62510 }, { "epoch": 10.198205546492659, "grad_norm": 0.0337468683719635, "learning_rate": 0.0005697557154684749, "loss": 0.0147, "num_input_tokens_seen": 135028448, "step": 62515 }, { "epoch": 10.199021207177815, "grad_norm": 0.18263459205627441, "learning_rate": 0.0005696852310450723, "loss": 0.0178, "num_input_tokens_seen": 135038080, "step": 62520 }, { "epoch": 10.199836867862969, "grad_norm": 0.45159977674484253, "learning_rate": 0.0005696147452094102, "loss": 0.0384, "num_input_tokens_seen": 135047392, "step": 62525 }, { "epoch": 10.200652528548124, "grad_norm": 0.010472620837390423, "learning_rate": 0.000569544257962917, "loss": 0.0159, "num_input_tokens_seen": 135057856, "step": 62530 }, { "epoch": 10.201468189233278, "grad_norm": 0.06547081470489502, "learning_rate": 0.0005694737693070213, "loss": 0.0129, "num_input_tokens_seen": 135069632, "step": 62535 }, { "epoch": 10.202283849918434, "grad_norm": 0.005394492298364639, "learning_rate": 0.0005694032792431515, "loss": 0.0796, "num_input_tokens_seen": 135080448, "step": 62540 }, { "epoch": 10.20309951060359, "grad_norm": 0.00928778387606144, "learning_rate": 0.0005693327877727361, "loss": 0.1307, "num_input_tokens_seen": 135091904, "step": 62545 }, { "epoch": 10.203915171288743, "grad_norm": 0.003517286153510213, "learning_rate": 0.0005692622948972039, "loss": 0.1866, "num_input_tokens_seen": 135102176, "step": 62550 }, { "epoch": 10.2047308319739, "grad_norm": 0.008070231415331364, "learning_rate": 0.0005691918006179833, "loss": 0.0158, "num_input_tokens_seen": 135113568, "step": 62555 }, { "epoch": 10.205546492659053, "grad_norm": 0.021824125200510025, "learning_rate": 0.0005691213049365031, "loss": 0.0186, "num_input_tokens_seen": 135124896, "step": 62560 }, { "epoch": 10.206362153344209, "grad_norm": 0.12865550816059113, "learning_rate": 0.000569050807854192, "loss": 0.0138, "num_input_tokens_seen": 135136768, "step": 62565 }, { "epoch": 10.207177814029365, "grad_norm": 0.02046484500169754, "learning_rate": 0.0005689803093724788, "loss": 0.0095, "num_input_tokens_seen": 135147488, "step": 62570 }, { "epoch": 10.207993474714518, "grad_norm": 0.15102674067020416, "learning_rate": 0.0005689098094927921, "loss": 0.0391, "num_input_tokens_seen": 135158240, "step": 62575 }, { "epoch": 10.208809135399674, "grad_norm": 0.005512281786650419, "learning_rate": 0.0005688393082165605, "loss": 0.0349, "num_input_tokens_seen": 135169984, "step": 62580 }, { "epoch": 10.209624796084828, "grad_norm": 0.004724299535155296, "learning_rate": 0.0005687688055452132, "loss": 0.0119, "num_input_tokens_seen": 135180832, "step": 62585 }, { "epoch": 10.210440456769984, "grad_norm": 0.01652977615594864, "learning_rate": 0.0005686983014801787, "loss": 0.0129, "num_input_tokens_seen": 135191584, "step": 62590 }, { "epoch": 10.21125611745514, "grad_norm": 0.0027591027319431305, "learning_rate": 0.000568627796022886, "loss": 0.0149, "num_input_tokens_seen": 135202048, "step": 62595 }, { "epoch": 10.212071778140293, "grad_norm": 0.11414124071598053, "learning_rate": 0.0005685572891747639, "loss": 0.0483, "num_input_tokens_seen": 135212384, "step": 62600 }, { "epoch": 10.21288743882545, "grad_norm": 0.018032826483249664, "learning_rate": 0.0005684867809372415, "loss": 0.0211, "num_input_tokens_seen": 135223456, "step": 62605 }, { "epoch": 10.213703099510603, "grad_norm": 0.2531827688217163, "learning_rate": 0.0005684162713117473, "loss": 0.1153, "num_input_tokens_seen": 135235488, "step": 62610 }, { "epoch": 10.214518760195759, "grad_norm": 0.5033522248268127, "learning_rate": 0.0005683457602997108, "loss": 0.0361, "num_input_tokens_seen": 135245376, "step": 62615 }, { "epoch": 10.215334420880913, "grad_norm": 0.06980666518211365, "learning_rate": 0.0005682752479025608, "loss": 0.0058, "num_input_tokens_seen": 135255968, "step": 62620 }, { "epoch": 10.216150081566068, "grad_norm": 0.0026550409384071827, "learning_rate": 0.0005682047341217262, "loss": 0.0324, "num_input_tokens_seen": 135267392, "step": 62625 }, { "epoch": 10.216965742251224, "grad_norm": 0.23229344189167023, "learning_rate": 0.0005681342189586362, "loss": 0.0303, "num_input_tokens_seen": 135277568, "step": 62630 }, { "epoch": 10.217781402936378, "grad_norm": 0.19436459243297577, "learning_rate": 0.0005680637024147199, "loss": 0.0422, "num_input_tokens_seen": 135286944, "step": 62635 }, { "epoch": 10.218597063621534, "grad_norm": 0.008587691932916641, "learning_rate": 0.0005679931844914061, "loss": 0.0498, "num_input_tokens_seen": 135296960, "step": 62640 }, { "epoch": 10.219412724306688, "grad_norm": 0.5537342429161072, "learning_rate": 0.0005679226651901243, "loss": 0.1044, "num_input_tokens_seen": 135307520, "step": 62645 }, { "epoch": 10.220228384991843, "grad_norm": 0.17528803646564484, "learning_rate": 0.0005678521445123036, "loss": 0.0279, "num_input_tokens_seen": 135315936, "step": 62650 }, { "epoch": 10.221044045676999, "grad_norm": 0.0012471231166273355, "learning_rate": 0.0005677816224593731, "loss": 0.029, "num_input_tokens_seen": 135326656, "step": 62655 }, { "epoch": 10.221859706362153, "grad_norm": 0.7135857343673706, "learning_rate": 0.0005677110990327618, "loss": 0.1871, "num_input_tokens_seen": 135337184, "step": 62660 }, { "epoch": 10.222675367047309, "grad_norm": 0.4663824141025543, "learning_rate": 0.0005676405742338995, "loss": 0.0679, "num_input_tokens_seen": 135348096, "step": 62665 }, { "epoch": 10.223491027732463, "grad_norm": 0.010357555001974106, "learning_rate": 0.0005675700480642149, "loss": 0.0056, "num_input_tokens_seen": 135358688, "step": 62670 }, { "epoch": 10.224306688417618, "grad_norm": 0.002061615465208888, "learning_rate": 0.0005674995205251376, "loss": 0.025, "num_input_tokens_seen": 135367840, "step": 62675 }, { "epoch": 10.225122349102774, "grad_norm": 0.011426280252635479, "learning_rate": 0.000567428991618097, "loss": 0.0245, "num_input_tokens_seen": 135378336, "step": 62680 }, { "epoch": 10.225938009787928, "grad_norm": 0.005902221892029047, "learning_rate": 0.0005673584613445223, "loss": 0.0179, "num_input_tokens_seen": 135389312, "step": 62685 }, { "epoch": 10.226753670473084, "grad_norm": 0.015893463045358658, "learning_rate": 0.000567287929705843, "loss": 0.0067, "num_input_tokens_seen": 135398496, "step": 62690 }, { "epoch": 10.227569331158238, "grad_norm": 0.3865486979484558, "learning_rate": 0.0005672173967034883, "loss": 0.2104, "num_input_tokens_seen": 135409024, "step": 62695 }, { "epoch": 10.228384991843393, "grad_norm": 0.0027782840188592672, "learning_rate": 0.0005671468623388878, "loss": 0.0176, "num_input_tokens_seen": 135419168, "step": 62700 }, { "epoch": 10.229200652528547, "grad_norm": 0.010938719846308231, "learning_rate": 0.000567076326613471, "loss": 0.0374, "num_input_tokens_seen": 135430368, "step": 62705 }, { "epoch": 10.230016313213703, "grad_norm": 0.027225926518440247, "learning_rate": 0.0005670057895286674, "loss": 0.0756, "num_input_tokens_seen": 135441408, "step": 62710 }, { "epoch": 10.230831973898859, "grad_norm": 0.09897179901599884, "learning_rate": 0.0005669352510859063, "loss": 0.0988, "num_input_tokens_seen": 135452160, "step": 62715 }, { "epoch": 10.231647634584013, "grad_norm": 0.021300874650478363, "learning_rate": 0.0005668647112866175, "loss": 0.005, "num_input_tokens_seen": 135463296, "step": 62720 }, { "epoch": 10.232463295269168, "grad_norm": 0.00661829486489296, "learning_rate": 0.0005667941701322305, "loss": 0.0571, "num_input_tokens_seen": 135473984, "step": 62725 }, { "epoch": 10.233278955954322, "grad_norm": 0.007071008440107107, "learning_rate": 0.000566723627624175, "loss": 0.0259, "num_input_tokens_seen": 135485632, "step": 62730 }, { "epoch": 10.234094616639478, "grad_norm": 0.04191211238503456, "learning_rate": 0.0005666530837638805, "loss": 0.0137, "num_input_tokens_seen": 135495968, "step": 62735 }, { "epoch": 10.234910277324634, "grad_norm": 0.0013803663896396756, "learning_rate": 0.0005665825385527766, "loss": 0.0056, "num_input_tokens_seen": 135506304, "step": 62740 }, { "epoch": 10.235725938009788, "grad_norm": 0.02503710426390171, "learning_rate": 0.0005665119919922932, "loss": 0.0062, "num_input_tokens_seen": 135516640, "step": 62745 }, { "epoch": 10.236541598694943, "grad_norm": 0.0530230738222599, "learning_rate": 0.0005664414440838598, "loss": 0.1141, "num_input_tokens_seen": 135527840, "step": 62750 }, { "epoch": 10.237357259380097, "grad_norm": 0.23326678574085236, "learning_rate": 0.0005663708948289065, "loss": 0.0281, "num_input_tokens_seen": 135537888, "step": 62755 }, { "epoch": 10.238172920065253, "grad_norm": 0.019284890964627266, "learning_rate": 0.0005663003442288626, "loss": 0.0147, "num_input_tokens_seen": 135548736, "step": 62760 }, { "epoch": 10.238988580750409, "grad_norm": 0.010017280466854572, "learning_rate": 0.0005662297922851583, "loss": 0.0994, "num_input_tokens_seen": 135557376, "step": 62765 }, { "epoch": 10.239804241435563, "grad_norm": 0.022153059020638466, "learning_rate": 0.0005661592389992231, "loss": 0.0235, "num_input_tokens_seen": 135567328, "step": 62770 }, { "epoch": 10.240619902120718, "grad_norm": 0.0728738009929657, "learning_rate": 0.0005660886843724869, "loss": 0.0451, "num_input_tokens_seen": 135578624, "step": 62775 }, { "epoch": 10.241435562805872, "grad_norm": 0.01032840833067894, "learning_rate": 0.0005660181284063798, "loss": 0.0168, "num_input_tokens_seen": 135588800, "step": 62780 }, { "epoch": 10.242251223491028, "grad_norm": 0.009232861921191216, "learning_rate": 0.0005659475711023317, "loss": 0.0637, "num_input_tokens_seen": 135599360, "step": 62785 }, { "epoch": 10.243066884176184, "grad_norm": 0.021632635965943336, "learning_rate": 0.0005658770124617722, "loss": 0.164, "num_input_tokens_seen": 135608960, "step": 62790 }, { "epoch": 10.243882544861338, "grad_norm": 0.005081352312117815, "learning_rate": 0.0005658064524861315, "loss": 0.0099, "num_input_tokens_seen": 135619744, "step": 62795 }, { "epoch": 10.244698205546493, "grad_norm": 0.13025851547718048, "learning_rate": 0.0005657358911768395, "loss": 0.0129, "num_input_tokens_seen": 135630016, "step": 62800 }, { "epoch": 10.245513866231647, "grad_norm": 0.006488516461104155, "learning_rate": 0.0005656653285353265, "loss": 0.0647, "num_input_tokens_seen": 135640480, "step": 62805 }, { "epoch": 10.246329526916803, "grad_norm": 0.01548718847334385, "learning_rate": 0.0005655947645630222, "loss": 0.0346, "num_input_tokens_seen": 135651328, "step": 62810 }, { "epoch": 10.247145187601957, "grad_norm": 0.031630516052246094, "learning_rate": 0.0005655241992613566, "loss": 0.0084, "num_input_tokens_seen": 135662976, "step": 62815 }, { "epoch": 10.247960848287113, "grad_norm": 0.17202773690223694, "learning_rate": 0.0005654536326317602, "loss": 0.0239, "num_input_tokens_seen": 135673920, "step": 62820 }, { "epoch": 10.248776508972268, "grad_norm": 0.016138330101966858, "learning_rate": 0.0005653830646756629, "loss": 0.0047, "num_input_tokens_seen": 135684672, "step": 62825 }, { "epoch": 10.249592169657422, "grad_norm": 0.00821363739669323, "learning_rate": 0.0005653124953944947, "loss": 0.0151, "num_input_tokens_seen": 135695232, "step": 62830 }, { "epoch": 10.250407830342578, "grad_norm": 0.0317390076816082, "learning_rate": 0.0005652419247896861, "loss": 0.0118, "num_input_tokens_seen": 135706432, "step": 62835 }, { "epoch": 10.251223491027732, "grad_norm": 0.0038922594394534826, "learning_rate": 0.000565171352862667, "loss": 0.0434, "num_input_tokens_seen": 135717536, "step": 62840 }, { "epoch": 10.252039151712887, "grad_norm": 0.03252030164003372, "learning_rate": 0.0005651007796148678, "loss": 0.0082, "num_input_tokens_seen": 135729856, "step": 62845 }, { "epoch": 10.252854812398043, "grad_norm": 0.025612108409404755, "learning_rate": 0.0005650302050477187, "loss": 0.0097, "num_input_tokens_seen": 135740000, "step": 62850 }, { "epoch": 10.253670473083197, "grad_norm": 0.2749195992946625, "learning_rate": 0.0005649596291626501, "loss": 0.0855, "num_input_tokens_seen": 135749888, "step": 62855 }, { "epoch": 10.254486133768353, "grad_norm": 0.20973838865756989, "learning_rate": 0.0005648890519610921, "loss": 0.0515, "num_input_tokens_seen": 135760992, "step": 62860 }, { "epoch": 10.255301794453507, "grad_norm": 0.3340967893600464, "learning_rate": 0.0005648184734444753, "loss": 0.122, "num_input_tokens_seen": 135769792, "step": 62865 }, { "epoch": 10.256117455138662, "grad_norm": 0.01708339713513851, "learning_rate": 0.0005647478936142296, "loss": 0.0249, "num_input_tokens_seen": 135781568, "step": 62870 }, { "epoch": 10.256933115823816, "grad_norm": 0.01038318034261465, "learning_rate": 0.0005646773124717858, "loss": 0.0541, "num_input_tokens_seen": 135792256, "step": 62875 }, { "epoch": 10.257748776508972, "grad_norm": 0.004717283882200718, "learning_rate": 0.0005646067300185744, "loss": 0.0263, "num_input_tokens_seen": 135803424, "step": 62880 }, { "epoch": 10.258564437194128, "grad_norm": 0.03085217997431755, "learning_rate": 0.0005645361462560256, "loss": 0.0809, "num_input_tokens_seen": 135814048, "step": 62885 }, { "epoch": 10.259380097879282, "grad_norm": 0.006718597374856472, "learning_rate": 0.0005644655611855698, "loss": 0.0866, "num_input_tokens_seen": 135825536, "step": 62890 }, { "epoch": 10.260195758564437, "grad_norm": 0.004600659478455782, "learning_rate": 0.0005643949748086377, "loss": 0.0735, "num_input_tokens_seen": 135837248, "step": 62895 }, { "epoch": 10.261011419249591, "grad_norm": 0.008998743258416653, "learning_rate": 0.0005643243871266598, "loss": 0.0995, "num_input_tokens_seen": 135847936, "step": 62900 }, { "epoch": 10.261827079934747, "grad_norm": 0.013277465477585793, "learning_rate": 0.0005642537981410665, "loss": 0.1557, "num_input_tokens_seen": 135859680, "step": 62905 }, { "epoch": 10.262642740619903, "grad_norm": 0.0034990364219993353, "learning_rate": 0.0005641832078532886, "loss": 0.0127, "num_input_tokens_seen": 135870880, "step": 62910 }, { "epoch": 10.263458401305057, "grad_norm": 0.2049998939037323, "learning_rate": 0.0005641126162647564, "loss": 0.0452, "num_input_tokens_seen": 135881568, "step": 62915 }, { "epoch": 10.264274061990212, "grad_norm": 0.1066952794790268, "learning_rate": 0.0005640420233769008, "loss": 0.0869, "num_input_tokens_seen": 135892800, "step": 62920 }, { "epoch": 10.265089722675366, "grad_norm": 0.03311437368392944, "learning_rate": 0.0005639714291911524, "loss": 0.0305, "num_input_tokens_seen": 135903712, "step": 62925 }, { "epoch": 10.265905383360522, "grad_norm": 0.022299086675047874, "learning_rate": 0.0005639008337089416, "loss": 0.0416, "num_input_tokens_seen": 135914240, "step": 62930 }, { "epoch": 10.266721044045678, "grad_norm": 0.018949246034026146, "learning_rate": 0.0005638302369316995, "loss": 0.0721, "num_input_tokens_seen": 135925760, "step": 62935 }, { "epoch": 10.267536704730832, "grad_norm": 0.019130868837237358, "learning_rate": 0.0005637596388608567, "loss": 0.0275, "num_input_tokens_seen": 135936096, "step": 62940 }, { "epoch": 10.268352365415987, "grad_norm": 0.36915987730026245, "learning_rate": 0.0005636890394978439, "loss": 0.1173, "num_input_tokens_seen": 135946144, "step": 62945 }, { "epoch": 10.269168026101141, "grad_norm": 0.15957792103290558, "learning_rate": 0.0005636184388440919, "loss": 0.0772, "num_input_tokens_seen": 135957152, "step": 62950 }, { "epoch": 10.269983686786297, "grad_norm": 0.010827888734638691, "learning_rate": 0.0005635478369010316, "loss": 0.0212, "num_input_tokens_seen": 135967328, "step": 62955 }, { "epoch": 10.270799347471453, "grad_norm": 0.02170558087527752, "learning_rate": 0.0005634772336700937, "loss": 0.0751, "num_input_tokens_seen": 135977472, "step": 62960 }, { "epoch": 10.271615008156607, "grad_norm": 0.012610095553100109, "learning_rate": 0.0005634066291527092, "loss": 0.0074, "num_input_tokens_seen": 135988032, "step": 62965 }, { "epoch": 10.272430668841762, "grad_norm": 0.24561123549938202, "learning_rate": 0.000563336023350309, "loss": 0.0205, "num_input_tokens_seen": 135998432, "step": 62970 }, { "epoch": 10.273246329526916, "grad_norm": 0.007883837446570396, "learning_rate": 0.0005632654162643239, "loss": 0.0414, "num_input_tokens_seen": 136009536, "step": 62975 }, { "epoch": 10.274061990212072, "grad_norm": 0.010281133465468884, "learning_rate": 0.0005631948078961847, "loss": 0.0081, "num_input_tokens_seen": 136020480, "step": 62980 }, { "epoch": 10.274877650897226, "grad_norm": 0.019367830827832222, "learning_rate": 0.0005631241982473227, "loss": 0.0205, "num_input_tokens_seen": 136031136, "step": 62985 }, { "epoch": 10.275693311582382, "grad_norm": 0.20234939455986023, "learning_rate": 0.0005630535873191687, "loss": 0.0314, "num_input_tokens_seen": 136043232, "step": 62990 }, { "epoch": 10.276508972267537, "grad_norm": 0.2768293619155884, "learning_rate": 0.0005629829751131538, "loss": 0.0386, "num_input_tokens_seen": 136053888, "step": 62995 }, { "epoch": 10.277324632952691, "grad_norm": 0.3425898253917694, "learning_rate": 0.0005629123616307089, "loss": 0.168, "num_input_tokens_seen": 136064160, "step": 63000 }, { "epoch": 10.278140293637847, "grad_norm": 0.03600388392806053, "learning_rate": 0.0005628417468732653, "loss": 0.1116, "num_input_tokens_seen": 136075584, "step": 63005 }, { "epoch": 10.278955954323001, "grad_norm": 0.18759030103683472, "learning_rate": 0.0005627711308422539, "loss": 0.0283, "num_input_tokens_seen": 136085312, "step": 63010 }, { "epoch": 10.279771615008157, "grad_norm": 0.09952437877655029, "learning_rate": 0.000562700513539106, "loss": 0.1074, "num_input_tokens_seen": 136096576, "step": 63015 }, { "epoch": 10.280587275693312, "grad_norm": 0.07147414237260818, "learning_rate": 0.0005626298949652524, "loss": 0.0206, "num_input_tokens_seen": 136107168, "step": 63020 }, { "epoch": 10.281402936378466, "grad_norm": 0.19256940484046936, "learning_rate": 0.0005625592751221248, "loss": 0.1364, "num_input_tokens_seen": 136118592, "step": 63025 }, { "epoch": 10.282218597063622, "grad_norm": 0.0532815121114254, "learning_rate": 0.000562488654011154, "loss": 0.0341, "num_input_tokens_seen": 136128960, "step": 63030 }, { "epoch": 10.283034257748776, "grad_norm": 0.3896867632865906, "learning_rate": 0.0005624180316337715, "loss": 0.0315, "num_input_tokens_seen": 136139584, "step": 63035 }, { "epoch": 10.283849918433932, "grad_norm": 0.07998025417327881, "learning_rate": 0.0005623474079914082, "loss": 0.0164, "num_input_tokens_seen": 136150560, "step": 63040 }, { "epoch": 10.284665579119087, "grad_norm": 0.007289467379450798, "learning_rate": 0.0005622767830854957, "loss": 0.0069, "num_input_tokens_seen": 136161216, "step": 63045 }, { "epoch": 10.285481239804241, "grad_norm": 0.018334923312067986, "learning_rate": 0.0005622061569174651, "loss": 0.0083, "num_input_tokens_seen": 136172192, "step": 63050 }, { "epoch": 10.286296900489397, "grad_norm": 0.36530670523643494, "learning_rate": 0.0005621355294887479, "loss": 0.0248, "num_input_tokens_seen": 136182240, "step": 63055 }, { "epoch": 10.28711256117455, "grad_norm": 0.0075960480608046055, "learning_rate": 0.0005620649008007755, "loss": 0.0148, "num_input_tokens_seen": 136193312, "step": 63060 }, { "epoch": 10.287928221859707, "grad_norm": 0.0044697243720293045, "learning_rate": 0.0005619942708549789, "loss": 0.0277, "num_input_tokens_seen": 136205280, "step": 63065 }, { "epoch": 10.28874388254486, "grad_norm": 0.007282007485628128, "learning_rate": 0.0005619236396527899, "loss": 0.0035, "num_input_tokens_seen": 136216768, "step": 63070 }, { "epoch": 10.289559543230016, "grad_norm": 0.0313313864171505, "learning_rate": 0.0005618530071956397, "loss": 0.0065, "num_input_tokens_seen": 136228608, "step": 63075 }, { "epoch": 10.290375203915172, "grad_norm": 0.6887944340705872, "learning_rate": 0.00056178237348496, "loss": 0.0458, "num_input_tokens_seen": 136239328, "step": 63080 }, { "epoch": 10.291190864600326, "grad_norm": 0.056814152747392654, "learning_rate": 0.0005617117385221819, "loss": 0.0233, "num_input_tokens_seen": 136249632, "step": 63085 }, { "epoch": 10.292006525285482, "grad_norm": 0.00974601786583662, "learning_rate": 0.0005616411023087373, "loss": 0.0129, "num_input_tokens_seen": 136260352, "step": 63090 }, { "epoch": 10.292822185970635, "grad_norm": 0.260468453168869, "learning_rate": 0.0005615704648460575, "loss": 0.0706, "num_input_tokens_seen": 136270752, "step": 63095 }, { "epoch": 10.293637846655791, "grad_norm": 0.5153801441192627, "learning_rate": 0.0005614998261355741, "loss": 0.1921, "num_input_tokens_seen": 136281664, "step": 63100 }, { "epoch": 10.294453507340947, "grad_norm": 0.0031059994362294674, "learning_rate": 0.0005614291861787188, "loss": 0.0344, "num_input_tokens_seen": 136292544, "step": 63105 }, { "epoch": 10.2952691680261, "grad_norm": 0.2620164155960083, "learning_rate": 0.0005613585449769232, "loss": 0.1277, "num_input_tokens_seen": 136303744, "step": 63110 }, { "epoch": 10.296084828711257, "grad_norm": 0.07174117118120193, "learning_rate": 0.0005612879025316186, "loss": 0.0239, "num_input_tokens_seen": 136314240, "step": 63115 }, { "epoch": 10.29690048939641, "grad_norm": 0.018266484141349792, "learning_rate": 0.000561217258844237, "loss": 0.0191, "num_input_tokens_seen": 136325824, "step": 63120 }, { "epoch": 10.297716150081566, "grad_norm": 0.010049085132777691, "learning_rate": 0.0005611466139162101, "loss": 0.0193, "num_input_tokens_seen": 136336864, "step": 63125 }, { "epoch": 10.298531810766722, "grad_norm": 0.005402869079262018, "learning_rate": 0.0005610759677489694, "loss": 0.0324, "num_input_tokens_seen": 136347456, "step": 63130 }, { "epoch": 10.299347471451876, "grad_norm": 0.008500440046191216, "learning_rate": 0.0005610053203439467, "loss": 0.2237, "num_input_tokens_seen": 136357408, "step": 63135 }, { "epoch": 10.300163132137031, "grad_norm": 0.0620102696120739, "learning_rate": 0.0005609346717025737, "loss": 0.0534, "num_input_tokens_seen": 136368640, "step": 63140 }, { "epoch": 10.300978792822185, "grad_norm": 0.006548778153955936, "learning_rate": 0.0005608640218262825, "loss": 0.0972, "num_input_tokens_seen": 136378144, "step": 63145 }, { "epoch": 10.301794453507341, "grad_norm": 0.005673635751008987, "learning_rate": 0.0005607933707165046, "loss": 0.0815, "num_input_tokens_seen": 136388672, "step": 63150 }, { "epoch": 10.302610114192497, "grad_norm": 0.29767847061157227, "learning_rate": 0.000560722718374672, "loss": 0.1562, "num_input_tokens_seen": 136398976, "step": 63155 }, { "epoch": 10.30342577487765, "grad_norm": 0.00508272647857666, "learning_rate": 0.0005606520648022164, "loss": 0.0116, "num_input_tokens_seen": 136410624, "step": 63160 }, { "epoch": 10.304241435562806, "grad_norm": 0.01566510647535324, "learning_rate": 0.0005605814100005696, "loss": 0.0087, "num_input_tokens_seen": 136420864, "step": 63165 }, { "epoch": 10.30505709624796, "grad_norm": 0.020303290337324142, "learning_rate": 0.0005605107539711639, "loss": 0.0049, "num_input_tokens_seen": 136430752, "step": 63170 }, { "epoch": 10.305872756933116, "grad_norm": 0.1528480499982834, "learning_rate": 0.000560440096715431, "loss": 0.0131, "num_input_tokens_seen": 136440736, "step": 63175 }, { "epoch": 10.30668841761827, "grad_norm": 0.17993883788585663, "learning_rate": 0.0005603694382348027, "loss": 0.0398, "num_input_tokens_seen": 136450240, "step": 63180 }, { "epoch": 10.307504078303426, "grad_norm": 0.024285180494189262, "learning_rate": 0.0005602987785307112, "loss": 0.0801, "num_input_tokens_seen": 136461376, "step": 63185 }, { "epoch": 10.308319738988581, "grad_norm": 0.00804990902543068, "learning_rate": 0.0005602281176045885, "loss": 0.0407, "num_input_tokens_seen": 136473504, "step": 63190 }, { "epoch": 10.309135399673735, "grad_norm": 0.008934085257351398, "learning_rate": 0.0005601574554578666, "loss": 0.069, "num_input_tokens_seen": 136483520, "step": 63195 }, { "epoch": 10.309951060358891, "grad_norm": 0.004447769373655319, "learning_rate": 0.0005600867920919775, "loss": 0.0074, "num_input_tokens_seen": 136493824, "step": 63200 }, { "epoch": 10.310766721044045, "grad_norm": 0.007732720114290714, "learning_rate": 0.0005600161275083535, "loss": 0.0509, "num_input_tokens_seen": 136503936, "step": 63205 }, { "epoch": 10.3115823817292, "grad_norm": 0.0947844460606575, "learning_rate": 0.0005599454617084264, "loss": 0.0136, "num_input_tokens_seen": 136515488, "step": 63210 }, { "epoch": 10.312398042414356, "grad_norm": 0.0417468324303627, "learning_rate": 0.0005598747946936285, "loss": 0.0505, "num_input_tokens_seen": 136526464, "step": 63215 }, { "epoch": 10.31321370309951, "grad_norm": 0.0020598669070750475, "learning_rate": 0.0005598041264653919, "loss": 0.0385, "num_input_tokens_seen": 136537248, "step": 63220 }, { "epoch": 10.314029363784666, "grad_norm": 0.01746521145105362, "learning_rate": 0.0005597334570251489, "loss": 0.0156, "num_input_tokens_seen": 136548320, "step": 63225 }, { "epoch": 10.31484502446982, "grad_norm": 0.0852559357881546, "learning_rate": 0.0005596627863743316, "loss": 0.0217, "num_input_tokens_seen": 136559360, "step": 63230 }, { "epoch": 10.315660685154976, "grad_norm": 0.01237891148775816, "learning_rate": 0.0005595921145143722, "loss": 0.0091, "num_input_tokens_seen": 136569760, "step": 63235 }, { "epoch": 10.31647634584013, "grad_norm": 0.3985021710395813, "learning_rate": 0.0005595214414467029, "loss": 0.1213, "num_input_tokens_seen": 136579904, "step": 63240 }, { "epoch": 10.317292006525285, "grad_norm": 0.04400103539228439, "learning_rate": 0.0005594507671727563, "loss": 0.1041, "num_input_tokens_seen": 136592064, "step": 63245 }, { "epoch": 10.318107667210441, "grad_norm": 0.04320823401212692, "learning_rate": 0.0005593800916939642, "loss": 0.0836, "num_input_tokens_seen": 136603424, "step": 63250 }, { "epoch": 10.318923327895595, "grad_norm": 0.27597784996032715, "learning_rate": 0.0005593094150117595, "loss": 0.0342, "num_input_tokens_seen": 136615232, "step": 63255 }, { "epoch": 10.31973898858075, "grad_norm": 0.04080792888998985, "learning_rate": 0.0005592387371275741, "loss": 0.0607, "num_input_tokens_seen": 136626240, "step": 63260 }, { "epoch": 10.320554649265905, "grad_norm": 0.05028015002608299, "learning_rate": 0.0005591680580428406, "loss": 0.042, "num_input_tokens_seen": 136637408, "step": 63265 }, { "epoch": 10.32137030995106, "grad_norm": 0.04935172200202942, "learning_rate": 0.0005590973777589912, "loss": 0.0162, "num_input_tokens_seen": 136647648, "step": 63270 }, { "epoch": 10.322185970636216, "grad_norm": 0.004445977509021759, "learning_rate": 0.0005590266962774588, "loss": 0.0064, "num_input_tokens_seen": 136658944, "step": 63275 }, { "epoch": 10.32300163132137, "grad_norm": 0.11220485717058182, "learning_rate": 0.0005589560135996752, "loss": 0.1099, "num_input_tokens_seen": 136669792, "step": 63280 }, { "epoch": 10.323817292006526, "grad_norm": 0.28762274980545044, "learning_rate": 0.0005588853297270734, "loss": 0.054, "num_input_tokens_seen": 136680608, "step": 63285 }, { "epoch": 10.32463295269168, "grad_norm": 0.006721612997353077, "learning_rate": 0.0005588146446610855, "loss": 0.039, "num_input_tokens_seen": 136691296, "step": 63290 }, { "epoch": 10.325448613376835, "grad_norm": 0.007691043894737959, "learning_rate": 0.0005587439584031444, "loss": 0.0152, "num_input_tokens_seen": 136701376, "step": 63295 }, { "epoch": 10.326264274061991, "grad_norm": 0.00728636747226119, "learning_rate": 0.0005586732709546824, "loss": 0.006, "num_input_tokens_seen": 136712832, "step": 63300 }, { "epoch": 10.327079934747145, "grad_norm": 0.0071546598337590694, "learning_rate": 0.0005586025823171321, "loss": 0.2496, "num_input_tokens_seen": 136722368, "step": 63305 }, { "epoch": 10.3278955954323, "grad_norm": 0.009742275811731815, "learning_rate": 0.0005585318924919262, "loss": 0.0236, "num_input_tokens_seen": 136733472, "step": 63310 }, { "epoch": 10.328711256117455, "grad_norm": 0.008231345564126968, "learning_rate": 0.0005584612014804972, "loss": 0.0179, "num_input_tokens_seen": 136744576, "step": 63315 }, { "epoch": 10.32952691680261, "grad_norm": 0.016715819016098976, "learning_rate": 0.0005583905092842777, "loss": 0.0645, "num_input_tokens_seen": 136756480, "step": 63320 }, { "epoch": 10.330342577487766, "grad_norm": 0.15237416326999664, "learning_rate": 0.0005583198159047005, "loss": 0.0334, "num_input_tokens_seen": 136766720, "step": 63325 }, { "epoch": 10.33115823817292, "grad_norm": 0.015316602773964405, "learning_rate": 0.0005582491213431983, "loss": 0.0147, "num_input_tokens_seen": 136777280, "step": 63330 }, { "epoch": 10.331973898858076, "grad_norm": 0.009626131504774094, "learning_rate": 0.0005581784256012037, "loss": 0.0615, "num_input_tokens_seen": 136788768, "step": 63335 }, { "epoch": 10.33278955954323, "grad_norm": 0.025972846895456314, "learning_rate": 0.0005581077286801495, "loss": 0.1756, "num_input_tokens_seen": 136799584, "step": 63340 }, { "epoch": 10.333605220228385, "grad_norm": 0.02969386987388134, "learning_rate": 0.0005580370305814686, "loss": 0.1976, "num_input_tokens_seen": 136811488, "step": 63345 }, { "epoch": 10.33442088091354, "grad_norm": 0.18062162399291992, "learning_rate": 0.0005579663313065935, "loss": 0.0207, "num_input_tokens_seen": 136822784, "step": 63350 }, { "epoch": 10.335236541598695, "grad_norm": 0.15014511346817017, "learning_rate": 0.0005578956308569572, "loss": 0.0212, "num_input_tokens_seen": 136833888, "step": 63355 }, { "epoch": 10.33605220228385, "grad_norm": 0.011584420688450336, "learning_rate": 0.0005578249292339924, "loss": 0.0704, "num_input_tokens_seen": 136844416, "step": 63360 }, { "epoch": 10.336867862969005, "grad_norm": 0.004050334449857473, "learning_rate": 0.0005577542264391322, "loss": 0.0629, "num_input_tokens_seen": 136854336, "step": 63365 }, { "epoch": 10.33768352365416, "grad_norm": 0.035820942372083664, "learning_rate": 0.0005576835224738092, "loss": 0.0538, "num_input_tokens_seen": 136865440, "step": 63370 }, { "epoch": 10.338499184339314, "grad_norm": 0.0067452918738126755, "learning_rate": 0.0005576128173394567, "loss": 0.0962, "num_input_tokens_seen": 136877248, "step": 63375 }, { "epoch": 10.33931484502447, "grad_norm": 0.19582054018974304, "learning_rate": 0.0005575421110375072, "loss": 0.0485, "num_input_tokens_seen": 136887360, "step": 63380 }, { "epoch": 10.340130505709626, "grad_norm": 0.005524530075490475, "learning_rate": 0.0005574714035693938, "loss": 0.1338, "num_input_tokens_seen": 136899008, "step": 63385 }, { "epoch": 10.34094616639478, "grad_norm": 0.1523488163948059, "learning_rate": 0.0005574006949365496, "loss": 0.1644, "num_input_tokens_seen": 136908832, "step": 63390 }, { "epoch": 10.341761827079935, "grad_norm": 0.33566051721572876, "learning_rate": 0.0005573299851404074, "loss": 0.0468, "num_input_tokens_seen": 136919168, "step": 63395 }, { "epoch": 10.34257748776509, "grad_norm": 0.0225025936961174, "learning_rate": 0.0005572592741824003, "loss": 0.0136, "num_input_tokens_seen": 136929312, "step": 63400 }, { "epoch": 10.343393148450245, "grad_norm": 0.009087719023227692, "learning_rate": 0.0005571885620639614, "loss": 0.089, "num_input_tokens_seen": 136940576, "step": 63405 }, { "epoch": 10.3442088091354, "grad_norm": 0.02980167046189308, "learning_rate": 0.0005571178487865238, "loss": 0.1111, "num_input_tokens_seen": 136951296, "step": 63410 }, { "epoch": 10.345024469820554, "grad_norm": 0.012627690099179745, "learning_rate": 0.0005570471343515205, "loss": 0.0463, "num_input_tokens_seen": 136962208, "step": 63415 }, { "epoch": 10.34584013050571, "grad_norm": 0.5630673170089722, "learning_rate": 0.0005569764187603846, "loss": 0.1932, "num_input_tokens_seen": 136973184, "step": 63420 }, { "epoch": 10.346655791190864, "grad_norm": 0.16642948985099792, "learning_rate": 0.0005569057020145494, "loss": 0.0289, "num_input_tokens_seen": 136983936, "step": 63425 }, { "epoch": 10.34747145187602, "grad_norm": 0.04962924122810364, "learning_rate": 0.0005568349841154479, "loss": 0.01, "num_input_tokens_seen": 136994848, "step": 63430 }, { "epoch": 10.348287112561174, "grad_norm": 0.28208860754966736, "learning_rate": 0.0005567642650645134, "loss": 0.0792, "num_input_tokens_seen": 137005760, "step": 63435 }, { "epoch": 10.34910277324633, "grad_norm": 0.754994809627533, "learning_rate": 0.000556693544863179, "loss": 0.1711, "num_input_tokens_seen": 137016192, "step": 63440 }, { "epoch": 10.349918433931485, "grad_norm": 0.013265586458146572, "learning_rate": 0.000556622823512878, "loss": 0.0094, "num_input_tokens_seen": 137027296, "step": 63445 }, { "epoch": 10.350734094616639, "grad_norm": 0.028474433347582817, "learning_rate": 0.0005565521010150436, "loss": 0.021, "num_input_tokens_seen": 137038592, "step": 63450 }, { "epoch": 10.351549755301795, "grad_norm": 0.0459626168012619, "learning_rate": 0.0005564813773711092, "loss": 0.1247, "num_input_tokens_seen": 137048768, "step": 63455 }, { "epoch": 10.352365415986949, "grad_norm": 0.007897719740867615, "learning_rate": 0.0005564106525825079, "loss": 0.0831, "num_input_tokens_seen": 137059520, "step": 63460 }, { "epoch": 10.353181076672104, "grad_norm": 0.01814207434654236, "learning_rate": 0.0005563399266506734, "loss": 0.0318, "num_input_tokens_seen": 137070912, "step": 63465 }, { "epoch": 10.35399673735726, "grad_norm": 0.06560403853654861, "learning_rate": 0.0005562691995770386, "loss": 0.0657, "num_input_tokens_seen": 137081472, "step": 63470 }, { "epoch": 10.354812398042414, "grad_norm": 0.016114749014377594, "learning_rate": 0.0005561984713630373, "loss": 0.0155, "num_input_tokens_seen": 137091520, "step": 63475 }, { "epoch": 10.35562805872757, "grad_norm": 0.05790586769580841, "learning_rate": 0.0005561277420101026, "loss": 0.0392, "num_input_tokens_seen": 137103680, "step": 63480 }, { "epoch": 10.356443719412724, "grad_norm": 0.17016306519508362, "learning_rate": 0.0005560570115196679, "loss": 0.053, "num_input_tokens_seen": 137115488, "step": 63485 }, { "epoch": 10.35725938009788, "grad_norm": 0.009363112039864063, "learning_rate": 0.0005559862798931668, "loss": 0.0265, "num_input_tokens_seen": 137126944, "step": 63490 }, { "epoch": 10.358075040783035, "grad_norm": 0.2349333018064499, "learning_rate": 0.0005559155471320326, "loss": 0.041, "num_input_tokens_seen": 137138112, "step": 63495 }, { "epoch": 10.358890701468189, "grad_norm": 0.17752444744110107, "learning_rate": 0.0005558448132376991, "loss": 0.0234, "num_input_tokens_seen": 137149216, "step": 63500 }, { "epoch": 10.359706362153345, "grad_norm": 0.03303788974881172, "learning_rate": 0.0005557740782115995, "loss": 0.0323, "num_input_tokens_seen": 137160640, "step": 63505 }, { "epoch": 10.360522022838499, "grad_norm": 0.3540240228176117, "learning_rate": 0.0005557033420551676, "loss": 0.1315, "num_input_tokens_seen": 137171808, "step": 63510 }, { "epoch": 10.361337683523654, "grad_norm": 0.004490839783102274, "learning_rate": 0.0005556326047698367, "loss": 0.0513, "num_input_tokens_seen": 137182912, "step": 63515 }, { "epoch": 10.362153344208808, "grad_norm": 0.11256757378578186, "learning_rate": 0.0005555618663570405, "loss": 0.0545, "num_input_tokens_seen": 137193792, "step": 63520 }, { "epoch": 10.362969004893964, "grad_norm": 0.011594374664127827, "learning_rate": 0.0005554911268182126, "loss": 0.0333, "num_input_tokens_seen": 137204160, "step": 63525 }, { "epoch": 10.36378466557912, "grad_norm": 0.004073978401720524, "learning_rate": 0.0005554203861547866, "loss": 0.0157, "num_input_tokens_seen": 137214720, "step": 63530 }, { "epoch": 10.364600326264274, "grad_norm": 0.16724653542041779, "learning_rate": 0.0005553496443681961, "loss": 0.0675, "num_input_tokens_seen": 137224096, "step": 63535 }, { "epoch": 10.36541598694943, "grad_norm": 0.04027742147445679, "learning_rate": 0.000555278901459875, "loss": 0.1207, "num_input_tokens_seen": 137235296, "step": 63540 }, { "epoch": 10.366231647634583, "grad_norm": 0.006478854920715094, "learning_rate": 0.0005552081574312568, "loss": 0.0184, "num_input_tokens_seen": 137247200, "step": 63545 }, { "epoch": 10.367047308319739, "grad_norm": 0.01771964132785797, "learning_rate": 0.0005551374122837752, "loss": 0.0222, "num_input_tokens_seen": 137257952, "step": 63550 }, { "epoch": 10.367862969004895, "grad_norm": 0.01356664951890707, "learning_rate": 0.000555066666018864, "loss": 0.0282, "num_input_tokens_seen": 137269088, "step": 63555 }, { "epoch": 10.368678629690049, "grad_norm": 0.3984331786632538, "learning_rate": 0.0005549959186379569, "loss": 0.1308, "num_input_tokens_seen": 137279520, "step": 63560 }, { "epoch": 10.369494290375204, "grad_norm": 0.007946250960230827, "learning_rate": 0.0005549251701424878, "loss": 0.0346, "num_input_tokens_seen": 137291008, "step": 63565 }, { "epoch": 10.370309951060358, "grad_norm": 0.022877560928463936, "learning_rate": 0.0005548544205338905, "loss": 0.0125, "num_input_tokens_seen": 137300832, "step": 63570 }, { "epoch": 10.371125611745514, "grad_norm": 0.021704500541090965, "learning_rate": 0.0005547836698135987, "loss": 0.0746, "num_input_tokens_seen": 137310208, "step": 63575 }, { "epoch": 10.37194127243067, "grad_norm": 0.17134937644004822, "learning_rate": 0.0005547129179830463, "loss": 0.0237, "num_input_tokens_seen": 137321504, "step": 63580 }, { "epoch": 10.372756933115824, "grad_norm": 0.0015580940525978804, "learning_rate": 0.0005546421650436674, "loss": 0.0157, "num_input_tokens_seen": 137331648, "step": 63585 }, { "epoch": 10.37357259380098, "grad_norm": 0.006557346321642399, "learning_rate": 0.0005545714109968956, "loss": 0.0066, "num_input_tokens_seen": 137342464, "step": 63590 }, { "epoch": 10.374388254486133, "grad_norm": 0.01257567573338747, "learning_rate": 0.0005545006558441649, "loss": 0.0655, "num_input_tokens_seen": 137353504, "step": 63595 }, { "epoch": 10.375203915171289, "grad_norm": 0.04626630246639252, "learning_rate": 0.0005544298995869093, "loss": 0.0389, "num_input_tokens_seen": 137365504, "step": 63600 }, { "epoch": 10.376019575856443, "grad_norm": 0.00850379467010498, "learning_rate": 0.0005543591422265627, "loss": 0.0076, "num_input_tokens_seen": 137376160, "step": 63605 }, { "epoch": 10.376835236541599, "grad_norm": 0.033676404505968094, "learning_rate": 0.0005542883837645592, "loss": 0.0659, "num_input_tokens_seen": 137387200, "step": 63610 }, { "epoch": 10.377650897226754, "grad_norm": 0.08765646070241928, "learning_rate": 0.0005542176242023326, "loss": 0.0249, "num_input_tokens_seen": 137399552, "step": 63615 }, { "epoch": 10.378466557911908, "grad_norm": 0.10746024549007416, "learning_rate": 0.0005541468635413172, "loss": 0.0237, "num_input_tokens_seen": 137411904, "step": 63620 }, { "epoch": 10.379282218597064, "grad_norm": 0.007602016907185316, "learning_rate": 0.0005540761017829468, "loss": 0.0067, "num_input_tokens_seen": 137421856, "step": 63625 }, { "epoch": 10.380097879282218, "grad_norm": 0.00871317833662033, "learning_rate": 0.0005540053389286556, "loss": 0.0111, "num_input_tokens_seen": 137432896, "step": 63630 }, { "epoch": 10.380913539967374, "grad_norm": 0.011335165239870548, "learning_rate": 0.0005539345749798778, "loss": 0.0507, "num_input_tokens_seen": 137444544, "step": 63635 }, { "epoch": 10.38172920065253, "grad_norm": 0.03143840283155441, "learning_rate": 0.0005538638099380473, "loss": 0.0591, "num_input_tokens_seen": 137454272, "step": 63640 }, { "epoch": 10.382544861337683, "grad_norm": 0.2992579936981201, "learning_rate": 0.0005537930438045984, "loss": 0.0203, "num_input_tokens_seen": 137464416, "step": 63645 }, { "epoch": 10.383360522022839, "grad_norm": 0.0029851419385522604, "learning_rate": 0.0005537222765809653, "loss": 0.0984, "num_input_tokens_seen": 137473472, "step": 63650 }, { "epoch": 10.384176182707993, "grad_norm": 0.3246956169605255, "learning_rate": 0.000553651508268582, "loss": 0.0578, "num_input_tokens_seen": 137484800, "step": 63655 }, { "epoch": 10.384991843393149, "grad_norm": 0.006309543736279011, "learning_rate": 0.000553580738868883, "loss": 0.0208, "num_input_tokens_seen": 137494592, "step": 63660 }, { "epoch": 10.385807504078304, "grad_norm": 0.42174792289733887, "learning_rate": 0.0005535099683833021, "loss": 0.2332, "num_input_tokens_seen": 137504352, "step": 63665 }, { "epoch": 10.386623164763458, "grad_norm": 0.04054646193981171, "learning_rate": 0.0005534391968132741, "loss": 0.0298, "num_input_tokens_seen": 137515680, "step": 63670 }, { "epoch": 10.387438825448614, "grad_norm": 0.08717795461416245, "learning_rate": 0.0005533684241602327, "loss": 0.1018, "num_input_tokens_seen": 137526592, "step": 63675 }, { "epoch": 10.388254486133768, "grad_norm": 0.04790165647864342, "learning_rate": 0.0005532976504256127, "loss": 0.1052, "num_input_tokens_seen": 137536864, "step": 63680 }, { "epoch": 10.389070146818923, "grad_norm": 0.546216607093811, "learning_rate": 0.000553226875610848, "loss": 0.0735, "num_input_tokens_seen": 137547232, "step": 63685 }, { "epoch": 10.38988580750408, "grad_norm": 0.00997911486774683, "learning_rate": 0.0005531560997173733, "loss": 0.0209, "num_input_tokens_seen": 137558016, "step": 63690 }, { "epoch": 10.390701468189233, "grad_norm": 0.0133741470053792, "learning_rate": 0.0005530853227466229, "loss": 0.0082, "num_input_tokens_seen": 137568896, "step": 63695 }, { "epoch": 10.391517128874389, "grad_norm": 0.0015816029626876116, "learning_rate": 0.0005530145447000308, "loss": 0.0075, "num_input_tokens_seen": 137578528, "step": 63700 }, { "epoch": 10.392332789559543, "grad_norm": 0.09255795925855637, "learning_rate": 0.0005529437655790319, "loss": 0.1526, "num_input_tokens_seen": 137589568, "step": 63705 }, { "epoch": 10.393148450244698, "grad_norm": 0.09927723556756973, "learning_rate": 0.0005528729853850604, "loss": 0.0198, "num_input_tokens_seen": 137600096, "step": 63710 }, { "epoch": 10.393964110929852, "grad_norm": 0.1837208867073059, "learning_rate": 0.0005528022041195507, "loss": 0.0387, "num_input_tokens_seen": 137611616, "step": 63715 }, { "epoch": 10.394779771615008, "grad_norm": 0.0033106612972915173, "learning_rate": 0.0005527314217839375, "loss": 0.0115, "num_input_tokens_seen": 137622304, "step": 63720 }, { "epoch": 10.395595432300164, "grad_norm": 0.010321944952011108, "learning_rate": 0.0005526606383796551, "loss": 0.0358, "num_input_tokens_seen": 137633344, "step": 63725 }, { "epoch": 10.396411092985318, "grad_norm": 0.10109658539295197, "learning_rate": 0.000552589853908138, "loss": 0.1396, "num_input_tokens_seen": 137643264, "step": 63730 }, { "epoch": 10.397226753670473, "grad_norm": 0.0041798497550189495, "learning_rate": 0.0005525190683708207, "loss": 0.004, "num_input_tokens_seen": 137654464, "step": 63735 }, { "epoch": 10.398042414355627, "grad_norm": 0.48795971274375916, "learning_rate": 0.0005524482817691381, "loss": 0.0697, "num_input_tokens_seen": 137664224, "step": 63740 }, { "epoch": 10.398858075040783, "grad_norm": 0.014303839765489101, "learning_rate": 0.0005523774941045244, "loss": 0.0325, "num_input_tokens_seen": 137675872, "step": 63745 }, { "epoch": 10.399673735725939, "grad_norm": 0.006388854701071978, "learning_rate": 0.0005523067053784143, "loss": 0.013, "num_input_tokens_seen": 137687328, "step": 63750 }, { "epoch": 10.400489396411093, "grad_norm": 0.02298627234995365, "learning_rate": 0.0005522359155922425, "loss": 0.0322, "num_input_tokens_seen": 137699072, "step": 63755 }, { "epoch": 10.401305057096248, "grad_norm": 0.003545205108821392, "learning_rate": 0.0005521651247474436, "loss": 0.0531, "num_input_tokens_seen": 137710720, "step": 63760 }, { "epoch": 10.402120717781402, "grad_norm": 0.10142495483160019, "learning_rate": 0.0005520943328454523, "loss": 0.2223, "num_input_tokens_seen": 137720640, "step": 63765 }, { "epoch": 10.402936378466558, "grad_norm": 0.007538018748164177, "learning_rate": 0.0005520235398877032, "loss": 0.0385, "num_input_tokens_seen": 137732000, "step": 63770 }, { "epoch": 10.403752039151712, "grad_norm": 0.018357079476118088, "learning_rate": 0.0005519527458756312, "loss": 0.1069, "num_input_tokens_seen": 137741760, "step": 63775 }, { "epoch": 10.404567699836868, "grad_norm": 0.0528842993080616, "learning_rate": 0.0005518819508106706, "loss": 0.0097, "num_input_tokens_seen": 137753408, "step": 63780 }, { "epoch": 10.405383360522023, "grad_norm": 0.00990887638181448, "learning_rate": 0.0005518111546942567, "loss": 0.1179, "num_input_tokens_seen": 137764608, "step": 63785 }, { "epoch": 10.406199021207177, "grad_norm": 0.023492760956287384, "learning_rate": 0.000551740357527824, "loss": 0.0076, "num_input_tokens_seen": 137775712, "step": 63790 }, { "epoch": 10.407014681892333, "grad_norm": 0.11711253225803375, "learning_rate": 0.0005516695593128073, "loss": 0.0947, "num_input_tokens_seen": 137786304, "step": 63795 }, { "epoch": 10.407830342577487, "grad_norm": 0.00303363474085927, "learning_rate": 0.0005515987600506414, "loss": 0.0079, "num_input_tokens_seen": 137797376, "step": 63800 }, { "epoch": 10.408646003262643, "grad_norm": 0.008306864649057388, "learning_rate": 0.0005515279597427612, "loss": 0.0036, "num_input_tokens_seen": 137808928, "step": 63805 }, { "epoch": 10.409461663947798, "grad_norm": 0.029468778520822525, "learning_rate": 0.0005514571583906014, "loss": 0.0067, "num_input_tokens_seen": 137819872, "step": 63810 }, { "epoch": 10.410277324632952, "grad_norm": 0.3117620348930359, "learning_rate": 0.0005513863559955971, "loss": 0.1481, "num_input_tokens_seen": 137830528, "step": 63815 }, { "epoch": 10.411092985318108, "grad_norm": 0.11586478352546692, "learning_rate": 0.0005513155525591831, "loss": 0.016, "num_input_tokens_seen": 137841920, "step": 63820 }, { "epoch": 10.411908646003262, "grad_norm": 0.1153964027762413, "learning_rate": 0.0005512447480827945, "loss": 0.1048, "num_input_tokens_seen": 137853280, "step": 63825 }, { "epoch": 10.412724306688418, "grad_norm": 0.00367523985914886, "learning_rate": 0.0005511739425678658, "loss": 0.1209, "num_input_tokens_seen": 137861984, "step": 63830 }, { "epoch": 10.413539967373573, "grad_norm": 0.02312796749174595, "learning_rate": 0.0005511031360158324, "loss": 0.1354, "num_input_tokens_seen": 137872192, "step": 63835 }, { "epoch": 10.414355628058727, "grad_norm": 0.005195770412683487, "learning_rate": 0.0005510323284281291, "loss": 0.1633, "num_input_tokens_seen": 137884160, "step": 63840 }, { "epoch": 10.415171288743883, "grad_norm": 0.01457708328962326, "learning_rate": 0.0005509615198061909, "loss": 0.0152, "num_input_tokens_seen": 137894912, "step": 63845 }, { "epoch": 10.415986949429037, "grad_norm": 0.4979305863380432, "learning_rate": 0.0005508907101514529, "loss": 0.0577, "num_input_tokens_seen": 137903648, "step": 63850 }, { "epoch": 10.416802610114193, "grad_norm": 0.028883550316095352, "learning_rate": 0.0005508198994653501, "loss": 0.0111, "num_input_tokens_seen": 137915680, "step": 63855 }, { "epoch": 10.417618270799348, "grad_norm": 0.015348607674241066, "learning_rate": 0.0005507490877493176, "loss": 0.0235, "num_input_tokens_seen": 137926912, "step": 63860 }, { "epoch": 10.418433931484502, "grad_norm": 0.09224829822778702, "learning_rate": 0.0005506782750047903, "loss": 0.0134, "num_input_tokens_seen": 137936192, "step": 63865 }, { "epoch": 10.419249592169658, "grad_norm": 0.011172234080731869, "learning_rate": 0.0005506074612332035, "loss": 0.0402, "num_input_tokens_seen": 137945856, "step": 63870 }, { "epoch": 10.420065252854812, "grad_norm": 0.010413877665996552, "learning_rate": 0.0005505366464359924, "loss": 0.0652, "num_input_tokens_seen": 137956864, "step": 63875 }, { "epoch": 10.420880913539968, "grad_norm": 0.030237272381782532, "learning_rate": 0.000550465830614592, "loss": 0.0112, "num_input_tokens_seen": 137969088, "step": 63880 }, { "epoch": 10.421696574225122, "grad_norm": 0.04134310036897659, "learning_rate": 0.0005503950137704374, "loss": 0.1144, "num_input_tokens_seen": 137979072, "step": 63885 }, { "epoch": 10.422512234910277, "grad_norm": 0.6153962016105652, "learning_rate": 0.0005503241959049641, "loss": 0.2252, "num_input_tokens_seen": 137990240, "step": 63890 }, { "epoch": 10.423327895595433, "grad_norm": 0.30122601985931396, "learning_rate": 0.000550253377019607, "loss": 0.0448, "num_input_tokens_seen": 138000832, "step": 63895 }, { "epoch": 10.424143556280587, "grad_norm": 0.00784077774733305, "learning_rate": 0.0005501825571158016, "loss": 0.0227, "num_input_tokens_seen": 138012576, "step": 63900 }, { "epoch": 10.424959216965743, "grad_norm": 0.0033007084857672453, "learning_rate": 0.000550111736194983, "loss": 0.0132, "num_input_tokens_seen": 138024000, "step": 63905 }, { "epoch": 10.425774877650896, "grad_norm": 0.043511830270290375, "learning_rate": 0.0005500409142585864, "loss": 0.0384, "num_input_tokens_seen": 138034464, "step": 63910 }, { "epoch": 10.426590538336052, "grad_norm": 0.02446580119431019, "learning_rate": 0.0005499700913080472, "loss": 0.0242, "num_input_tokens_seen": 138044448, "step": 63915 }, { "epoch": 10.427406199021208, "grad_norm": 0.0117116067558527, "learning_rate": 0.0005498992673448008, "loss": 0.0354, "num_input_tokens_seen": 138055424, "step": 63920 }, { "epoch": 10.428221859706362, "grad_norm": 0.16764822602272034, "learning_rate": 0.0005498284423702824, "loss": 0.0158, "num_input_tokens_seen": 138066560, "step": 63925 }, { "epoch": 10.429037520391518, "grad_norm": 0.04869011417031288, "learning_rate": 0.0005497576163859273, "loss": 0.0856, "num_input_tokens_seen": 138076992, "step": 63930 }, { "epoch": 10.429853181076671, "grad_norm": 0.32588332891464233, "learning_rate": 0.0005496867893931711, "loss": 0.0419, "num_input_tokens_seen": 138087296, "step": 63935 }, { "epoch": 10.430668841761827, "grad_norm": 0.012518075294792652, "learning_rate": 0.0005496159613934492, "loss": 0.1043, "num_input_tokens_seen": 138097184, "step": 63940 }, { "epoch": 10.431484502446983, "grad_norm": 0.11210685223340988, "learning_rate": 0.0005495451323881967, "loss": 0.0525, "num_input_tokens_seen": 138109216, "step": 63945 }, { "epoch": 10.432300163132137, "grad_norm": 0.07045716047286987, "learning_rate": 0.0005494743023788493, "loss": 0.0087, "num_input_tokens_seen": 138120032, "step": 63950 }, { "epoch": 10.433115823817293, "grad_norm": 0.006280902773141861, "learning_rate": 0.0005494034713668423, "loss": 0.0157, "num_input_tokens_seen": 138129696, "step": 63955 }, { "epoch": 10.433931484502446, "grad_norm": 0.022228620946407318, "learning_rate": 0.0005493326393536113, "loss": 0.005, "num_input_tokens_seen": 138141984, "step": 63960 }, { "epoch": 10.434747145187602, "grad_norm": 0.41118839383125305, "learning_rate": 0.000549261806340592, "loss": 0.0481, "num_input_tokens_seen": 138152512, "step": 63965 }, { "epoch": 10.435562805872756, "grad_norm": 0.0460282601416111, "learning_rate": 0.0005491909723292196, "loss": 0.0952, "num_input_tokens_seen": 138164160, "step": 63970 }, { "epoch": 10.436378466557912, "grad_norm": 0.007916356436908245, "learning_rate": 0.0005491201373209295, "loss": 0.007, "num_input_tokens_seen": 138175200, "step": 63975 }, { "epoch": 10.437194127243067, "grad_norm": 0.006717904936522245, "learning_rate": 0.0005490493013171578, "loss": 0.0115, "num_input_tokens_seen": 138185568, "step": 63980 }, { "epoch": 10.438009787928221, "grad_norm": 0.03154471516609192, "learning_rate": 0.0005489784643193397, "loss": 0.0226, "num_input_tokens_seen": 138195904, "step": 63985 }, { "epoch": 10.438825448613377, "grad_norm": 0.021439258009195328, "learning_rate": 0.0005489076263289109, "loss": 0.0061, "num_input_tokens_seen": 138206112, "step": 63990 }, { "epoch": 10.439641109298531, "grad_norm": 0.013829846866428852, "learning_rate": 0.000548836787347307, "loss": 0.0246, "num_input_tokens_seen": 138216320, "step": 63995 }, { "epoch": 10.440456769983687, "grad_norm": 0.03632638603448868, "learning_rate": 0.0005487659473759635, "loss": 0.1465, "num_input_tokens_seen": 138226656, "step": 64000 }, { "epoch": 10.441272430668842, "grad_norm": 0.007183171808719635, "learning_rate": 0.0005486951064163164, "loss": 0.1501, "num_input_tokens_seen": 138237664, "step": 64005 }, { "epoch": 10.442088091353996, "grad_norm": 0.00477360375225544, "learning_rate": 0.0005486242644698011, "loss": 0.0094, "num_input_tokens_seen": 138249568, "step": 64010 }, { "epoch": 10.442903752039152, "grad_norm": 0.013287726789712906, "learning_rate": 0.0005485534215378535, "loss": 0.0705, "num_input_tokens_seen": 138260128, "step": 64015 }, { "epoch": 10.443719412724306, "grad_norm": 0.013567056506872177, "learning_rate": 0.0005484825776219092, "loss": 0.059, "num_input_tokens_seen": 138270464, "step": 64020 }, { "epoch": 10.444535073409462, "grad_norm": 0.02812664769589901, "learning_rate": 0.0005484117327234038, "loss": 0.0055, "num_input_tokens_seen": 138280672, "step": 64025 }, { "epoch": 10.445350734094617, "grad_norm": 0.0012703530956059694, "learning_rate": 0.0005483408868437734, "loss": 0.0095, "num_input_tokens_seen": 138290944, "step": 64030 }, { "epoch": 10.446166394779771, "grad_norm": 0.9293310046195984, "learning_rate": 0.0005482700399844536, "loss": 0.0507, "num_input_tokens_seen": 138301728, "step": 64035 }, { "epoch": 10.446982055464927, "grad_norm": 0.056482378393411636, "learning_rate": 0.0005481991921468801, "loss": 0.0048, "num_input_tokens_seen": 138313248, "step": 64040 }, { "epoch": 10.447797716150081, "grad_norm": 0.2966460585594177, "learning_rate": 0.0005481283433324888, "loss": 0.0907, "num_input_tokens_seen": 138324032, "step": 64045 }, { "epoch": 10.448613376835237, "grad_norm": 0.23112505674362183, "learning_rate": 0.0005480574935427157, "loss": 0.0209, "num_input_tokens_seen": 138335424, "step": 64050 }, { "epoch": 10.449429037520392, "grad_norm": 0.3356492221355438, "learning_rate": 0.0005479866427789965, "loss": 0.0301, "num_input_tokens_seen": 138346400, "step": 64055 }, { "epoch": 10.450244698205546, "grad_norm": 0.003506778972223401, "learning_rate": 0.0005479157910427672, "loss": 0.0095, "num_input_tokens_seen": 138357600, "step": 64060 }, { "epoch": 10.451060358890702, "grad_norm": 0.2725575268268585, "learning_rate": 0.0005478449383354634, "loss": 0.114, "num_input_tokens_seen": 138368160, "step": 64065 }, { "epoch": 10.451876019575856, "grad_norm": 0.003526828018948436, "learning_rate": 0.0005477740846585213, "loss": 0.0247, "num_input_tokens_seen": 138379008, "step": 64070 }, { "epoch": 10.452691680261012, "grad_norm": 0.06649752706289291, "learning_rate": 0.0005477032300133768, "loss": 0.0175, "num_input_tokens_seen": 138389568, "step": 64075 }, { "epoch": 10.453507340946166, "grad_norm": 0.25740283727645874, "learning_rate": 0.0005476323744014658, "loss": 0.0609, "num_input_tokens_seen": 138400160, "step": 64080 }, { "epoch": 10.454323001631321, "grad_norm": 0.03396669030189514, "learning_rate": 0.0005475615178242244, "loss": 0.0875, "num_input_tokens_seen": 138410688, "step": 64085 }, { "epoch": 10.455138662316477, "grad_norm": 0.0125178387388587, "learning_rate": 0.0005474906602830884, "loss": 0.039, "num_input_tokens_seen": 138420320, "step": 64090 }, { "epoch": 10.455954323001631, "grad_norm": 0.16476449370384216, "learning_rate": 0.0005474198017794939, "loss": 0.1703, "num_input_tokens_seen": 138431680, "step": 64095 }, { "epoch": 10.456769983686787, "grad_norm": 0.004711966495960951, "learning_rate": 0.000547348942314877, "loss": 0.1373, "num_input_tokens_seen": 138442016, "step": 64100 }, { "epoch": 10.45758564437194, "grad_norm": 0.02316650189459324, "learning_rate": 0.0005472780818906736, "loss": 0.0743, "num_input_tokens_seen": 138452096, "step": 64105 }, { "epoch": 10.458401305057096, "grad_norm": 0.0701381042599678, "learning_rate": 0.00054720722050832, "loss": 0.072, "num_input_tokens_seen": 138460288, "step": 64110 }, { "epoch": 10.459216965742252, "grad_norm": 0.22336918115615845, "learning_rate": 0.0005471363581692523, "loss": 0.0152, "num_input_tokens_seen": 138471904, "step": 64115 }, { "epoch": 10.460032626427406, "grad_norm": 0.003313810098916292, "learning_rate": 0.0005470654948749065, "loss": 0.0656, "num_input_tokens_seen": 138483072, "step": 64120 }, { "epoch": 10.460848287112562, "grad_norm": 0.041474759578704834, "learning_rate": 0.0005469946306267185, "loss": 0.0052, "num_input_tokens_seen": 138494496, "step": 64125 }, { "epoch": 10.461663947797716, "grad_norm": 0.003572209272533655, "learning_rate": 0.0005469237654261249, "loss": 0.0202, "num_input_tokens_seen": 138505856, "step": 64130 }, { "epoch": 10.462479608482871, "grad_norm": 0.1137644425034523, "learning_rate": 0.0005468528992745615, "loss": 0.021, "num_input_tokens_seen": 138517440, "step": 64135 }, { "epoch": 10.463295269168025, "grad_norm": 0.002851669443771243, "learning_rate": 0.0005467820321734647, "loss": 0.0179, "num_input_tokens_seen": 138528320, "step": 64140 }, { "epoch": 10.464110929853181, "grad_norm": 0.01338632870465517, "learning_rate": 0.0005467111641242709, "loss": 0.0815, "num_input_tokens_seen": 138537888, "step": 64145 }, { "epoch": 10.464926590538337, "grad_norm": 0.2335215061903, "learning_rate": 0.000546640295128416, "loss": 0.0825, "num_input_tokens_seen": 138548672, "step": 64150 }, { "epoch": 10.46574225122349, "grad_norm": 0.017317278310656548, "learning_rate": 0.0005465694251873362, "loss": 0.0199, "num_input_tokens_seen": 138560032, "step": 64155 }, { "epoch": 10.466557911908646, "grad_norm": 0.03315971791744232, "learning_rate": 0.000546498554302468, "loss": 0.0263, "num_input_tokens_seen": 138571872, "step": 64160 }, { "epoch": 10.4673735725938, "grad_norm": 0.4206830561161041, "learning_rate": 0.0005464276824752477, "loss": 0.1329, "num_input_tokens_seen": 138583328, "step": 64165 }, { "epoch": 10.468189233278956, "grad_norm": 0.007223771885037422, "learning_rate": 0.0005463568097071115, "loss": 0.0393, "num_input_tokens_seen": 138593664, "step": 64170 }, { "epoch": 10.469004893964112, "grad_norm": 0.01341505441814661, "learning_rate": 0.0005462859359994957, "loss": 0.0588, "num_input_tokens_seen": 138605408, "step": 64175 }, { "epoch": 10.469820554649266, "grad_norm": 0.005490301642566919, "learning_rate": 0.0005462150613538366, "loss": 0.0055, "num_input_tokens_seen": 138615936, "step": 64180 }, { "epoch": 10.470636215334421, "grad_norm": 0.10406633466482162, "learning_rate": 0.0005461441857715708, "loss": 0.0171, "num_input_tokens_seen": 138626528, "step": 64185 }, { "epoch": 10.471451876019575, "grad_norm": 0.004958090838044882, "learning_rate": 0.0005460733092541345, "loss": 0.0126, "num_input_tokens_seen": 138636288, "step": 64190 }, { "epoch": 10.47226753670473, "grad_norm": 0.3021959364414215, "learning_rate": 0.000546002431802964, "loss": 0.0472, "num_input_tokens_seen": 138647904, "step": 64195 }, { "epoch": 10.473083197389887, "grad_norm": 0.002978770760819316, "learning_rate": 0.0005459315534194959, "loss": 0.0803, "num_input_tokens_seen": 138657792, "step": 64200 }, { "epoch": 10.47389885807504, "grad_norm": 0.007034600712358952, "learning_rate": 0.0005458606741051667, "loss": 0.013, "num_input_tokens_seen": 138668672, "step": 64205 }, { "epoch": 10.474714518760196, "grad_norm": 0.14886121451854706, "learning_rate": 0.0005457897938614127, "loss": 0.0536, "num_input_tokens_seen": 138679584, "step": 64210 }, { "epoch": 10.47553017944535, "grad_norm": 0.11041484028100967, "learning_rate": 0.0005457189126896704, "loss": 0.0166, "num_input_tokens_seen": 138691712, "step": 64215 }, { "epoch": 10.476345840130506, "grad_norm": 0.004423327744007111, "learning_rate": 0.0005456480305913765, "loss": 0.0999, "num_input_tokens_seen": 138703648, "step": 64220 }, { "epoch": 10.477161500815662, "grad_norm": 0.018787806853652, "learning_rate": 0.0005455771475679673, "loss": 0.0406, "num_input_tokens_seen": 138713920, "step": 64225 }, { "epoch": 10.477977161500815, "grad_norm": 0.023285958915948868, "learning_rate": 0.0005455062636208793, "loss": 0.005, "num_input_tokens_seen": 138724896, "step": 64230 }, { "epoch": 10.478792822185971, "grad_norm": 0.002442354802042246, "learning_rate": 0.0005454353787515493, "loss": 0.0312, "num_input_tokens_seen": 138735584, "step": 64235 }, { "epoch": 10.479608482871125, "grad_norm": 0.011334599927067757, "learning_rate": 0.0005453644929614136, "loss": 0.0171, "num_input_tokens_seen": 138745664, "step": 64240 }, { "epoch": 10.48042414355628, "grad_norm": 0.017016872763633728, "learning_rate": 0.0005452936062519088, "loss": 0.1233, "num_input_tokens_seen": 138756576, "step": 64245 }, { "epoch": 10.481239804241435, "grad_norm": 0.06044808775186539, "learning_rate": 0.0005452227186244717, "loss": 0.0104, "num_input_tokens_seen": 138766816, "step": 64250 }, { "epoch": 10.48205546492659, "grad_norm": 0.0027733854949474335, "learning_rate": 0.0005451518300805389, "loss": 0.0224, "num_input_tokens_seen": 138777376, "step": 64255 }, { "epoch": 10.482871125611746, "grad_norm": 0.001782201579771936, "learning_rate": 0.0005450809406215469, "loss": 0.0042, "num_input_tokens_seen": 138787648, "step": 64260 }, { "epoch": 10.4836867862969, "grad_norm": 0.012305756099522114, "learning_rate": 0.0005450100502489324, "loss": 0.049, "num_input_tokens_seen": 138798752, "step": 64265 }, { "epoch": 10.484502446982056, "grad_norm": 0.008276435546576977, "learning_rate": 0.0005449391589641321, "loss": 0.0068, "num_input_tokens_seen": 138810016, "step": 64270 }, { "epoch": 10.48531810766721, "grad_norm": 0.07297209650278091, "learning_rate": 0.0005448682667685829, "loss": 0.0123, "num_input_tokens_seen": 138821248, "step": 64275 }, { "epoch": 10.486133768352365, "grad_norm": 0.047419674694538116, "learning_rate": 0.0005447973736637214, "loss": 0.0081, "num_input_tokens_seen": 138832320, "step": 64280 }, { "epoch": 10.486949429037521, "grad_norm": 0.0257173590362072, "learning_rate": 0.0005447264796509841, "loss": 0.0423, "num_input_tokens_seen": 138842464, "step": 64285 }, { "epoch": 10.487765089722675, "grad_norm": 0.004003862384706736, "learning_rate": 0.0005446555847318081, "loss": 0.0081, "num_input_tokens_seen": 138852960, "step": 64290 }, { "epoch": 10.48858075040783, "grad_norm": 0.10330451279878616, "learning_rate": 0.00054458468890763, "loss": 0.0104, "num_input_tokens_seen": 138864384, "step": 64295 }, { "epoch": 10.489396411092985, "grad_norm": 0.056239236146211624, "learning_rate": 0.0005445137921798866, "loss": 0.0616, "num_input_tokens_seen": 138876416, "step": 64300 }, { "epoch": 10.49021207177814, "grad_norm": 0.0013666888698935509, "learning_rate": 0.0005444428945500147, "loss": 0.0684, "num_input_tokens_seen": 138886592, "step": 64305 }, { "epoch": 10.491027732463296, "grad_norm": 0.1663975864648819, "learning_rate": 0.0005443719960194513, "loss": 0.0411, "num_input_tokens_seen": 138896864, "step": 64310 }, { "epoch": 10.49184339314845, "grad_norm": 0.008711322210729122, "learning_rate": 0.0005443010965896327, "loss": 0.0068, "num_input_tokens_seen": 138907680, "step": 64315 }, { "epoch": 10.492659053833606, "grad_norm": 0.01615816168487072, "learning_rate": 0.0005442301962619965, "loss": 0.0203, "num_input_tokens_seen": 138917472, "step": 64320 }, { "epoch": 10.49347471451876, "grad_norm": 0.028369436040520668, "learning_rate": 0.0005441592950379792, "loss": 0.0259, "num_input_tokens_seen": 138927392, "step": 64325 }, { "epoch": 10.494290375203915, "grad_norm": 0.09108876436948776, "learning_rate": 0.0005440883929190179, "loss": 0.0313, "num_input_tokens_seen": 138937792, "step": 64330 }, { "epoch": 10.49510603588907, "grad_norm": 0.2788495421409607, "learning_rate": 0.0005440174899065493, "loss": 0.0666, "num_input_tokens_seen": 138949120, "step": 64335 }, { "epoch": 10.495921696574225, "grad_norm": 0.003757023485377431, "learning_rate": 0.0005439465860020104, "loss": 0.0199, "num_input_tokens_seen": 138960832, "step": 64340 }, { "epoch": 10.49673735725938, "grad_norm": 0.00556677533313632, "learning_rate": 0.0005438756812068382, "loss": 0.0084, "num_input_tokens_seen": 138971648, "step": 64345 }, { "epoch": 10.497553017944535, "grad_norm": 0.07826634496450424, "learning_rate": 0.0005438047755224696, "loss": 0.0203, "num_input_tokens_seen": 138982432, "step": 64350 }, { "epoch": 10.49836867862969, "grad_norm": 0.017965713515877724, "learning_rate": 0.0005437338689503417, "loss": 0.0064, "num_input_tokens_seen": 138992864, "step": 64355 }, { "epoch": 10.499184339314844, "grad_norm": 0.00357259763404727, "learning_rate": 0.0005436629614918915, "loss": 0.0203, "num_input_tokens_seen": 139004768, "step": 64360 }, { "epoch": 10.5, "grad_norm": 0.0022475633304566145, "learning_rate": 0.0005435920531485559, "loss": 0.0118, "num_input_tokens_seen": 139015680, "step": 64365 }, { "epoch": 10.500815660685156, "grad_norm": 0.18087992072105408, "learning_rate": 0.0005435211439217722, "loss": 0.0253, "num_input_tokens_seen": 139025376, "step": 64370 }, { "epoch": 10.50163132137031, "grad_norm": 0.01190586294978857, "learning_rate": 0.0005434502338129773, "loss": 0.0041, "num_input_tokens_seen": 139036288, "step": 64375 }, { "epoch": 10.502446982055465, "grad_norm": 1.1199398040771484, "learning_rate": 0.0005433793228236081, "loss": 0.1947, "num_input_tokens_seen": 139046752, "step": 64380 }, { "epoch": 10.50326264274062, "grad_norm": 0.07040636241436005, "learning_rate": 0.000543308410955102, "loss": 0.0204, "num_input_tokens_seen": 139057184, "step": 64385 }, { "epoch": 10.504078303425775, "grad_norm": 0.05645016208291054, "learning_rate": 0.0005432374982088961, "loss": 0.021, "num_input_tokens_seen": 139067648, "step": 64390 }, { "epoch": 10.50489396411093, "grad_norm": 0.006974777206778526, "learning_rate": 0.0005431665845864274, "loss": 0.0138, "num_input_tokens_seen": 139078528, "step": 64395 }, { "epoch": 10.505709624796085, "grad_norm": 0.0022757535334676504, "learning_rate": 0.0005430956700891331, "loss": 0.0865, "num_input_tokens_seen": 139090624, "step": 64400 }, { "epoch": 10.50652528548124, "grad_norm": 0.4144793152809143, "learning_rate": 0.0005430247547184504, "loss": 0.066, "num_input_tokens_seen": 139101760, "step": 64405 }, { "epoch": 10.507340946166394, "grad_norm": 0.011025538668036461, "learning_rate": 0.0005429538384758162, "loss": 0.0381, "num_input_tokens_seen": 139112672, "step": 64410 }, { "epoch": 10.50815660685155, "grad_norm": 0.07199998944997787, "learning_rate": 0.0005428829213626683, "loss": 0.0099, "num_input_tokens_seen": 139121440, "step": 64415 }, { "epoch": 10.508972267536706, "grad_norm": 0.0066911992616951466, "learning_rate": 0.0005428120033804433, "loss": 0.0852, "num_input_tokens_seen": 139132512, "step": 64420 }, { "epoch": 10.50978792822186, "grad_norm": 0.007305980194360018, "learning_rate": 0.0005427410845305791, "loss": 0.0751, "num_input_tokens_seen": 139143392, "step": 64425 }, { "epoch": 10.510603588907015, "grad_norm": 0.0012512394459918141, "learning_rate": 0.0005426701648145124, "loss": 0.0556, "num_input_tokens_seen": 139154880, "step": 64430 }, { "epoch": 10.51141924959217, "grad_norm": 0.23672683537006378, "learning_rate": 0.0005425992442336805, "loss": 0.1436, "num_input_tokens_seen": 139164480, "step": 64435 }, { "epoch": 10.512234910277325, "grad_norm": 0.008508339524269104, "learning_rate": 0.0005425283227895212, "loss": 0.1699, "num_input_tokens_seen": 139173856, "step": 64440 }, { "epoch": 10.513050570962479, "grad_norm": 0.1878798007965088, "learning_rate": 0.0005424574004834712, "loss": 0.0442, "num_input_tokens_seen": 139186016, "step": 64445 }, { "epoch": 10.513866231647635, "grad_norm": 0.03218397870659828, "learning_rate": 0.0005423864773169683, "loss": 0.1763, "num_input_tokens_seen": 139197088, "step": 64450 }, { "epoch": 10.51468189233279, "grad_norm": 0.015488283708691597, "learning_rate": 0.0005423155532914497, "loss": 0.0437, "num_input_tokens_seen": 139207264, "step": 64455 }, { "epoch": 10.515497553017944, "grad_norm": 0.011527454480528831, "learning_rate": 0.0005422446284083527, "loss": 0.0341, "num_input_tokens_seen": 139219040, "step": 64460 }, { "epoch": 10.5163132137031, "grad_norm": 0.006106112617999315, "learning_rate": 0.0005421737026691147, "loss": 0.0074, "num_input_tokens_seen": 139230784, "step": 64465 }, { "epoch": 10.517128874388254, "grad_norm": 0.00418028375133872, "learning_rate": 0.0005421027760751731, "loss": 0.0401, "num_input_tokens_seen": 139240512, "step": 64470 }, { "epoch": 10.51794453507341, "grad_norm": 0.04402640089392662, "learning_rate": 0.0005420318486279653, "loss": 0.114, "num_input_tokens_seen": 139250304, "step": 64475 }, { "epoch": 10.518760195758565, "grad_norm": 0.42352381348609924, "learning_rate": 0.0005419609203289288, "loss": 0.1085, "num_input_tokens_seen": 139261280, "step": 64480 }, { "epoch": 10.51957585644372, "grad_norm": 0.25291287899017334, "learning_rate": 0.0005418899911795011, "loss": 0.0252, "num_input_tokens_seen": 139272640, "step": 64485 }, { "epoch": 10.520391517128875, "grad_norm": 0.0042486912570893764, "learning_rate": 0.0005418190611811194, "loss": 0.0061, "num_input_tokens_seen": 139283168, "step": 64490 }, { "epoch": 10.521207177814029, "grad_norm": 0.020534677430987358, "learning_rate": 0.0005417481303352216, "loss": 0.2991, "num_input_tokens_seen": 139293920, "step": 64495 }, { "epoch": 10.522022838499185, "grad_norm": 0.018889861181378365, "learning_rate": 0.0005416771986432448, "loss": 0.0539, "num_input_tokens_seen": 139304416, "step": 64500 }, { "epoch": 10.522838499184338, "grad_norm": 0.29440486431121826, "learning_rate": 0.0005416062661066268, "loss": 0.0387, "num_input_tokens_seen": 139316224, "step": 64505 }, { "epoch": 10.523654159869494, "grad_norm": 0.04716013744473457, "learning_rate": 0.000541535332726805, "loss": 0.0242, "num_input_tokens_seen": 139327072, "step": 64510 }, { "epoch": 10.52446982055465, "grad_norm": 0.007669588550925255, "learning_rate": 0.000541464398505217, "loss": 0.0097, "num_input_tokens_seen": 139337504, "step": 64515 }, { "epoch": 10.525285481239804, "grad_norm": 0.2716355621814728, "learning_rate": 0.0005413934634433003, "loss": 0.1008, "num_input_tokens_seen": 139347616, "step": 64520 }, { "epoch": 10.52610114192496, "grad_norm": 0.18859538435935974, "learning_rate": 0.0005413225275424926, "loss": 0.0371, "num_input_tokens_seen": 139359456, "step": 64525 }, { "epoch": 10.526916802610113, "grad_norm": 0.30474743247032166, "learning_rate": 0.0005412515908042314, "loss": 0.0143, "num_input_tokens_seen": 139370752, "step": 64530 }, { "epoch": 10.52773246329527, "grad_norm": 0.030280984938144684, "learning_rate": 0.0005411806532299544, "loss": 0.0078, "num_input_tokens_seen": 139379584, "step": 64535 }, { "epoch": 10.528548123980425, "grad_norm": 0.2833738923072815, "learning_rate": 0.0005411097148210992, "loss": 0.0516, "num_input_tokens_seen": 139389152, "step": 64540 }, { "epoch": 10.529363784665579, "grad_norm": 0.22054560482501984, "learning_rate": 0.0005410387755791036, "loss": 0.0403, "num_input_tokens_seen": 139400416, "step": 64545 }, { "epoch": 10.530179445350734, "grad_norm": 0.5191470980644226, "learning_rate": 0.0005409678355054051, "loss": 0.0801, "num_input_tokens_seen": 139411456, "step": 64550 }, { "epoch": 10.530995106035888, "grad_norm": 0.004127180203795433, "learning_rate": 0.0005408968946014416, "loss": 0.0043, "num_input_tokens_seen": 139422880, "step": 64555 }, { "epoch": 10.531810766721044, "grad_norm": 0.0998779833316803, "learning_rate": 0.0005408259528686503, "loss": 0.2009, "num_input_tokens_seen": 139433632, "step": 64560 }, { "epoch": 10.5326264274062, "grad_norm": 0.03429444134235382, "learning_rate": 0.0005407550103084695, "loss": 0.0099, "num_input_tokens_seen": 139443520, "step": 64565 }, { "epoch": 10.533442088091354, "grad_norm": 0.0019338505808264017, "learning_rate": 0.0005406840669223367, "loss": 0.012, "num_input_tokens_seen": 139455456, "step": 64570 }, { "epoch": 10.53425774877651, "grad_norm": 0.05488257855176926, "learning_rate": 0.0005406131227116896, "loss": 0.0458, "num_input_tokens_seen": 139465600, "step": 64575 }, { "epoch": 10.535073409461663, "grad_norm": 0.11232556402683258, "learning_rate": 0.000540542177677966, "loss": 0.0465, "num_input_tokens_seen": 139475744, "step": 64580 }, { "epoch": 10.535889070146819, "grad_norm": 0.011859245598316193, "learning_rate": 0.0005404712318226038, "loss": 0.0283, "num_input_tokens_seen": 139485280, "step": 64585 }, { "epoch": 10.536704730831975, "grad_norm": 0.022117752581834793, "learning_rate": 0.0005404002851470409, "loss": 0.0266, "num_input_tokens_seen": 139495872, "step": 64590 }, { "epoch": 10.537520391517129, "grad_norm": 0.01723141223192215, "learning_rate": 0.0005403293376527148, "loss": 0.0125, "num_input_tokens_seen": 139506112, "step": 64595 }, { "epoch": 10.538336052202284, "grad_norm": 0.12899601459503174, "learning_rate": 0.0005402583893410636, "loss": 0.0421, "num_input_tokens_seen": 139516672, "step": 64600 }, { "epoch": 10.539151712887438, "grad_norm": 0.1691344976425171, "learning_rate": 0.0005401874402135249, "loss": 0.0381, "num_input_tokens_seen": 139527072, "step": 64605 }, { "epoch": 10.539967373572594, "grad_norm": 0.17849282920360565, "learning_rate": 0.000540116490271537, "loss": 0.0242, "num_input_tokens_seen": 139538656, "step": 64610 }, { "epoch": 10.540783034257748, "grad_norm": 0.010269011370837688, "learning_rate": 0.0005400455395165373, "loss": 0.1574, "num_input_tokens_seen": 139549568, "step": 64615 }, { "epoch": 10.541598694942904, "grad_norm": 0.39542660117149353, "learning_rate": 0.0005399745879499641, "loss": 0.0575, "num_input_tokens_seen": 139560768, "step": 64620 }, { "epoch": 10.54241435562806, "grad_norm": 0.011781658045947552, "learning_rate": 0.0005399036355732552, "loss": 0.0123, "num_input_tokens_seen": 139571232, "step": 64625 }, { "epoch": 10.543230016313213, "grad_norm": 0.018769023939967155, "learning_rate": 0.0005398326823878482, "loss": 0.0835, "num_input_tokens_seen": 139583200, "step": 64630 }, { "epoch": 10.544045676998369, "grad_norm": 0.04893181473016739, "learning_rate": 0.0005397617283951816, "loss": 0.0129, "num_input_tokens_seen": 139592032, "step": 64635 }, { "epoch": 10.544861337683523, "grad_norm": 0.04125187546014786, "learning_rate": 0.000539690773596693, "loss": 0.026, "num_input_tokens_seen": 139603712, "step": 64640 }, { "epoch": 10.545676998368679, "grad_norm": 0.09487692266702652, "learning_rate": 0.0005396198179938208, "loss": 0.0631, "num_input_tokens_seen": 139614400, "step": 64645 }, { "epoch": 10.546492659053834, "grad_norm": 0.4042510986328125, "learning_rate": 0.0005395488615880024, "loss": 0.0617, "num_input_tokens_seen": 139625984, "step": 64650 }, { "epoch": 10.547308319738988, "grad_norm": 0.011217739433050156, "learning_rate": 0.0005394779043806764, "loss": 0.0264, "num_input_tokens_seen": 139635936, "step": 64655 }, { "epoch": 10.548123980424144, "grad_norm": 0.0025864015333354473, "learning_rate": 0.0005394069463732805, "loss": 0.0236, "num_input_tokens_seen": 139646048, "step": 64660 }, { "epoch": 10.548939641109298, "grad_norm": 0.04871753975749016, "learning_rate": 0.0005393359875672527, "loss": 0.0056, "num_input_tokens_seen": 139656512, "step": 64665 }, { "epoch": 10.549755301794454, "grad_norm": 0.009117928333580494, "learning_rate": 0.0005392650279640314, "loss": 0.0618, "num_input_tokens_seen": 139667552, "step": 64670 }, { "epoch": 10.550570962479608, "grad_norm": 0.00621196161955595, "learning_rate": 0.0005391940675650545, "loss": 0.0182, "num_input_tokens_seen": 139679200, "step": 64675 }, { "epoch": 10.551386623164763, "grad_norm": 0.14022231101989746, "learning_rate": 0.00053912310637176, "loss": 0.017, "num_input_tokens_seen": 139691104, "step": 64680 }, { "epoch": 10.552202283849919, "grad_norm": 0.005030880682170391, "learning_rate": 0.0005390521443855861, "loss": 0.0078, "num_input_tokens_seen": 139702176, "step": 64685 }, { "epoch": 10.553017944535073, "grad_norm": 0.0799722671508789, "learning_rate": 0.0005389811816079711, "loss": 0.0161, "num_input_tokens_seen": 139712736, "step": 64690 }, { "epoch": 10.553833605220229, "grad_norm": 0.009314349852502346, "learning_rate": 0.0005389102180403529, "loss": 0.0155, "num_input_tokens_seen": 139723072, "step": 64695 }, { "epoch": 10.554649265905383, "grad_norm": 0.02171134017407894, "learning_rate": 0.0005388392536841697, "loss": 0.0125, "num_input_tokens_seen": 139733376, "step": 64700 }, { "epoch": 10.555464926590538, "grad_norm": 0.006424791179597378, "learning_rate": 0.00053876828854086, "loss": 0.0155, "num_input_tokens_seen": 139745024, "step": 64705 }, { "epoch": 10.556280587275694, "grad_norm": 0.002661141101270914, "learning_rate": 0.0005386973226118615, "loss": 0.0097, "num_input_tokens_seen": 139755168, "step": 64710 }, { "epoch": 10.557096247960848, "grad_norm": 0.010994684882462025, "learning_rate": 0.0005386263558986127, "loss": 0.0817, "num_input_tokens_seen": 139766144, "step": 64715 }, { "epoch": 10.557911908646004, "grad_norm": 0.001512798946350813, "learning_rate": 0.0005385553884025519, "loss": 0.005, "num_input_tokens_seen": 139777248, "step": 64720 }, { "epoch": 10.558727569331158, "grad_norm": 0.022147612646222115, "learning_rate": 0.000538484420125117, "loss": 0.0091, "num_input_tokens_seen": 139788000, "step": 64725 }, { "epoch": 10.559543230016313, "grad_norm": 0.003416349645704031, "learning_rate": 0.0005384134510677468, "loss": 0.0865, "num_input_tokens_seen": 139799328, "step": 64730 }, { "epoch": 10.560358890701469, "grad_norm": 0.31981605291366577, "learning_rate": 0.0005383424812318791, "loss": 0.1284, "num_input_tokens_seen": 139810144, "step": 64735 }, { "epoch": 10.561174551386623, "grad_norm": 0.03253169730305672, "learning_rate": 0.0005382715106189525, "loss": 0.0047, "num_input_tokens_seen": 139820480, "step": 64740 }, { "epoch": 10.561990212071779, "grad_norm": 0.17456290125846863, "learning_rate": 0.0005382005392304051, "loss": 0.1414, "num_input_tokens_seen": 139831040, "step": 64745 }, { "epoch": 10.562805872756933, "grad_norm": 0.06870071589946747, "learning_rate": 0.0005381295670676752, "loss": 0.0198, "num_input_tokens_seen": 139840800, "step": 64750 }, { "epoch": 10.563621533442088, "grad_norm": 0.015359500423073769, "learning_rate": 0.0005380585941322014, "loss": 0.0808, "num_input_tokens_seen": 139851904, "step": 64755 }, { "epoch": 10.564437194127244, "grad_norm": 0.4003918170928955, "learning_rate": 0.000537987620425422, "loss": 0.0464, "num_input_tokens_seen": 139863200, "step": 64760 }, { "epoch": 10.565252854812398, "grad_norm": 0.3785932660102844, "learning_rate": 0.0005379166459487752, "loss": 0.0747, "num_input_tokens_seen": 139873312, "step": 64765 }, { "epoch": 10.566068515497554, "grad_norm": 0.0021906422916799784, "learning_rate": 0.0005378456707036995, "loss": 0.016, "num_input_tokens_seen": 139885440, "step": 64770 }, { "epoch": 10.566884176182707, "grad_norm": 0.007869427092373371, "learning_rate": 0.0005377746946916332, "loss": 0.0628, "num_input_tokens_seen": 139894336, "step": 64775 }, { "epoch": 10.567699836867863, "grad_norm": 0.0180523581802845, "learning_rate": 0.0005377037179140149, "loss": 0.0089, "num_input_tokens_seen": 139906016, "step": 64780 }, { "epoch": 10.568515497553017, "grad_norm": 0.18199679255485535, "learning_rate": 0.0005376327403722828, "loss": 0.0407, "num_input_tokens_seen": 139917792, "step": 64785 }, { "epoch": 10.569331158238173, "grad_norm": 0.01165593322366476, "learning_rate": 0.0005375617620678756, "loss": 0.0179, "num_input_tokens_seen": 139928608, "step": 64790 }, { "epoch": 10.570146818923329, "grad_norm": 0.013312139548361301, "learning_rate": 0.0005374907830022316, "loss": 0.0053, "num_input_tokens_seen": 139940352, "step": 64795 }, { "epoch": 10.570962479608482, "grad_norm": 0.020462390035390854, "learning_rate": 0.0005374198031767892, "loss": 0.1616, "num_input_tokens_seen": 139950656, "step": 64800 }, { "epoch": 10.571778140293638, "grad_norm": 0.09696569293737411, "learning_rate": 0.0005373488225929871, "loss": 0.0076, "num_input_tokens_seen": 139960864, "step": 64805 }, { "epoch": 10.572593800978792, "grad_norm": 0.03129251301288605, "learning_rate": 0.0005372778412522638, "loss": 0.006, "num_input_tokens_seen": 139971968, "step": 64810 }, { "epoch": 10.573409461663948, "grad_norm": 0.1927904337644577, "learning_rate": 0.0005372068591560577, "loss": 0.0291, "num_input_tokens_seen": 139982816, "step": 64815 }, { "epoch": 10.574225122349104, "grad_norm": 0.012073406018316746, "learning_rate": 0.0005371358763058074, "loss": 0.1344, "num_input_tokens_seen": 139991904, "step": 64820 }, { "epoch": 10.575040783034257, "grad_norm": 0.0031769592314958572, "learning_rate": 0.0005370648927029515, "loss": 0.1062, "num_input_tokens_seen": 140002784, "step": 64825 }, { "epoch": 10.575856443719413, "grad_norm": 0.0018186360830441117, "learning_rate": 0.0005369939083489283, "loss": 0.0101, "num_input_tokens_seen": 140014272, "step": 64830 }, { "epoch": 10.576672104404567, "grad_norm": 0.30345040559768677, "learning_rate": 0.0005369229232451769, "loss": 0.0294, "num_input_tokens_seen": 140025376, "step": 64835 }, { "epoch": 10.577487765089723, "grad_norm": 0.00335653405636549, "learning_rate": 0.0005368519373931355, "loss": 0.0725, "num_input_tokens_seen": 140036832, "step": 64840 }, { "epoch": 10.578303425774878, "grad_norm": 0.006152989808470011, "learning_rate": 0.0005367809507942429, "loss": 0.0029, "num_input_tokens_seen": 140046464, "step": 64845 }, { "epoch": 10.579119086460032, "grad_norm": 0.44906291365623474, "learning_rate": 0.0005367099634499375, "loss": 0.1718, "num_input_tokens_seen": 140056992, "step": 64850 }, { "epoch": 10.579934747145188, "grad_norm": 0.3523012101650238, "learning_rate": 0.0005366389753616583, "loss": 0.2722, "num_input_tokens_seen": 140068320, "step": 64855 }, { "epoch": 10.580750407830342, "grad_norm": 0.014367824420332909, "learning_rate": 0.0005365679865308437, "loss": 0.0082, "num_input_tokens_seen": 140079328, "step": 64860 }, { "epoch": 10.581566068515498, "grad_norm": 0.011161359958350658, "learning_rate": 0.0005364969969589325, "loss": 0.0365, "num_input_tokens_seen": 140089056, "step": 64865 }, { "epoch": 10.582381729200652, "grad_norm": 0.0025692936033010483, "learning_rate": 0.0005364260066473634, "loss": 0.0547, "num_input_tokens_seen": 140100160, "step": 64870 }, { "epoch": 10.583197389885807, "grad_norm": 0.3549707233905792, "learning_rate": 0.000536355015597575, "loss": 0.0397, "num_input_tokens_seen": 140112128, "step": 64875 }, { "epoch": 10.584013050570963, "grad_norm": 0.01046574767678976, "learning_rate": 0.0005362840238110061, "loss": 0.0416, "num_input_tokens_seen": 140123456, "step": 64880 }, { "epoch": 10.584828711256117, "grad_norm": 0.02689875289797783, "learning_rate": 0.0005362130312890955, "loss": 0.0106, "num_input_tokens_seen": 140132320, "step": 64885 }, { "epoch": 10.585644371941273, "grad_norm": 0.2877082824707031, "learning_rate": 0.0005361420380332818, "loss": 0.2631, "num_input_tokens_seen": 140142880, "step": 64890 }, { "epoch": 10.586460032626427, "grad_norm": 0.015195044688880444, "learning_rate": 0.0005360710440450037, "loss": 0.0125, "num_input_tokens_seen": 140152896, "step": 64895 }, { "epoch": 10.587275693311582, "grad_norm": 0.162540003657341, "learning_rate": 0.0005360000493257003, "loss": 0.0314, "num_input_tokens_seen": 140162656, "step": 64900 }, { "epoch": 10.588091353996738, "grad_norm": 0.00751123484224081, "learning_rate": 0.0005359290538768102, "loss": 0.0158, "num_input_tokens_seen": 140173568, "step": 64905 }, { "epoch": 10.588907014681892, "grad_norm": 0.18104144930839539, "learning_rate": 0.0005358580576997723, "loss": 0.0381, "num_input_tokens_seen": 140184192, "step": 64910 }, { "epoch": 10.589722675367048, "grad_norm": 0.089718297123909, "learning_rate": 0.0005357870607960255, "loss": 0.0439, "num_input_tokens_seen": 140195648, "step": 64915 }, { "epoch": 10.590538336052202, "grad_norm": 0.02439286932349205, "learning_rate": 0.0005357160631670083, "loss": 0.0382, "num_input_tokens_seen": 140206784, "step": 64920 }, { "epoch": 10.591353996737357, "grad_norm": 0.3429160416126251, "learning_rate": 0.0005356450648141599, "loss": 0.1792, "num_input_tokens_seen": 140218048, "step": 64925 }, { "epoch": 10.592169657422513, "grad_norm": 0.03793445602059364, "learning_rate": 0.0005355740657389189, "loss": 0.017, "num_input_tokens_seen": 140228256, "step": 64930 }, { "epoch": 10.592985318107667, "grad_norm": 0.012508481740951538, "learning_rate": 0.0005355030659427245, "loss": 0.0076, "num_input_tokens_seen": 140238688, "step": 64935 }, { "epoch": 10.593800978792823, "grad_norm": 0.02273573912680149, "learning_rate": 0.0005354320654270153, "loss": 0.0649, "num_input_tokens_seen": 140249600, "step": 64940 }, { "epoch": 10.594616639477977, "grad_norm": 0.326787531375885, "learning_rate": 0.0005353610641932304, "loss": 0.2481, "num_input_tokens_seen": 140260576, "step": 64945 }, { "epoch": 10.595432300163132, "grad_norm": 0.04980117827653885, "learning_rate": 0.0005352900622428086, "loss": 0.0898, "num_input_tokens_seen": 140271776, "step": 64950 }, { "epoch": 10.596247960848288, "grad_norm": 0.01142844371497631, "learning_rate": 0.0005352190595771889, "loss": 0.0473, "num_input_tokens_seen": 140282528, "step": 64955 }, { "epoch": 10.597063621533442, "grad_norm": 0.0047523933462798595, "learning_rate": 0.0005351480561978103, "loss": 0.0194, "num_input_tokens_seen": 140293568, "step": 64960 }, { "epoch": 10.597879282218598, "grad_norm": 0.1998644769191742, "learning_rate": 0.0005350770521061118, "loss": 0.0443, "num_input_tokens_seen": 140305632, "step": 64965 }, { "epoch": 10.598694942903752, "grad_norm": 0.09209080785512924, "learning_rate": 0.0005350060473035324, "loss": 0.0134, "num_input_tokens_seen": 140316832, "step": 64970 }, { "epoch": 10.599510603588907, "grad_norm": 0.012417695485055447, "learning_rate": 0.000534935041791511, "loss": 0.0066, "num_input_tokens_seen": 140328736, "step": 64975 }, { "epoch": 10.600326264274061, "grad_norm": 0.061468616127967834, "learning_rate": 0.0005348640355714866, "loss": 0.0179, "num_input_tokens_seen": 140339456, "step": 64980 }, { "epoch": 10.601141924959217, "grad_norm": 0.009134767577052116, "learning_rate": 0.0005347930286448984, "loss": 0.0054, "num_input_tokens_seen": 140350720, "step": 64985 }, { "epoch": 10.601957585644373, "grad_norm": 0.0015841845888644457, "learning_rate": 0.0005347220210131853, "loss": 0.1847, "num_input_tokens_seen": 140363072, "step": 64990 }, { "epoch": 10.602773246329527, "grad_norm": 0.29716333746910095, "learning_rate": 0.0005346510126777864, "loss": 0.0169, "num_input_tokens_seen": 140374240, "step": 64995 }, { "epoch": 10.603588907014682, "grad_norm": 0.5121808648109436, "learning_rate": 0.0005345800036401407, "loss": 0.0605, "num_input_tokens_seen": 140385184, "step": 65000 }, { "epoch": 10.604404567699836, "grad_norm": 0.17837762832641602, "learning_rate": 0.0005345089939016874, "loss": 0.1591, "num_input_tokens_seen": 140396000, "step": 65005 }, { "epoch": 10.605220228384992, "grad_norm": 0.04627210274338722, "learning_rate": 0.0005344379834638656, "loss": 0.0646, "num_input_tokens_seen": 140406848, "step": 65010 }, { "epoch": 10.606035889070148, "grad_norm": 0.039753254503011703, "learning_rate": 0.0005343669723281144, "loss": 0.0128, "num_input_tokens_seen": 140418208, "step": 65015 }, { "epoch": 10.606851549755302, "grad_norm": 0.26612791419029236, "learning_rate": 0.0005342959604958728, "loss": 0.0285, "num_input_tokens_seen": 140428864, "step": 65020 }, { "epoch": 10.607667210440457, "grad_norm": 0.13962987065315247, "learning_rate": 0.0005342249479685801, "loss": 0.0204, "num_input_tokens_seen": 140440288, "step": 65025 }, { "epoch": 10.608482871125611, "grad_norm": 0.004963894374668598, "learning_rate": 0.0005341539347476754, "loss": 0.0056, "num_input_tokens_seen": 140451648, "step": 65030 }, { "epoch": 10.609298531810767, "grad_norm": 0.24064743518829346, "learning_rate": 0.0005340829208345979, "loss": 0.2026, "num_input_tokens_seen": 140462336, "step": 65035 }, { "epoch": 10.61011419249592, "grad_norm": 0.16874991357326508, "learning_rate": 0.0005340119062307866, "loss": 0.0166, "num_input_tokens_seen": 140473056, "step": 65040 }, { "epoch": 10.610929853181077, "grad_norm": 0.03800236061215401, "learning_rate": 0.0005339408909376812, "loss": 0.0689, "num_input_tokens_seen": 140482656, "step": 65045 }, { "epoch": 10.611745513866232, "grad_norm": 0.11263815313577652, "learning_rate": 0.0005338698749567203, "loss": 0.0895, "num_input_tokens_seen": 140494272, "step": 65050 }, { "epoch": 10.612561174551386, "grad_norm": 0.012205363251268864, "learning_rate": 0.0005337988582893436, "loss": 0.0099, "num_input_tokens_seen": 140504640, "step": 65055 }, { "epoch": 10.613376835236542, "grad_norm": 0.0024235863238573074, "learning_rate": 0.0005337278409369901, "loss": 0.0182, "num_input_tokens_seen": 140515680, "step": 65060 }, { "epoch": 10.614192495921696, "grad_norm": 0.01602749712765217, "learning_rate": 0.0005336568229010991, "loss": 0.0831, "num_input_tokens_seen": 140526752, "step": 65065 }, { "epoch": 10.615008156606851, "grad_norm": 0.020563099533319473, "learning_rate": 0.0005335858041831099, "loss": 0.033, "num_input_tokens_seen": 140536384, "step": 65070 }, { "epoch": 10.615823817292007, "grad_norm": 0.003615399356931448, "learning_rate": 0.0005335147847844618, "loss": 0.076, "num_input_tokens_seen": 140545440, "step": 65075 }, { "epoch": 10.616639477977161, "grad_norm": 0.0032959782984107733, "learning_rate": 0.000533443764706594, "loss": 0.0034, "num_input_tokens_seen": 140555936, "step": 65080 }, { "epoch": 10.617455138662317, "grad_norm": 0.013945156708359718, "learning_rate": 0.0005333727439509459, "loss": 0.0347, "num_input_tokens_seen": 140566944, "step": 65085 }, { "epoch": 10.61827079934747, "grad_norm": 0.01374234538525343, "learning_rate": 0.0005333017225189569, "loss": 0.0238, "num_input_tokens_seen": 140577568, "step": 65090 }, { "epoch": 10.619086460032626, "grad_norm": 0.007713802624493837, "learning_rate": 0.0005332307004120662, "loss": 0.1414, "num_input_tokens_seen": 140586528, "step": 65095 }, { "epoch": 10.619902120717782, "grad_norm": 0.0017633598763495684, "learning_rate": 0.0005331596776317133, "loss": 0.0061, "num_input_tokens_seen": 140597952, "step": 65100 }, { "epoch": 10.620717781402936, "grad_norm": 0.013678031042218208, "learning_rate": 0.0005330886541793372, "loss": 0.0765, "num_input_tokens_seen": 140608384, "step": 65105 }, { "epoch": 10.621533442088092, "grad_norm": 0.013079334050416946, "learning_rate": 0.0005330176300563778, "loss": 0.0771, "num_input_tokens_seen": 140620000, "step": 65110 }, { "epoch": 10.622349102773246, "grad_norm": 0.0030080180149525404, "learning_rate": 0.0005329466052642741, "loss": 0.0067, "num_input_tokens_seen": 140630496, "step": 65115 }, { "epoch": 10.623164763458401, "grad_norm": 0.24842797219753265, "learning_rate": 0.0005328755798044658, "loss": 0.0944, "num_input_tokens_seen": 140641984, "step": 65120 }, { "epoch": 10.623980424143557, "grad_norm": 0.22249571979045868, "learning_rate": 0.000532804553678392, "loss": 0.0287, "num_input_tokens_seen": 140652928, "step": 65125 }, { "epoch": 10.624796084828711, "grad_norm": 0.004154822789132595, "learning_rate": 0.0005327335268874924, "loss": 0.0064, "num_input_tokens_seen": 140664448, "step": 65130 }, { "epoch": 10.625611745513867, "grad_norm": 0.0061147562228143215, "learning_rate": 0.0005326624994332063, "loss": 0.0985, "num_input_tokens_seen": 140674112, "step": 65135 }, { "epoch": 10.62642740619902, "grad_norm": 0.2392461895942688, "learning_rate": 0.0005325914713169733, "loss": 0.0699, "num_input_tokens_seen": 140684352, "step": 65140 }, { "epoch": 10.627243066884176, "grad_norm": 0.015056677162647247, "learning_rate": 0.0005325204425402327, "loss": 0.146, "num_input_tokens_seen": 140696256, "step": 65145 }, { "epoch": 10.62805872756933, "grad_norm": 0.35335031151771545, "learning_rate": 0.0005324494131044241, "loss": 0.0534, "num_input_tokens_seen": 140707040, "step": 65150 }, { "epoch": 10.628874388254486, "grad_norm": 0.015306448563933372, "learning_rate": 0.000532378383010987, "loss": 0.0072, "num_input_tokens_seen": 140718080, "step": 65155 }, { "epoch": 10.629690048939642, "grad_norm": 0.00962145160883665, "learning_rate": 0.0005323073522613608, "loss": 0.052, "num_input_tokens_seen": 140728128, "step": 65160 }, { "epoch": 10.630505709624796, "grad_norm": 0.018188240006566048, "learning_rate": 0.0005322363208569851, "loss": 0.0331, "num_input_tokens_seen": 140738816, "step": 65165 }, { "epoch": 10.631321370309951, "grad_norm": 0.00877345446497202, "learning_rate": 0.0005321652887992996, "loss": 0.0446, "num_input_tokens_seen": 140749216, "step": 65170 }, { "epoch": 10.632137030995105, "grad_norm": 0.5343616008758545, "learning_rate": 0.0005320942560897436, "loss": 0.0549, "num_input_tokens_seen": 140760160, "step": 65175 }, { "epoch": 10.632952691680261, "grad_norm": 0.0021271174773573875, "learning_rate": 0.0005320232227297569, "loss": 0.1599, "num_input_tokens_seen": 140771200, "step": 65180 }, { "epoch": 10.633768352365417, "grad_norm": 0.02698766253888607, "learning_rate": 0.0005319521887207789, "loss": 0.0241, "num_input_tokens_seen": 140782560, "step": 65185 }, { "epoch": 10.63458401305057, "grad_norm": 0.005686949472874403, "learning_rate": 0.0005318811540642493, "loss": 0.0326, "num_input_tokens_seen": 140794656, "step": 65190 }, { "epoch": 10.635399673735726, "grad_norm": 0.663912832736969, "learning_rate": 0.0005318101187616077, "loss": 0.2698, "num_input_tokens_seen": 140805088, "step": 65195 }, { "epoch": 10.63621533442088, "grad_norm": 0.3262537717819214, "learning_rate": 0.0005317390828142937, "loss": 0.0567, "num_input_tokens_seen": 140814976, "step": 65200 }, { "epoch": 10.637030995106036, "grad_norm": 0.006468473467975855, "learning_rate": 0.0005316680462237468, "loss": 0.0426, "num_input_tokens_seen": 140826208, "step": 65205 }, { "epoch": 10.63784665579119, "grad_norm": 0.006259999703615904, "learning_rate": 0.0005315970089914068, "loss": 0.0134, "num_input_tokens_seen": 140836160, "step": 65210 }, { "epoch": 10.638662316476346, "grad_norm": 0.05188210308551788, "learning_rate": 0.0005315259711187134, "loss": 0.0205, "num_input_tokens_seen": 140847520, "step": 65215 }, { "epoch": 10.639477977161501, "grad_norm": 0.017969651147723198, "learning_rate": 0.0005314549326071061, "loss": 0.0343, "num_input_tokens_seen": 140858688, "step": 65220 }, { "epoch": 10.640293637846655, "grad_norm": 0.1467316597700119, "learning_rate": 0.0005313838934580248, "loss": 0.0695, "num_input_tokens_seen": 140869024, "step": 65225 }, { "epoch": 10.641109298531811, "grad_norm": 0.006599868647754192, "learning_rate": 0.0005313128536729091, "loss": 0.0143, "num_input_tokens_seen": 140880288, "step": 65230 }, { "epoch": 10.641924959216965, "grad_norm": 0.33210447430610657, "learning_rate": 0.0005312418132531985, "loss": 0.0654, "num_input_tokens_seen": 140890496, "step": 65235 }, { "epoch": 10.64274061990212, "grad_norm": 0.0033072608057409525, "learning_rate": 0.0005311707722003332, "loss": 0.0421, "num_input_tokens_seen": 140901728, "step": 65240 }, { "epoch": 10.643556280587276, "grad_norm": 0.03693348169326782, "learning_rate": 0.0005310997305157524, "loss": 0.1053, "num_input_tokens_seen": 140912544, "step": 65245 }, { "epoch": 10.64437194127243, "grad_norm": 0.08483865857124329, "learning_rate": 0.0005310286882008962, "loss": 0.0405, "num_input_tokens_seen": 140922944, "step": 65250 }, { "epoch": 10.645187601957586, "grad_norm": 0.14889277517795563, "learning_rate": 0.0005309576452572043, "loss": 0.0287, "num_input_tokens_seen": 140933216, "step": 65255 }, { "epoch": 10.64600326264274, "grad_norm": 0.006382533814758062, "learning_rate": 0.0005308866016861166, "loss": 0.0158, "num_input_tokens_seen": 140944160, "step": 65260 }, { "epoch": 10.646818923327896, "grad_norm": 0.010671558789908886, "learning_rate": 0.0005308155574890725, "loss": 0.0585, "num_input_tokens_seen": 140956480, "step": 65265 }, { "epoch": 10.647634584013051, "grad_norm": 0.031797025352716446, "learning_rate": 0.000530744512667512, "loss": 0.1394, "num_input_tokens_seen": 140967552, "step": 65270 }, { "epoch": 10.648450244698205, "grad_norm": 0.004792911000549793, "learning_rate": 0.0005306734672228751, "loss": 0.0817, "num_input_tokens_seen": 140978560, "step": 65275 }, { "epoch": 10.649265905383361, "grad_norm": 0.7232376933097839, "learning_rate": 0.0005306024211566014, "loss": 0.0937, "num_input_tokens_seen": 140989728, "step": 65280 }, { "epoch": 10.650081566068515, "grad_norm": 0.11897439509630203, "learning_rate": 0.0005305313744701309, "loss": 0.0567, "num_input_tokens_seen": 141000928, "step": 65285 }, { "epoch": 10.65089722675367, "grad_norm": 0.11357161402702332, "learning_rate": 0.0005304603271649033, "loss": 0.0245, "num_input_tokens_seen": 141011936, "step": 65290 }, { "epoch": 10.651712887438826, "grad_norm": 0.005883332807570696, "learning_rate": 0.0005303892792423585, "loss": 0.0075, "num_input_tokens_seen": 141023456, "step": 65295 }, { "epoch": 10.65252854812398, "grad_norm": 0.2534210979938507, "learning_rate": 0.0005303182307039364, "loss": 0.0201, "num_input_tokens_seen": 141034464, "step": 65300 }, { "epoch": 10.653344208809136, "grad_norm": 0.004826645366847515, "learning_rate": 0.0005302471815510771, "loss": 0.0268, "num_input_tokens_seen": 141045248, "step": 65305 }, { "epoch": 10.65415986949429, "grad_norm": 0.010334780439734459, "learning_rate": 0.00053017613178522, "loss": 0.0064, "num_input_tokens_seen": 141056736, "step": 65310 }, { "epoch": 10.654975530179446, "grad_norm": 0.1845068484544754, "learning_rate": 0.0005301050814078055, "loss": 0.0627, "num_input_tokens_seen": 141066784, "step": 65315 }, { "epoch": 10.655791190864601, "grad_norm": 0.39638128876686096, "learning_rate": 0.0005300340304202734, "loss": 0.0741, "num_input_tokens_seen": 141077344, "step": 65320 }, { "epoch": 10.656606851549755, "grad_norm": 0.04189550504088402, "learning_rate": 0.0005299629788240634, "loss": 0.0603, "num_input_tokens_seen": 141088320, "step": 65325 }, { "epoch": 10.65742251223491, "grad_norm": 0.004338693805038929, "learning_rate": 0.0005298919266206157, "loss": 0.0533, "num_input_tokens_seen": 141099776, "step": 65330 }, { "epoch": 10.658238172920065, "grad_norm": 0.490159809589386, "learning_rate": 0.0005298208738113701, "loss": 0.088, "num_input_tokens_seen": 141110592, "step": 65335 }, { "epoch": 10.65905383360522, "grad_norm": 0.020773818716406822, "learning_rate": 0.0005297498203977668, "loss": 0.058, "num_input_tokens_seen": 141120928, "step": 65340 }, { "epoch": 10.659869494290374, "grad_norm": 0.5976955890655518, "learning_rate": 0.0005296787663812456, "loss": 0.0867, "num_input_tokens_seen": 141132128, "step": 65345 }, { "epoch": 10.66068515497553, "grad_norm": 0.004236851818859577, "learning_rate": 0.0005296077117632464, "loss": 0.0134, "num_input_tokens_seen": 141142944, "step": 65350 }, { "epoch": 10.661500815660686, "grad_norm": 0.0024764598347246647, "learning_rate": 0.0005295366565452094, "loss": 0.0215, "num_input_tokens_seen": 141153984, "step": 65355 }, { "epoch": 10.66231647634584, "grad_norm": 0.28249356150627136, "learning_rate": 0.0005294656007285748, "loss": 0.107, "num_input_tokens_seen": 141163968, "step": 65360 }, { "epoch": 10.663132137030995, "grad_norm": 0.017333753407001495, "learning_rate": 0.0005293945443147821, "loss": 0.0242, "num_input_tokens_seen": 141175040, "step": 65365 }, { "epoch": 10.66394779771615, "grad_norm": 0.2655371427536011, "learning_rate": 0.000529323487305272, "loss": 0.1751, "num_input_tokens_seen": 141184928, "step": 65370 }, { "epoch": 10.664763458401305, "grad_norm": 0.08187251538038254, "learning_rate": 0.0005292524297014842, "loss": 0.0071, "num_input_tokens_seen": 141196768, "step": 65375 }, { "epoch": 10.66557911908646, "grad_norm": 0.016217637807130814, "learning_rate": 0.0005291813715048584, "loss": 0.057, "num_input_tokens_seen": 141207712, "step": 65380 }, { "epoch": 10.666394779771615, "grad_norm": 0.016979215666651726, "learning_rate": 0.0005291103127168355, "loss": 0.0145, "num_input_tokens_seen": 141218080, "step": 65385 }, { "epoch": 10.66721044045677, "grad_norm": 0.006149845663458109, "learning_rate": 0.000529039253338855, "loss": 0.0718, "num_input_tokens_seen": 141229024, "step": 65390 }, { "epoch": 10.668026101141924, "grad_norm": 0.1777970790863037, "learning_rate": 0.0005289681933723573, "loss": 0.0487, "num_input_tokens_seen": 141239072, "step": 65395 }, { "epoch": 10.66884176182708, "grad_norm": 0.38320648670196533, "learning_rate": 0.0005288971328187824, "loss": 0.1186, "num_input_tokens_seen": 141250400, "step": 65400 }, { "epoch": 10.669657422512234, "grad_norm": 0.16135111451148987, "learning_rate": 0.0005288260716795704, "loss": 0.016, "num_input_tokens_seen": 141260192, "step": 65405 }, { "epoch": 10.67047308319739, "grad_norm": 0.00716315396130085, "learning_rate": 0.0005287550099561614, "loss": 0.0078, "num_input_tokens_seen": 141270720, "step": 65410 }, { "epoch": 10.671288743882545, "grad_norm": 0.2340540587902069, "learning_rate": 0.0005286839476499959, "loss": 0.0308, "num_input_tokens_seen": 141282432, "step": 65415 }, { "epoch": 10.6721044045677, "grad_norm": 0.003738554660230875, "learning_rate": 0.0005286128847625136, "loss": 0.0139, "num_input_tokens_seen": 141294240, "step": 65420 }, { "epoch": 10.672920065252855, "grad_norm": 0.2376752495765686, "learning_rate": 0.0005285418212951549, "loss": 0.0881, "num_input_tokens_seen": 141305152, "step": 65425 }, { "epoch": 10.673735725938009, "grad_norm": 0.0017827788833528757, "learning_rate": 0.0005284707572493601, "loss": 0.0143, "num_input_tokens_seen": 141314944, "step": 65430 }, { "epoch": 10.674551386623165, "grad_norm": 0.025138380005955696, "learning_rate": 0.0005283996926265692, "loss": 0.1245, "num_input_tokens_seen": 141326368, "step": 65435 }, { "epoch": 10.67536704730832, "grad_norm": 0.009806608781218529, "learning_rate": 0.0005283286274282226, "loss": 0.0123, "num_input_tokens_seen": 141337760, "step": 65440 }, { "epoch": 10.676182707993474, "grad_norm": 0.05824309587478638, "learning_rate": 0.0005282575616557603, "loss": 0.0242, "num_input_tokens_seen": 141348768, "step": 65445 }, { "epoch": 10.67699836867863, "grad_norm": 0.06231406703591347, "learning_rate": 0.0005281864953106226, "loss": 0.1298, "num_input_tokens_seen": 141360544, "step": 65450 }, { "epoch": 10.677814029363784, "grad_norm": 0.012798627838492393, "learning_rate": 0.0005281154283942501, "loss": 0.0735, "num_input_tokens_seen": 141370944, "step": 65455 }, { "epoch": 10.67862969004894, "grad_norm": 0.02479397878050804, "learning_rate": 0.0005280443609080826, "loss": 0.0367, "num_input_tokens_seen": 141379584, "step": 65460 }, { "epoch": 10.679445350734095, "grad_norm": 0.009278696030378342, "learning_rate": 0.0005279732928535606, "loss": 0.0162, "num_input_tokens_seen": 141390880, "step": 65465 }, { "epoch": 10.68026101141925, "grad_norm": 0.011147456243634224, "learning_rate": 0.0005279022242321242, "loss": 0.0035, "num_input_tokens_seen": 141401312, "step": 65470 }, { "epoch": 10.681076672104405, "grad_norm": 0.003964760806411505, "learning_rate": 0.000527831155045214, "loss": 0.0609, "num_input_tokens_seen": 141411328, "step": 65475 }, { "epoch": 10.681892332789559, "grad_norm": 0.016584768891334534, "learning_rate": 0.00052776008529427, "loss": 0.0091, "num_input_tokens_seen": 141422240, "step": 65480 }, { "epoch": 10.682707993474715, "grad_norm": 0.04356918856501579, "learning_rate": 0.0005276890149807326, "loss": 0.0253, "num_input_tokens_seen": 141433152, "step": 65485 }, { "epoch": 10.68352365415987, "grad_norm": 0.010738197714090347, "learning_rate": 0.0005276179441060423, "loss": 0.0407, "num_input_tokens_seen": 141444992, "step": 65490 }, { "epoch": 10.684339314845024, "grad_norm": 0.010608477517962456, "learning_rate": 0.0005275468726716393, "loss": 0.0188, "num_input_tokens_seen": 141456832, "step": 65495 }, { "epoch": 10.68515497553018, "grad_norm": 0.6359379887580872, "learning_rate": 0.000527475800678964, "loss": 0.0501, "num_input_tokens_seen": 141469280, "step": 65500 }, { "epoch": 10.685970636215334, "grad_norm": 0.0019201745744794607, "learning_rate": 0.0005274047281294569, "loss": 0.0088, "num_input_tokens_seen": 141479520, "step": 65505 }, { "epoch": 10.68678629690049, "grad_norm": 0.1839599311351776, "learning_rate": 0.000527333655024558, "loss": 0.0197, "num_input_tokens_seen": 141490272, "step": 65510 }, { "epoch": 10.687601957585644, "grad_norm": 0.10226281732320786, "learning_rate": 0.0005272625813657079, "loss": 0.0121, "num_input_tokens_seen": 141501280, "step": 65515 }, { "epoch": 10.6884176182708, "grad_norm": 0.001877550152130425, "learning_rate": 0.000527191507154347, "loss": 0.0256, "num_input_tokens_seen": 141512512, "step": 65520 }, { "epoch": 10.689233278955955, "grad_norm": 0.2550641894340515, "learning_rate": 0.0005271204323919158, "loss": 0.0788, "num_input_tokens_seen": 141522912, "step": 65525 }, { "epoch": 10.690048939641109, "grad_norm": 0.09536845982074738, "learning_rate": 0.0005270493570798546, "loss": 0.0136, "num_input_tokens_seen": 141533952, "step": 65530 }, { "epoch": 10.690864600326265, "grad_norm": 0.141608327627182, "learning_rate": 0.000526978281219604, "loss": 0.0671, "num_input_tokens_seen": 141544768, "step": 65535 }, { "epoch": 10.691680261011419, "grad_norm": 0.050538040697574615, "learning_rate": 0.0005269072048126041, "loss": 0.0737, "num_input_tokens_seen": 141556416, "step": 65540 }, { "epoch": 10.692495921696574, "grad_norm": 0.03494250029325485, "learning_rate": 0.0005268361278602957, "loss": 0.0673, "num_input_tokens_seen": 141568032, "step": 65545 }, { "epoch": 10.69331158238173, "grad_norm": 0.014967870898544788, "learning_rate": 0.0005267650503641191, "loss": 0.0549, "num_input_tokens_seen": 141580224, "step": 65550 }, { "epoch": 10.694127243066884, "grad_norm": 0.010322188027203083, "learning_rate": 0.0005266939723255148, "loss": 0.1173, "num_input_tokens_seen": 141591456, "step": 65555 }, { "epoch": 10.69494290375204, "grad_norm": 0.021448874846100807, "learning_rate": 0.0005266228937459233, "loss": 0.0126, "num_input_tokens_seen": 141602368, "step": 65560 }, { "epoch": 10.695758564437194, "grad_norm": 0.00392883038148284, "learning_rate": 0.0005265518146267851, "loss": 0.0375, "num_input_tokens_seen": 141613696, "step": 65565 }, { "epoch": 10.69657422512235, "grad_norm": 0.6401793360710144, "learning_rate": 0.0005264807349695406, "loss": 0.0524, "num_input_tokens_seen": 141624768, "step": 65570 }, { "epoch": 10.697389885807503, "grad_norm": 0.516755223274231, "learning_rate": 0.0005264096547756305, "loss": 0.0913, "num_input_tokens_seen": 141635040, "step": 65575 }, { "epoch": 10.698205546492659, "grad_norm": 0.03410768136382103, "learning_rate": 0.0005263385740464951, "loss": 0.0249, "num_input_tokens_seen": 141644992, "step": 65580 }, { "epoch": 10.699021207177815, "grad_norm": 0.13844673335552216, "learning_rate": 0.0005262674927835752, "loss": 0.1242, "num_input_tokens_seen": 141655424, "step": 65585 }, { "epoch": 10.699836867862969, "grad_norm": 0.0018074375111609697, "learning_rate": 0.0005261964109883111, "loss": 0.0093, "num_input_tokens_seen": 141664224, "step": 65590 }, { "epoch": 10.700652528548124, "grad_norm": 0.19906216859817505, "learning_rate": 0.0005261253286621437, "loss": 0.1212, "num_input_tokens_seen": 141676512, "step": 65595 }, { "epoch": 10.701468189233278, "grad_norm": 0.13830533623695374, "learning_rate": 0.0005260542458065132, "loss": 0.0527, "num_input_tokens_seen": 141687040, "step": 65600 }, { "epoch": 10.702283849918434, "grad_norm": 0.5063537359237671, "learning_rate": 0.0005259831624228605, "loss": 0.0797, "num_input_tokens_seen": 141698208, "step": 65605 }, { "epoch": 10.70309951060359, "grad_norm": 0.06567066162824631, "learning_rate": 0.000525912078512626, "loss": 0.0198, "num_input_tokens_seen": 141710944, "step": 65610 }, { "epoch": 10.703915171288743, "grad_norm": 0.19044040143489838, "learning_rate": 0.0005258409940772504, "loss": 0.0304, "num_input_tokens_seen": 141722272, "step": 65615 }, { "epoch": 10.7047308319739, "grad_norm": 0.06141744181513786, "learning_rate": 0.0005257699091181742, "loss": 0.178, "num_input_tokens_seen": 141733664, "step": 65620 }, { "epoch": 10.705546492659053, "grad_norm": 0.010067290626466274, "learning_rate": 0.0005256988236368382, "loss": 0.0183, "num_input_tokens_seen": 141744160, "step": 65625 }, { "epoch": 10.706362153344209, "grad_norm": 0.014722629450261593, "learning_rate": 0.0005256277376346829, "loss": 0.0323, "num_input_tokens_seen": 141754400, "step": 65630 }, { "epoch": 10.707177814029365, "grad_norm": 0.012095391750335693, "learning_rate": 0.0005255566511131489, "loss": 0.0283, "num_input_tokens_seen": 141765504, "step": 65635 }, { "epoch": 10.707993474714518, "grad_norm": 0.1298554241657257, "learning_rate": 0.000525485564073677, "loss": 0.0264, "num_input_tokens_seen": 141777664, "step": 65640 }, { "epoch": 10.708809135399674, "grad_norm": 0.005714466795325279, "learning_rate": 0.0005254144765177078, "loss": 0.0067, "num_input_tokens_seen": 141788800, "step": 65645 }, { "epoch": 10.709624796084828, "grad_norm": 0.0032331603579223156, "learning_rate": 0.0005253433884466821, "loss": 0.1435, "num_input_tokens_seen": 141799936, "step": 65650 }, { "epoch": 10.710440456769984, "grad_norm": 0.028762778267264366, "learning_rate": 0.0005252722998620403, "loss": 0.0974, "num_input_tokens_seen": 141809952, "step": 65655 }, { "epoch": 10.71125611745514, "grad_norm": 0.012786184437572956, "learning_rate": 0.0005252012107652234, "loss": 0.1218, "num_input_tokens_seen": 141821056, "step": 65660 }, { "epoch": 10.712071778140293, "grad_norm": 0.005127850454300642, "learning_rate": 0.0005251301211576718, "loss": 0.0301, "num_input_tokens_seen": 141832416, "step": 65665 }, { "epoch": 10.71288743882545, "grad_norm": 0.016213873401284218, "learning_rate": 0.0005250590310408266, "loss": 0.0137, "num_input_tokens_seen": 141842528, "step": 65670 }, { "epoch": 10.713703099510603, "grad_norm": 0.07902921736240387, "learning_rate": 0.0005249879404161284, "loss": 0.1831, "num_input_tokens_seen": 141853824, "step": 65675 }, { "epoch": 10.714518760195759, "grad_norm": 0.016699183732271194, "learning_rate": 0.0005249168492850178, "loss": 0.0312, "num_input_tokens_seen": 141864960, "step": 65680 }, { "epoch": 10.715334420880914, "grad_norm": 0.0019363955361768603, "learning_rate": 0.0005248457576489356, "loss": 0.0186, "num_input_tokens_seen": 141874784, "step": 65685 }, { "epoch": 10.716150081566068, "grad_norm": 0.09606332331895828, "learning_rate": 0.0005247746655093228, "loss": 0.1051, "num_input_tokens_seen": 141884640, "step": 65690 }, { "epoch": 10.716965742251224, "grad_norm": 0.015151958912611008, "learning_rate": 0.0005247035728676196, "loss": 0.0694, "num_input_tokens_seen": 141895136, "step": 65695 }, { "epoch": 10.717781402936378, "grad_norm": 0.004619085229933262, "learning_rate": 0.0005246324797252674, "loss": 0.0162, "num_input_tokens_seen": 141905440, "step": 65700 }, { "epoch": 10.718597063621534, "grad_norm": 0.2725040316581726, "learning_rate": 0.0005245613860837068, "loss": 0.0238, "num_input_tokens_seen": 141916160, "step": 65705 }, { "epoch": 10.719412724306688, "grad_norm": 0.00907763559371233, "learning_rate": 0.0005244902919443785, "loss": 0.0594, "num_input_tokens_seen": 141927360, "step": 65710 }, { "epoch": 10.720228384991843, "grad_norm": 0.5919573903083801, "learning_rate": 0.0005244191973087233, "loss": 0.1188, "num_input_tokens_seen": 141937248, "step": 65715 }, { "epoch": 10.721044045676999, "grad_norm": 0.3372504413127899, "learning_rate": 0.0005243481021781821, "loss": 0.0679, "num_input_tokens_seen": 141948992, "step": 65720 }, { "epoch": 10.721859706362153, "grad_norm": 0.2855179011821747, "learning_rate": 0.0005242770065541958, "loss": 0.041, "num_input_tokens_seen": 141958880, "step": 65725 }, { "epoch": 10.722675367047309, "grad_norm": 0.12741783261299133, "learning_rate": 0.0005242059104382052, "loss": 0.0217, "num_input_tokens_seen": 141969152, "step": 65730 }, { "epoch": 10.723491027732463, "grad_norm": 0.00721243629232049, "learning_rate": 0.000524134813831651, "loss": 0.0292, "num_input_tokens_seen": 141980416, "step": 65735 }, { "epoch": 10.724306688417618, "grad_norm": 0.04829755425453186, "learning_rate": 0.0005240637167359743, "loss": 0.0642, "num_input_tokens_seen": 141991072, "step": 65740 }, { "epoch": 10.725122349102774, "grad_norm": 0.0076208519749343395, "learning_rate": 0.0005239926191526157, "loss": 0.0349, "num_input_tokens_seen": 142003168, "step": 65745 }, { "epoch": 10.725938009787928, "grad_norm": 0.17454379796981812, "learning_rate": 0.0005239215210830164, "loss": 0.0365, "num_input_tokens_seen": 142014304, "step": 65750 }, { "epoch": 10.726753670473084, "grad_norm": 0.01813514530658722, "learning_rate": 0.000523850422528617, "loss": 0.0595, "num_input_tokens_seen": 142024480, "step": 65755 }, { "epoch": 10.727569331158238, "grad_norm": 0.37926414608955383, "learning_rate": 0.0005237793234908586, "loss": 0.031, "num_input_tokens_seen": 142035552, "step": 65760 }, { "epoch": 10.728384991843393, "grad_norm": 0.0012836528476327658, "learning_rate": 0.000523708223971182, "loss": 0.0101, "num_input_tokens_seen": 142047456, "step": 65765 }, { "epoch": 10.729200652528547, "grad_norm": 0.004975530784577131, "learning_rate": 0.0005236371239710283, "loss": 0.0055, "num_input_tokens_seen": 142058976, "step": 65770 }, { "epoch": 10.730016313213703, "grad_norm": 0.006139600649476051, "learning_rate": 0.0005235660234918381, "loss": 0.0386, "num_input_tokens_seen": 142069920, "step": 65775 }, { "epoch": 10.730831973898859, "grad_norm": 0.02576206438243389, "learning_rate": 0.0005234949225350526, "loss": 0.0445, "num_input_tokens_seen": 142081152, "step": 65780 }, { "epoch": 10.731647634584013, "grad_norm": 0.005682915449142456, "learning_rate": 0.0005234238211021127, "loss": 0.0903, "num_input_tokens_seen": 142091232, "step": 65785 }, { "epoch": 10.732463295269168, "grad_norm": 0.015125678852200508, "learning_rate": 0.0005233527191944593, "loss": 0.0652, "num_input_tokens_seen": 142100736, "step": 65790 }, { "epoch": 10.733278955954322, "grad_norm": 0.011067863553762436, "learning_rate": 0.0005232816168135336, "loss": 0.0116, "num_input_tokens_seen": 142111232, "step": 65795 }, { "epoch": 10.734094616639478, "grad_norm": 0.29029378294944763, "learning_rate": 0.0005232105139607763, "loss": 0.2617, "num_input_tokens_seen": 142122688, "step": 65800 }, { "epoch": 10.734910277324634, "grad_norm": 0.016473524272441864, "learning_rate": 0.0005231394106376283, "loss": 0.0213, "num_input_tokens_seen": 142133248, "step": 65805 }, { "epoch": 10.735725938009788, "grad_norm": 1.6077208518981934, "learning_rate": 0.000523068306845531, "loss": 0.1057, "num_input_tokens_seen": 142143936, "step": 65810 }, { "epoch": 10.736541598694943, "grad_norm": 0.05260138213634491, "learning_rate": 0.0005229972025859252, "loss": 0.016, "num_input_tokens_seen": 142153952, "step": 65815 }, { "epoch": 10.737357259380097, "grad_norm": 0.013729028403759003, "learning_rate": 0.0005229260978602519, "loss": 0.0038, "num_input_tokens_seen": 142163712, "step": 65820 }, { "epoch": 10.738172920065253, "grad_norm": 0.37588047981262207, "learning_rate": 0.0005228549926699521, "loss": 0.1415, "num_input_tokens_seen": 142173568, "step": 65825 }, { "epoch": 10.738988580750409, "grad_norm": 0.010452077724039555, "learning_rate": 0.0005227838870164669, "loss": 0.3089, "num_input_tokens_seen": 142183040, "step": 65830 }, { "epoch": 10.739804241435563, "grad_norm": 0.00847290363162756, "learning_rate": 0.0005227127809012372, "loss": 0.0174, "num_input_tokens_seen": 142194752, "step": 65835 }, { "epoch": 10.740619902120718, "grad_norm": 0.004005743190646172, "learning_rate": 0.0005226416743257043, "loss": 0.104, "num_input_tokens_seen": 142206144, "step": 65840 }, { "epoch": 10.741435562805872, "grad_norm": 0.010764031670987606, "learning_rate": 0.0005225705672913092, "loss": 0.0163, "num_input_tokens_seen": 142215872, "step": 65845 }, { "epoch": 10.742251223491028, "grad_norm": 0.0068029677495360374, "learning_rate": 0.0005224994597994929, "loss": 0.0413, "num_input_tokens_seen": 142225184, "step": 65850 }, { "epoch": 10.743066884176184, "grad_norm": 0.027975937351584435, "learning_rate": 0.0005224283518516965, "loss": 0.0766, "num_input_tokens_seen": 142235328, "step": 65855 }, { "epoch": 10.743882544861338, "grad_norm": 0.003287712810561061, "learning_rate": 0.000522357243449361, "loss": 0.0151, "num_input_tokens_seen": 142246688, "step": 65860 }, { "epoch": 10.744698205546493, "grad_norm": 0.014056977815926075, "learning_rate": 0.0005222861345939278, "loss": 0.0311, "num_input_tokens_seen": 142258592, "step": 65865 }, { "epoch": 10.745513866231647, "grad_norm": 0.24229301512241364, "learning_rate": 0.0005222150252868375, "loss": 0.0317, "num_input_tokens_seen": 142270016, "step": 65870 }, { "epoch": 10.746329526916803, "grad_norm": 0.005472760181874037, "learning_rate": 0.0005221439155295318, "loss": 0.0125, "num_input_tokens_seen": 142280224, "step": 65875 }, { "epoch": 10.747145187601957, "grad_norm": 0.20398905873298645, "learning_rate": 0.0005220728053234514, "loss": 0.0342, "num_input_tokens_seen": 142291648, "step": 65880 }, { "epoch": 10.747960848287113, "grad_norm": 0.1125456914305687, "learning_rate": 0.0005220016946700378, "loss": 0.0427, "num_input_tokens_seen": 142303392, "step": 65885 }, { "epoch": 10.748776508972268, "grad_norm": 0.004370346665382385, "learning_rate": 0.0005219305835707318, "loss": 0.1624, "num_input_tokens_seen": 142313504, "step": 65890 }, { "epoch": 10.749592169657422, "grad_norm": 0.019533276557922363, "learning_rate": 0.0005218594720269748, "loss": 0.1332, "num_input_tokens_seen": 142323904, "step": 65895 }, { "epoch": 10.750407830342578, "grad_norm": 0.04836461693048477, "learning_rate": 0.0005217883600402076, "loss": 0.011, "num_input_tokens_seen": 142334880, "step": 65900 }, { "epoch": 10.751223491027732, "grad_norm": 0.05884992331266403, "learning_rate": 0.0005217172476118719, "loss": 0.0278, "num_input_tokens_seen": 142344960, "step": 65905 }, { "epoch": 10.752039151712887, "grad_norm": 0.05737880989909172, "learning_rate": 0.0005216461347434084, "loss": 0.0813, "num_input_tokens_seen": 142355808, "step": 65910 }, { "epoch": 10.752854812398043, "grad_norm": 0.003403907176107168, "learning_rate": 0.0005215750214362588, "loss": 0.0151, "num_input_tokens_seen": 142368032, "step": 65915 }, { "epoch": 10.753670473083197, "grad_norm": 0.05276336520910263, "learning_rate": 0.0005215039076918638, "loss": 0.0087, "num_input_tokens_seen": 142378784, "step": 65920 }, { "epoch": 10.754486133768353, "grad_norm": 0.21015602350234985, "learning_rate": 0.0005214327935116651, "loss": 0.0215, "num_input_tokens_seen": 142389600, "step": 65925 }, { "epoch": 10.755301794453507, "grad_norm": 0.19275906682014465, "learning_rate": 0.0005213616788971034, "loss": 0.0239, "num_input_tokens_seen": 142400704, "step": 65930 }, { "epoch": 10.756117455138662, "grad_norm": 0.04935337230563164, "learning_rate": 0.0005212905638496203, "loss": 0.1233, "num_input_tokens_seen": 142411904, "step": 65935 }, { "epoch": 10.756933115823816, "grad_norm": 0.03261714428663254, "learning_rate": 0.0005212194483706569, "loss": 0.0411, "num_input_tokens_seen": 142423872, "step": 65940 }, { "epoch": 10.757748776508972, "grad_norm": 0.004834748338907957, "learning_rate": 0.0005211483324616544, "loss": 0.0058, "num_input_tokens_seen": 142435648, "step": 65945 }, { "epoch": 10.758564437194128, "grad_norm": 0.013836579397320747, "learning_rate": 0.0005210772161240541, "loss": 0.0076, "num_input_tokens_seen": 142447936, "step": 65950 }, { "epoch": 10.759380097879282, "grad_norm": 0.057208843529224396, "learning_rate": 0.0005210060993592973, "loss": 0.0416, "num_input_tokens_seen": 142459136, "step": 65955 }, { "epoch": 10.760195758564437, "grad_norm": 0.003955775871872902, "learning_rate": 0.0005209349821688254, "loss": 0.0474, "num_input_tokens_seen": 142468928, "step": 65960 }, { "epoch": 10.761011419249591, "grad_norm": 0.00470461742952466, "learning_rate": 0.0005208638645540795, "loss": 0.0353, "num_input_tokens_seen": 142478656, "step": 65965 }, { "epoch": 10.761827079934747, "grad_norm": 0.005068403668701649, "learning_rate": 0.0005207927465165007, "loss": 0.0613, "num_input_tokens_seen": 142489184, "step": 65970 }, { "epoch": 10.762642740619903, "grad_norm": 0.24675904214382172, "learning_rate": 0.0005207216280575306, "loss": 0.0938, "num_input_tokens_seen": 142499584, "step": 65975 }, { "epoch": 10.763458401305057, "grad_norm": 0.12622298300266266, "learning_rate": 0.0005206505091786103, "loss": 0.0198, "num_input_tokens_seen": 142509440, "step": 65980 }, { "epoch": 10.764274061990212, "grad_norm": 0.001572229783050716, "learning_rate": 0.0005205793898811814, "loss": 0.0059, "num_input_tokens_seen": 142520512, "step": 65985 }, { "epoch": 10.765089722675366, "grad_norm": 0.04246864467859268, "learning_rate": 0.0005205082701666851, "loss": 0.0388, "num_input_tokens_seen": 142531872, "step": 65990 }, { "epoch": 10.765905383360522, "grad_norm": 0.013875995762646198, "learning_rate": 0.0005204371500365627, "loss": 0.0517, "num_input_tokens_seen": 142542784, "step": 65995 }, { "epoch": 10.766721044045678, "grad_norm": 0.10224605351686478, "learning_rate": 0.0005203660294922554, "loss": 0.1327, "num_input_tokens_seen": 142553408, "step": 66000 }, { "epoch": 10.767536704730832, "grad_norm": 0.10035200417041779, "learning_rate": 0.0005202949085352048, "loss": 0.0202, "num_input_tokens_seen": 142564352, "step": 66005 }, { "epoch": 10.768352365415987, "grad_norm": 0.13176901638507843, "learning_rate": 0.000520223787166852, "loss": 0.1211, "num_input_tokens_seen": 142576704, "step": 66010 }, { "epoch": 10.769168026101141, "grad_norm": 0.004604107700288296, "learning_rate": 0.0005201526653886385, "loss": 0.0651, "num_input_tokens_seen": 142587392, "step": 66015 }, { "epoch": 10.769983686786297, "grad_norm": 0.0311175137758255, "learning_rate": 0.0005200815432020058, "loss": 0.1254, "num_input_tokens_seen": 142597696, "step": 66020 }, { "epoch": 10.770799347471453, "grad_norm": 0.027582794427871704, "learning_rate": 0.0005200104206083951, "loss": 0.0548, "num_input_tokens_seen": 142609152, "step": 66025 }, { "epoch": 10.771615008156607, "grad_norm": 0.011016631498932838, "learning_rate": 0.0005199392976092479, "loss": 0.0127, "num_input_tokens_seen": 142620608, "step": 66030 }, { "epoch": 10.772430668841762, "grad_norm": 0.011967485770583153, "learning_rate": 0.0005198681742060055, "loss": 0.0142, "num_input_tokens_seen": 142630944, "step": 66035 }, { "epoch": 10.773246329526916, "grad_norm": 0.013100696727633476, "learning_rate": 0.0005197970504001091, "loss": 0.0048, "num_input_tokens_seen": 142642112, "step": 66040 }, { "epoch": 10.774061990212072, "grad_norm": 0.0620444230735302, "learning_rate": 0.0005197259261930007, "loss": 0.0705, "num_input_tokens_seen": 142652064, "step": 66045 }, { "epoch": 10.774877650897226, "grad_norm": 0.5616212487220764, "learning_rate": 0.0005196548015861212, "loss": 0.0456, "num_input_tokens_seen": 142663328, "step": 66050 }, { "epoch": 10.775693311582382, "grad_norm": 0.0031712185591459274, "learning_rate": 0.0005195836765809123, "loss": 0.0436, "num_input_tokens_seen": 142674304, "step": 66055 }, { "epoch": 10.776508972267537, "grad_norm": 0.0015294282929971814, "learning_rate": 0.0005195125511788153, "loss": 0.1117, "num_input_tokens_seen": 142684736, "step": 66060 }, { "epoch": 10.777324632952691, "grad_norm": 0.004349547438323498, "learning_rate": 0.0005194414253812718, "loss": 0.0388, "num_input_tokens_seen": 142694880, "step": 66065 }, { "epoch": 10.778140293637847, "grad_norm": 0.29317227005958557, "learning_rate": 0.000519370299189723, "loss": 0.2097, "num_input_tokens_seen": 142705280, "step": 66070 }, { "epoch": 10.778955954323001, "grad_norm": 0.3925904333591461, "learning_rate": 0.0005192991726056107, "loss": 0.1295, "num_input_tokens_seen": 142716928, "step": 66075 }, { "epoch": 10.779771615008157, "grad_norm": 0.020632924512028694, "learning_rate": 0.0005192280456303759, "loss": 0.0299, "num_input_tokens_seen": 142727072, "step": 66080 }, { "epoch": 10.780587275693312, "grad_norm": 0.001718319021165371, "learning_rate": 0.0005191569182654606, "loss": 0.1368, "num_input_tokens_seen": 142738560, "step": 66085 }, { "epoch": 10.781402936378466, "grad_norm": 0.016666101291775703, "learning_rate": 0.000519085790512306, "loss": 0.0515, "num_input_tokens_seen": 142749696, "step": 66090 }, { "epoch": 10.782218597063622, "grad_norm": 0.005761300679296255, "learning_rate": 0.0005190146623723536, "loss": 0.0083, "num_input_tokens_seen": 142761504, "step": 66095 }, { "epoch": 10.783034257748776, "grad_norm": 0.020155632868409157, "learning_rate": 0.000518943533847045, "loss": 0.1114, "num_input_tokens_seen": 142772416, "step": 66100 }, { "epoch": 10.783849918433932, "grad_norm": 0.006656975019723177, "learning_rate": 0.0005188724049378216, "loss": 0.013, "num_input_tokens_seen": 142783392, "step": 66105 }, { "epoch": 10.784665579119086, "grad_norm": 0.005192107055336237, "learning_rate": 0.0005188012756461251, "loss": 0.0154, "num_input_tokens_seen": 142793952, "step": 66110 }, { "epoch": 10.785481239804241, "grad_norm": 0.024940945208072662, "learning_rate": 0.0005187301459733967, "loss": 0.0309, "num_input_tokens_seen": 142805856, "step": 66115 }, { "epoch": 10.786296900489397, "grad_norm": 0.00655796192586422, "learning_rate": 0.0005186590159210783, "loss": 0.0289, "num_input_tokens_seen": 142818112, "step": 66120 }, { "epoch": 10.78711256117455, "grad_norm": 0.24119259417057037, "learning_rate": 0.0005185878854906111, "loss": 0.0967, "num_input_tokens_seen": 142829632, "step": 66125 }, { "epoch": 10.787928221859707, "grad_norm": 0.13890810310840607, "learning_rate": 0.0005185167546834368, "loss": 0.0153, "num_input_tokens_seen": 142840192, "step": 66130 }, { "epoch": 10.78874388254486, "grad_norm": 0.03538502752780914, "learning_rate": 0.0005184456235009972, "loss": 0.0156, "num_input_tokens_seen": 142851264, "step": 66135 }, { "epoch": 10.789559543230016, "grad_norm": 0.02117316424846649, "learning_rate": 0.0005183744919447335, "loss": 0.0379, "num_input_tokens_seen": 142862496, "step": 66140 }, { "epoch": 10.790375203915172, "grad_norm": 0.006447410210967064, "learning_rate": 0.0005183033600160875, "loss": 0.0769, "num_input_tokens_seen": 142872896, "step": 66145 }, { "epoch": 10.791190864600326, "grad_norm": 0.025039151310920715, "learning_rate": 0.0005182322277165005, "loss": 0.022, "num_input_tokens_seen": 142883936, "step": 66150 }, { "epoch": 10.792006525285482, "grad_norm": 0.0019283173605799675, "learning_rate": 0.0005181610950474143, "loss": 0.0595, "num_input_tokens_seen": 142894816, "step": 66155 }, { "epoch": 10.792822185970635, "grad_norm": 0.5425769090652466, "learning_rate": 0.0005180899620102707, "loss": 0.0537, "num_input_tokens_seen": 142906752, "step": 66160 }, { "epoch": 10.793637846655791, "grad_norm": 0.007401785347610712, "learning_rate": 0.000518018828606511, "loss": 0.0908, "num_input_tokens_seen": 142918464, "step": 66165 }, { "epoch": 10.794453507340947, "grad_norm": 0.14382609724998474, "learning_rate": 0.0005179476948375767, "loss": 0.0308, "num_input_tokens_seen": 142930304, "step": 66170 }, { "epoch": 10.7952691680261, "grad_norm": 0.026092106476426125, "learning_rate": 0.0005178765607049098, "loss": 0.1134, "num_input_tokens_seen": 142940320, "step": 66175 }, { "epoch": 10.796084828711257, "grad_norm": 0.49475693702697754, "learning_rate": 0.0005178054262099516, "loss": 0.038, "num_input_tokens_seen": 142950848, "step": 66180 }, { "epoch": 10.79690048939641, "grad_norm": 0.3677700161933899, "learning_rate": 0.000517734291354144, "loss": 0.044, "num_input_tokens_seen": 142962080, "step": 66185 }, { "epoch": 10.797716150081566, "grad_norm": 0.15535861253738403, "learning_rate": 0.0005176631561389283, "loss": 0.0314, "num_input_tokens_seen": 142971648, "step": 66190 }, { "epoch": 10.798531810766722, "grad_norm": 0.015271559357643127, "learning_rate": 0.0005175920205657465, "loss": 0.0056, "num_input_tokens_seen": 142982912, "step": 66195 }, { "epoch": 10.799347471451876, "grad_norm": 0.4133912920951843, "learning_rate": 0.0005175208846360399, "loss": 0.0579, "num_input_tokens_seen": 142992672, "step": 66200 }, { "epoch": 10.800163132137031, "grad_norm": 0.0460764579474926, "learning_rate": 0.0005174497483512506, "loss": 0.0649, "num_input_tokens_seen": 143003104, "step": 66205 }, { "epoch": 10.800978792822185, "grad_norm": 0.004594626370817423, "learning_rate": 0.0005173786117128198, "loss": 0.0756, "num_input_tokens_seen": 143013216, "step": 66210 }, { "epoch": 10.801794453507341, "grad_norm": 0.043709345161914825, "learning_rate": 0.0005173074747221895, "loss": 0.028, "num_input_tokens_seen": 143024192, "step": 66215 }, { "epoch": 10.802610114192497, "grad_norm": 0.3211905062198639, "learning_rate": 0.0005172363373808013, "loss": 0.0268, "num_input_tokens_seen": 143035136, "step": 66220 }, { "epoch": 10.80342577487765, "grad_norm": 0.010561930947005749, "learning_rate": 0.0005171651996900967, "loss": 0.0181, "num_input_tokens_seen": 143045504, "step": 66225 }, { "epoch": 10.804241435562806, "grad_norm": 0.011289691552519798, "learning_rate": 0.0005170940616515175, "loss": 0.0166, "num_input_tokens_seen": 143057024, "step": 66230 }, { "epoch": 10.80505709624796, "grad_norm": 0.010316290892660618, "learning_rate": 0.0005170229232665056, "loss": 0.0544, "num_input_tokens_seen": 143068608, "step": 66235 }, { "epoch": 10.805872756933116, "grad_norm": 0.12447977811098099, "learning_rate": 0.0005169517845365025, "loss": 0.114, "num_input_tokens_seen": 143078976, "step": 66240 }, { "epoch": 10.80668841761827, "grad_norm": 0.05771623179316521, "learning_rate": 0.0005168806454629501, "loss": 0.0135, "num_input_tokens_seen": 143089504, "step": 66245 }, { "epoch": 10.807504078303426, "grad_norm": 0.6013209223747253, "learning_rate": 0.0005168095060472899, "loss": 0.0947, "num_input_tokens_seen": 143099392, "step": 66250 }, { "epoch": 10.808319738988581, "grad_norm": 0.017578154802322388, "learning_rate": 0.0005167383662909638, "loss": 0.0805, "num_input_tokens_seen": 143109856, "step": 66255 }, { "epoch": 10.809135399673735, "grad_norm": 0.012048912234604359, "learning_rate": 0.0005166672261954134, "loss": 0.0691, "num_input_tokens_seen": 143119776, "step": 66260 }, { "epoch": 10.809951060358891, "grad_norm": 0.0018194675212725997, "learning_rate": 0.0005165960857620806, "loss": 0.0093, "num_input_tokens_seen": 143130816, "step": 66265 }, { "epoch": 10.810766721044045, "grad_norm": 0.13204284012317657, "learning_rate": 0.000516524944992407, "loss": 0.0189, "num_input_tokens_seen": 143141408, "step": 66270 }, { "epoch": 10.8115823817292, "grad_norm": 0.02083822339773178, "learning_rate": 0.0005164538038878345, "loss": 0.0296, "num_input_tokens_seen": 143153920, "step": 66275 }, { "epoch": 10.812398042414356, "grad_norm": 0.010216380469501019, "learning_rate": 0.0005163826624498047, "loss": 0.1024, "num_input_tokens_seen": 143164256, "step": 66280 }, { "epoch": 10.81321370309951, "grad_norm": 0.2752525806427002, "learning_rate": 0.0005163115206797596, "loss": 0.1481, "num_input_tokens_seen": 143175072, "step": 66285 }, { "epoch": 10.814029363784666, "grad_norm": 0.14177221059799194, "learning_rate": 0.0005162403785791408, "loss": 0.0325, "num_input_tokens_seen": 143185408, "step": 66290 }, { "epoch": 10.81484502446982, "grad_norm": 0.0835612341761589, "learning_rate": 0.0005161692361493899, "loss": 0.0445, "num_input_tokens_seen": 143196000, "step": 66295 }, { "epoch": 10.815660685154976, "grad_norm": 0.005078664980828762, "learning_rate": 0.0005160980933919491, "loss": 0.0074, "num_input_tokens_seen": 143206336, "step": 66300 }, { "epoch": 10.81647634584013, "grad_norm": 0.01076800748705864, "learning_rate": 0.00051602695030826, "loss": 0.037, "num_input_tokens_seen": 143217952, "step": 66305 }, { "epoch": 10.817292006525285, "grad_norm": 0.35145077109336853, "learning_rate": 0.0005159558068997644, "loss": 0.116, "num_input_tokens_seen": 143229504, "step": 66310 }, { "epoch": 10.818107667210441, "grad_norm": 0.012480063363909721, "learning_rate": 0.0005158846631679041, "loss": 0.0731, "num_input_tokens_seen": 143239136, "step": 66315 }, { "epoch": 10.818923327895595, "grad_norm": 0.0028558603953570127, "learning_rate": 0.0005158135191141211, "loss": 0.062, "num_input_tokens_seen": 143248512, "step": 66320 }, { "epoch": 10.81973898858075, "grad_norm": 0.0026985383592545986, "learning_rate": 0.000515742374739857, "loss": 0.0593, "num_input_tokens_seen": 143259776, "step": 66325 }, { "epoch": 10.820554649265905, "grad_norm": 0.01275864988565445, "learning_rate": 0.0005156712300465537, "loss": 0.039, "num_input_tokens_seen": 143269440, "step": 66330 }, { "epoch": 10.82137030995106, "grad_norm": 0.02906421571969986, "learning_rate": 0.000515600085035653, "loss": 0.0167, "num_input_tokens_seen": 143280384, "step": 66335 }, { "epoch": 10.822185970636216, "grad_norm": 0.020170027390122414, "learning_rate": 0.0005155289397085968, "loss": 0.0326, "num_input_tokens_seen": 143291040, "step": 66340 }, { "epoch": 10.82300163132137, "grad_norm": 0.03715604171156883, "learning_rate": 0.0005154577940668269, "loss": 0.0715, "num_input_tokens_seen": 143303296, "step": 66345 }, { "epoch": 10.823817292006526, "grad_norm": 0.014832494780421257, "learning_rate": 0.0005153866481117852, "loss": 0.0322, "num_input_tokens_seen": 143315072, "step": 66350 }, { "epoch": 10.82463295269168, "grad_norm": 0.004788695368915796, "learning_rate": 0.0005153155018449137, "loss": 0.0079, "num_input_tokens_seen": 143326688, "step": 66355 }, { "epoch": 10.825448613376835, "grad_norm": 0.021626006811857224, "learning_rate": 0.000515244355267654, "loss": 0.0536, "num_input_tokens_seen": 143336800, "step": 66360 }, { "epoch": 10.826264274061991, "grad_norm": 0.09210459142923355, "learning_rate": 0.0005151732083814481, "loss": 0.0128, "num_input_tokens_seen": 143347232, "step": 66365 }, { "epoch": 10.827079934747145, "grad_norm": 0.007327007595449686, "learning_rate": 0.000515102061187738, "loss": 0.0546, "num_input_tokens_seen": 143359104, "step": 66370 }, { "epoch": 10.8278955954323, "grad_norm": 0.07148618251085281, "learning_rate": 0.0005150309136879654, "loss": 0.0249, "num_input_tokens_seen": 143370304, "step": 66375 }, { "epoch": 10.828711256117455, "grad_norm": 0.04153743386268616, "learning_rate": 0.0005149597658835722, "loss": 0.0253, "num_input_tokens_seen": 143381152, "step": 66380 }, { "epoch": 10.82952691680261, "grad_norm": 0.023836899548768997, "learning_rate": 0.0005148886177760005, "loss": 0.0174, "num_input_tokens_seen": 143392672, "step": 66385 }, { "epoch": 10.830342577487766, "grad_norm": 0.06939146667718887, "learning_rate": 0.000514817469366692, "loss": 0.0085, "num_input_tokens_seen": 143403264, "step": 66390 }, { "epoch": 10.83115823817292, "grad_norm": 0.0013916671741753817, "learning_rate": 0.0005147463206570886, "loss": 0.0123, "num_input_tokens_seen": 143414944, "step": 66395 }, { "epoch": 10.831973898858076, "grad_norm": 0.009549994952976704, "learning_rate": 0.0005146751716486324, "loss": 0.0229, "num_input_tokens_seen": 143425216, "step": 66400 }, { "epoch": 10.83278955954323, "grad_norm": 0.054410431534051895, "learning_rate": 0.0005146040223427652, "loss": 0.0585, "num_input_tokens_seen": 143437120, "step": 66405 }, { "epoch": 10.833605220228385, "grad_norm": 0.00718072010204196, "learning_rate": 0.0005145328727409291, "loss": 0.0341, "num_input_tokens_seen": 143448128, "step": 66410 }, { "epoch": 10.83442088091354, "grad_norm": 0.186012402176857, "learning_rate": 0.0005144617228445657, "loss": 0.019, "num_input_tokens_seen": 143458976, "step": 66415 }, { "epoch": 10.835236541598695, "grad_norm": 0.005144505761563778, "learning_rate": 0.0005143905726551172, "loss": 0.1658, "num_input_tokens_seen": 143469376, "step": 66420 }, { "epoch": 10.83605220228385, "grad_norm": 0.01144189853221178, "learning_rate": 0.0005143194221740255, "loss": 0.0033, "num_input_tokens_seen": 143479872, "step": 66425 }, { "epoch": 10.836867862969005, "grad_norm": 0.003008177038282156, "learning_rate": 0.0005142482714027326, "loss": 0.0458, "num_input_tokens_seen": 143490976, "step": 66430 }, { "epoch": 10.83768352365416, "grad_norm": 0.020077014341950417, "learning_rate": 0.0005141771203426803, "loss": 0.0113, "num_input_tokens_seen": 143502496, "step": 66435 }, { "epoch": 10.838499184339314, "grad_norm": 0.26483267545700073, "learning_rate": 0.0005141059689953107, "loss": 0.0711, "num_input_tokens_seen": 143513184, "step": 66440 }, { "epoch": 10.83931484502447, "grad_norm": 0.22102683782577515, "learning_rate": 0.0005140348173620657, "loss": 0.0152, "num_input_tokens_seen": 143523232, "step": 66445 }, { "epoch": 10.840130505709626, "grad_norm": 0.007105762138962746, "learning_rate": 0.0005139636654443874, "loss": 0.032, "num_input_tokens_seen": 143534208, "step": 66450 }, { "epoch": 10.84094616639478, "grad_norm": 0.03749940171837807, "learning_rate": 0.0005138925132437178, "loss": 0.0057, "num_input_tokens_seen": 143545376, "step": 66455 }, { "epoch": 10.841761827079935, "grad_norm": 0.00626655388623476, "learning_rate": 0.0005138213607614985, "loss": 0.0121, "num_input_tokens_seen": 143555968, "step": 66460 }, { "epoch": 10.84257748776509, "grad_norm": 0.004296998027712107, "learning_rate": 0.000513750207999172, "loss": 0.1055, "num_input_tokens_seen": 143566560, "step": 66465 }, { "epoch": 10.843393148450245, "grad_norm": 0.004289441742002964, "learning_rate": 0.0005136790549581801, "loss": 0.064, "num_input_tokens_seen": 143577056, "step": 66470 }, { "epoch": 10.844208809135399, "grad_norm": 0.005175084341317415, "learning_rate": 0.0005136079016399647, "loss": 0.0078, "num_input_tokens_seen": 143588672, "step": 66475 }, { "epoch": 10.845024469820554, "grad_norm": 0.28688469529151917, "learning_rate": 0.000513536748045968, "loss": 0.0346, "num_input_tokens_seen": 143600320, "step": 66480 }, { "epoch": 10.84584013050571, "grad_norm": 0.12599433958530426, "learning_rate": 0.000513465594177632, "loss": 0.1222, "num_input_tokens_seen": 143612608, "step": 66485 }, { "epoch": 10.846655791190864, "grad_norm": 0.31342703104019165, "learning_rate": 0.0005133944400363986, "loss": 0.2291, "num_input_tokens_seen": 143623200, "step": 66490 }, { "epoch": 10.84747145187602, "grad_norm": 0.10795027762651443, "learning_rate": 0.0005133232856237098, "loss": 0.014, "num_input_tokens_seen": 143634560, "step": 66495 }, { "epoch": 10.848287112561174, "grad_norm": 0.3790777027606964, "learning_rate": 0.0005132521309410078, "loss": 0.0711, "num_input_tokens_seen": 143644288, "step": 66500 }, { "epoch": 10.84910277324633, "grad_norm": 0.0046887993812561035, "learning_rate": 0.0005131809759897345, "loss": 0.005, "num_input_tokens_seen": 143654848, "step": 66505 }, { "epoch": 10.849918433931485, "grad_norm": 0.008742697536945343, "learning_rate": 0.000513109820771332, "loss": 0.0074, "num_input_tokens_seen": 143664768, "step": 66510 }, { "epoch": 10.850734094616639, "grad_norm": 0.021899035200476646, "learning_rate": 0.0005130386652872423, "loss": 0.0286, "num_input_tokens_seen": 143675680, "step": 66515 }, { "epoch": 10.851549755301795, "grad_norm": 0.33755260705947876, "learning_rate": 0.0005129675095389076, "loss": 0.0831, "num_input_tokens_seen": 143685280, "step": 66520 }, { "epoch": 10.852365415986949, "grad_norm": 0.002667512744665146, "learning_rate": 0.0005128963535277699, "loss": 0.0268, "num_input_tokens_seen": 143695648, "step": 66525 }, { "epoch": 10.853181076672104, "grad_norm": 0.026985513046383858, "learning_rate": 0.0005128251972552711, "loss": 0.0065, "num_input_tokens_seen": 143706208, "step": 66530 }, { "epoch": 10.85399673735726, "grad_norm": 0.3457069396972656, "learning_rate": 0.0005127540407228535, "loss": 0.2036, "num_input_tokens_seen": 143717408, "step": 66535 }, { "epoch": 10.854812398042414, "grad_norm": 0.003863809397444129, "learning_rate": 0.0005126828839319591, "loss": 0.0185, "num_input_tokens_seen": 143729632, "step": 66540 }, { "epoch": 10.85562805872757, "grad_norm": 0.008951705880463123, "learning_rate": 0.0005126117268840299, "loss": 0.029, "num_input_tokens_seen": 143739968, "step": 66545 }, { "epoch": 10.856443719412724, "grad_norm": 0.0031280622351914644, "learning_rate": 0.000512540569580508, "loss": 0.0555, "num_input_tokens_seen": 143751712, "step": 66550 }, { "epoch": 10.85725938009788, "grad_norm": 0.1753411591053009, "learning_rate": 0.0005124694120228357, "loss": 0.0195, "num_input_tokens_seen": 143762720, "step": 66555 }, { "epoch": 10.858075040783035, "grad_norm": 0.5248215198516846, "learning_rate": 0.0005123982542124549, "loss": 0.2099, "num_input_tokens_seen": 143774304, "step": 66560 }, { "epoch": 10.858890701468189, "grad_norm": 0.012592996470630169, "learning_rate": 0.0005123270961508077, "loss": 0.0103, "num_input_tokens_seen": 143783712, "step": 66565 }, { "epoch": 10.859706362153345, "grad_norm": 0.029887670651078224, "learning_rate": 0.0005122559378393363, "loss": 0.1224, "num_input_tokens_seen": 143794400, "step": 66570 }, { "epoch": 10.860522022838499, "grad_norm": 0.0571708083152771, "learning_rate": 0.0005121847792794828, "loss": 0.0685, "num_input_tokens_seen": 143805728, "step": 66575 }, { "epoch": 10.861337683523654, "grad_norm": 0.006564087700098753, "learning_rate": 0.0005121136204726893, "loss": 0.0293, "num_input_tokens_seen": 143815424, "step": 66580 }, { "epoch": 10.86215334420881, "grad_norm": 0.005001547280699015, "learning_rate": 0.0005120424614203978, "loss": 0.0233, "num_input_tokens_seen": 143826560, "step": 66585 }, { "epoch": 10.862969004893964, "grad_norm": 0.04530732333660126, "learning_rate": 0.0005119713021240507, "loss": 0.0681, "num_input_tokens_seen": 143838016, "step": 66590 }, { "epoch": 10.86378466557912, "grad_norm": 0.002425889251753688, "learning_rate": 0.0005119001425850899, "loss": 0.1426, "num_input_tokens_seen": 143850240, "step": 66595 }, { "epoch": 10.864600326264274, "grad_norm": 0.0060838027857244015, "learning_rate": 0.0005118289828049575, "loss": 0.0393, "num_input_tokens_seen": 143860960, "step": 66600 }, { "epoch": 10.86541598694943, "grad_norm": 0.07112309336662292, "learning_rate": 0.0005117578227850958, "loss": 0.1031, "num_input_tokens_seen": 143871872, "step": 66605 }, { "epoch": 10.866231647634583, "grad_norm": 0.026289258152246475, "learning_rate": 0.000511686662526947, "loss": 0.0053, "num_input_tokens_seen": 143880800, "step": 66610 }, { "epoch": 10.867047308319739, "grad_norm": 0.011879836209118366, "learning_rate": 0.0005116155020319531, "loss": 0.0279, "num_input_tokens_seen": 143892128, "step": 66615 }, { "epoch": 10.867862969004895, "grad_norm": 0.002536615589633584, "learning_rate": 0.0005115443413015563, "loss": 0.0288, "num_input_tokens_seen": 143902304, "step": 66620 }, { "epoch": 10.868678629690049, "grad_norm": 0.2889273762702942, "learning_rate": 0.0005114731803371988, "loss": 0.0921, "num_input_tokens_seen": 143914688, "step": 66625 }, { "epoch": 10.869494290375204, "grad_norm": 0.003246544860303402, "learning_rate": 0.0005114020191403228, "loss": 0.0073, "num_input_tokens_seen": 143925344, "step": 66630 }, { "epoch": 10.870309951060358, "grad_norm": 0.00995062105357647, "learning_rate": 0.0005113308577123705, "loss": 0.0165, "num_input_tokens_seen": 143935776, "step": 66635 }, { "epoch": 10.871125611745514, "grad_norm": 0.26391562819480896, "learning_rate": 0.0005112596960547838, "loss": 0.0205, "num_input_tokens_seen": 143947296, "step": 66640 }, { "epoch": 10.87194127243067, "grad_norm": 0.01041611097753048, "learning_rate": 0.0005111885341690051, "loss": 0.0861, "num_input_tokens_seen": 143958784, "step": 66645 }, { "epoch": 10.872756933115824, "grad_norm": 0.06782546639442444, "learning_rate": 0.0005111173720564767, "loss": 0.0348, "num_input_tokens_seen": 143969824, "step": 66650 }, { "epoch": 10.87357259380098, "grad_norm": 0.03075503371655941, "learning_rate": 0.0005110462097186405, "loss": 0.038, "num_input_tokens_seen": 143981440, "step": 66655 }, { "epoch": 10.874388254486133, "grad_norm": 0.0384233333170414, "learning_rate": 0.0005109750471569388, "loss": 0.2224, "num_input_tokens_seen": 143991840, "step": 66660 }, { "epoch": 10.875203915171289, "grad_norm": 0.38814109563827515, "learning_rate": 0.000510903884372814, "loss": 0.1535, "num_input_tokens_seen": 144003200, "step": 66665 }, { "epoch": 10.876019575856443, "grad_norm": 0.1948164850473404, "learning_rate": 0.0005108327213677081, "loss": 0.0234, "num_input_tokens_seen": 144014656, "step": 66670 }, { "epoch": 10.876835236541599, "grad_norm": 0.05382636934518814, "learning_rate": 0.0005107615581430633, "loss": 0.0065, "num_input_tokens_seen": 144026400, "step": 66675 }, { "epoch": 10.877650897226754, "grad_norm": 0.20784275233745575, "learning_rate": 0.0005106903947003221, "loss": 0.023, "num_input_tokens_seen": 144036480, "step": 66680 }, { "epoch": 10.878466557911908, "grad_norm": 0.4996677339076996, "learning_rate": 0.0005106192310409263, "loss": 0.0446, "num_input_tokens_seen": 144046624, "step": 66685 }, { "epoch": 10.879282218597064, "grad_norm": 0.002807484706863761, "learning_rate": 0.0005105480671663183, "loss": 0.0368, "num_input_tokens_seen": 144056960, "step": 66690 }, { "epoch": 10.880097879282218, "grad_norm": 0.06957192718982697, "learning_rate": 0.0005104769030779404, "loss": 0.0435, "num_input_tokens_seen": 144066592, "step": 66695 }, { "epoch": 10.880913539967374, "grad_norm": 0.009007420390844345, "learning_rate": 0.0005104057387772347, "loss": 0.02, "num_input_tokens_seen": 144077504, "step": 66700 }, { "epoch": 10.88172920065253, "grad_norm": 0.16739769279956818, "learning_rate": 0.0005103345742656437, "loss": 0.0271, "num_input_tokens_seen": 144086112, "step": 66705 }, { "epoch": 10.882544861337683, "grad_norm": 0.004342829342931509, "learning_rate": 0.0005102634095446092, "loss": 0.0516, "num_input_tokens_seen": 144097216, "step": 66710 }, { "epoch": 10.883360522022839, "grad_norm": 0.054401274770498276, "learning_rate": 0.0005101922446155738, "loss": 0.0135, "num_input_tokens_seen": 144108000, "step": 66715 }, { "epoch": 10.884176182707993, "grad_norm": 0.0049821496941149235, "learning_rate": 0.0005101210794799797, "loss": 0.0069, "num_input_tokens_seen": 144119232, "step": 66720 }, { "epoch": 10.884991843393149, "grad_norm": 0.0032072472386062145, "learning_rate": 0.0005100499141392689, "loss": 0.0321, "num_input_tokens_seen": 144129696, "step": 66725 }, { "epoch": 10.885807504078304, "grad_norm": 0.001098168664611876, "learning_rate": 0.0005099787485948839, "loss": 0.0478, "num_input_tokens_seen": 144140576, "step": 66730 }, { "epoch": 10.886623164763458, "grad_norm": 0.045703381299972534, "learning_rate": 0.000509907582848267, "loss": 0.0163, "num_input_tokens_seen": 144151456, "step": 66735 }, { "epoch": 10.887438825448614, "grad_norm": 0.0048604668118059635, "learning_rate": 0.0005098364169008604, "loss": 0.0195, "num_input_tokens_seen": 144161984, "step": 66740 }, { "epoch": 10.888254486133768, "grad_norm": 0.37948596477508545, "learning_rate": 0.0005097652507541062, "loss": 0.099, "num_input_tokens_seen": 144173152, "step": 66745 }, { "epoch": 10.889070146818923, "grad_norm": 0.012583531439304352, "learning_rate": 0.0005096940844094467, "loss": 0.0098, "num_input_tokens_seen": 144184736, "step": 66750 }, { "epoch": 10.88988580750408, "grad_norm": 0.02361808530986309, "learning_rate": 0.0005096229178683244, "loss": 0.0141, "num_input_tokens_seen": 144196064, "step": 66755 }, { "epoch": 10.890701468189233, "grad_norm": 0.15489862859249115, "learning_rate": 0.0005095517511321815, "loss": 0.02, "num_input_tokens_seen": 144207200, "step": 66760 }, { "epoch": 10.891517128874389, "grad_norm": 0.01060162577778101, "learning_rate": 0.0005094805842024603, "loss": 0.0102, "num_input_tokens_seen": 144217664, "step": 66765 }, { "epoch": 10.892332789559543, "grad_norm": 0.0021783667616546154, "learning_rate": 0.000509409417080603, "loss": 0.1081, "num_input_tokens_seen": 144226784, "step": 66770 }, { "epoch": 10.893148450244698, "grad_norm": 0.10572908818721771, "learning_rate": 0.0005093382497680516, "loss": 0.1198, "num_input_tokens_seen": 144237024, "step": 66775 }, { "epoch": 10.893964110929852, "grad_norm": 0.35556432604789734, "learning_rate": 0.000509267082266249, "loss": 0.0452, "num_input_tokens_seen": 144247488, "step": 66780 }, { "epoch": 10.894779771615008, "grad_norm": 0.020045407116413116, "learning_rate": 0.0005091959145766373, "loss": 0.0503, "num_input_tokens_seen": 144258048, "step": 66785 }, { "epoch": 10.895595432300164, "grad_norm": 0.10034667700529099, "learning_rate": 0.0005091247467006588, "loss": 0.0272, "num_input_tokens_seen": 144268832, "step": 66790 }, { "epoch": 10.896411092985318, "grad_norm": 0.005177509505301714, "learning_rate": 0.0005090535786397556, "loss": 0.1209, "num_input_tokens_seen": 144279744, "step": 66795 }, { "epoch": 10.897226753670473, "grad_norm": 0.0051989988423883915, "learning_rate": 0.0005089824103953701, "loss": 0.0217, "num_input_tokens_seen": 144290976, "step": 66800 }, { "epoch": 10.898042414355627, "grad_norm": 0.16461747884750366, "learning_rate": 0.0005089112419689447, "loss": 0.0298, "num_input_tokens_seen": 144301312, "step": 66805 }, { "epoch": 10.898858075040783, "grad_norm": 0.04938230663537979, "learning_rate": 0.0005088400733619217, "loss": 0.0197, "num_input_tokens_seen": 144311904, "step": 66810 }, { "epoch": 10.899673735725939, "grad_norm": 0.004325589165091515, "learning_rate": 0.0005087689045757433, "loss": 0.2009, "num_input_tokens_seen": 144323392, "step": 66815 }, { "epoch": 10.900489396411093, "grad_norm": 0.006989457178860903, "learning_rate": 0.000508697735611852, "loss": 0.0353, "num_input_tokens_seen": 144335232, "step": 66820 }, { "epoch": 10.901305057096248, "grad_norm": 0.021615099161863327, "learning_rate": 0.0005086265664716901, "loss": 0.0172, "num_input_tokens_seen": 144345408, "step": 66825 }, { "epoch": 10.902120717781402, "grad_norm": 0.005931622814387083, "learning_rate": 0.0005085553971566998, "loss": 0.0237, "num_input_tokens_seen": 144356000, "step": 66830 }, { "epoch": 10.902936378466558, "grad_norm": 0.6692498326301575, "learning_rate": 0.0005084842276683236, "loss": 0.0503, "num_input_tokens_seen": 144366048, "step": 66835 }, { "epoch": 10.903752039151712, "grad_norm": 0.003552852664142847, "learning_rate": 0.0005084130580080038, "loss": 0.0162, "num_input_tokens_seen": 144377312, "step": 66840 }, { "epoch": 10.904567699836868, "grad_norm": 0.004602952394634485, "learning_rate": 0.0005083418881771826, "loss": 0.0141, "num_input_tokens_seen": 144387872, "step": 66845 }, { "epoch": 10.905383360522023, "grad_norm": 0.1763039380311966, "learning_rate": 0.0005082707181773025, "loss": 0.0307, "num_input_tokens_seen": 144399232, "step": 66850 }, { "epoch": 10.906199021207177, "grad_norm": 0.11560584604740143, "learning_rate": 0.0005081995480098057, "loss": 0.02, "num_input_tokens_seen": 144410144, "step": 66855 }, { "epoch": 10.907014681892333, "grad_norm": 0.14307036995887756, "learning_rate": 0.0005081283776761348, "loss": 0.0186, "num_input_tokens_seen": 144420672, "step": 66860 }, { "epoch": 10.907830342577487, "grad_norm": 0.45195597410202026, "learning_rate": 0.0005080572071777319, "loss": 0.1616, "num_input_tokens_seen": 144431936, "step": 66865 }, { "epoch": 10.908646003262643, "grad_norm": 0.003012361004948616, "learning_rate": 0.0005079860365160395, "loss": 0.0048, "num_input_tokens_seen": 144442816, "step": 66870 }, { "epoch": 10.909461663947798, "grad_norm": 0.11098600924015045, "learning_rate": 0.0005079148656924999, "loss": 0.0662, "num_input_tokens_seen": 144453408, "step": 66875 }, { "epoch": 10.910277324632952, "grad_norm": 0.20769816637039185, "learning_rate": 0.0005078436947085557, "loss": 0.0218, "num_input_tokens_seen": 144463968, "step": 66880 }, { "epoch": 10.911092985318108, "grad_norm": 0.007285351864993572, "learning_rate": 0.0005077725235656488, "loss": 0.0462, "num_input_tokens_seen": 144474176, "step": 66885 }, { "epoch": 10.911908646003262, "grad_norm": 0.005105094984173775, "learning_rate": 0.000507701352265222, "loss": 0.0245, "num_input_tokens_seen": 144485600, "step": 66890 }, { "epoch": 10.912724306688418, "grad_norm": 0.05201911926269531, "learning_rate": 0.0005076301808087176, "loss": 0.0075, "num_input_tokens_seen": 144496864, "step": 66895 }, { "epoch": 10.913539967373573, "grad_norm": 0.0026850395370274782, "learning_rate": 0.0005075590091975779, "loss": 0.1397, "num_input_tokens_seen": 144506944, "step": 66900 }, { "epoch": 10.914355628058727, "grad_norm": 0.017833666875958443, "learning_rate": 0.0005074878374332452, "loss": 0.0205, "num_input_tokens_seen": 144518144, "step": 66905 }, { "epoch": 10.915171288743883, "grad_norm": 0.01121000200510025, "learning_rate": 0.000507416665517162, "loss": 0.0068, "num_input_tokens_seen": 144527328, "step": 66910 }, { "epoch": 10.915986949429037, "grad_norm": 0.006536161061376333, "learning_rate": 0.0005073454934507708, "loss": 0.006, "num_input_tokens_seen": 144538560, "step": 66915 }, { "epoch": 10.916802610114193, "grad_norm": 0.3094877600669861, "learning_rate": 0.0005072743212355135, "loss": 0.1158, "num_input_tokens_seen": 144550560, "step": 66920 }, { "epoch": 10.917618270799348, "grad_norm": 0.07924286276102066, "learning_rate": 0.0005072031488728331, "loss": 0.0204, "num_input_tokens_seen": 144561472, "step": 66925 }, { "epoch": 10.918433931484502, "grad_norm": 0.47475647926330566, "learning_rate": 0.0005071319763641718, "loss": 0.0448, "num_input_tokens_seen": 144572864, "step": 66930 }, { "epoch": 10.919249592169658, "grad_norm": 0.03268728777766228, "learning_rate": 0.0005070608037109718, "loss": 0.0069, "num_input_tokens_seen": 144582912, "step": 66935 }, { "epoch": 10.920065252854812, "grad_norm": 0.010898868553340435, "learning_rate": 0.0005069896309146758, "loss": 0.0214, "num_input_tokens_seen": 144593664, "step": 66940 }, { "epoch": 10.920880913539968, "grad_norm": 0.024903923273086548, "learning_rate": 0.000506918457976726, "loss": 0.1886, "num_input_tokens_seen": 144604192, "step": 66945 }, { "epoch": 10.921696574225122, "grad_norm": 2.0595877170562744, "learning_rate": 0.0005068472848985647, "loss": 0.0321, "num_input_tokens_seen": 144614336, "step": 66950 }, { "epoch": 10.922512234910277, "grad_norm": 0.003864932106807828, "learning_rate": 0.0005067761116816348, "loss": 0.0096, "num_input_tokens_seen": 144625824, "step": 66955 }, { "epoch": 10.923327895595433, "grad_norm": 0.02564224787056446, "learning_rate": 0.0005067049383273783, "loss": 0.0166, "num_input_tokens_seen": 144636064, "step": 66960 }, { "epoch": 10.924143556280587, "grad_norm": 0.0030069448985159397, "learning_rate": 0.0005066337648372376, "loss": 0.0394, "num_input_tokens_seen": 144646176, "step": 66965 }, { "epoch": 10.924959216965743, "grad_norm": 0.0064015681855380535, "learning_rate": 0.0005065625912126553, "loss": 0.0149, "num_input_tokens_seen": 144655616, "step": 66970 }, { "epoch": 10.925774877650896, "grad_norm": 0.016096873208880424, "learning_rate": 0.0005064914174550737, "loss": 0.0167, "num_input_tokens_seen": 144667072, "step": 66975 }, { "epoch": 10.926590538336052, "grad_norm": 0.018032198771834373, "learning_rate": 0.0005064202435659354, "loss": 0.05, "num_input_tokens_seen": 144676800, "step": 66980 }, { "epoch": 10.927406199021208, "grad_norm": 0.13074368238449097, "learning_rate": 0.0005063490695466827, "loss": 0.0129, "num_input_tokens_seen": 144687616, "step": 66985 }, { "epoch": 10.928221859706362, "grad_norm": 0.006677868310362101, "learning_rate": 0.000506277895398758, "loss": 0.017, "num_input_tokens_seen": 144698464, "step": 66990 }, { "epoch": 10.929037520391518, "grad_norm": 0.07817433029413223, "learning_rate": 0.0005062067211236039, "loss": 0.0938, "num_input_tokens_seen": 144709216, "step": 66995 }, { "epoch": 10.929853181076671, "grad_norm": 0.008835572749376297, "learning_rate": 0.0005061355467226626, "loss": 0.0782, "num_input_tokens_seen": 144720224, "step": 67000 }, { "epoch": 10.930668841761827, "grad_norm": 0.002849903656169772, "learning_rate": 0.0005060643721973766, "loss": 0.0499, "num_input_tokens_seen": 144730592, "step": 67005 }, { "epoch": 10.931484502446983, "grad_norm": 0.0012694394681602716, "learning_rate": 0.0005059931975491886, "loss": 0.1155, "num_input_tokens_seen": 144740960, "step": 67010 }, { "epoch": 10.932300163132137, "grad_norm": 0.00297158001922071, "learning_rate": 0.0005059220227795409, "loss": 0.0047, "num_input_tokens_seen": 144751520, "step": 67015 }, { "epoch": 10.933115823817293, "grad_norm": 0.8607537150382996, "learning_rate": 0.0005058508478898757, "loss": 0.0684, "num_input_tokens_seen": 144761856, "step": 67020 }, { "epoch": 10.933931484502446, "grad_norm": 0.20840585231781006, "learning_rate": 0.0005057796728816358, "loss": 0.016, "num_input_tokens_seen": 144772672, "step": 67025 }, { "epoch": 10.934747145187602, "grad_norm": 0.006408384069800377, "learning_rate": 0.0005057084977562633, "loss": 0.0143, "num_input_tokens_seen": 144784224, "step": 67030 }, { "epoch": 10.935562805872756, "grad_norm": 0.28648972511291504, "learning_rate": 0.0005056373225152009, "loss": 0.0144, "num_input_tokens_seen": 144793216, "step": 67035 }, { "epoch": 10.936378466557912, "grad_norm": 0.018111038953065872, "learning_rate": 0.0005055661471598911, "loss": 0.0031, "num_input_tokens_seen": 144803680, "step": 67040 }, { "epoch": 10.937194127243067, "grad_norm": 0.02693861722946167, "learning_rate": 0.0005054949716917763, "loss": 0.0052, "num_input_tokens_seen": 144814816, "step": 67045 }, { "epoch": 10.938009787928221, "grad_norm": 0.27363863587379456, "learning_rate": 0.0005054237961122989, "loss": 0.0874, "num_input_tokens_seen": 144826176, "step": 67050 }, { "epoch": 10.938825448613377, "grad_norm": 0.015029046684503555, "learning_rate": 0.0005053526204229012, "loss": 0.0694, "num_input_tokens_seen": 144836160, "step": 67055 }, { "epoch": 10.939641109298531, "grad_norm": 0.005386275239288807, "learning_rate": 0.000505281444625026, "loss": 0.0082, "num_input_tokens_seen": 144845088, "step": 67060 }, { "epoch": 10.940456769983687, "grad_norm": 0.015765998512506485, "learning_rate": 0.0005052102687201156, "loss": 0.0738, "num_input_tokens_seen": 144856288, "step": 67065 }, { "epoch": 10.941272430668842, "grad_norm": 0.046367112547159195, "learning_rate": 0.0005051390927096125, "loss": 0.0486, "num_input_tokens_seen": 144866208, "step": 67070 }, { "epoch": 10.942088091353996, "grad_norm": 0.014453914016485214, "learning_rate": 0.0005050679165949592, "loss": 0.0542, "num_input_tokens_seen": 144877344, "step": 67075 }, { "epoch": 10.942903752039152, "grad_norm": 0.02145060896873474, "learning_rate": 0.0005049967403775982, "loss": 0.0326, "num_input_tokens_seen": 144888160, "step": 67080 }, { "epoch": 10.943719412724306, "grad_norm": 0.0056599765084683895, "learning_rate": 0.0005049255640589718, "loss": 0.0566, "num_input_tokens_seen": 144900224, "step": 67085 }, { "epoch": 10.944535073409462, "grad_norm": 0.4373588562011719, "learning_rate": 0.0005048543876405225, "loss": 0.0846, "num_input_tokens_seen": 144909792, "step": 67090 }, { "epoch": 10.945350734094617, "grad_norm": 0.007855315692722797, "learning_rate": 0.000504783211123693, "loss": 0.1838, "num_input_tokens_seen": 144920992, "step": 67095 }, { "epoch": 10.946166394779771, "grad_norm": 0.41129010915756226, "learning_rate": 0.0005047120345099258, "loss": 0.1903, "num_input_tokens_seen": 144930080, "step": 67100 }, { "epoch": 10.946982055464927, "grad_norm": 0.00836831796914339, "learning_rate": 0.0005046408578006631, "loss": 0.0198, "num_input_tokens_seen": 144940320, "step": 67105 }, { "epoch": 10.947797716150081, "grad_norm": 0.04342430830001831, "learning_rate": 0.0005045696809973474, "loss": 0.0137, "num_input_tokens_seen": 144951392, "step": 67110 }, { "epoch": 10.948613376835237, "grad_norm": 0.0020754148717969656, "learning_rate": 0.0005044985041014217, "loss": 0.0124, "num_input_tokens_seen": 144960928, "step": 67115 }, { "epoch": 10.949429037520392, "grad_norm": 0.007274713832885027, "learning_rate": 0.0005044273271143277, "loss": 0.0152, "num_input_tokens_seen": 144973024, "step": 67120 }, { "epoch": 10.950244698205546, "grad_norm": 0.4020930826663971, "learning_rate": 0.0005043561500375085, "loss": 0.0408, "num_input_tokens_seen": 144983776, "step": 67125 }, { "epoch": 10.951060358890702, "grad_norm": 0.42312315106391907, "learning_rate": 0.0005042849728724064, "loss": 0.0615, "num_input_tokens_seen": 144993888, "step": 67130 }, { "epoch": 10.951876019575856, "grad_norm": 0.008536286652088165, "learning_rate": 0.0005042137956204639, "loss": 0.0313, "num_input_tokens_seen": 145004352, "step": 67135 }, { "epoch": 10.952691680261012, "grad_norm": 0.004205097444355488, "learning_rate": 0.0005041426182831233, "loss": 0.0166, "num_input_tokens_seen": 145014592, "step": 67140 }, { "epoch": 10.953507340946166, "grad_norm": 0.001511257141828537, "learning_rate": 0.0005040714408618275, "loss": 0.0205, "num_input_tokens_seen": 145025344, "step": 67145 }, { "epoch": 10.954323001631321, "grad_norm": 0.3182823061943054, "learning_rate": 0.0005040002633580188, "loss": 0.0477, "num_input_tokens_seen": 145037088, "step": 67150 }, { "epoch": 10.955138662316477, "grad_norm": 0.027804870158433914, "learning_rate": 0.0005039290857731395, "loss": 0.006, "num_input_tokens_seen": 145048544, "step": 67155 }, { "epoch": 10.955954323001631, "grad_norm": 0.3454773724079132, "learning_rate": 0.0005038579081086324, "loss": 0.0143, "num_input_tokens_seen": 145059584, "step": 67160 }, { "epoch": 10.956769983686787, "grad_norm": 0.01337494421750307, "learning_rate": 0.0005037867303659399, "loss": 0.0096, "num_input_tokens_seen": 145070880, "step": 67165 }, { "epoch": 10.95758564437194, "grad_norm": 0.01701531931757927, "learning_rate": 0.0005037155525465046, "loss": 0.0746, "num_input_tokens_seen": 145082208, "step": 67170 }, { "epoch": 10.958401305057096, "grad_norm": 0.015620725229382515, "learning_rate": 0.0005036443746517688, "loss": 0.0104, "num_input_tokens_seen": 145093024, "step": 67175 }, { "epoch": 10.959216965742252, "grad_norm": 0.017922695726156235, "learning_rate": 0.0005035731966831752, "loss": 0.0036, "num_input_tokens_seen": 145104064, "step": 67180 }, { "epoch": 10.960032626427406, "grad_norm": 0.17914153635501862, "learning_rate": 0.0005035020186421661, "loss": 0.0944, "num_input_tokens_seen": 145116064, "step": 67185 }, { "epoch": 10.960848287112562, "grad_norm": 0.0031310205813497305, "learning_rate": 0.0005034308405301842, "loss": 0.0992, "num_input_tokens_seen": 145126720, "step": 67190 }, { "epoch": 10.961663947797716, "grad_norm": 0.27558255195617676, "learning_rate": 0.0005033596623486719, "loss": 0.0291, "num_input_tokens_seen": 145137344, "step": 67195 }, { "epoch": 10.962479608482871, "grad_norm": 0.02502385526895523, "learning_rate": 0.0005032884840990719, "loss": 0.0127, "num_input_tokens_seen": 145147744, "step": 67200 }, { "epoch": 10.963295269168025, "grad_norm": 0.15393461287021637, "learning_rate": 0.0005032173057828265, "loss": 0.0902, "num_input_tokens_seen": 145158528, "step": 67205 }, { "epoch": 10.964110929853181, "grad_norm": 0.006907598581165075, "learning_rate": 0.0005031461274013784, "loss": 0.0077, "num_input_tokens_seen": 145168864, "step": 67210 }, { "epoch": 10.964926590538337, "grad_norm": 0.1618753969669342, "learning_rate": 0.0005030749489561701, "loss": 0.0743, "num_input_tokens_seen": 145180064, "step": 67215 }, { "epoch": 10.96574225122349, "grad_norm": 0.4483173191547394, "learning_rate": 0.000503003770448644, "loss": 0.1097, "num_input_tokens_seen": 145191040, "step": 67220 }, { "epoch": 10.966557911908646, "grad_norm": 0.01324823871254921, "learning_rate": 0.0005029325918802426, "loss": 0.0271, "num_input_tokens_seen": 145201376, "step": 67225 }, { "epoch": 10.9673735725938, "grad_norm": 0.0016422256594523787, "learning_rate": 0.0005028614132524085, "loss": 0.0231, "num_input_tokens_seen": 145210880, "step": 67230 }, { "epoch": 10.968189233278956, "grad_norm": 0.008640460669994354, "learning_rate": 0.0005027902345665843, "loss": 0.014, "num_input_tokens_seen": 145221536, "step": 67235 }, { "epoch": 10.969004893964112, "grad_norm": 0.010546923615038395, "learning_rate": 0.0005027190558242124, "loss": 0.0045, "num_input_tokens_seen": 145231424, "step": 67240 }, { "epoch": 10.969820554649266, "grad_norm": 0.004773357417434454, "learning_rate": 0.0005026478770267355, "loss": 0.0331, "num_input_tokens_seen": 145241792, "step": 67245 }, { "epoch": 10.970636215334421, "grad_norm": 0.021990058943629265, "learning_rate": 0.0005025766981755959, "loss": 0.0278, "num_input_tokens_seen": 145252928, "step": 67250 }, { "epoch": 10.971451876019575, "grad_norm": 0.34234485030174255, "learning_rate": 0.0005025055192722363, "loss": 0.0443, "num_input_tokens_seen": 145262656, "step": 67255 }, { "epoch": 10.97226753670473, "grad_norm": 0.02041991800069809, "learning_rate": 0.0005024343403180992, "loss": 0.0067, "num_input_tokens_seen": 145273824, "step": 67260 }, { "epoch": 10.973083197389887, "grad_norm": 0.0017821387154981494, "learning_rate": 0.0005023631613146272, "loss": 0.0246, "num_input_tokens_seen": 145284928, "step": 67265 }, { "epoch": 10.97389885807504, "grad_norm": 0.0038782746996730566, "learning_rate": 0.0005022919822632625, "loss": 0.0664, "num_input_tokens_seen": 145295552, "step": 67270 }, { "epoch": 10.974714518760196, "grad_norm": 0.004829897079616785, "learning_rate": 0.0005022208031654479, "loss": 0.006, "num_input_tokens_seen": 145306336, "step": 67275 }, { "epoch": 10.97553017944535, "grad_norm": 0.21816429495811462, "learning_rate": 0.0005021496240226261, "loss": 0.0352, "num_input_tokens_seen": 145317152, "step": 67280 }, { "epoch": 10.976345840130506, "grad_norm": 0.8397039175033569, "learning_rate": 0.0005020784448362393, "loss": 0.0911, "num_input_tokens_seen": 145327776, "step": 67285 }, { "epoch": 10.977161500815662, "grad_norm": 0.018613159656524658, "learning_rate": 0.0005020072656077302, "loss": 0.1081, "num_input_tokens_seen": 145337984, "step": 67290 }, { "epoch": 10.977977161500815, "grad_norm": 0.01907249540090561, "learning_rate": 0.0005019360863385413, "loss": 0.0057, "num_input_tokens_seen": 145348832, "step": 67295 }, { "epoch": 10.978792822185971, "grad_norm": 0.0018857029499486089, "learning_rate": 0.0005018649070301152, "loss": 0.023, "num_input_tokens_seen": 145360000, "step": 67300 }, { "epoch": 10.979608482871125, "grad_norm": 0.13006794452667236, "learning_rate": 0.0005017937276838943, "loss": 0.0915, "num_input_tokens_seen": 145371008, "step": 67305 }, { "epoch": 10.98042414355628, "grad_norm": 0.02020275965332985, "learning_rate": 0.0005017225483013212, "loss": 0.1884, "num_input_tokens_seen": 145382656, "step": 67310 }, { "epoch": 10.981239804241435, "grad_norm": 0.01665751077234745, "learning_rate": 0.0005016513688838387, "loss": 0.0106, "num_input_tokens_seen": 145394880, "step": 67315 }, { "epoch": 10.98205546492659, "grad_norm": 0.02400999516248703, "learning_rate": 0.0005015801894328889, "loss": 0.0801, "num_input_tokens_seen": 145406464, "step": 67320 }, { "epoch": 10.982871125611746, "grad_norm": 0.023982934653759003, "learning_rate": 0.0005015090099499147, "loss": 0.015, "num_input_tokens_seen": 145416960, "step": 67325 }, { "epoch": 10.9836867862969, "grad_norm": 0.00537499226629734, "learning_rate": 0.0005014378304363584, "loss": 0.029, "num_input_tokens_seen": 145427232, "step": 67330 }, { "epoch": 10.984502446982056, "grad_norm": 0.002083304338157177, "learning_rate": 0.0005013666508936627, "loss": 0.0136, "num_input_tokens_seen": 145438560, "step": 67335 }, { "epoch": 10.98531810766721, "grad_norm": 0.0014621549053117633, "learning_rate": 0.0005012954713232701, "loss": 0.0048, "num_input_tokens_seen": 145449888, "step": 67340 }, { "epoch": 10.986133768352365, "grad_norm": 0.4816358685493469, "learning_rate": 0.0005012242917266232, "loss": 0.09, "num_input_tokens_seen": 145461184, "step": 67345 }, { "epoch": 10.986949429037521, "grad_norm": 0.0055021862499415874, "learning_rate": 0.0005011531121051643, "loss": 0.0575, "num_input_tokens_seen": 145470944, "step": 67350 }, { "epoch": 10.987765089722675, "grad_norm": 0.0026292535476386547, "learning_rate": 0.0005010819324603363, "loss": 0.0114, "num_input_tokens_seen": 145481824, "step": 67355 }, { "epoch": 10.98858075040783, "grad_norm": 0.1441703736782074, "learning_rate": 0.0005010107527935815, "loss": 0.0207, "num_input_tokens_seen": 145492576, "step": 67360 }, { "epoch": 10.989396411092985, "grad_norm": 0.005147392395883799, "learning_rate": 0.0005009395731063424, "loss": 0.0153, "num_input_tokens_seen": 145503936, "step": 67365 }, { "epoch": 10.99021207177814, "grad_norm": 0.004951298236846924, "learning_rate": 0.0005008683934000618, "loss": 0.0056, "num_input_tokens_seen": 145514208, "step": 67370 }, { "epoch": 10.991027732463294, "grad_norm": 0.05322100967168808, "learning_rate": 0.000500797213676182, "loss": 0.0455, "num_input_tokens_seen": 145524064, "step": 67375 }, { "epoch": 10.99184339314845, "grad_norm": 0.04236412048339844, "learning_rate": 0.0005007260339361456, "loss": 0.017, "num_input_tokens_seen": 145534464, "step": 67380 }, { "epoch": 10.992659053833606, "grad_norm": 0.03592411428689957, "learning_rate": 0.0005006548541813953, "loss": 0.2842, "num_input_tokens_seen": 145544672, "step": 67385 }, { "epoch": 10.99347471451876, "grad_norm": 0.3007633090019226, "learning_rate": 0.0005005836744133736, "loss": 0.1761, "num_input_tokens_seen": 145555296, "step": 67390 }, { "epoch": 10.994290375203915, "grad_norm": 0.017643166705965996, "learning_rate": 0.0005005124946335229, "loss": 0.0344, "num_input_tokens_seen": 145565984, "step": 67395 }, { "epoch": 10.99510603588907, "grad_norm": 0.005647346377372742, "learning_rate": 0.0005004413148432859, "loss": 0.0183, "num_input_tokens_seen": 145577472, "step": 67400 }, { "epoch": 10.995921696574225, "grad_norm": 0.04438630864024162, "learning_rate": 0.000500370135044105, "loss": 0.0147, "num_input_tokens_seen": 145588096, "step": 67405 }, { "epoch": 10.99673735725938, "grad_norm": 0.04898401349782944, "learning_rate": 0.000500298955237423, "loss": 0.0896, "num_input_tokens_seen": 145598752, "step": 67410 }, { "epoch": 10.997553017944535, "grad_norm": 0.03524085506796837, "learning_rate": 0.0005002277754246822, "loss": 0.0186, "num_input_tokens_seen": 145608992, "step": 67415 }, { "epoch": 10.99836867862969, "grad_norm": 0.0137935196980834, "learning_rate": 0.0005001565956073252, "loss": 0.0551, "num_input_tokens_seen": 145620512, "step": 67420 }, { "epoch": 10.999184339314844, "grad_norm": 0.19982655346393585, "learning_rate": 0.0005000854157867947, "loss": 0.0408, "num_input_tokens_seen": 145631296, "step": 67425 }, { "epoch": 11.0, "grad_norm": 0.014280433766543865, "learning_rate": 0.0005000142359645331, "loss": 0.2095, "num_input_tokens_seen": 145641920, "step": 67430 }, { "epoch": 11.0, "eval_loss": 0.16289934515953064, "eval_runtime": 103.8025, "eval_samples_per_second": 26.252, "eval_steps_per_second": 6.57, "num_input_tokens_seen": 145641920, "step": 67430 }, { "epoch": 11.000815660685156, "grad_norm": 0.002235093619674444, "learning_rate": 0.0004999430561419831, "loss": 0.0902, "num_input_tokens_seen": 145653984, "step": 67435 }, { "epoch": 11.00163132137031, "grad_norm": 0.165186807513237, "learning_rate": 0.000499871876320587, "loss": 0.0201, "num_input_tokens_seen": 145664832, "step": 67440 }, { "epoch": 11.002446982055465, "grad_norm": 0.03600761294364929, "learning_rate": 0.0004998006965017876, "loss": 0.1174, "num_input_tokens_seen": 145674848, "step": 67445 }, { "epoch": 11.00326264274062, "grad_norm": 0.004147016908973455, "learning_rate": 0.0004997295166870271, "loss": 0.0087, "num_input_tokens_seen": 145685760, "step": 67450 }, { "epoch": 11.004078303425775, "grad_norm": 0.05979970097541809, "learning_rate": 0.0004996583368777484, "loss": 0.0296, "num_input_tokens_seen": 145697088, "step": 67455 }, { "epoch": 11.00489396411093, "grad_norm": 0.01066543161869049, "learning_rate": 0.000499587157075394, "loss": 0.0168, "num_input_tokens_seen": 145708704, "step": 67460 }, { "epoch": 11.005709624796085, "grad_norm": 0.01953071542084217, "learning_rate": 0.0004995159772814063, "loss": 0.1, "num_input_tokens_seen": 145720384, "step": 67465 }, { "epoch": 11.00652528548124, "grad_norm": 0.03782833367586136, "learning_rate": 0.0004994447974972281, "loss": 0.0067, "num_input_tokens_seen": 145732640, "step": 67470 }, { "epoch": 11.007340946166394, "grad_norm": 0.11350507289171219, "learning_rate": 0.0004993736177243016, "loss": 0.0151, "num_input_tokens_seen": 145744448, "step": 67475 }, { "epoch": 11.00815660685155, "grad_norm": 0.23200421035289764, "learning_rate": 0.0004993024379640697, "loss": 0.0287, "num_input_tokens_seen": 145755584, "step": 67480 }, { "epoch": 11.008972267536704, "grad_norm": 0.003650764236226678, "learning_rate": 0.0004992312582179746, "loss": 0.0155, "num_input_tokens_seen": 145766720, "step": 67485 }, { "epoch": 11.00978792822186, "grad_norm": 0.02220398746430874, "learning_rate": 0.0004991600784874593, "loss": 0.0628, "num_input_tokens_seen": 145778368, "step": 67490 }, { "epoch": 11.010603588907015, "grad_norm": 0.131536066532135, "learning_rate": 0.0004990888987739657, "loss": 0.0104, "num_input_tokens_seen": 145790144, "step": 67495 }, { "epoch": 11.01141924959217, "grad_norm": 0.04819709062576294, "learning_rate": 0.0004990177190789371, "loss": 0.1136, "num_input_tokens_seen": 145800352, "step": 67500 }, { "epoch": 11.012234910277325, "grad_norm": 0.054440777748823166, "learning_rate": 0.0004989465394038153, "loss": 0.0212, "num_input_tokens_seen": 145811776, "step": 67505 }, { "epoch": 11.013050570962479, "grad_norm": 0.008946564979851246, "learning_rate": 0.0004988753597500435, "loss": 0.0105, "num_input_tokens_seen": 145821376, "step": 67510 }, { "epoch": 11.013866231647635, "grad_norm": 0.33636924624443054, "learning_rate": 0.0004988041801190638, "loss": 0.1091, "num_input_tokens_seen": 145831328, "step": 67515 }, { "epoch": 11.01468189233279, "grad_norm": 0.04350544139742851, "learning_rate": 0.000498733000512319, "loss": 0.0268, "num_input_tokens_seen": 145842208, "step": 67520 }, { "epoch": 11.015497553017944, "grad_norm": 0.025713231414556503, "learning_rate": 0.0004986618209312515, "loss": 0.1589, "num_input_tokens_seen": 145851776, "step": 67525 }, { "epoch": 11.0163132137031, "grad_norm": 0.0037758410908281803, "learning_rate": 0.000498590641377304, "loss": 0.0148, "num_input_tokens_seen": 145862432, "step": 67530 }, { "epoch": 11.017128874388254, "grad_norm": 0.1802942007780075, "learning_rate": 0.0004985194618519188, "loss": 0.0294, "num_input_tokens_seen": 145873056, "step": 67535 }, { "epoch": 11.01794453507341, "grad_norm": 0.10657081007957458, "learning_rate": 0.0004984482823565386, "loss": 0.0239, "num_input_tokens_seen": 145884384, "step": 67540 }, { "epoch": 11.018760195758565, "grad_norm": 0.02635457180440426, "learning_rate": 0.0004983771028926059, "loss": 0.0267, "num_input_tokens_seen": 145895136, "step": 67545 }, { "epoch": 11.01957585644372, "grad_norm": 0.0033126375637948513, "learning_rate": 0.0004983059234615635, "loss": 0.0207, "num_input_tokens_seen": 145905984, "step": 67550 }, { "epoch": 11.020391517128875, "grad_norm": 0.005782026797533035, "learning_rate": 0.0004982347440648534, "loss": 0.033, "num_input_tokens_seen": 145916832, "step": 67555 }, { "epoch": 11.021207177814029, "grad_norm": 0.016137108206748962, "learning_rate": 0.0004981635647039186, "loss": 0.0217, "num_input_tokens_seen": 145927360, "step": 67560 }, { "epoch": 11.022022838499185, "grad_norm": 0.3549785017967224, "learning_rate": 0.0004980923853802015, "loss": 0.0284, "num_input_tokens_seen": 145936704, "step": 67565 }, { "epoch": 11.022838499184338, "grad_norm": 0.23480737209320068, "learning_rate": 0.0004980212060951447, "loss": 0.1713, "num_input_tokens_seen": 145948096, "step": 67570 }, { "epoch": 11.023654159869494, "grad_norm": 0.4012732207775116, "learning_rate": 0.0004979500268501905, "loss": 0.0394, "num_input_tokens_seen": 145958112, "step": 67575 }, { "epoch": 11.02446982055465, "grad_norm": 0.00960595440119505, "learning_rate": 0.0004978788476467816, "loss": 0.0082, "num_input_tokens_seen": 145969536, "step": 67580 }, { "epoch": 11.025285481239804, "grad_norm": 0.34898385405540466, "learning_rate": 0.0004978076684863607, "loss": 0.1035, "num_input_tokens_seen": 145981056, "step": 67585 }, { "epoch": 11.02610114192496, "grad_norm": 0.34688490629196167, "learning_rate": 0.0004977364893703701, "loss": 0.0577, "num_input_tokens_seen": 145991808, "step": 67590 }, { "epoch": 11.026916802610113, "grad_norm": 0.1314949095249176, "learning_rate": 0.0004976653103002526, "loss": 0.0696, "num_input_tokens_seen": 146003424, "step": 67595 }, { "epoch": 11.02773246329527, "grad_norm": 0.024752607569098473, "learning_rate": 0.0004975941312774502, "loss": 0.1167, "num_input_tokens_seen": 146014304, "step": 67600 }, { "epoch": 11.028548123980425, "grad_norm": 0.012540026567876339, "learning_rate": 0.0004975229523034061, "loss": 0.0097, "num_input_tokens_seen": 146025728, "step": 67605 }, { "epoch": 11.029363784665579, "grad_norm": 0.01599235273897648, "learning_rate": 0.0004974517733795623, "loss": 0.0398, "num_input_tokens_seen": 146037536, "step": 67610 }, { "epoch": 11.030179445350734, "grad_norm": 0.018973039463162422, "learning_rate": 0.0004973805945073617, "loss": 0.0209, "num_input_tokens_seen": 146048544, "step": 67615 }, { "epoch": 11.030995106035888, "grad_norm": 0.016196317970752716, "learning_rate": 0.0004973094156882466, "loss": 0.009, "num_input_tokens_seen": 146058784, "step": 67620 }, { "epoch": 11.031810766721044, "grad_norm": 0.015408056788146496, "learning_rate": 0.0004972382369236596, "loss": 0.0159, "num_input_tokens_seen": 146068416, "step": 67625 }, { "epoch": 11.0326264274062, "grad_norm": 0.20243647694587708, "learning_rate": 0.0004971670582150431, "loss": 0.0211, "num_input_tokens_seen": 146077664, "step": 67630 }, { "epoch": 11.033442088091354, "grad_norm": 1.3160874843597412, "learning_rate": 0.0004970958795638401, "loss": 0.0713, "num_input_tokens_seen": 146088320, "step": 67635 }, { "epoch": 11.03425774877651, "grad_norm": 0.006498668342828751, "learning_rate": 0.0004970247009714924, "loss": 0.0051, "num_input_tokens_seen": 146099424, "step": 67640 }, { "epoch": 11.035073409461663, "grad_norm": 0.00784413330256939, "learning_rate": 0.0004969535224394432, "loss": 0.0024, "num_input_tokens_seen": 146110112, "step": 67645 }, { "epoch": 11.035889070146819, "grad_norm": 0.12007040530443192, "learning_rate": 0.0004968823439691346, "loss": 0.0142, "num_input_tokens_seen": 146120224, "step": 67650 }, { "epoch": 11.036704730831975, "grad_norm": 0.07254232466220856, "learning_rate": 0.0004968111655620093, "loss": 0.0263, "num_input_tokens_seen": 146131328, "step": 67655 }, { "epoch": 11.037520391517129, "grad_norm": 0.027044525370001793, "learning_rate": 0.0004967399872195096, "loss": 0.0067, "num_input_tokens_seen": 146142144, "step": 67660 }, { "epoch": 11.038336052202284, "grad_norm": 0.013522460125386715, "learning_rate": 0.0004966688089430785, "loss": 0.0919, "num_input_tokens_seen": 146153696, "step": 67665 }, { "epoch": 11.039151712887438, "grad_norm": 0.004441166762262583, "learning_rate": 0.000496597630734158, "loss": 0.0076, "num_input_tokens_seen": 146164608, "step": 67670 }, { "epoch": 11.039967373572594, "grad_norm": 0.04167890548706055, "learning_rate": 0.0004965264525941908, "loss": 0.0221, "num_input_tokens_seen": 146175616, "step": 67675 }, { "epoch": 11.040783034257748, "grad_norm": 0.3512006103992462, "learning_rate": 0.0004964552745246196, "loss": 0.0122, "num_input_tokens_seen": 146187104, "step": 67680 }, { "epoch": 11.041598694942904, "grad_norm": 0.007863182574510574, "learning_rate": 0.0004963840965268866, "loss": 0.0075, "num_input_tokens_seen": 146196448, "step": 67685 }, { "epoch": 11.04241435562806, "grad_norm": 0.01281198114156723, "learning_rate": 0.0004963129186024346, "loss": 0.0089, "num_input_tokens_seen": 146206720, "step": 67690 }, { "epoch": 11.043230016313213, "grad_norm": 0.42489489912986755, "learning_rate": 0.0004962417407527059, "loss": 0.1801, "num_input_tokens_seen": 146215488, "step": 67695 }, { "epoch": 11.044045676998369, "grad_norm": 0.02685629017651081, "learning_rate": 0.0004961705629791431, "loss": 0.0676, "num_input_tokens_seen": 146226400, "step": 67700 }, { "epoch": 11.044861337683523, "grad_norm": 0.6352334022521973, "learning_rate": 0.0004960993852831888, "loss": 0.0828, "num_input_tokens_seen": 146237632, "step": 67705 }, { "epoch": 11.045676998368679, "grad_norm": 0.003587668761610985, "learning_rate": 0.0004960282076662853, "loss": 0.0196, "num_input_tokens_seen": 146248128, "step": 67710 }, { "epoch": 11.046492659053834, "grad_norm": 0.006983236409723759, "learning_rate": 0.0004959570301298752, "loss": 0.0135, "num_input_tokens_seen": 146258528, "step": 67715 }, { "epoch": 11.047308319738988, "grad_norm": 0.01367577537894249, "learning_rate": 0.0004958858526754012, "loss": 0.0054, "num_input_tokens_seen": 146268704, "step": 67720 }, { "epoch": 11.048123980424144, "grad_norm": 0.012268884107470512, "learning_rate": 0.0004958146753043053, "loss": 0.0081, "num_input_tokens_seen": 146279136, "step": 67725 }, { "epoch": 11.048939641109298, "grad_norm": 0.045778777450323105, "learning_rate": 0.0004957434980180307, "loss": 0.0213, "num_input_tokens_seen": 146288256, "step": 67730 }, { "epoch": 11.049755301794454, "grad_norm": 0.07384860515594482, "learning_rate": 0.0004956723208180191, "loss": 0.0076, "num_input_tokens_seen": 146298272, "step": 67735 }, { "epoch": 11.05057096247961, "grad_norm": 0.12969577312469482, "learning_rate": 0.0004956011437057138, "loss": 0.0134, "num_input_tokens_seen": 146309600, "step": 67740 }, { "epoch": 11.051386623164763, "grad_norm": 0.013329303823411465, "learning_rate": 0.0004955299666825566, "loss": 0.0342, "num_input_tokens_seen": 146320320, "step": 67745 }, { "epoch": 11.052202283849919, "grad_norm": 0.02987365610897541, "learning_rate": 0.0004954587897499905, "loss": 0.0054, "num_input_tokens_seen": 146331296, "step": 67750 }, { "epoch": 11.053017944535073, "grad_norm": 0.003352835774421692, "learning_rate": 0.0004953876129094576, "loss": 0.0039, "num_input_tokens_seen": 146342016, "step": 67755 }, { "epoch": 11.053833605220229, "grad_norm": 0.008115014061331749, "learning_rate": 0.0004953164361624008, "loss": 0.0199, "num_input_tokens_seen": 146353184, "step": 67760 }, { "epoch": 11.054649265905383, "grad_norm": 0.009867136366665363, "learning_rate": 0.0004952452595102621, "loss": 0.0175, "num_input_tokens_seen": 146364160, "step": 67765 }, { "epoch": 11.055464926590538, "grad_norm": 0.003682214766740799, "learning_rate": 0.0004951740829544846, "loss": 0.019, "num_input_tokens_seen": 146373760, "step": 67770 }, { "epoch": 11.056280587275694, "grad_norm": 0.05358272045850754, "learning_rate": 0.00049510290649651, "loss": 0.007, "num_input_tokens_seen": 146384928, "step": 67775 }, { "epoch": 11.057096247960848, "grad_norm": 0.23653176426887512, "learning_rate": 0.0004950317301377813, "loss": 0.0563, "num_input_tokens_seen": 146396192, "step": 67780 }, { "epoch": 11.057911908646004, "grad_norm": 0.3197796642780304, "learning_rate": 0.0004949605538797412, "loss": 0.1492, "num_input_tokens_seen": 146407680, "step": 67785 }, { "epoch": 11.058727569331158, "grad_norm": 0.31645524501800537, "learning_rate": 0.0004948893777238316, "loss": 0.0944, "num_input_tokens_seen": 146417632, "step": 67790 }, { "epoch": 11.059543230016313, "grad_norm": 0.04331495612859726, "learning_rate": 0.0004948182016714954, "loss": 0.157, "num_input_tokens_seen": 146428736, "step": 67795 }, { "epoch": 11.060358890701469, "grad_norm": 0.007367865182459354, "learning_rate": 0.0004947470257241748, "loss": 0.0024, "num_input_tokens_seen": 146439456, "step": 67800 }, { "epoch": 11.061174551386623, "grad_norm": 0.05738005042076111, "learning_rate": 0.0004946758498833125, "loss": 0.0079, "num_input_tokens_seen": 146450176, "step": 67805 }, { "epoch": 11.061990212071779, "grad_norm": 0.4973194897174835, "learning_rate": 0.0004946046741503507, "loss": 0.0327, "num_input_tokens_seen": 146461376, "step": 67810 }, { "epoch": 11.062805872756933, "grad_norm": 0.009904002770781517, "learning_rate": 0.0004945334985267323, "loss": 0.0211, "num_input_tokens_seen": 146473504, "step": 67815 }, { "epoch": 11.063621533442088, "grad_norm": 0.7115747928619385, "learning_rate": 0.0004944623230138991, "loss": 0.0453, "num_input_tokens_seen": 146484448, "step": 67820 }, { "epoch": 11.064437194127244, "grad_norm": 0.006754287518560886, "learning_rate": 0.0004943911476132943, "loss": 0.0082, "num_input_tokens_seen": 146495200, "step": 67825 }, { "epoch": 11.065252854812398, "grad_norm": 0.07527286559343338, "learning_rate": 0.0004943199723263597, "loss": 0.0103, "num_input_tokens_seen": 146505824, "step": 67830 }, { "epoch": 11.066068515497554, "grad_norm": 0.022538485005497932, "learning_rate": 0.0004942487971545383, "loss": 0.0142, "num_input_tokens_seen": 146515168, "step": 67835 }, { "epoch": 11.066884176182707, "grad_norm": 0.23096723854541779, "learning_rate": 0.0004941776220992722, "loss": 0.2208, "num_input_tokens_seen": 146525056, "step": 67840 }, { "epoch": 11.067699836867863, "grad_norm": 0.017740461975336075, "learning_rate": 0.0004941064471620041, "loss": 0.1202, "num_input_tokens_seen": 146535136, "step": 67845 }, { "epoch": 11.068515497553017, "grad_norm": 0.17247579991817474, "learning_rate": 0.0004940352723441763, "loss": 0.0237, "num_input_tokens_seen": 146545920, "step": 67850 }, { "epoch": 11.069331158238173, "grad_norm": 0.04620293900370598, "learning_rate": 0.0004939640976472311, "loss": 0.0333, "num_input_tokens_seen": 146557120, "step": 67855 }, { "epoch": 11.070146818923329, "grad_norm": 0.015531661920249462, "learning_rate": 0.0004938929230726111, "loss": 0.0095, "num_input_tokens_seen": 146568576, "step": 67860 }, { "epoch": 11.070962479608482, "grad_norm": 0.22680914402008057, "learning_rate": 0.0004938217486217591, "loss": 0.0081, "num_input_tokens_seen": 146579424, "step": 67865 }, { "epoch": 11.071778140293638, "grad_norm": 0.0019467025995254517, "learning_rate": 0.0004937505742961169, "loss": 0.0032, "num_input_tokens_seen": 146590560, "step": 67870 }, { "epoch": 11.072593800978792, "grad_norm": 0.0013207707088440657, "learning_rate": 0.0004936794000971274, "loss": 0.008, "num_input_tokens_seen": 146600768, "step": 67875 }, { "epoch": 11.073409461663948, "grad_norm": 0.0049352445639669895, "learning_rate": 0.0004936082260262328, "loss": 0.0946, "num_input_tokens_seen": 146610880, "step": 67880 }, { "epoch": 11.074225122349104, "grad_norm": 0.14016573131084442, "learning_rate": 0.0004935370520848755, "loss": 0.0111, "num_input_tokens_seen": 146621792, "step": 67885 }, { "epoch": 11.075040783034257, "grad_norm": 0.01653108559548855, "learning_rate": 0.0004934658782744983, "loss": 0.0302, "num_input_tokens_seen": 146632448, "step": 67890 }, { "epoch": 11.075856443719413, "grad_norm": 0.002239247551187873, "learning_rate": 0.0004933947045965431, "loss": 0.0057, "num_input_tokens_seen": 146643456, "step": 67895 }, { "epoch": 11.076672104404567, "grad_norm": 0.013882400467991829, "learning_rate": 0.0004933235310524528, "loss": 0.0566, "num_input_tokens_seen": 146654176, "step": 67900 }, { "epoch": 11.077487765089723, "grad_norm": 0.32492849230766296, "learning_rate": 0.0004932523576436695, "loss": 0.0132, "num_input_tokens_seen": 146665984, "step": 67905 }, { "epoch": 11.078303425774878, "grad_norm": 0.006665470078587532, "learning_rate": 0.0004931811843716358, "loss": 0.116, "num_input_tokens_seen": 146677440, "step": 67910 }, { "epoch": 11.079119086460032, "grad_norm": 0.010101187974214554, "learning_rate": 0.000493110011237794, "loss": 0.0033, "num_input_tokens_seen": 146688416, "step": 67915 }, { "epoch": 11.079934747145188, "grad_norm": 0.00362041755579412, "learning_rate": 0.0004930388382435866, "loss": 0.009, "num_input_tokens_seen": 146699360, "step": 67920 }, { "epoch": 11.080750407830342, "grad_norm": 0.015067537315189838, "learning_rate": 0.0004929676653904558, "loss": 0.009, "num_input_tokens_seen": 146709056, "step": 67925 }, { "epoch": 11.081566068515498, "grad_norm": 0.010858445428311825, "learning_rate": 0.0004928964926798445, "loss": 0.0066, "num_input_tokens_seen": 146718752, "step": 67930 }, { "epoch": 11.082381729200652, "grad_norm": 0.011564402841031551, "learning_rate": 0.0004928253201131945, "loss": 0.0332, "num_input_tokens_seen": 146729920, "step": 67935 }, { "epoch": 11.083197389885807, "grad_norm": 0.0970597043633461, "learning_rate": 0.0004927541476919487, "loss": 0.0534, "num_input_tokens_seen": 146740064, "step": 67940 }, { "epoch": 11.084013050570963, "grad_norm": 0.35658419132232666, "learning_rate": 0.0004926829754175492, "loss": 0.0337, "num_input_tokens_seen": 146750560, "step": 67945 }, { "epoch": 11.084828711256117, "grad_norm": 0.3965001702308655, "learning_rate": 0.0004926118032914385, "loss": 0.1152, "num_input_tokens_seen": 146760960, "step": 67950 }, { "epoch": 11.085644371941273, "grad_norm": 0.02664480172097683, "learning_rate": 0.0004925406313150589, "loss": 0.1071, "num_input_tokens_seen": 146772864, "step": 67955 }, { "epoch": 11.086460032626427, "grad_norm": 0.10795983672142029, "learning_rate": 0.000492469459489853, "loss": 0.0145, "num_input_tokens_seen": 146783968, "step": 67960 }, { "epoch": 11.087275693311582, "grad_norm": 0.7373493909835815, "learning_rate": 0.0004923982878172629, "loss": 0.0582, "num_input_tokens_seen": 146795808, "step": 67965 }, { "epoch": 11.088091353996738, "grad_norm": 0.13917656242847443, "learning_rate": 0.0004923271162987314, "loss": 0.0078, "num_input_tokens_seen": 146807104, "step": 67970 }, { "epoch": 11.088907014681892, "grad_norm": 0.0017775179585441947, "learning_rate": 0.0004922559449357003, "loss": 0.0355, "num_input_tokens_seen": 146817216, "step": 67975 }, { "epoch": 11.089722675367048, "grad_norm": 0.003343122312799096, "learning_rate": 0.0004921847737296125, "loss": 0.0041, "num_input_tokens_seen": 146827776, "step": 67980 }, { "epoch": 11.090538336052202, "grad_norm": 0.3145506978034973, "learning_rate": 0.0004921136026819101, "loss": 0.0563, "num_input_tokens_seen": 146839712, "step": 67985 }, { "epoch": 11.091353996737357, "grad_norm": 0.01094027329236269, "learning_rate": 0.0004920424317940355, "loss": 0.0105, "num_input_tokens_seen": 146850592, "step": 67990 }, { "epoch": 11.092169657422513, "grad_norm": 0.007395964581519365, "learning_rate": 0.0004919712610674312, "loss": 0.0065, "num_input_tokens_seen": 146862528, "step": 67995 }, { "epoch": 11.092985318107667, "grad_norm": 0.038671478629112244, "learning_rate": 0.0004919000905035394, "loss": 0.0621, "num_input_tokens_seen": 146871488, "step": 68000 }, { "epoch": 11.093800978792823, "grad_norm": 0.0066762445494532585, "learning_rate": 0.0004918289201038026, "loss": 0.0707, "num_input_tokens_seen": 146882496, "step": 68005 }, { "epoch": 11.094616639477977, "grad_norm": 0.02582985907793045, "learning_rate": 0.0004917577498696631, "loss": 0.0111, "num_input_tokens_seen": 146893280, "step": 68010 }, { "epoch": 11.095432300163132, "grad_norm": 0.6989096403121948, "learning_rate": 0.0004916865798025634, "loss": 0.0422, "num_input_tokens_seen": 146903008, "step": 68015 }, { "epoch": 11.096247960848286, "grad_norm": 0.14155010879039764, "learning_rate": 0.0004916154099039455, "loss": 0.1259, "num_input_tokens_seen": 146913472, "step": 68020 }, { "epoch": 11.097063621533442, "grad_norm": 0.01753934472799301, "learning_rate": 0.000491544240175252, "loss": 0.0104, "num_input_tokens_seen": 146925440, "step": 68025 }, { "epoch": 11.097879282218598, "grad_norm": 0.01112120971083641, "learning_rate": 0.0004914730706179251, "loss": 0.0393, "num_input_tokens_seen": 146935360, "step": 68030 }, { "epoch": 11.098694942903752, "grad_norm": 0.07331804931163788, "learning_rate": 0.0004914019012334075, "loss": 0.0092, "num_input_tokens_seen": 146945600, "step": 68035 }, { "epoch": 11.099510603588907, "grad_norm": 0.07903977483510971, "learning_rate": 0.000491330732023141, "loss": 0.1622, "num_input_tokens_seen": 146956928, "step": 68040 }, { "epoch": 11.100326264274061, "grad_norm": 0.0027705691754817963, "learning_rate": 0.0004912595629885685, "loss": 0.0208, "num_input_tokens_seen": 146967680, "step": 68045 }, { "epoch": 11.101141924959217, "grad_norm": 0.0015439860289916396, "learning_rate": 0.0004911883941311319, "loss": 0.1458, "num_input_tokens_seen": 146978656, "step": 68050 }, { "epoch": 11.101957585644373, "grad_norm": 0.0027622964698821306, "learning_rate": 0.0004911172254522737, "loss": 0.0245, "num_input_tokens_seen": 146989408, "step": 68055 }, { "epoch": 11.102773246329527, "grad_norm": 0.013207031413912773, "learning_rate": 0.0004910460569534361, "loss": 0.0085, "num_input_tokens_seen": 147000288, "step": 68060 }, { "epoch": 11.103588907014682, "grad_norm": 0.010163257829844952, "learning_rate": 0.0004909748886360617, "loss": 0.0079, "num_input_tokens_seen": 147011008, "step": 68065 }, { "epoch": 11.104404567699836, "grad_norm": 0.005293056834489107, "learning_rate": 0.0004909037205015924, "loss": 0.0085, "num_input_tokens_seen": 147022272, "step": 68070 }, { "epoch": 11.105220228384992, "grad_norm": 0.03544028103351593, "learning_rate": 0.000490832552551471, "loss": 0.0416, "num_input_tokens_seen": 147032864, "step": 68075 }, { "epoch": 11.106035889070148, "grad_norm": 0.01018522959202528, "learning_rate": 0.0004907613847871393, "loss": 0.071, "num_input_tokens_seen": 147042784, "step": 68080 }, { "epoch": 11.106851549755302, "grad_norm": 0.18979839980602264, "learning_rate": 0.00049069021721004, "loss": 0.0272, "num_input_tokens_seen": 147052928, "step": 68085 }, { "epoch": 11.107667210440457, "grad_norm": 0.002638269681483507, "learning_rate": 0.0004906190498216151, "loss": 0.0137, "num_input_tokens_seen": 147063808, "step": 68090 }, { "epoch": 11.108482871125611, "grad_norm": 0.0040941014885902405, "learning_rate": 0.0004905478826233072, "loss": 0.0102, "num_input_tokens_seen": 147074240, "step": 68095 }, { "epoch": 11.109298531810767, "grad_norm": 0.016239026561379433, "learning_rate": 0.0004904767156165585, "loss": 0.0101, "num_input_tokens_seen": 147084736, "step": 68100 }, { "epoch": 11.11011419249592, "grad_norm": 0.251310259103775, "learning_rate": 0.000490405548802811, "loss": 0.1487, "num_input_tokens_seen": 147097216, "step": 68105 }, { "epoch": 11.110929853181077, "grad_norm": 0.01817292720079422, "learning_rate": 0.0004903343821835075, "loss": 0.0043, "num_input_tokens_seen": 147107264, "step": 68110 }, { "epoch": 11.111745513866232, "grad_norm": 0.0064503224566578865, "learning_rate": 0.0004902632157600898, "loss": 0.0141, "num_input_tokens_seen": 147117440, "step": 68115 }, { "epoch": 11.112561174551386, "grad_norm": 0.003892709966748953, "learning_rate": 0.0004901920495340007, "loss": 0.026, "num_input_tokens_seen": 147128960, "step": 68120 }, { "epoch": 11.113376835236542, "grad_norm": 0.013186760246753693, "learning_rate": 0.0004901208835066818, "loss": 0.0471, "num_input_tokens_seen": 147139584, "step": 68125 }, { "epoch": 11.114192495921696, "grad_norm": 0.0026877010241150856, "learning_rate": 0.0004900497176795759, "loss": 0.0016, "num_input_tokens_seen": 147150432, "step": 68130 }, { "epoch": 11.115008156606851, "grad_norm": 0.8579369187355042, "learning_rate": 0.000489978552054125, "loss": 0.1093, "num_input_tokens_seen": 147160928, "step": 68135 }, { "epoch": 11.115823817292007, "grad_norm": 0.0050546894781291485, "learning_rate": 0.0004899073866317717, "loss": 0.11, "num_input_tokens_seen": 147171520, "step": 68140 }, { "epoch": 11.116639477977161, "grad_norm": 0.11194411665201187, "learning_rate": 0.0004898362214139577, "loss": 0.0052, "num_input_tokens_seen": 147182848, "step": 68145 }, { "epoch": 11.117455138662317, "grad_norm": 0.031917814165353775, "learning_rate": 0.0004897650564021257, "loss": 0.013, "num_input_tokens_seen": 147193248, "step": 68150 }, { "epoch": 11.11827079934747, "grad_norm": 0.009770890697836876, "learning_rate": 0.0004896938915977178, "loss": 0.1756, "num_input_tokens_seen": 147203744, "step": 68155 }, { "epoch": 11.119086460032626, "grad_norm": 0.0032027806155383587, "learning_rate": 0.0004896227270021763, "loss": 0.0026, "num_input_tokens_seen": 147215072, "step": 68160 }, { "epoch": 11.119902120717782, "grad_norm": 0.042442020028829575, "learning_rate": 0.0004895515626169433, "loss": 0.0074, "num_input_tokens_seen": 147225696, "step": 68165 }, { "epoch": 11.120717781402936, "grad_norm": 0.06179466471076012, "learning_rate": 0.0004894803984434613, "loss": 0.0186, "num_input_tokens_seen": 147236992, "step": 68170 }, { "epoch": 11.121533442088092, "grad_norm": 0.02509087324142456, "learning_rate": 0.0004894092344831722, "loss": 0.023, "num_input_tokens_seen": 147248416, "step": 68175 }, { "epoch": 11.122349102773246, "grad_norm": 0.02915806882083416, "learning_rate": 0.0004893380707375186, "loss": 0.0308, "num_input_tokens_seen": 147257984, "step": 68180 }, { "epoch": 11.123164763458401, "grad_norm": 0.43293923139572144, "learning_rate": 0.0004892669072079423, "loss": 0.0323, "num_input_tokens_seen": 147268480, "step": 68185 }, { "epoch": 11.123980424143557, "grad_norm": 0.08693939447402954, "learning_rate": 0.000489195743895886, "loss": 0.0516, "num_input_tokens_seen": 147279776, "step": 68190 }, { "epoch": 11.124796084828711, "grad_norm": 0.0011673959670588374, "learning_rate": 0.0004891245808027913, "loss": 0.012, "num_input_tokens_seen": 147290336, "step": 68195 }, { "epoch": 11.125611745513867, "grad_norm": 0.3298425078392029, "learning_rate": 0.0004890534179301009, "loss": 0.0769, "num_input_tokens_seen": 147300864, "step": 68200 }, { "epoch": 11.12642740619902, "grad_norm": 0.3644849956035614, "learning_rate": 0.0004889822552792572, "loss": 0.0312, "num_input_tokens_seen": 147310912, "step": 68205 }, { "epoch": 11.127243066884176, "grad_norm": 0.004404593259096146, "learning_rate": 0.0004889110928517016, "loss": 0.0218, "num_input_tokens_seen": 147321344, "step": 68210 }, { "epoch": 11.12805872756933, "grad_norm": 0.21322403848171234, "learning_rate": 0.0004888399306488771, "loss": 0.0253, "num_input_tokens_seen": 147332448, "step": 68215 }, { "epoch": 11.128874388254486, "grad_norm": 0.002350582042708993, "learning_rate": 0.0004887687686722254, "loss": 0.003, "num_input_tokens_seen": 147344192, "step": 68220 }, { "epoch": 11.129690048939642, "grad_norm": 0.010090678930282593, "learning_rate": 0.000488697606923189, "loss": 0.1477, "num_input_tokens_seen": 147355104, "step": 68225 }, { "epoch": 11.130505709624796, "grad_norm": 0.0023009213618934155, "learning_rate": 0.0004886264454032097, "loss": 0.0098, "num_input_tokens_seen": 147364608, "step": 68230 }, { "epoch": 11.131321370309951, "grad_norm": 0.18410499393939972, "learning_rate": 0.0004885552841137302, "loss": 0.0118, "num_input_tokens_seen": 147375296, "step": 68235 }, { "epoch": 11.132137030995105, "grad_norm": 0.002545412862673402, "learning_rate": 0.0004884841230561922, "loss": 0.0066, "num_input_tokens_seen": 147387296, "step": 68240 }, { "epoch": 11.132952691680261, "grad_norm": 0.014841769821941853, "learning_rate": 0.0004884129622320381, "loss": 0.0104, "num_input_tokens_seen": 147398144, "step": 68245 }, { "epoch": 11.133768352365417, "grad_norm": 0.0036457956302911043, "learning_rate": 0.0004883418016427099, "loss": 0.0114, "num_input_tokens_seen": 147409504, "step": 68250 }, { "epoch": 11.13458401305057, "grad_norm": 0.0025173728354275227, "learning_rate": 0.00048827064128965014, "loss": 0.005, "num_input_tokens_seen": 147420512, "step": 68255 }, { "epoch": 11.135399673735726, "grad_norm": 0.007116433698683977, "learning_rate": 0.00048819948117430047, "loss": 0.0038, "num_input_tokens_seen": 147432672, "step": 68260 }, { "epoch": 11.13621533442088, "grad_norm": 0.0915973037481308, "learning_rate": 0.00048812832129810347, "loss": 0.1583, "num_input_tokens_seen": 147444512, "step": 68265 }, { "epoch": 11.137030995106036, "grad_norm": 0.008468952029943466, "learning_rate": 0.0004880571616625009, "loss": 0.0097, "num_input_tokens_seen": 147455296, "step": 68270 }, { "epoch": 11.137846655791192, "grad_norm": 0.004829864017665386, "learning_rate": 0.00048798600226893535, "loss": 0.0415, "num_input_tokens_seen": 147465120, "step": 68275 }, { "epoch": 11.138662316476346, "grad_norm": 0.005911378655582666, "learning_rate": 0.00048791484311884844, "loss": 0.0123, "num_input_tokens_seen": 147476512, "step": 68280 }, { "epoch": 11.139477977161501, "grad_norm": 0.6165083646774292, "learning_rate": 0.0004878436842136828, "loss": 0.2525, "num_input_tokens_seen": 147487520, "step": 68285 }, { "epoch": 11.140293637846655, "grad_norm": 0.02946476638317108, "learning_rate": 0.0004877725255548801, "loss": 0.1519, "num_input_tokens_seen": 147499584, "step": 68290 }, { "epoch": 11.141109298531811, "grad_norm": 0.010245737619698048, "learning_rate": 0.0004877013671438828, "loss": 0.0123, "num_input_tokens_seen": 147510176, "step": 68295 }, { "epoch": 11.141924959216965, "grad_norm": 0.030240066349506378, "learning_rate": 0.0004876302089821329, "loss": 0.009, "num_input_tokens_seen": 147520352, "step": 68300 }, { "epoch": 11.14274061990212, "grad_norm": 0.004862932022660971, "learning_rate": 0.0004875590510710724, "loss": 0.003, "num_input_tokens_seen": 147530560, "step": 68305 }, { "epoch": 11.143556280587276, "grad_norm": 0.2248290628194809, "learning_rate": 0.00048748789341214373, "loss": 0.0184, "num_input_tokens_seen": 147542016, "step": 68310 }, { "epoch": 11.14437194127243, "grad_norm": 0.08497530966997147, "learning_rate": 0.00048741673600678857, "loss": 0.0922, "num_input_tokens_seen": 147552896, "step": 68315 }, { "epoch": 11.145187601957586, "grad_norm": 0.02630682848393917, "learning_rate": 0.00048734557885644924, "loss": 0.0608, "num_input_tokens_seen": 147563008, "step": 68320 }, { "epoch": 11.14600326264274, "grad_norm": 0.0025665624998509884, "learning_rate": 0.00048727442196256786, "loss": 0.056, "num_input_tokens_seen": 147573568, "step": 68325 }, { "epoch": 11.146818923327896, "grad_norm": 0.008093073032796383, "learning_rate": 0.0004872032653265865, "loss": 0.1188, "num_input_tokens_seen": 147584448, "step": 68330 }, { "epoch": 11.147634584013051, "grad_norm": 0.42789462208747864, "learning_rate": 0.0004871321089499472, "loss": 0.0741, "num_input_tokens_seen": 147595680, "step": 68335 }, { "epoch": 11.148450244698205, "grad_norm": 0.010536248795688152, "learning_rate": 0.00048706095283409194, "loss": 0.006, "num_input_tokens_seen": 147606688, "step": 68340 }, { "epoch": 11.149265905383361, "grad_norm": 0.002236375818029046, "learning_rate": 0.00048698979698046286, "loss": 0.0296, "num_input_tokens_seen": 147617728, "step": 68345 }, { "epoch": 11.150081566068515, "grad_norm": 0.252946674823761, "learning_rate": 0.0004869186413905023, "loss": 0.058, "num_input_tokens_seen": 147627136, "step": 68350 }, { "epoch": 11.15089722675367, "grad_norm": 0.012744572013616562, "learning_rate": 0.00048684748606565175, "loss": 0.0104, "num_input_tokens_seen": 147638368, "step": 68355 }, { "epoch": 11.151712887438826, "grad_norm": 0.17559273540973663, "learning_rate": 0.00048677633100735387, "loss": 0.0119, "num_input_tokens_seen": 147649536, "step": 68360 }, { "epoch": 11.15252854812398, "grad_norm": 0.04494628682732582, "learning_rate": 0.00048670517621705016, "loss": 0.0073, "num_input_tokens_seen": 147660736, "step": 68365 }, { "epoch": 11.153344208809136, "grad_norm": 0.12925973534584045, "learning_rate": 0.0004866340216961832, "loss": 0.0964, "num_input_tokens_seen": 147672704, "step": 68370 }, { "epoch": 11.15415986949429, "grad_norm": 0.03734464943408966, "learning_rate": 0.00048656286744619447, "loss": 0.0714, "num_input_tokens_seen": 147684128, "step": 68375 }, { "epoch": 11.154975530179446, "grad_norm": 0.026860255748033524, "learning_rate": 0.0004864917134685265, "loss": 0.192, "num_input_tokens_seen": 147694368, "step": 68380 }, { "epoch": 11.1557911908646, "grad_norm": 0.049138400703668594, "learning_rate": 0.0004864205597646209, "loss": 0.0124, "num_input_tokens_seen": 147704352, "step": 68385 }, { "epoch": 11.156606851549755, "grad_norm": 0.012718032114207745, "learning_rate": 0.00048634940633592006, "loss": 0.0095, "num_input_tokens_seen": 147716064, "step": 68390 }, { "epoch": 11.15742251223491, "grad_norm": 0.022052332758903503, "learning_rate": 0.00048627825318386567, "loss": 0.1129, "num_input_tokens_seen": 147726720, "step": 68395 }, { "epoch": 11.158238172920065, "grad_norm": 0.007494083605706692, "learning_rate": 0.00048620710030990004, "loss": 0.0105, "num_input_tokens_seen": 147737632, "step": 68400 }, { "epoch": 11.15905383360522, "grad_norm": 0.0026938801165670156, "learning_rate": 0.0004861359477154648, "loss": 0.0068, "num_input_tokens_seen": 147748160, "step": 68405 }, { "epoch": 11.159869494290374, "grad_norm": 0.010102441534399986, "learning_rate": 0.00048606479540200243, "loss": 0.2594, "num_input_tokens_seen": 147758560, "step": 68410 }, { "epoch": 11.16068515497553, "grad_norm": 0.009311516769230366, "learning_rate": 0.00048599364337095443, "loss": 0.0699, "num_input_tokens_seen": 147769568, "step": 68415 }, { "epoch": 11.161500815660686, "grad_norm": 0.015968909487128258, "learning_rate": 0.000485922491623763, "loss": 0.0083, "num_input_tokens_seen": 147780480, "step": 68420 }, { "epoch": 11.16231647634584, "grad_norm": 0.003891808446496725, "learning_rate": 0.0004858513401618704, "loss": 0.009, "num_input_tokens_seen": 147792096, "step": 68425 }, { "epoch": 11.163132137030995, "grad_norm": 0.014763821847736835, "learning_rate": 0.00048578018898671804, "loss": 0.0104, "num_input_tokens_seen": 147802144, "step": 68430 }, { "epoch": 11.16394779771615, "grad_norm": 0.1828288435935974, "learning_rate": 0.0004857090380997484, "loss": 0.0814, "num_input_tokens_seen": 147812480, "step": 68435 }, { "epoch": 11.164763458401305, "grad_norm": 0.23500409722328186, "learning_rate": 0.00048563788750240314, "loss": 0.1066, "num_input_tokens_seen": 147823328, "step": 68440 }, { "epoch": 11.16557911908646, "grad_norm": 0.29729729890823364, "learning_rate": 0.00048556673719612445, "loss": 0.1152, "num_input_tokens_seen": 147832992, "step": 68445 }, { "epoch": 11.166394779771615, "grad_norm": 0.04158762842416763, "learning_rate": 0.00048549558718235386, "loss": 0.0312, "num_input_tokens_seen": 147844160, "step": 68450 }, { "epoch": 11.16721044045677, "grad_norm": 0.023969994857907295, "learning_rate": 0.0004854244374625339, "loss": 0.016, "num_input_tokens_seen": 147855328, "step": 68455 }, { "epoch": 11.168026101141924, "grad_norm": 0.33585116267204285, "learning_rate": 0.00048535328803810595, "loss": 0.1219, "num_input_tokens_seen": 147866240, "step": 68460 }, { "epoch": 11.16884176182708, "grad_norm": 0.02872396446764469, "learning_rate": 0.0004852821389105123, "loss": 0.073, "num_input_tokens_seen": 147877824, "step": 68465 }, { "epoch": 11.169657422512234, "grad_norm": 0.20101076364517212, "learning_rate": 0.00048521099008119484, "loss": 0.0217, "num_input_tokens_seen": 147887200, "step": 68470 }, { "epoch": 11.17047308319739, "grad_norm": 0.23845504224300385, "learning_rate": 0.0004851398415515954, "loss": 0.0247, "num_input_tokens_seen": 147896224, "step": 68475 }, { "epoch": 11.171288743882545, "grad_norm": 0.02124555967748165, "learning_rate": 0.0004850686933231559, "loss": 0.0392, "num_input_tokens_seen": 147906368, "step": 68480 }, { "epoch": 11.1721044045677, "grad_norm": 0.008393766358494759, "learning_rate": 0.00048499754539731827, "loss": 0.0166, "num_input_tokens_seen": 147917952, "step": 68485 }, { "epoch": 11.172920065252855, "grad_norm": 0.02445857785642147, "learning_rate": 0.0004849263977755243, "loss": 0.0246, "num_input_tokens_seen": 147928064, "step": 68490 }, { "epoch": 11.173735725938009, "grad_norm": 0.07245718687772751, "learning_rate": 0.00048485525045921627, "loss": 0.0295, "num_input_tokens_seen": 147938336, "step": 68495 }, { "epoch": 11.174551386623165, "grad_norm": 0.008152371272444725, "learning_rate": 0.00048478410344983554, "loss": 0.0155, "num_input_tokens_seen": 147948480, "step": 68500 }, { "epoch": 11.17536704730832, "grad_norm": 0.18014536798000336, "learning_rate": 0.00048471295674882447, "loss": 0.2172, "num_input_tokens_seen": 147958560, "step": 68505 }, { "epoch": 11.176182707993474, "grad_norm": 0.0194853488355875, "learning_rate": 0.0004846418103576245, "loss": 0.0456, "num_input_tokens_seen": 147969056, "step": 68510 }, { "epoch": 11.17699836867863, "grad_norm": 0.278815358877182, "learning_rate": 0.000484570664277678, "loss": 0.1935, "num_input_tokens_seen": 147980064, "step": 68515 }, { "epoch": 11.177814029363784, "grad_norm": 0.015023061074316502, "learning_rate": 0.00048449951851042627, "loss": 0.0262, "num_input_tokens_seen": 147990752, "step": 68520 }, { "epoch": 11.17862969004894, "grad_norm": 0.12025143951177597, "learning_rate": 0.0004844283730573115, "loss": 0.0308, "num_input_tokens_seen": 148002176, "step": 68525 }, { "epoch": 11.179445350734095, "grad_norm": 0.008281445130705833, "learning_rate": 0.0004843572279197757, "loss": 0.0199, "num_input_tokens_seen": 148011872, "step": 68530 }, { "epoch": 11.18026101141925, "grad_norm": 0.019231772050261497, "learning_rate": 0.0004842860830992604, "loss": 0.0178, "num_input_tokens_seen": 148022592, "step": 68535 }, { "epoch": 11.181076672104405, "grad_norm": 0.0041782851330935955, "learning_rate": 0.00048421493859720767, "loss": 0.0598, "num_input_tokens_seen": 148032832, "step": 68540 }, { "epoch": 11.181892332789559, "grad_norm": 0.09217726439237595, "learning_rate": 0.000484143794415059, "loss": 0.0425, "num_input_tokens_seen": 148042816, "step": 68545 }, { "epoch": 11.182707993474715, "grad_norm": 0.31993457674980164, "learning_rate": 0.00048407265055425673, "loss": 0.1465, "num_input_tokens_seen": 148053056, "step": 68550 }, { "epoch": 11.18352365415987, "grad_norm": 0.02010430581867695, "learning_rate": 0.00048400150701624216, "loss": 0.0309, "num_input_tokens_seen": 148062912, "step": 68555 }, { "epoch": 11.184339314845024, "grad_norm": 0.020727120339870453, "learning_rate": 0.0004839303638024576, "loss": 0.009, "num_input_tokens_seen": 148074176, "step": 68560 }, { "epoch": 11.18515497553018, "grad_norm": 0.016470473259687424, "learning_rate": 0.0004838592209143444, "loss": 0.0048, "num_input_tokens_seen": 148085728, "step": 68565 }, { "epoch": 11.185970636215334, "grad_norm": 0.007106783799827099, "learning_rate": 0.0004837880783533447, "loss": 0.0293, "num_input_tokens_seen": 148095744, "step": 68570 }, { "epoch": 11.18678629690049, "grad_norm": 0.04211915656924248, "learning_rate": 0.00048371693612089996, "loss": 0.0085, "num_input_tokens_seen": 148106592, "step": 68575 }, { "epoch": 11.187601957585644, "grad_norm": 0.026171937584877014, "learning_rate": 0.00048364579421845245, "loss": 0.0794, "num_input_tokens_seen": 148118272, "step": 68580 }, { "epoch": 11.1884176182708, "grad_norm": 0.028126433491706848, "learning_rate": 0.0004835746526474434, "loss": 0.0699, "num_input_tokens_seen": 148129856, "step": 68585 }, { "epoch": 11.189233278955955, "grad_norm": 0.49054139852523804, "learning_rate": 0.00048350351140931505, "loss": 0.0506, "num_input_tokens_seen": 148140544, "step": 68590 }, { "epoch": 11.190048939641109, "grad_norm": 0.4415249526500702, "learning_rate": 0.00048343237050550876, "loss": 0.0405, "num_input_tokens_seen": 148149600, "step": 68595 }, { "epoch": 11.190864600326265, "grad_norm": 0.013207647018134594, "learning_rate": 0.0004833612299374667, "loss": 0.0279, "num_input_tokens_seen": 148161376, "step": 68600 }, { "epoch": 11.191680261011419, "grad_norm": 0.006056750193238258, "learning_rate": 0.0004832900897066303, "loss": 0.0179, "num_input_tokens_seen": 148171968, "step": 68605 }, { "epoch": 11.192495921696574, "grad_norm": 0.42517709732055664, "learning_rate": 0.0004832189498144415, "loss": 0.0969, "num_input_tokens_seen": 148183168, "step": 68610 }, { "epoch": 11.19331158238173, "grad_norm": 0.029658634215593338, "learning_rate": 0.0004831478102623419, "loss": 0.0057, "num_input_tokens_seen": 148194592, "step": 68615 }, { "epoch": 11.194127243066884, "grad_norm": 0.004472116474062204, "learning_rate": 0.0004830766710517733, "loss": 0.0063, "num_input_tokens_seen": 148206880, "step": 68620 }, { "epoch": 11.19494290375204, "grad_norm": 0.004931016359478235, "learning_rate": 0.00048300553218417753, "loss": 0.0322, "num_input_tokens_seen": 148217504, "step": 68625 }, { "epoch": 11.195758564437194, "grad_norm": 1.0115134716033936, "learning_rate": 0.0004829343936609961, "loss": 0.0433, "num_input_tokens_seen": 148228160, "step": 68630 }, { "epoch": 11.19657422512235, "grad_norm": 0.13028618693351746, "learning_rate": 0.00048286325548367083, "loss": 0.0401, "num_input_tokens_seen": 148238816, "step": 68635 }, { "epoch": 11.197389885807505, "grad_norm": 0.42092257738113403, "learning_rate": 0.0004827921176536435, "loss": 0.0875, "num_input_tokens_seen": 148249152, "step": 68640 }, { "epoch": 11.198205546492659, "grad_norm": 0.009416461922228336, "learning_rate": 0.00048272098017235573, "loss": 0.0141, "num_input_tokens_seen": 148261024, "step": 68645 }, { "epoch": 11.199021207177815, "grad_norm": 0.02090616337954998, "learning_rate": 0.0004826498430412492, "loss": 0.017, "num_input_tokens_seen": 148271712, "step": 68650 }, { "epoch": 11.199836867862969, "grad_norm": 0.0011445780983194709, "learning_rate": 0.00048257870626176565, "loss": 0.0089, "num_input_tokens_seen": 148283584, "step": 68655 }, { "epoch": 11.200652528548124, "grad_norm": 0.0027027344331145287, "learning_rate": 0.00048250756983534657, "loss": 0.021, "num_input_tokens_seen": 148293408, "step": 68660 }, { "epoch": 11.201468189233278, "grad_norm": 0.006044385023415089, "learning_rate": 0.000482436433763434, "loss": 0.0261, "num_input_tokens_seen": 148303776, "step": 68665 }, { "epoch": 11.202283849918434, "grad_norm": 0.004509706981480122, "learning_rate": 0.00048236529804746915, "loss": 0.0046, "num_input_tokens_seen": 148315008, "step": 68670 }, { "epoch": 11.20309951060359, "grad_norm": 0.18956010043621063, "learning_rate": 0.0004822941626888941, "loss": 0.0489, "num_input_tokens_seen": 148325344, "step": 68675 }, { "epoch": 11.203915171288743, "grad_norm": 0.012917671352624893, "learning_rate": 0.0004822230276891502, "loss": 0.0532, "num_input_tokens_seen": 148337120, "step": 68680 }, { "epoch": 11.2047308319739, "grad_norm": 0.04024987295269966, "learning_rate": 0.00048215189304967934, "loss": 0.0764, "num_input_tokens_seen": 148348576, "step": 68685 }, { "epoch": 11.205546492659053, "grad_norm": 0.737769365310669, "learning_rate": 0.00048208075877192275, "loss": 0.1234, "num_input_tokens_seen": 148359552, "step": 68690 }, { "epoch": 11.206362153344209, "grad_norm": 0.44505423307418823, "learning_rate": 0.0004820096248573226, "loss": 0.1929, "num_input_tokens_seen": 148369568, "step": 68695 }, { "epoch": 11.207177814029365, "grad_norm": 0.008643914945423603, "learning_rate": 0.00048193849130732, "loss": 0.0077, "num_input_tokens_seen": 148381184, "step": 68700 }, { "epoch": 11.207993474714518, "grad_norm": 0.060006801038980484, "learning_rate": 0.00048186735812335695, "loss": 0.0786, "num_input_tokens_seen": 148390880, "step": 68705 }, { "epoch": 11.208809135399674, "grad_norm": 0.0032924246042966843, "learning_rate": 0.0004817962253068747, "loss": 0.0805, "num_input_tokens_seen": 148402080, "step": 68710 }, { "epoch": 11.209624796084828, "grad_norm": 0.12051805853843689, "learning_rate": 0.0004817250928593153, "loss": 0.0332, "num_input_tokens_seen": 148413440, "step": 68715 }, { "epoch": 11.210440456769984, "grad_norm": 0.020529478788375854, "learning_rate": 0.0004816539607821198, "loss": 0.0171, "num_input_tokens_seen": 148424640, "step": 68720 }, { "epoch": 11.21125611745514, "grad_norm": 0.010123873129487038, "learning_rate": 0.0004815828290767303, "loss": 0.0242, "num_input_tokens_seen": 148434240, "step": 68725 }, { "epoch": 11.212071778140293, "grad_norm": 0.04959236830472946, "learning_rate": 0.00048151169774458797, "loss": 0.0102, "num_input_tokens_seen": 148443488, "step": 68730 }, { "epoch": 11.21288743882545, "grad_norm": 0.467986524105072, "learning_rate": 0.00048144056678713445, "loss": 0.1084, "num_input_tokens_seen": 148453120, "step": 68735 }, { "epoch": 11.213703099510603, "grad_norm": 0.7139317393302917, "learning_rate": 0.00048136943620581164, "loss": 0.0229, "num_input_tokens_seen": 148463712, "step": 68740 }, { "epoch": 11.214518760195759, "grad_norm": 0.10186992585659027, "learning_rate": 0.00048129830600206067, "loss": 0.0082, "num_input_tokens_seen": 148474336, "step": 68745 }, { "epoch": 11.215334420880913, "grad_norm": 0.004726898390799761, "learning_rate": 0.0004812271761773234, "loss": 0.0443, "num_input_tokens_seen": 148485184, "step": 68750 }, { "epoch": 11.216150081566068, "grad_norm": 0.016972597688436508, "learning_rate": 0.00048115604673304105, "loss": 0.035, "num_input_tokens_seen": 148495712, "step": 68755 }, { "epoch": 11.216965742251224, "grad_norm": 0.2422623485326767, "learning_rate": 0.0004810849176706555, "loss": 0.0959, "num_input_tokens_seen": 148507200, "step": 68760 }, { "epoch": 11.217781402936378, "grad_norm": 0.3776349127292633, "learning_rate": 0.00048101378899160786, "loss": 0.1002, "num_input_tokens_seen": 148517216, "step": 68765 }, { "epoch": 11.218597063621534, "grad_norm": 0.016399575397372246, "learning_rate": 0.0004809426606973401, "loss": 0.0085, "num_input_tokens_seen": 148527520, "step": 68770 }, { "epoch": 11.219412724306688, "grad_norm": 0.003865182166919112, "learning_rate": 0.00048087153278929327, "loss": 0.0114, "num_input_tokens_seen": 148537728, "step": 68775 }, { "epoch": 11.220228384991843, "grad_norm": 0.1378757506608963, "learning_rate": 0.0004808004052689093, "loss": 0.0282, "num_input_tokens_seen": 148548736, "step": 68780 }, { "epoch": 11.221044045676999, "grad_norm": 0.004386617336422205, "learning_rate": 0.0004807292781376294, "loss": 0.0657, "num_input_tokens_seen": 148559776, "step": 68785 }, { "epoch": 11.221859706362153, "grad_norm": 0.014153995551168919, "learning_rate": 0.0004806581513968951, "loss": 0.0088, "num_input_tokens_seen": 148571200, "step": 68790 }, { "epoch": 11.222675367047309, "grad_norm": 0.004978655371814966, "learning_rate": 0.00048058702504814795, "loss": 0.0824, "num_input_tokens_seen": 148582528, "step": 68795 }, { "epoch": 11.223491027732463, "grad_norm": 0.2707526683807373, "learning_rate": 0.0004805158990928293, "loss": 0.0324, "num_input_tokens_seen": 148593216, "step": 68800 }, { "epoch": 11.224306688417618, "grad_norm": 0.02647767774760723, "learning_rate": 0.0004804447735323806, "loss": 0.0042, "num_input_tokens_seen": 148605248, "step": 68805 }, { "epoch": 11.225122349102774, "grad_norm": 0.008877074345946312, "learning_rate": 0.0004803736483682436, "loss": 0.1179, "num_input_tokens_seen": 148614976, "step": 68810 }, { "epoch": 11.225938009787928, "grad_norm": 0.005164226982742548, "learning_rate": 0.0004803025236018593, "loss": 0.0199, "num_input_tokens_seen": 148625952, "step": 68815 }, { "epoch": 11.226753670473084, "grad_norm": 0.04235182702541351, "learning_rate": 0.00048023139923466954, "loss": 0.1503, "num_input_tokens_seen": 148636800, "step": 68820 }, { "epoch": 11.227569331158238, "grad_norm": 0.3473689556121826, "learning_rate": 0.00048016027526811536, "loss": 0.0951, "num_input_tokens_seen": 148648000, "step": 68825 }, { "epoch": 11.228384991843393, "grad_norm": 0.5196056365966797, "learning_rate": 0.00048008915170363853, "loss": 0.0642, "num_input_tokens_seen": 148659168, "step": 68830 }, { "epoch": 11.229200652528547, "grad_norm": 0.04722573608160019, "learning_rate": 0.0004800180285426802, "loss": 0.0235, "num_input_tokens_seen": 148670144, "step": 68835 }, { "epoch": 11.230016313213703, "grad_norm": 0.006056048907339573, "learning_rate": 0.00047994690578668175, "loss": 0.012, "num_input_tokens_seen": 148679808, "step": 68840 }, { "epoch": 11.230831973898859, "grad_norm": 0.028124723583459854, "learning_rate": 0.000479875783437085, "loss": 0.0065, "num_input_tokens_seen": 148690720, "step": 68845 }, { "epoch": 11.231647634584013, "grad_norm": 0.03521675989031792, "learning_rate": 0.00047980466149533075, "loss": 0.0056, "num_input_tokens_seen": 148703104, "step": 68850 }, { "epoch": 11.232463295269168, "grad_norm": 0.020668139681220055, "learning_rate": 0.0004797335399628609, "loss": 0.0128, "num_input_tokens_seen": 148714144, "step": 68855 }, { "epoch": 11.233278955954322, "grad_norm": 0.3109743893146515, "learning_rate": 0.0004796624188411163, "loss": 0.0206, "num_input_tokens_seen": 148725952, "step": 68860 }, { "epoch": 11.234094616639478, "grad_norm": 0.002848732518032193, "learning_rate": 0.00047959129813153885, "loss": 0.0111, "num_input_tokens_seen": 148736384, "step": 68865 }, { "epoch": 11.234910277324634, "grad_norm": 0.11283021420240402, "learning_rate": 0.00047952017783556945, "loss": 0.0113, "num_input_tokens_seen": 148746720, "step": 68870 }, { "epoch": 11.235725938009788, "grad_norm": 0.06801166385412216, "learning_rate": 0.00047944905795464977, "loss": 0.0148, "num_input_tokens_seen": 148757888, "step": 68875 }, { "epoch": 11.236541598694943, "grad_norm": 0.34667694568634033, "learning_rate": 0.0004793779384902208, "loss": 0.1412, "num_input_tokens_seen": 148768544, "step": 68880 }, { "epoch": 11.237357259380097, "grad_norm": 0.4156077802181244, "learning_rate": 0.00047930681944372434, "loss": 0.0829, "num_input_tokens_seen": 148780896, "step": 68885 }, { "epoch": 11.238172920065253, "grad_norm": 0.0029288295190781355, "learning_rate": 0.00047923570081660115, "loss": 0.0029, "num_input_tokens_seen": 148792992, "step": 68890 }, { "epoch": 11.238988580750409, "grad_norm": 0.1302565187215805, "learning_rate": 0.0004791645826102931, "loss": 0.0099, "num_input_tokens_seen": 148802560, "step": 68895 }, { "epoch": 11.239804241435563, "grad_norm": 0.0034214449115097523, "learning_rate": 0.000479093464826241, "loss": 0.0178, "num_input_tokens_seen": 148813344, "step": 68900 }, { "epoch": 11.240619902120718, "grad_norm": 0.015604183077812195, "learning_rate": 0.00047902234746588653, "loss": 0.0411, "num_input_tokens_seen": 148824064, "step": 68905 }, { "epoch": 11.241435562805872, "grad_norm": 0.04579491913318634, "learning_rate": 0.0004789512305306706, "loss": 0.0316, "num_input_tokens_seen": 148835232, "step": 68910 }, { "epoch": 11.242251223491028, "grad_norm": 0.0059250290505588055, "learning_rate": 0.0004788801140220349, "loss": 0.0095, "num_input_tokens_seen": 148845504, "step": 68915 }, { "epoch": 11.243066884176184, "grad_norm": 0.013930793851613998, "learning_rate": 0.00047880899794142026, "loss": 0.1052, "num_input_tokens_seen": 148857440, "step": 68920 }, { "epoch": 11.243882544861338, "grad_norm": 0.003167049726471305, "learning_rate": 0.00047873788229026826, "loss": 0.0235, "num_input_tokens_seen": 148868960, "step": 68925 }, { "epoch": 11.244698205546493, "grad_norm": 0.6725971698760986, "learning_rate": 0.0004786667670700201, "loss": 0.0462, "num_input_tokens_seen": 148879168, "step": 68930 }, { "epoch": 11.245513866231647, "grad_norm": 0.552683413028717, "learning_rate": 0.00047859565228211695, "loss": 0.1268, "num_input_tokens_seen": 148890016, "step": 68935 }, { "epoch": 11.246329526916803, "grad_norm": 0.0028921207413077354, "learning_rate": 0.00047852453792799997, "loss": 0.006, "num_input_tokens_seen": 148901344, "step": 68940 }, { "epoch": 11.247145187601957, "grad_norm": 0.008932768367230892, "learning_rate": 0.0004784534240091105, "loss": 0.0332, "num_input_tokens_seen": 148912480, "step": 68945 }, { "epoch": 11.247960848287113, "grad_norm": 0.005634156055748463, "learning_rate": 0.00047838231052688975, "loss": 0.0064, "num_input_tokens_seen": 148922592, "step": 68950 }, { "epoch": 11.248776508972268, "grad_norm": 0.005630916450172663, "learning_rate": 0.0004783111974827789, "loss": 0.0109, "num_input_tokens_seen": 148933824, "step": 68955 }, { "epoch": 11.249592169657422, "grad_norm": 0.09499726444482803, "learning_rate": 0.0004782400848782192, "loss": 0.0164, "num_input_tokens_seen": 148945152, "step": 68960 }, { "epoch": 11.250407830342578, "grad_norm": 0.16316631436347961, "learning_rate": 0.0004781689727146517, "loss": 0.0227, "num_input_tokens_seen": 148957504, "step": 68965 }, { "epoch": 11.251223491027732, "grad_norm": 0.001331451814621687, "learning_rate": 0.0004780978609935178, "loss": 0.0178, "num_input_tokens_seen": 148969312, "step": 68970 }, { "epoch": 11.252039151712887, "grad_norm": 0.042243119329214096, "learning_rate": 0.00047802674971625825, "loss": 0.0096, "num_input_tokens_seen": 148978656, "step": 68975 }, { "epoch": 11.252854812398043, "grad_norm": 0.3264990746974945, "learning_rate": 0.0004779556388843148, "loss": 0.0268, "num_input_tokens_seen": 148989408, "step": 68980 }, { "epoch": 11.253670473083197, "grad_norm": 0.40584519505500793, "learning_rate": 0.0004778845284991281, "loss": 0.0428, "num_input_tokens_seen": 148999360, "step": 68985 }, { "epoch": 11.254486133768353, "grad_norm": 0.0077764419838786125, "learning_rate": 0.00047781341856213965, "loss": 0.0158, "num_input_tokens_seen": 149010048, "step": 68990 }, { "epoch": 11.255301794453507, "grad_norm": 0.0264874417334795, "learning_rate": 0.00047774230907479025, "loss": 0.0027, "num_input_tokens_seen": 149019552, "step": 68995 }, { "epoch": 11.256117455138662, "grad_norm": 0.0019950123969465494, "learning_rate": 0.0004776712000385214, "loss": 0.0248, "num_input_tokens_seen": 149029216, "step": 69000 }, { "epoch": 11.256933115823816, "grad_norm": 0.1232723593711853, "learning_rate": 0.0004776000914547738, "loss": 0.0121, "num_input_tokens_seen": 149041088, "step": 69005 }, { "epoch": 11.257748776508972, "grad_norm": 0.002051304094493389, "learning_rate": 0.00047752898332498894, "loss": 0.0062, "num_input_tokens_seen": 149052224, "step": 69010 }, { "epoch": 11.258564437194128, "grad_norm": 0.0008948579197749496, "learning_rate": 0.00047745787565060756, "loss": 0.0121, "num_input_tokens_seen": 149063264, "step": 69015 }, { "epoch": 11.259380097879282, "grad_norm": 0.011743937619030476, "learning_rate": 0.0004773867684330711, "loss": 0.0215, "num_input_tokens_seen": 149074240, "step": 69020 }, { "epoch": 11.260195758564437, "grad_norm": 0.7066226601600647, "learning_rate": 0.0004773156616738203, "loss": 0.0792, "num_input_tokens_seen": 149085984, "step": 69025 }, { "epoch": 11.261011419249591, "grad_norm": 0.012210289016366005, "learning_rate": 0.00047724455537429656, "loss": 0.1053, "num_input_tokens_seen": 149096736, "step": 69030 }, { "epoch": 11.261827079934747, "grad_norm": 0.001831859932281077, "learning_rate": 0.00047717344953594054, "loss": 0.0876, "num_input_tokens_seen": 149107712, "step": 69035 }, { "epoch": 11.262642740619903, "grad_norm": 0.01406815368682146, "learning_rate": 0.0004771023441601938, "loss": 0.0034, "num_input_tokens_seen": 149118240, "step": 69040 }, { "epoch": 11.263458401305057, "grad_norm": 0.06279000639915466, "learning_rate": 0.0004770312392484968, "loss": 0.0116, "num_input_tokens_seen": 149128896, "step": 69045 }, { "epoch": 11.264274061990212, "grad_norm": 0.007155647035688162, "learning_rate": 0.000476960134802291, "loss": 0.0221, "num_input_tokens_seen": 149140960, "step": 69050 }, { "epoch": 11.265089722675366, "grad_norm": 0.0038323281332850456, "learning_rate": 0.00047688903082301746, "loss": 0.0078, "num_input_tokens_seen": 149152704, "step": 69055 }, { "epoch": 11.265905383360522, "grad_norm": 0.03719216585159302, "learning_rate": 0.00047681792731211684, "loss": 0.0182, "num_input_tokens_seen": 149164192, "step": 69060 }, { "epoch": 11.266721044045678, "grad_norm": 0.015115122310817242, "learning_rate": 0.00047674682427103045, "loss": 0.01, "num_input_tokens_seen": 149175008, "step": 69065 }, { "epoch": 11.267536704730832, "grad_norm": 0.050118640065193176, "learning_rate": 0.00047667572170119905, "loss": 0.0488, "num_input_tokens_seen": 149185440, "step": 69070 }, { "epoch": 11.268352365415987, "grad_norm": 0.003916706424206495, "learning_rate": 0.00047660461960406385, "loss": 0.0159, "num_input_tokens_seen": 149197184, "step": 69075 }, { "epoch": 11.269168026101141, "grad_norm": 0.0028812792152166367, "learning_rate": 0.0004765335179810656, "loss": 0.0035, "num_input_tokens_seen": 149207104, "step": 69080 }, { "epoch": 11.269983686786297, "grad_norm": 0.0012590938713401556, "learning_rate": 0.00047646241683364554, "loss": 0.0129, "num_input_tokens_seen": 149218464, "step": 69085 }, { "epoch": 11.270799347471453, "grad_norm": 0.01192085538059473, "learning_rate": 0.0004763913161632443, "loss": 0.0174, "num_input_tokens_seen": 149227328, "step": 69090 }, { "epoch": 11.271615008156607, "grad_norm": 0.031869806349277496, "learning_rate": 0.00047632021597130304, "loss": 0.0346, "num_input_tokens_seen": 149238752, "step": 69095 }, { "epoch": 11.272430668841762, "grad_norm": 0.3605614900588989, "learning_rate": 0.0004762491162592627, "loss": 0.0333, "num_input_tokens_seen": 149249024, "step": 69100 }, { "epoch": 11.273246329526916, "grad_norm": 0.03341824561357498, "learning_rate": 0.00047617801702856406, "loss": 0.0162, "num_input_tokens_seen": 149259968, "step": 69105 }, { "epoch": 11.274061990212072, "grad_norm": 0.015178644098341465, "learning_rate": 0.00047610691828064815, "loss": 0.0139, "num_input_tokens_seen": 149269632, "step": 69110 }, { "epoch": 11.274877650897226, "grad_norm": 0.00792383961379528, "learning_rate": 0.0004760358200169559, "loss": 0.0043, "num_input_tokens_seen": 149280480, "step": 69115 }, { "epoch": 11.275693311582382, "grad_norm": 0.0019493248546496034, "learning_rate": 0.000475964722238928, "loss": 0.002, "num_input_tokens_seen": 149291616, "step": 69120 }, { "epoch": 11.276508972267537, "grad_norm": 0.0008896426879800856, "learning_rate": 0.00047589362494800574, "loss": 0.0016, "num_input_tokens_seen": 149302368, "step": 69125 }, { "epoch": 11.277324632952691, "grad_norm": 0.01973794586956501, "learning_rate": 0.00047582252814562954, "loss": 0.0232, "num_input_tokens_seen": 149311648, "step": 69130 }, { "epoch": 11.278140293637847, "grad_norm": 0.0590951032936573, "learning_rate": 0.0004757514318332407, "loss": 0.0864, "num_input_tokens_seen": 149322912, "step": 69135 }, { "epoch": 11.278955954323001, "grad_norm": 0.03127824887633324, "learning_rate": 0.0004756803360122796, "loss": 0.0046, "num_input_tokens_seen": 149335168, "step": 69140 }, { "epoch": 11.279771615008157, "grad_norm": 0.06734327971935272, "learning_rate": 0.00047560924068418763, "loss": 0.0141, "num_input_tokens_seen": 149346368, "step": 69145 }, { "epoch": 11.280587275693312, "grad_norm": 0.03294230252504349, "learning_rate": 0.00047553814585040506, "loss": 0.0062, "num_input_tokens_seen": 149356544, "step": 69150 }, { "epoch": 11.281402936378466, "grad_norm": 0.010984640568494797, "learning_rate": 0.00047546705151237323, "loss": 0.007, "num_input_tokens_seen": 149366432, "step": 69155 }, { "epoch": 11.282218597063622, "grad_norm": 0.008774088695645332, "learning_rate": 0.00047539595767153255, "loss": 0.0199, "num_input_tokens_seen": 149377216, "step": 69160 }, { "epoch": 11.283034257748776, "grad_norm": 0.021760782226920128, "learning_rate": 0.00047532486432932394, "loss": 0.0883, "num_input_tokens_seen": 149388256, "step": 69165 }, { "epoch": 11.283849918433932, "grad_norm": 0.0007815745775587857, "learning_rate": 0.00047525377148718845, "loss": 0.014, "num_input_tokens_seen": 149400224, "step": 69170 }, { "epoch": 11.284665579119087, "grad_norm": 0.10448987782001495, "learning_rate": 0.00047518267914656656, "loss": 0.039, "num_input_tokens_seen": 149411808, "step": 69175 }, { "epoch": 11.285481239804241, "grad_norm": 0.04002084583044052, "learning_rate": 0.0004751115873088992, "loss": 0.1977, "num_input_tokens_seen": 149422272, "step": 69180 }, { "epoch": 11.286296900489397, "grad_norm": 0.4018106162548065, "learning_rate": 0.0004750404959756271, "loss": 0.0552, "num_input_tokens_seen": 149433664, "step": 69185 }, { "epoch": 11.28711256117455, "grad_norm": 0.1768367737531662, "learning_rate": 0.0004749694051481911, "loss": 0.0444, "num_input_tokens_seen": 149443936, "step": 69190 }, { "epoch": 11.287928221859707, "grad_norm": 0.532922089099884, "learning_rate": 0.00047489831482803167, "loss": 0.0503, "num_input_tokens_seen": 149453824, "step": 69195 }, { "epoch": 11.28874388254486, "grad_norm": 0.013889400288462639, "learning_rate": 0.00047482722501658993, "loss": 0.0031, "num_input_tokens_seen": 149464992, "step": 69200 }, { "epoch": 11.289559543230016, "grad_norm": 0.020058369264006615, "learning_rate": 0.00047475613571530624, "loss": 0.0058, "num_input_tokens_seen": 149476672, "step": 69205 }, { "epoch": 11.290375203915172, "grad_norm": 0.0016307708574458957, "learning_rate": 0.0004746850469256216, "loss": 0.1704, "num_input_tokens_seen": 149487392, "step": 69210 }, { "epoch": 11.291190864600326, "grad_norm": 0.014769136905670166, "learning_rate": 0.0004746139586489765, "loss": 0.0223, "num_input_tokens_seen": 149498208, "step": 69215 }, { "epoch": 11.292006525285482, "grad_norm": 0.0010451297275722027, "learning_rate": 0.00047454287088681194, "loss": 0.0099, "num_input_tokens_seen": 149509280, "step": 69220 }, { "epoch": 11.292822185970635, "grad_norm": 0.005814549047499895, "learning_rate": 0.0004744717836405681, "loss": 0.0855, "num_input_tokens_seen": 149519808, "step": 69225 }, { "epoch": 11.293637846655791, "grad_norm": 0.041108760982751846, "learning_rate": 0.00047440069691168617, "loss": 0.0058, "num_input_tokens_seen": 149530208, "step": 69230 }, { "epoch": 11.294453507340947, "grad_norm": 0.517116367816925, "learning_rate": 0.0004743296107016065, "loss": 0.0336, "num_input_tokens_seen": 149541728, "step": 69235 }, { "epoch": 11.2952691680261, "grad_norm": 0.3088570833206177, "learning_rate": 0.0004742585250117698, "loss": 0.0157, "num_input_tokens_seen": 149552544, "step": 69240 }, { "epoch": 11.296084828711257, "grad_norm": 1.3200207948684692, "learning_rate": 0.00047418743984361676, "loss": 0.0721, "num_input_tokens_seen": 149563456, "step": 69245 }, { "epoch": 11.29690048939641, "grad_norm": 0.4354603886604309, "learning_rate": 0.0004741163551985881, "loss": 0.0331, "num_input_tokens_seen": 149574336, "step": 69250 }, { "epoch": 11.297716150081566, "grad_norm": 0.0031291439663618803, "learning_rate": 0.00047404527107812423, "loss": 0.002, "num_input_tokens_seen": 149584512, "step": 69255 }, { "epoch": 11.298531810766722, "grad_norm": 0.43325313925743103, "learning_rate": 0.00047397418748366596, "loss": 0.0693, "num_input_tokens_seen": 149594176, "step": 69260 }, { "epoch": 11.299347471451876, "grad_norm": 0.08179045468568802, "learning_rate": 0.0004739031044166536, "loss": 0.0592, "num_input_tokens_seen": 149605216, "step": 69265 }, { "epoch": 11.300163132137031, "grad_norm": 0.00407014973461628, "learning_rate": 0.0004738320218785281, "loss": 0.0163, "num_input_tokens_seen": 149616480, "step": 69270 }, { "epoch": 11.300978792822185, "grad_norm": 0.013852819800376892, "learning_rate": 0.00047376093987072985, "loss": 0.0048, "num_input_tokens_seen": 149627392, "step": 69275 }, { "epoch": 11.301794453507341, "grad_norm": 0.002191155683249235, "learning_rate": 0.00047368985839469946, "loss": 0.0029, "num_input_tokens_seen": 149638144, "step": 69280 }, { "epoch": 11.302610114192497, "grad_norm": 0.059072766453027725, "learning_rate": 0.00047361877745187743, "loss": 0.0103, "num_input_tokens_seen": 149648640, "step": 69285 }, { "epoch": 11.30342577487765, "grad_norm": 0.4255982041358948, "learning_rate": 0.0004735476970437043, "loss": 0.1163, "num_input_tokens_seen": 149659392, "step": 69290 }, { "epoch": 11.304241435562806, "grad_norm": 0.06577350199222565, "learning_rate": 0.0004734766171716208, "loss": 0.02, "num_input_tokens_seen": 149669184, "step": 69295 }, { "epoch": 11.30505709624796, "grad_norm": 0.47753238677978516, "learning_rate": 0.0004734055378370671, "loss": 0.0425, "num_input_tokens_seen": 149681504, "step": 69300 }, { "epoch": 11.305872756933116, "grad_norm": 0.012164420448243618, "learning_rate": 0.00047333445904148414, "loss": 0.0191, "num_input_tokens_seen": 149692192, "step": 69305 }, { "epoch": 11.30668841761827, "grad_norm": 0.0932174026966095, "learning_rate": 0.0004732633807863119, "loss": 0.0097, "num_input_tokens_seen": 149702688, "step": 69310 }, { "epoch": 11.307504078303426, "grad_norm": 0.007502132561057806, "learning_rate": 0.0004731923030729915, "loss": 0.0056, "num_input_tokens_seen": 149714016, "step": 69315 }, { "epoch": 11.308319738988581, "grad_norm": 0.002116349758580327, "learning_rate": 0.0004731212259029628, "loss": 0.006, "num_input_tokens_seen": 149723456, "step": 69320 }, { "epoch": 11.309135399673735, "grad_norm": 0.0006365369190461934, "learning_rate": 0.0004730501492776668, "loss": 0.004, "num_input_tokens_seen": 149734816, "step": 69325 }, { "epoch": 11.309951060358891, "grad_norm": 0.12664510309696198, "learning_rate": 0.00047297907319854347, "loss": 0.0369, "num_input_tokens_seen": 149745440, "step": 69330 }, { "epoch": 11.310766721044045, "grad_norm": 0.13877706229686737, "learning_rate": 0.0004729079976670338, "loss": 0.0101, "num_input_tokens_seen": 149753664, "step": 69335 }, { "epoch": 11.3115823817292, "grad_norm": 0.31556734442710876, "learning_rate": 0.00047283692268457764, "loss": 0.0417, "num_input_tokens_seen": 149765280, "step": 69340 }, { "epoch": 11.312398042414356, "grad_norm": 0.07661747932434082, "learning_rate": 0.0004727658482526159, "loss": 0.0281, "num_input_tokens_seen": 149774944, "step": 69345 }, { "epoch": 11.31321370309951, "grad_norm": 0.0017009270377457142, "learning_rate": 0.00047269477437258863, "loss": 0.0066, "num_input_tokens_seen": 149785664, "step": 69350 }, { "epoch": 11.314029363784666, "grad_norm": 0.03626738116145134, "learning_rate": 0.0004726237010459366, "loss": 0.0107, "num_input_tokens_seen": 149796864, "step": 69355 }, { "epoch": 11.31484502446982, "grad_norm": 0.0531466118991375, "learning_rate": 0.00047255262827409974, "loss": 0.1056, "num_input_tokens_seen": 149807136, "step": 69360 }, { "epoch": 11.315660685154976, "grad_norm": 0.3658745288848877, "learning_rate": 0.00047248155605851896, "loss": 0.0328, "num_input_tokens_seen": 149817888, "step": 69365 }, { "epoch": 11.31647634584013, "grad_norm": 0.5249987840652466, "learning_rate": 0.0004724104844006341, "loss": 0.0633, "num_input_tokens_seen": 149828448, "step": 69370 }, { "epoch": 11.317292006525285, "grad_norm": 0.0011662240140140057, "learning_rate": 0.0004723394133018858, "loss": 0.0368, "num_input_tokens_seen": 149839968, "step": 69375 }, { "epoch": 11.318107667210441, "grad_norm": 0.006454044952988625, "learning_rate": 0.00047226834276371457, "loss": 0.0474, "num_input_tokens_seen": 149850688, "step": 69380 }, { "epoch": 11.318923327895595, "grad_norm": 0.04705966264009476, "learning_rate": 0.00047219727278756033, "loss": 0.0116, "num_input_tokens_seen": 149861152, "step": 69385 }, { "epoch": 11.31973898858075, "grad_norm": 0.3665064871311188, "learning_rate": 0.0004721262033748639, "loss": 0.1215, "num_input_tokens_seen": 149870592, "step": 69390 }, { "epoch": 11.320554649265905, "grad_norm": 0.0022680433467030525, "learning_rate": 0.00047205513452706503, "loss": 0.0041, "num_input_tokens_seen": 149880736, "step": 69395 }, { "epoch": 11.32137030995106, "grad_norm": 0.4094318747520447, "learning_rate": 0.0004719840662456046, "loss": 0.0873, "num_input_tokens_seen": 149890912, "step": 69400 }, { "epoch": 11.322185970636216, "grad_norm": 0.9223697781562805, "learning_rate": 0.0004719129985319223, "loss": 0.0964, "num_input_tokens_seen": 149901024, "step": 69405 }, { "epoch": 11.32300163132137, "grad_norm": 0.004950478672981262, "learning_rate": 0.0004718419313874589, "loss": 0.0312, "num_input_tokens_seen": 149912352, "step": 69410 }, { "epoch": 11.323817292006526, "grad_norm": 0.04885503649711609, "learning_rate": 0.00047177086481365444, "loss": 0.0061, "num_input_tokens_seen": 149923072, "step": 69415 }, { "epoch": 11.32463295269168, "grad_norm": 0.021326279267668724, "learning_rate": 0.00047169979881194927, "loss": 0.0059, "num_input_tokens_seen": 149933248, "step": 69420 }, { "epoch": 11.325448613376835, "grad_norm": 0.18451477587223053, "learning_rate": 0.00047162873338378353, "loss": 0.0136, "num_input_tokens_seen": 149943392, "step": 69425 }, { "epoch": 11.326264274061991, "grad_norm": 0.3547409176826477, "learning_rate": 0.0004715576685305975, "loss": 0.0394, "num_input_tokens_seen": 149954240, "step": 69430 }, { "epoch": 11.327079934747145, "grad_norm": 0.02587219700217247, "learning_rate": 0.0004714866042538313, "loss": 0.0062, "num_input_tokens_seen": 149964352, "step": 69435 }, { "epoch": 11.3278955954323, "grad_norm": 0.11461975425481796, "learning_rate": 0.00047141554055492546, "loss": 0.0079, "num_input_tokens_seen": 149975680, "step": 69440 }, { "epoch": 11.328711256117455, "grad_norm": 0.18348151445388794, "learning_rate": 0.0004713444774353197, "loss": 0.0975, "num_input_tokens_seen": 149987136, "step": 69445 }, { "epoch": 11.32952691680261, "grad_norm": 0.039701469242572784, "learning_rate": 0.0004712734148964547, "loss": 0.0123, "num_input_tokens_seen": 149998624, "step": 69450 }, { "epoch": 11.330342577487766, "grad_norm": 0.00447084940969944, "learning_rate": 0.00047120235293977023, "loss": 0.0045, "num_input_tokens_seen": 150009280, "step": 69455 }, { "epoch": 11.33115823817292, "grad_norm": 0.14754629135131836, "learning_rate": 0.00047113129156670677, "loss": 0.0187, "num_input_tokens_seen": 150019360, "step": 69460 }, { "epoch": 11.331973898858076, "grad_norm": 0.0007346358615905046, "learning_rate": 0.00047106023077870407, "loss": 0.0088, "num_input_tokens_seen": 150030336, "step": 69465 }, { "epoch": 11.33278955954323, "grad_norm": 0.09433241188526154, "learning_rate": 0.00047098917057720275, "loss": 0.0178, "num_input_tokens_seen": 150040288, "step": 69470 }, { "epoch": 11.333605220228385, "grad_norm": 0.149391308426857, "learning_rate": 0.00047091811096364243, "loss": 0.0079, "num_input_tokens_seen": 150051296, "step": 69475 }, { "epoch": 11.33442088091354, "grad_norm": 0.0016867019003257155, "learning_rate": 0.00047084705193946357, "loss": 0.004, "num_input_tokens_seen": 150062656, "step": 69480 }, { "epoch": 11.335236541598695, "grad_norm": 0.006028663367033005, "learning_rate": 0.0004707759935061063, "loss": 0.0098, "num_input_tokens_seen": 150073792, "step": 69485 }, { "epoch": 11.33605220228385, "grad_norm": 0.015563595108687878, "learning_rate": 0.0004707049356650105, "loss": 0.0032, "num_input_tokens_seen": 150085088, "step": 69490 }, { "epoch": 11.336867862969005, "grad_norm": 0.9245015382766724, "learning_rate": 0.0004706338784176165, "loss": 0.0306, "num_input_tokens_seen": 150096640, "step": 69495 }, { "epoch": 11.33768352365416, "grad_norm": 0.1057695597410202, "learning_rate": 0.000470562821765364, "loss": 0.0087, "num_input_tokens_seen": 150106656, "step": 69500 }, { "epoch": 11.338499184339314, "grad_norm": 0.7162270545959473, "learning_rate": 0.0004704917657096934, "loss": 0.0747, "num_input_tokens_seen": 150117056, "step": 69505 }, { "epoch": 11.33931484502447, "grad_norm": 0.08723993599414825, "learning_rate": 0.00047042071025204445, "loss": 0.0063, "num_input_tokens_seen": 150128832, "step": 69510 }, { "epoch": 11.340130505709626, "grad_norm": 0.09651493281126022, "learning_rate": 0.0004703496553938576, "loss": 0.0111, "num_input_tokens_seen": 150139168, "step": 69515 }, { "epoch": 11.34094616639478, "grad_norm": 0.023214256390929222, "learning_rate": 0.00047027860113657235, "loss": 0.1542, "num_input_tokens_seen": 150149344, "step": 69520 }, { "epoch": 11.341761827079935, "grad_norm": 0.01802109181880951, "learning_rate": 0.00047020754748162914, "loss": 0.0138, "num_input_tokens_seen": 150160032, "step": 69525 }, { "epoch": 11.34257748776509, "grad_norm": 0.467803031206131, "learning_rate": 0.0004701364944304675, "loss": 0.0278, "num_input_tokens_seen": 150171264, "step": 69530 }, { "epoch": 11.343393148450245, "grad_norm": 0.018711155280470848, "learning_rate": 0.000470065441984528, "loss": 0.0039, "num_input_tokens_seen": 150181696, "step": 69535 }, { "epoch": 11.3442088091354, "grad_norm": 0.0073052081279456615, "learning_rate": 0.00046999439014525004, "loss": 0.0077, "num_input_tokens_seen": 150192352, "step": 69540 }, { "epoch": 11.345024469820554, "grad_norm": 0.0033433528151363134, "learning_rate": 0.00046992333891407396, "loss": 0.0414, "num_input_tokens_seen": 150203392, "step": 69545 }, { "epoch": 11.34584013050571, "grad_norm": 0.13693156838417053, "learning_rate": 0.00046985228829243955, "loss": 0.0771, "num_input_tokens_seen": 150214144, "step": 69550 }, { "epoch": 11.346655791190864, "grad_norm": 0.15868552029132843, "learning_rate": 0.0004697812382817868, "loss": 0.0192, "num_input_tokens_seen": 150225664, "step": 69555 }, { "epoch": 11.34747145187602, "grad_norm": 0.22203922271728516, "learning_rate": 0.0004697101888835555, "loss": 0.0222, "num_input_tokens_seen": 150237728, "step": 69560 }, { "epoch": 11.348287112561174, "grad_norm": 0.22300738096237183, "learning_rate": 0.0004696391400991857, "loss": 0.0206, "num_input_tokens_seen": 150249056, "step": 69565 }, { "epoch": 11.34910277324633, "grad_norm": 2.7132036685943604, "learning_rate": 0.0004695680919301173, "loss": 0.0948, "num_input_tokens_seen": 150259872, "step": 69570 }, { "epoch": 11.349918433931485, "grad_norm": 0.6747056245803833, "learning_rate": 0.00046949704437779005, "loss": 0.261, "num_input_tokens_seen": 150269312, "step": 69575 }, { "epoch": 11.350734094616639, "grad_norm": 0.04295853152871132, "learning_rate": 0.0004694259974436438, "loss": 0.0082, "num_input_tokens_seen": 150280064, "step": 69580 }, { "epoch": 11.351549755301795, "grad_norm": 0.08870097249746323, "learning_rate": 0.00046935495112911856, "loss": 0.0273, "num_input_tokens_seen": 150290784, "step": 69585 }, { "epoch": 11.352365415986949, "grad_norm": 0.002641193335875869, "learning_rate": 0.0004692839054356542, "loss": 0.0031, "num_input_tokens_seen": 150301248, "step": 69590 }, { "epoch": 11.353181076672104, "grad_norm": 0.015244108624756336, "learning_rate": 0.0004692128603646904, "loss": 0.0053, "num_input_tokens_seen": 150312160, "step": 69595 }, { "epoch": 11.35399673735726, "grad_norm": 0.00676583731546998, "learning_rate": 0.0004691418159176671, "loss": 0.0158, "num_input_tokens_seen": 150322336, "step": 69600 }, { "epoch": 11.354812398042414, "grad_norm": 0.11018446087837219, "learning_rate": 0.00046907077209602387, "loss": 0.0472, "num_input_tokens_seen": 150332800, "step": 69605 }, { "epoch": 11.35562805872757, "grad_norm": 0.013826750218868256, "learning_rate": 0.0004689997289012009, "loss": 0.0206, "num_input_tokens_seen": 150343488, "step": 69610 }, { "epoch": 11.356443719412724, "grad_norm": 0.014815778471529484, "learning_rate": 0.0004689286863346376, "loss": 0.0666, "num_input_tokens_seen": 150355200, "step": 69615 }, { "epoch": 11.35725938009788, "grad_norm": 0.06837964802980423, "learning_rate": 0.00046885764439777406, "loss": 0.2133, "num_input_tokens_seen": 150365632, "step": 69620 }, { "epoch": 11.358075040783035, "grad_norm": 0.08865787088871002, "learning_rate": 0.0004687866030920496, "loss": 0.0067, "num_input_tokens_seen": 150376512, "step": 69625 }, { "epoch": 11.358890701468189, "grad_norm": 0.005196568556129932, "learning_rate": 0.00046871556241890455, "loss": 0.1446, "num_input_tokens_seen": 150385184, "step": 69630 }, { "epoch": 11.359706362153345, "grad_norm": 0.046573054045438766, "learning_rate": 0.000468644522379778, "loss": 0.0273, "num_input_tokens_seen": 150396384, "step": 69635 }, { "epoch": 11.360522022838499, "grad_norm": 0.35547757148742676, "learning_rate": 0.00046857348297611024, "loss": 0.2188, "num_input_tokens_seen": 150407712, "step": 69640 }, { "epoch": 11.361337683523654, "grad_norm": 0.014049242250621319, "learning_rate": 0.0004685024442093405, "loss": 0.0876, "num_input_tokens_seen": 150418016, "step": 69645 }, { "epoch": 11.362153344208808, "grad_norm": 0.03289031237363815, "learning_rate": 0.00046843140608090897, "loss": 0.017, "num_input_tokens_seen": 150427840, "step": 69650 }, { "epoch": 11.362969004893964, "grad_norm": 0.33571213483810425, "learning_rate": 0.0004683603685922547, "loss": 0.0994, "num_input_tokens_seen": 150439040, "step": 69655 }, { "epoch": 11.36378466557912, "grad_norm": 0.01751234009861946, "learning_rate": 0.00046828933174481797, "loss": 0.1699, "num_input_tokens_seen": 150450080, "step": 69660 }, { "epoch": 11.364600326264274, "grad_norm": 0.03534317389130592, "learning_rate": 0.000468218295540038, "loss": 0.0356, "num_input_tokens_seen": 150461728, "step": 69665 }, { "epoch": 11.36541598694943, "grad_norm": 0.037167083472013474, "learning_rate": 0.0004681472599793547, "loss": 0.0182, "num_input_tokens_seen": 150472288, "step": 69670 }, { "epoch": 11.366231647634583, "grad_norm": 0.045636508613824844, "learning_rate": 0.00046807622506420745, "loss": 0.0124, "num_input_tokens_seen": 150482048, "step": 69675 }, { "epoch": 11.367047308319739, "grad_norm": 0.020483041182160378, "learning_rate": 0.00046800519079603616, "loss": 0.004, "num_input_tokens_seen": 150492928, "step": 69680 }, { "epoch": 11.367862969004895, "grad_norm": 0.06380794942378998, "learning_rate": 0.00046793415717628006, "loss": 0.0081, "num_input_tokens_seen": 150504448, "step": 69685 }, { "epoch": 11.368678629690049, "grad_norm": 0.006169379223138094, "learning_rate": 0.000467863124206379, "loss": 0.0785, "num_input_tokens_seen": 150514880, "step": 69690 }, { "epoch": 11.369494290375204, "grad_norm": 0.004469654988497496, "learning_rate": 0.0004677920918877726, "loss": 0.1212, "num_input_tokens_seen": 150525984, "step": 69695 }, { "epoch": 11.370309951060358, "grad_norm": 0.009194393642246723, "learning_rate": 0.0004677210602219002, "loss": 0.0221, "num_input_tokens_seen": 150536352, "step": 69700 }, { "epoch": 11.371125611745514, "grad_norm": 0.014019605703651905, "learning_rate": 0.00046765002921020165, "loss": 0.0168, "num_input_tokens_seen": 150547040, "step": 69705 }, { "epoch": 11.37194127243067, "grad_norm": 0.008166109211742878, "learning_rate": 0.0004675789988541161, "loss": 0.0146, "num_input_tokens_seen": 150557920, "step": 69710 }, { "epoch": 11.372756933115824, "grad_norm": 0.029136566445231438, "learning_rate": 0.0004675079691550833, "loss": 0.0126, "num_input_tokens_seen": 150568832, "step": 69715 }, { "epoch": 11.37357259380098, "grad_norm": 0.0021976104471832514, "learning_rate": 0.0004674369401145428, "loss": 0.0731, "num_input_tokens_seen": 150579232, "step": 69720 }, { "epoch": 11.374388254486133, "grad_norm": 0.0005827890709042549, "learning_rate": 0.000467365911733934, "loss": 0.0138, "num_input_tokens_seen": 150590656, "step": 69725 }, { "epoch": 11.375203915171289, "grad_norm": 0.017050622031092644, "learning_rate": 0.0004672948840146964, "loss": 0.0246, "num_input_tokens_seen": 150600992, "step": 69730 }, { "epoch": 11.376019575856443, "grad_norm": 0.07799620926380157, "learning_rate": 0.0004672238569582695, "loss": 0.0096, "num_input_tokens_seen": 150612512, "step": 69735 }, { "epoch": 11.376835236541599, "grad_norm": 0.008146431297063828, "learning_rate": 0.00046715283056609255, "loss": 0.0073, "num_input_tokens_seen": 150622976, "step": 69740 }, { "epoch": 11.377650897226754, "grad_norm": 0.01783410832285881, "learning_rate": 0.0004670818048396054, "loss": 0.0123, "num_input_tokens_seen": 150633792, "step": 69745 }, { "epoch": 11.378466557911908, "grad_norm": 0.22305737435817719, "learning_rate": 0.00046701077978024695, "loss": 0.0305, "num_input_tokens_seen": 150644864, "step": 69750 }, { "epoch": 11.379282218597064, "grad_norm": 0.012764952145516872, "learning_rate": 0.0004669397553894572, "loss": 0.0759, "num_input_tokens_seen": 150654912, "step": 69755 }, { "epoch": 11.380097879282218, "grad_norm": 0.07222677022218704, "learning_rate": 0.00046686873166867503, "loss": 0.0307, "num_input_tokens_seen": 150665632, "step": 69760 }, { "epoch": 11.380913539967374, "grad_norm": 0.007756201084703207, "learning_rate": 0.00046679770861934026, "loss": 0.0233, "num_input_tokens_seen": 150676576, "step": 69765 }, { "epoch": 11.38172920065253, "grad_norm": 0.001642027753405273, "learning_rate": 0.00046672668624289177, "loss": 0.0241, "num_input_tokens_seen": 150687936, "step": 69770 }, { "epoch": 11.382544861337683, "grad_norm": 0.005967188626527786, "learning_rate": 0.0004666556645407695, "loss": 0.0062, "num_input_tokens_seen": 150699808, "step": 69775 }, { "epoch": 11.383360522022839, "grad_norm": 0.24380135536193848, "learning_rate": 0.00046658464351441214, "loss": 0.0123, "num_input_tokens_seen": 150710304, "step": 69780 }, { "epoch": 11.384176182707993, "grad_norm": 0.04066390171647072, "learning_rate": 0.0004665136231652597, "loss": 0.0122, "num_input_tokens_seen": 150721696, "step": 69785 }, { "epoch": 11.384991843393149, "grad_norm": 0.004151922184973955, "learning_rate": 0.0004664426034947509, "loss": 0.0107, "num_input_tokens_seen": 150732832, "step": 69790 }, { "epoch": 11.385807504078304, "grad_norm": 0.3790360987186432, "learning_rate": 0.00046637158450432557, "loss": 0.1486, "num_input_tokens_seen": 150743168, "step": 69795 }, { "epoch": 11.386623164763458, "grad_norm": 0.4907276928424835, "learning_rate": 0.0004663005661954225, "loss": 0.0318, "num_input_tokens_seen": 150753984, "step": 69800 }, { "epoch": 11.387438825448614, "grad_norm": 0.004496270790696144, "learning_rate": 0.0004662295485694812, "loss": 0.0028, "num_input_tokens_seen": 150763840, "step": 69805 }, { "epoch": 11.388254486133768, "grad_norm": 0.013045874424278736, "learning_rate": 0.00046615853162794115, "loss": 0.0179, "num_input_tokens_seen": 150774016, "step": 69810 }, { "epoch": 11.389070146818923, "grad_norm": 0.12476672232151031, "learning_rate": 0.00046608751537224115, "loss": 0.0283, "num_input_tokens_seen": 150785024, "step": 69815 }, { "epoch": 11.38988580750408, "grad_norm": 0.000895587436389178, "learning_rate": 0.0004660164998038209, "loss": 0.1608, "num_input_tokens_seen": 150795264, "step": 69820 }, { "epoch": 11.390701468189233, "grad_norm": 0.003991547040641308, "learning_rate": 0.0004659454849241192, "loss": 0.0028, "num_input_tokens_seen": 150805440, "step": 69825 }, { "epoch": 11.391517128874389, "grad_norm": 0.001991401193663478, "learning_rate": 0.0004658744707345757, "loss": 0.0102, "num_input_tokens_seen": 150816064, "step": 69830 }, { "epoch": 11.392332789559543, "grad_norm": 0.025201058015227318, "learning_rate": 0.000465803457236629, "loss": 0.0161, "num_input_tokens_seen": 150828000, "step": 69835 }, { "epoch": 11.393148450244698, "grad_norm": 0.002868784824386239, "learning_rate": 0.00046573244443171897, "loss": 0.022, "num_input_tokens_seen": 150838912, "step": 69840 }, { "epoch": 11.393964110929852, "grad_norm": 0.0028491260018199682, "learning_rate": 0.00046566143232128416, "loss": 0.027, "num_input_tokens_seen": 150850976, "step": 69845 }, { "epoch": 11.394779771615008, "grad_norm": 0.003991770092397928, "learning_rate": 0.0004655904209067642, "loss": 0.006, "num_input_tokens_seen": 150861216, "step": 69850 }, { "epoch": 11.395595432300164, "grad_norm": 0.08987481147050858, "learning_rate": 0.0004655194101895978, "loss": 0.0087, "num_input_tokens_seen": 150871808, "step": 69855 }, { "epoch": 11.396411092985318, "grad_norm": 0.12651516497135162, "learning_rate": 0.00046544840017122437, "loss": 0.1577, "num_input_tokens_seen": 150882432, "step": 69860 }, { "epoch": 11.397226753670473, "grad_norm": 0.38407793641090393, "learning_rate": 0.000465377390853083, "loss": 0.0731, "num_input_tokens_seen": 150894016, "step": 69865 }, { "epoch": 11.398042414355627, "grad_norm": 0.5415304899215698, "learning_rate": 0.0004653063822366127, "loss": 0.0491, "num_input_tokens_seen": 150904384, "step": 69870 }, { "epoch": 11.398858075040783, "grad_norm": 0.001830374007113278, "learning_rate": 0.00046523537432325256, "loss": 0.0782, "num_input_tokens_seen": 150915328, "step": 69875 }, { "epoch": 11.399673735725939, "grad_norm": 0.1316104382276535, "learning_rate": 0.00046516436711444166, "loss": 0.0082, "num_input_tokens_seen": 150927264, "step": 69880 }, { "epoch": 11.400489396411093, "grad_norm": 0.005751550663262606, "learning_rate": 0.000465093360611619, "loss": 0.0139, "num_input_tokens_seen": 150938752, "step": 69885 }, { "epoch": 11.401305057096248, "grad_norm": 0.019678879529237747, "learning_rate": 0.00046502235481622387, "loss": 0.005, "num_input_tokens_seen": 150949312, "step": 69890 }, { "epoch": 11.402120717781402, "grad_norm": 0.03236980736255646, "learning_rate": 0.00046495134972969476, "loss": 0.0759, "num_input_tokens_seen": 150959488, "step": 69895 }, { "epoch": 11.402936378466558, "grad_norm": 0.11323463916778564, "learning_rate": 0.00046488034535347133, "loss": 0.0246, "num_input_tokens_seen": 150971264, "step": 69900 }, { "epoch": 11.403752039151712, "grad_norm": 0.011170793324708939, "learning_rate": 0.00046480934168899204, "loss": 0.007, "num_input_tokens_seen": 150982688, "step": 69905 }, { "epoch": 11.404567699836868, "grad_norm": 0.22744742035865784, "learning_rate": 0.0004647383387376961, "loss": 0.0193, "num_input_tokens_seen": 150993504, "step": 69910 }, { "epoch": 11.405383360522023, "grad_norm": 0.005999124143272638, "learning_rate": 0.0004646673365010226, "loss": 0.0046, "num_input_tokens_seen": 151004256, "step": 69915 }, { "epoch": 11.406199021207177, "grad_norm": 0.007814616896212101, "learning_rate": 0.0004645963349804102, "loss": 0.0679, "num_input_tokens_seen": 151014560, "step": 69920 }, { "epoch": 11.407014681892333, "grad_norm": 0.002219694433733821, "learning_rate": 0.0004645253341772982, "loss": 0.15, "num_input_tokens_seen": 151026752, "step": 69925 }, { "epoch": 11.407830342577487, "grad_norm": 0.0035255623515695333, "learning_rate": 0.00046445433409312507, "loss": 0.0028, "num_input_tokens_seen": 151037792, "step": 69930 }, { "epoch": 11.408646003262643, "grad_norm": 0.13510288298130035, "learning_rate": 0.00046438333472933015, "loss": 0.0644, "num_input_tokens_seen": 151048832, "step": 69935 }, { "epoch": 11.409461663947798, "grad_norm": 0.04429556801915169, "learning_rate": 0.0004643123360873519, "loss": 0.043, "num_input_tokens_seen": 151059840, "step": 69940 }, { "epoch": 11.410277324632952, "grad_norm": 0.26778528094291687, "learning_rate": 0.00046424133816862966, "loss": 0.0608, "num_input_tokens_seen": 151071328, "step": 69945 }, { "epoch": 11.411092985318108, "grad_norm": 0.031590599566698074, "learning_rate": 0.00046417034097460193, "loss": 0.0184, "num_input_tokens_seen": 151081920, "step": 69950 }, { "epoch": 11.411908646003262, "grad_norm": 0.41061243414878845, "learning_rate": 0.0004640993445067078, "loss": 0.1952, "num_input_tokens_seen": 151093440, "step": 69955 }, { "epoch": 11.412724306688418, "grad_norm": 0.011785534210503101, "learning_rate": 0.00046402834876638584, "loss": 0.0072, "num_input_tokens_seen": 151104256, "step": 69960 }, { "epoch": 11.413539967373573, "grad_norm": 0.0027609181124716997, "learning_rate": 0.00046395735375507523, "loss": 0.0053, "num_input_tokens_seen": 151113856, "step": 69965 }, { "epoch": 11.414355628058727, "grad_norm": 0.004242262803018093, "learning_rate": 0.0004638863594742144, "loss": 0.0083, "num_input_tokens_seen": 151124928, "step": 69970 }, { "epoch": 11.415171288743883, "grad_norm": 0.0068120453506708145, "learning_rate": 0.00046381536592524244, "loss": 0.0188, "num_input_tokens_seen": 151134304, "step": 69975 }, { "epoch": 11.415986949429037, "grad_norm": 0.43580371141433716, "learning_rate": 0.00046374437310959783, "loss": 0.089, "num_input_tokens_seen": 151145600, "step": 69980 }, { "epoch": 11.416802610114193, "grad_norm": 0.01116594672203064, "learning_rate": 0.0004636733810287197, "loss": 0.0091, "num_input_tokens_seen": 151155872, "step": 69985 }, { "epoch": 11.417618270799348, "grad_norm": 0.00956810638308525, "learning_rate": 0.00046360238968404634, "loss": 0.0033, "num_input_tokens_seen": 151164960, "step": 69990 }, { "epoch": 11.418433931484502, "grad_norm": 0.00572627317160368, "learning_rate": 0.000463531399077017, "loss": 0.0798, "num_input_tokens_seen": 151176480, "step": 69995 }, { "epoch": 11.419249592169658, "grad_norm": 0.9830451011657715, "learning_rate": 0.00046346040920906985, "loss": 0.0623, "num_input_tokens_seen": 151187136, "step": 70000 }, { "epoch": 11.420065252854812, "grad_norm": 0.03204105421900749, "learning_rate": 0.000463389420081644, "loss": 0.015, "num_input_tokens_seen": 151197856, "step": 70005 }, { "epoch": 11.420880913539968, "grad_norm": 0.20063550770282745, "learning_rate": 0.000463318431696178, "loss": 0.0212, "num_input_tokens_seen": 151209280, "step": 70010 }, { "epoch": 11.421696574225122, "grad_norm": 0.0036226080264896154, "learning_rate": 0.00046324744405411034, "loss": 0.0399, "num_input_tokens_seen": 151219392, "step": 70015 }, { "epoch": 11.422512234910277, "grad_norm": 0.005751903634518385, "learning_rate": 0.00046317645715688015, "loss": 0.2715, "num_input_tokens_seen": 151230784, "step": 70020 }, { "epoch": 11.423327895595433, "grad_norm": 0.23385001718997955, "learning_rate": 0.00046310547100592557, "loss": 0.1079, "num_input_tokens_seen": 151241824, "step": 70025 }, { "epoch": 11.424143556280587, "grad_norm": 0.0022833424154669046, "learning_rate": 0.0004630344856026855, "loss": 0.0047, "num_input_tokens_seen": 151253280, "step": 70030 }, { "epoch": 11.424959216965743, "grad_norm": 0.009258701466023922, "learning_rate": 0.0004629635009485984, "loss": 0.0887, "num_input_tokens_seen": 151264864, "step": 70035 }, { "epoch": 11.425774877650896, "grad_norm": 0.06533806771039963, "learning_rate": 0.000462892517045103, "loss": 0.0182, "num_input_tokens_seen": 151275936, "step": 70040 }, { "epoch": 11.426590538336052, "grad_norm": 0.039362918585538864, "learning_rate": 0.0004628215338936378, "loss": 0.0376, "num_input_tokens_seen": 151286368, "step": 70045 }, { "epoch": 11.427406199021208, "grad_norm": 0.20595617592334747, "learning_rate": 0.0004627505514956414, "loss": 0.0135, "num_input_tokens_seen": 151296064, "step": 70050 }, { "epoch": 11.428221859706362, "grad_norm": 0.05006346479058266, "learning_rate": 0.0004626795698525522, "loss": 0.0201, "num_input_tokens_seen": 151306848, "step": 70055 }, { "epoch": 11.429037520391518, "grad_norm": 0.18353790044784546, "learning_rate": 0.00046260858896580916, "loss": 0.1223, "num_input_tokens_seen": 151317088, "step": 70060 }, { "epoch": 11.429853181076671, "grad_norm": 0.1014518141746521, "learning_rate": 0.0004625376088368502, "loss": 0.0353, "num_input_tokens_seen": 151328576, "step": 70065 }, { "epoch": 11.430668841761827, "grad_norm": 0.003118648659437895, "learning_rate": 0.0004624666294671143, "loss": 0.006, "num_input_tokens_seen": 151339424, "step": 70070 }, { "epoch": 11.431484502446983, "grad_norm": 0.07444004714488983, "learning_rate": 0.00046239565085803966, "loss": 0.0096, "num_input_tokens_seen": 151349696, "step": 70075 }, { "epoch": 11.432300163132137, "grad_norm": 0.0086215203627944, "learning_rate": 0.000462324673011065, "loss": 0.0029, "num_input_tokens_seen": 151361728, "step": 70080 }, { "epoch": 11.433115823817293, "grad_norm": 0.005710784811526537, "learning_rate": 0.00046225369592762844, "loss": 0.0456, "num_input_tokens_seen": 151372928, "step": 70085 }, { "epoch": 11.433931484502446, "grad_norm": 0.003733087796717882, "learning_rate": 0.00046218271960916886, "loss": 0.016, "num_input_tokens_seen": 151383072, "step": 70090 }, { "epoch": 11.434747145187602, "grad_norm": 0.3411053419113159, "learning_rate": 0.0004621117440571242, "loss": 0.1267, "num_input_tokens_seen": 151393952, "step": 70095 }, { "epoch": 11.435562805872756, "grad_norm": 0.008186204358935356, "learning_rate": 0.0004620407692729333, "loss": 0.0087, "num_input_tokens_seen": 151405376, "step": 70100 }, { "epoch": 11.436378466557912, "grad_norm": 0.0025740989949554205, "learning_rate": 0.0004619697952580342, "loss": 0.0465, "num_input_tokens_seen": 151415360, "step": 70105 }, { "epoch": 11.437194127243067, "grad_norm": 0.011523899622261524, "learning_rate": 0.00046189882201386564, "loss": 0.0113, "num_input_tokens_seen": 151424960, "step": 70110 }, { "epoch": 11.438009787928221, "grad_norm": 0.05342297628521919, "learning_rate": 0.0004618278495418655, "loss": 0.0082, "num_input_tokens_seen": 151436000, "step": 70115 }, { "epoch": 11.438825448613377, "grad_norm": 0.001507753157056868, "learning_rate": 0.0004617568778434725, "loss": 0.004, "num_input_tokens_seen": 151446944, "step": 70120 }, { "epoch": 11.439641109298531, "grad_norm": 0.0007432901184074581, "learning_rate": 0.0004616859069201251, "loss": 0.0029, "num_input_tokens_seen": 151457888, "step": 70125 }, { "epoch": 11.440456769983687, "grad_norm": 0.0018831411143764853, "learning_rate": 0.0004616149367732612, "loss": 0.0091, "num_input_tokens_seen": 151467904, "step": 70130 }, { "epoch": 11.441272430668842, "grad_norm": 0.0827159658074379, "learning_rate": 0.0004615439674043195, "loss": 0.1162, "num_input_tokens_seen": 151479744, "step": 70135 }, { "epoch": 11.442088091353996, "grad_norm": 0.09129805862903595, "learning_rate": 0.00046147299881473783, "loss": 0.0245, "num_input_tokens_seen": 151489760, "step": 70140 }, { "epoch": 11.442903752039152, "grad_norm": 0.06670738756656647, "learning_rate": 0.0004614020310059549, "loss": 0.0095, "num_input_tokens_seen": 151500640, "step": 70145 }, { "epoch": 11.443719412724306, "grad_norm": 0.036870285868644714, "learning_rate": 0.0004613310639794086, "loss": 0.0096, "num_input_tokens_seen": 151511200, "step": 70150 }, { "epoch": 11.444535073409462, "grad_norm": 0.042107485234737396, "learning_rate": 0.0004612600977365376, "loss": 0.0109, "num_input_tokens_seen": 151521632, "step": 70155 }, { "epoch": 11.445350734094617, "grad_norm": 0.02666345052421093, "learning_rate": 0.0004611891322787796, "loss": 0.0095, "num_input_tokens_seen": 151531904, "step": 70160 }, { "epoch": 11.446166394779771, "grad_norm": 0.022787848487496376, "learning_rate": 0.0004611181676075734, "loss": 0.0058, "num_input_tokens_seen": 151542816, "step": 70165 }, { "epoch": 11.446982055464927, "grad_norm": 0.010411875322461128, "learning_rate": 0.00046104720372435647, "loss": 0.0078, "num_input_tokens_seen": 151553312, "step": 70170 }, { "epoch": 11.447797716150081, "grad_norm": 0.014083120971918106, "learning_rate": 0.0004609762406305676, "loss": 0.006, "num_input_tokens_seen": 151562112, "step": 70175 }, { "epoch": 11.448613376835237, "grad_norm": 0.008514699526131153, "learning_rate": 0.0004609052783276447, "loss": 0.0074, "num_input_tokens_seen": 151571168, "step": 70180 }, { "epoch": 11.449429037520392, "grad_norm": 0.07610338926315308, "learning_rate": 0.0004608343168170259, "loss": 0.0048, "num_input_tokens_seen": 151580352, "step": 70185 }, { "epoch": 11.450244698205546, "grad_norm": 0.01380261592566967, "learning_rate": 0.0004607633561001493, "loss": 0.0047, "num_input_tokens_seen": 151591360, "step": 70190 }, { "epoch": 11.451060358890702, "grad_norm": 0.0032468524295836687, "learning_rate": 0.0004606923961784532, "loss": 0.0021, "num_input_tokens_seen": 151602368, "step": 70195 }, { "epoch": 11.451876019575856, "grad_norm": 0.000461250776425004, "learning_rate": 0.00046062143705337535, "loss": 0.0027, "num_input_tokens_seen": 151613504, "step": 70200 }, { "epoch": 11.452691680261012, "grad_norm": 0.009105556644499302, "learning_rate": 0.00046055047872635424, "loss": 0.0024, "num_input_tokens_seen": 151624160, "step": 70205 }, { "epoch": 11.453507340946166, "grad_norm": 0.005286885425448418, "learning_rate": 0.0004604795211988275, "loss": 0.1602, "num_input_tokens_seen": 151632672, "step": 70210 }, { "epoch": 11.454323001631321, "grad_norm": 0.0044653876684606075, "learning_rate": 0.00046040856447223375, "loss": 0.0645, "num_input_tokens_seen": 151644672, "step": 70215 }, { "epoch": 11.455138662316477, "grad_norm": 0.0038570996839553118, "learning_rate": 0.00046033760854801033, "loss": 0.0166, "num_input_tokens_seen": 151655168, "step": 70220 }, { "epoch": 11.455954323001631, "grad_norm": 0.4873116612434387, "learning_rate": 0.0004602666534275956, "loss": 0.0768, "num_input_tokens_seen": 151666080, "step": 70225 }, { "epoch": 11.456769983686787, "grad_norm": 0.011731144040822983, "learning_rate": 0.0004601956991124278, "loss": 0.0027, "num_input_tokens_seen": 151676864, "step": 70230 }, { "epoch": 11.45758564437194, "grad_norm": 0.1812438666820526, "learning_rate": 0.00046012474560394443, "loss": 0.0688, "num_input_tokens_seen": 151688768, "step": 70235 }, { "epoch": 11.458401305057096, "grad_norm": 0.11268593370914459, "learning_rate": 0.00046005379290358386, "loss": 0.0731, "num_input_tokens_seen": 151699200, "step": 70240 }, { "epoch": 11.459216965742252, "grad_norm": 0.004744227509945631, "learning_rate": 0.00045998284101278367, "loss": 0.0363, "num_input_tokens_seen": 151711168, "step": 70245 }, { "epoch": 11.460032626427406, "grad_norm": 0.016108961775898933, "learning_rate": 0.0004599118899329821, "loss": 0.0137, "num_input_tokens_seen": 151722208, "step": 70250 }, { "epoch": 11.460848287112562, "grad_norm": 0.015514836646616459, "learning_rate": 0.0004598409396656168, "loss": 0.0761, "num_input_tokens_seen": 151732256, "step": 70255 }, { "epoch": 11.461663947797716, "grad_norm": 0.002084217732772231, "learning_rate": 0.000459769990212126, "loss": 0.0355, "num_input_tokens_seen": 151742240, "step": 70260 }, { "epoch": 11.462479608482871, "grad_norm": 0.009067544713616371, "learning_rate": 0.0004596990415739472, "loss": 0.0296, "num_input_tokens_seen": 151752320, "step": 70265 }, { "epoch": 11.463295269168025, "grad_norm": 0.42302384972572327, "learning_rate": 0.0004596280937525186, "loss": 0.0366, "num_input_tokens_seen": 151763072, "step": 70270 }, { "epoch": 11.464110929853181, "grad_norm": 0.0011174281826242805, "learning_rate": 0.00045955714674927775, "loss": 0.0465, "num_input_tokens_seen": 151773088, "step": 70275 }, { "epoch": 11.464926590538337, "grad_norm": 0.05450016260147095, "learning_rate": 0.0004594862005656628, "loss": 0.1505, "num_input_tokens_seen": 151784992, "step": 70280 }, { "epoch": 11.46574225122349, "grad_norm": 0.004575311206281185, "learning_rate": 0.00045941525520311116, "loss": 0.0564, "num_input_tokens_seen": 151796288, "step": 70285 }, { "epoch": 11.466557911908646, "grad_norm": 0.018959159031510353, "learning_rate": 0.0004593443106630611, "loss": 0.0244, "num_input_tokens_seen": 151807904, "step": 70290 }, { "epoch": 11.4673735725938, "grad_norm": 0.0019465818768367171, "learning_rate": 0.00045927336694695, "loss": 0.1312, "num_input_tokens_seen": 151818592, "step": 70295 }, { "epoch": 11.468189233278956, "grad_norm": 0.0028126207180321217, "learning_rate": 0.00045920242405621595, "loss": 0.0328, "num_input_tokens_seen": 151827616, "step": 70300 }, { "epoch": 11.469004893964112, "grad_norm": 0.00244735274463892, "learning_rate": 0.0004591314819922963, "loss": 0.0073, "num_input_tokens_seen": 151838272, "step": 70305 }, { "epoch": 11.469820554649266, "grad_norm": 0.010344883427023888, "learning_rate": 0.0004590605407566292, "loss": 0.0356, "num_input_tokens_seen": 151848896, "step": 70310 }, { "epoch": 11.470636215334421, "grad_norm": 0.07402225583791733, "learning_rate": 0.00045898960035065204, "loss": 0.0283, "num_input_tokens_seen": 151861248, "step": 70315 }, { "epoch": 11.471451876019575, "grad_norm": 0.5629965662956238, "learning_rate": 0.00045891866077580267, "loss": 0.0419, "num_input_tokens_seen": 151872512, "step": 70320 }, { "epoch": 11.47226753670473, "grad_norm": 0.027263466268777847, "learning_rate": 0.0004588477220335188, "loss": 0.0407, "num_input_tokens_seen": 151883744, "step": 70325 }, { "epoch": 11.473083197389887, "grad_norm": 0.4663159251213074, "learning_rate": 0.000458776784125238, "loss": 0.0163, "num_input_tokens_seen": 151894592, "step": 70330 }, { "epoch": 11.47389885807504, "grad_norm": 0.011494866572320461, "learning_rate": 0.0004587058470523981, "loss": 0.1729, "num_input_tokens_seen": 151904224, "step": 70335 }, { "epoch": 11.474714518760196, "grad_norm": 0.002531670266762376, "learning_rate": 0.00045863491081643646, "loss": 0.0069, "num_input_tokens_seen": 151915232, "step": 70340 }, { "epoch": 11.47553017944535, "grad_norm": 0.4217558801174164, "learning_rate": 0.00045856397541879087, "loss": 0.0437, "num_input_tokens_seen": 151926784, "step": 70345 }, { "epoch": 11.476345840130506, "grad_norm": 0.03666359931230545, "learning_rate": 0.0004584930408608989, "loss": 0.0105, "num_input_tokens_seen": 151937184, "step": 70350 }, { "epoch": 11.477161500815662, "grad_norm": 0.08825061470270157, "learning_rate": 0.0004584221071441981, "loss": 0.0575, "num_input_tokens_seen": 151947808, "step": 70355 }, { "epoch": 11.477977161500815, "grad_norm": 0.0489339604973793, "learning_rate": 0.000458351174270126, "loss": 0.0048, "num_input_tokens_seen": 151958656, "step": 70360 }, { "epoch": 11.478792822185971, "grad_norm": 0.08036676794290543, "learning_rate": 0.00045828024224012025, "loss": 0.1161, "num_input_tokens_seen": 151969888, "step": 70365 }, { "epoch": 11.479608482871125, "grad_norm": 0.009987418539822102, "learning_rate": 0.00045820931105561817, "loss": 0.0087, "num_input_tokens_seen": 151980352, "step": 70370 }, { "epoch": 11.48042414355628, "grad_norm": 1.6685467958450317, "learning_rate": 0.0004581383807180577, "loss": 0.0545, "num_input_tokens_seen": 151991616, "step": 70375 }, { "epoch": 11.481239804241435, "grad_norm": 0.09259206056594849, "learning_rate": 0.0004580674512288758, "loss": 0.1426, "num_input_tokens_seen": 152002400, "step": 70380 }, { "epoch": 11.48205546492659, "grad_norm": 0.001271451241336763, "learning_rate": 0.0004579965225895104, "loss": 0.0104, "num_input_tokens_seen": 152013952, "step": 70385 }, { "epoch": 11.482871125611746, "grad_norm": 0.0852578654885292, "learning_rate": 0.00045792559480139854, "loss": 0.0087, "num_input_tokens_seen": 152025248, "step": 70390 }, { "epoch": 11.4836867862969, "grad_norm": 0.03162388131022453, "learning_rate": 0.0004578546678659781, "loss": 0.115, "num_input_tokens_seen": 152037536, "step": 70395 }, { "epoch": 11.484502446982056, "grad_norm": 0.04780033975839615, "learning_rate": 0.00045778374178468605, "loss": 0.0536, "num_input_tokens_seen": 152048480, "step": 70400 }, { "epoch": 11.48531810766721, "grad_norm": 0.0038224325980991125, "learning_rate": 0.0004577128165589603, "loss": 0.0496, "num_input_tokens_seen": 152059808, "step": 70405 }, { "epoch": 11.486133768352365, "grad_norm": 0.031959887593984604, "learning_rate": 0.0004576418921902377, "loss": 0.0168, "num_input_tokens_seen": 152070304, "step": 70410 }, { "epoch": 11.486949429037521, "grad_norm": 0.006775837391614914, "learning_rate": 0.0004575709686799561, "loss": 0.0065, "num_input_tokens_seen": 152081472, "step": 70415 }, { "epoch": 11.487765089722675, "grad_norm": 0.237082839012146, "learning_rate": 0.00045750004602955246, "loss": 0.0241, "num_input_tokens_seen": 152092512, "step": 70420 }, { "epoch": 11.48858075040783, "grad_norm": 0.0024227348621934652, "learning_rate": 0.0004574291242404645, "loss": 0.0125, "num_input_tokens_seen": 152103872, "step": 70425 }, { "epoch": 11.489396411092985, "grad_norm": 0.4212666451931, "learning_rate": 0.00045735820331412914, "loss": 0.0137, "num_input_tokens_seen": 152115648, "step": 70430 }, { "epoch": 11.49021207177814, "grad_norm": 0.01448234636336565, "learning_rate": 0.0004572872832519839, "loss": 0.0084, "num_input_tokens_seen": 152126368, "step": 70435 }, { "epoch": 11.491027732463296, "grad_norm": 0.011855104938149452, "learning_rate": 0.0004572163640554662, "loss": 0.0058, "num_input_tokens_seen": 152137760, "step": 70440 }, { "epoch": 11.49184339314845, "grad_norm": 0.18611255288124084, "learning_rate": 0.00045714544572601296, "loss": 0.047, "num_input_tokens_seen": 152149088, "step": 70445 }, { "epoch": 11.492659053833606, "grad_norm": 0.0021328406874090433, "learning_rate": 0.0004570745282650619, "loss": 0.1562, "num_input_tokens_seen": 152159904, "step": 70450 }, { "epoch": 11.49347471451876, "grad_norm": 0.00876991543918848, "learning_rate": 0.00045700361167404967, "loss": 0.0285, "num_input_tokens_seen": 152171712, "step": 70455 }, { "epoch": 11.494290375203915, "grad_norm": 0.00328267109580338, "learning_rate": 0.0004569326959544141, "loss": 0.0684, "num_input_tokens_seen": 152182912, "step": 70460 }, { "epoch": 11.49510603588907, "grad_norm": 0.003475640434771776, "learning_rate": 0.00045686178110759183, "loss": 0.2062, "num_input_tokens_seen": 152193760, "step": 70465 }, { "epoch": 11.495921696574225, "grad_norm": 0.45626431703567505, "learning_rate": 0.0004567908671350206, "loss": 0.1644, "num_input_tokens_seen": 152204288, "step": 70470 }, { "epoch": 11.49673735725938, "grad_norm": 0.003484656335785985, "learning_rate": 0.00045671995403813686, "loss": 0.0197, "num_input_tokens_seen": 152215328, "step": 70475 }, { "epoch": 11.497553017944535, "grad_norm": 0.016352150589227676, "learning_rate": 0.0004566490418183785, "loss": 0.0053, "num_input_tokens_seen": 152225408, "step": 70480 }, { "epoch": 11.49836867862969, "grad_norm": 0.622386634349823, "learning_rate": 0.00045657813047718203, "loss": 0.0426, "num_input_tokens_seen": 152235904, "step": 70485 }, { "epoch": 11.499184339314844, "grad_norm": 0.12910671532154083, "learning_rate": 0.000456507220015985, "loss": 0.0196, "num_input_tokens_seen": 152247424, "step": 70490 }, { "epoch": 11.5, "grad_norm": 0.5777674317359924, "learning_rate": 0.00045643631043622426, "loss": 0.1849, "num_input_tokens_seen": 152258624, "step": 70495 }, { "epoch": 11.500815660685156, "grad_norm": 0.09449607133865356, "learning_rate": 0.00045636540173933697, "loss": 0.0105, "num_input_tokens_seen": 152269760, "step": 70500 }, { "epoch": 11.50163132137031, "grad_norm": 0.2962408661842346, "learning_rate": 0.0004562944939267602, "loss": 0.168, "num_input_tokens_seen": 152280672, "step": 70505 }, { "epoch": 11.502446982055465, "grad_norm": 0.4580059051513672, "learning_rate": 0.00045622358699993093, "loss": 0.0817, "num_input_tokens_seen": 152291008, "step": 70510 }, { "epoch": 11.50326264274062, "grad_norm": 0.07859183102846146, "learning_rate": 0.00045615268096028613, "loss": 0.0984, "num_input_tokens_seen": 152301920, "step": 70515 }, { "epoch": 11.504078303425775, "grad_norm": 0.019914044067263603, "learning_rate": 0.0004560817758092631, "loss": 0.028, "num_input_tokens_seen": 152312256, "step": 70520 }, { "epoch": 11.50489396411093, "grad_norm": 0.1658468097448349, "learning_rate": 0.00045601087154829834, "loss": 0.0312, "num_input_tokens_seen": 152323104, "step": 70525 }, { "epoch": 11.505709624796085, "grad_norm": 0.003438874613493681, "learning_rate": 0.00045593996817882925, "loss": 0.0346, "num_input_tokens_seen": 152334432, "step": 70530 }, { "epoch": 11.50652528548124, "grad_norm": 0.006378691643476486, "learning_rate": 0.0004558690657022925, "loss": 0.0845, "num_input_tokens_seen": 152344224, "step": 70535 }, { "epoch": 11.507340946166394, "grad_norm": 0.005127818323671818, "learning_rate": 0.0004557981641201252, "loss": 0.0707, "num_input_tokens_seen": 152353824, "step": 70540 }, { "epoch": 11.50815660685155, "grad_norm": 0.010716418735682964, "learning_rate": 0.000455727263433764, "loss": 0.0139, "num_input_tokens_seen": 152364320, "step": 70545 }, { "epoch": 11.508972267536706, "grad_norm": 0.01387725118547678, "learning_rate": 0.000455656363644646, "loss": 0.0109, "num_input_tokens_seen": 152375808, "step": 70550 }, { "epoch": 11.50978792822186, "grad_norm": 0.030756894499063492, "learning_rate": 0.0004555854647542083, "loss": 0.0246, "num_input_tokens_seen": 152386912, "step": 70555 }, { "epoch": 11.510603588907015, "grad_norm": 0.04727554693818092, "learning_rate": 0.00045551456676388725, "loss": 0.0825, "num_input_tokens_seen": 152397984, "step": 70560 }, { "epoch": 11.51141924959217, "grad_norm": 0.011782309971749783, "learning_rate": 0.00045544366967512014, "loss": 0.0468, "num_input_tokens_seen": 152408704, "step": 70565 }, { "epoch": 11.512234910277325, "grad_norm": 0.03985341265797615, "learning_rate": 0.0004553727734893434, "loss": 0.0423, "num_input_tokens_seen": 152417984, "step": 70570 }, { "epoch": 11.513050570962479, "grad_norm": 0.02030654065310955, "learning_rate": 0.0004553018782079942, "loss": 0.0136, "num_input_tokens_seen": 152428640, "step": 70575 }, { "epoch": 11.513866231647635, "grad_norm": 0.0016316096298396587, "learning_rate": 0.00045523098383250894, "loss": 0.1642, "num_input_tokens_seen": 152439328, "step": 70580 }, { "epoch": 11.51468189233279, "grad_norm": 0.6504743099212646, "learning_rate": 0.0004551600903643248, "loss": 0.1711, "num_input_tokens_seen": 152450048, "step": 70585 }, { "epoch": 11.515497553017944, "grad_norm": 0.034713149070739746, "learning_rate": 0.00045508919780487805, "loss": 0.025, "num_input_tokens_seen": 152460320, "step": 70590 }, { "epoch": 11.5163132137031, "grad_norm": 0.004274432547390461, "learning_rate": 0.000455018306155606, "loss": 0.0197, "num_input_tokens_seen": 152471264, "step": 70595 }, { "epoch": 11.517128874388254, "grad_norm": 0.004105722531676292, "learning_rate": 0.0004549474154179447, "loss": 0.0159, "num_input_tokens_seen": 152482016, "step": 70600 }, { "epoch": 11.51794453507341, "grad_norm": 0.10600485652685165, "learning_rate": 0.0004548765255933315, "loss": 0.0833, "num_input_tokens_seen": 152493472, "step": 70605 }, { "epoch": 11.518760195758565, "grad_norm": 0.011000092141330242, "learning_rate": 0.00045480563668320244, "loss": 0.014, "num_input_tokens_seen": 152504704, "step": 70610 }, { "epoch": 11.51957585644372, "grad_norm": 0.17460846900939941, "learning_rate": 0.0004547347486889948, "loss": 0.0521, "num_input_tokens_seen": 152515872, "step": 70615 }, { "epoch": 11.520391517128875, "grad_norm": 0.06345030665397644, "learning_rate": 0.00045466386161214465, "loss": 0.0101, "num_input_tokens_seen": 152528192, "step": 70620 }, { "epoch": 11.521207177814029, "grad_norm": 0.002006505150347948, "learning_rate": 0.00045459297545408906, "loss": 0.0123, "num_input_tokens_seen": 152539264, "step": 70625 }, { "epoch": 11.522022838499185, "grad_norm": 0.3534085750579834, "learning_rate": 0.0004545220902162642, "loss": 0.1996, "num_input_tokens_seen": 152550560, "step": 70630 }, { "epoch": 11.522838499184338, "grad_norm": 0.03179255872964859, "learning_rate": 0.000454451205900107, "loss": 0.0135, "num_input_tokens_seen": 152561088, "step": 70635 }, { "epoch": 11.523654159869494, "grad_norm": 0.006981425452977419, "learning_rate": 0.00045438032250705394, "loss": 0.0704, "num_input_tokens_seen": 152572512, "step": 70640 }, { "epoch": 11.52446982055465, "grad_norm": 0.024807730689644814, "learning_rate": 0.00045430944003854143, "loss": 0.0117, "num_input_tokens_seen": 152583520, "step": 70645 }, { "epoch": 11.525285481239804, "grad_norm": 0.008164377883076668, "learning_rate": 0.00045423855849600615, "loss": 0.0116, "num_input_tokens_seen": 152594880, "step": 70650 }, { "epoch": 11.52610114192496, "grad_norm": 0.4699248969554901, "learning_rate": 0.00045416767788088435, "loss": 0.0348, "num_input_tokens_seen": 152604416, "step": 70655 }, { "epoch": 11.526916802610113, "grad_norm": 0.04431702569127083, "learning_rate": 0.00045409679819461286, "loss": 0.0079, "num_input_tokens_seen": 152615264, "step": 70660 }, { "epoch": 11.52773246329527, "grad_norm": 0.039329253137111664, "learning_rate": 0.000454025919438628, "loss": 0.0114, "num_input_tokens_seen": 152627072, "step": 70665 }, { "epoch": 11.528548123980425, "grad_norm": 0.015786344185471535, "learning_rate": 0.00045395504161436617, "loss": 0.0249, "num_input_tokens_seen": 152636704, "step": 70670 }, { "epoch": 11.529363784665579, "grad_norm": 0.0025255740620195866, "learning_rate": 0.0004538841647232639, "loss": 0.0054, "num_input_tokens_seen": 152647104, "step": 70675 }, { "epoch": 11.530179445350734, "grad_norm": 0.011380949057638645, "learning_rate": 0.0004538132887667574, "loss": 0.0212, "num_input_tokens_seen": 152657280, "step": 70680 }, { "epoch": 11.530995106035888, "grad_norm": 0.018168801441788673, "learning_rate": 0.0004537424137462832, "loss": 0.0158, "num_input_tokens_seen": 152667072, "step": 70685 }, { "epoch": 11.531810766721044, "grad_norm": 0.552568793296814, "learning_rate": 0.0004536715396632779, "loss": 0.1466, "num_input_tokens_seen": 152678336, "step": 70690 }, { "epoch": 11.5326264274062, "grad_norm": 1.1314250230789185, "learning_rate": 0.00045360066651917733, "loss": 0.1199, "num_input_tokens_seen": 152688128, "step": 70695 }, { "epoch": 11.533442088091354, "grad_norm": 0.04501219838857651, "learning_rate": 0.00045352979431541833, "loss": 0.0052, "num_input_tokens_seen": 152699872, "step": 70700 }, { "epoch": 11.53425774877651, "grad_norm": 0.010922752320766449, "learning_rate": 0.0004534589230534368, "loss": 0.0111, "num_input_tokens_seen": 152710880, "step": 70705 }, { "epoch": 11.535073409461663, "grad_norm": 1.1566555500030518, "learning_rate": 0.00045338805273466954, "loss": 0.1121, "num_input_tokens_seen": 152722272, "step": 70710 }, { "epoch": 11.535889070146819, "grad_norm": 0.08548852056264877, "learning_rate": 0.00045331718336055223, "loss": 0.0773, "num_input_tokens_seen": 152733568, "step": 70715 }, { "epoch": 11.536704730831975, "grad_norm": 0.0486895889043808, "learning_rate": 0.0004532463149325216, "loss": 0.1141, "num_input_tokens_seen": 152743488, "step": 70720 }, { "epoch": 11.537520391517129, "grad_norm": 0.0107080964371562, "learning_rate": 0.00045317544745201354, "loss": 0.1, "num_input_tokens_seen": 152754400, "step": 70725 }, { "epoch": 11.538336052202284, "grad_norm": 0.06043444946408272, "learning_rate": 0.00045310458092046464, "loss": 0.035, "num_input_tokens_seen": 152763648, "step": 70730 }, { "epoch": 11.539151712887438, "grad_norm": 0.009252172894775867, "learning_rate": 0.0004530337153393107, "loss": 0.0772, "num_input_tokens_seen": 152774784, "step": 70735 }, { "epoch": 11.539967373572594, "grad_norm": 0.008908047340810299, "learning_rate": 0.00045296285070998835, "loss": 0.0091, "num_input_tokens_seen": 152785024, "step": 70740 }, { "epoch": 11.540783034257748, "grad_norm": 0.0023308051750063896, "learning_rate": 0.0004528919870339332, "loss": 0.019, "num_input_tokens_seen": 152795872, "step": 70745 }, { "epoch": 11.541598694942904, "grad_norm": 0.019828980788588524, "learning_rate": 0.00045282112431258194, "loss": 0.023, "num_input_tokens_seen": 152804640, "step": 70750 }, { "epoch": 11.54241435562806, "grad_norm": 0.047302018851041794, "learning_rate": 0.00045275026254737027, "loss": 0.0124, "num_input_tokens_seen": 152815648, "step": 70755 }, { "epoch": 11.543230016313213, "grad_norm": 0.005254935007542372, "learning_rate": 0.0004526794017397344, "loss": 0.0191, "num_input_tokens_seen": 152828256, "step": 70760 }, { "epoch": 11.544045676998369, "grad_norm": 0.012222270481288433, "learning_rate": 0.0004526085418911108, "loss": 0.0181, "num_input_tokens_seen": 152838336, "step": 70765 }, { "epoch": 11.544861337683523, "grad_norm": 0.0508912019431591, "learning_rate": 0.0004525376830029349, "loss": 0.1886, "num_input_tokens_seen": 152847616, "step": 70770 }, { "epoch": 11.545676998368679, "grad_norm": 0.0036810701712965965, "learning_rate": 0.00045246682507664335, "loss": 0.0209, "num_input_tokens_seen": 152857984, "step": 70775 }, { "epoch": 11.546492659053834, "grad_norm": 0.009106824174523354, "learning_rate": 0.0004523959681136716, "loss": 0.0314, "num_input_tokens_seen": 152869280, "step": 70780 }, { "epoch": 11.547308319738988, "grad_norm": 0.018032781779766083, "learning_rate": 0.00045232511211545625, "loss": 0.007, "num_input_tokens_seen": 152879936, "step": 70785 }, { "epoch": 11.548123980424144, "grad_norm": 0.17428778111934662, "learning_rate": 0.0004522542570834327, "loss": 0.0836, "num_input_tokens_seen": 152891296, "step": 70790 }, { "epoch": 11.548939641109298, "grad_norm": 0.004335889592766762, "learning_rate": 0.0004521834030190375, "loss": 0.0112, "num_input_tokens_seen": 152903776, "step": 70795 }, { "epoch": 11.549755301794454, "grad_norm": 0.020084548741579056, "learning_rate": 0.000452112549923706, "loss": 0.0251, "num_input_tokens_seen": 152915232, "step": 70800 }, { "epoch": 11.550570962479608, "grad_norm": 0.07404981553554535, "learning_rate": 0.00045204169779887454, "loss": 0.0116, "num_input_tokens_seen": 152925632, "step": 70805 }, { "epoch": 11.551386623164763, "grad_norm": 0.0036789614241570234, "learning_rate": 0.0004519708466459789, "loss": 0.0058, "num_input_tokens_seen": 152936672, "step": 70810 }, { "epoch": 11.552202283849919, "grad_norm": 0.004675018601119518, "learning_rate": 0.0004518999964664551, "loss": 0.0746, "num_input_tokens_seen": 152947648, "step": 70815 }, { "epoch": 11.553017944535073, "grad_norm": 0.031063973903656006, "learning_rate": 0.0004518291472617387, "loss": 0.0214, "num_input_tokens_seen": 152959008, "step": 70820 }, { "epoch": 11.553833605220229, "grad_norm": 0.006103998050093651, "learning_rate": 0.00045175829903326594, "loss": 0.0059, "num_input_tokens_seen": 152969504, "step": 70825 }, { "epoch": 11.554649265905383, "grad_norm": 0.001215186552144587, "learning_rate": 0.0004516874517824722, "loss": 0.0038, "num_input_tokens_seen": 152979264, "step": 70830 }, { "epoch": 11.555464926590538, "grad_norm": 0.0012902735034003854, "learning_rate": 0.0004516166055107938, "loss": 0.0212, "num_input_tokens_seen": 152990496, "step": 70835 }, { "epoch": 11.556280587275694, "grad_norm": 0.0009681761148385704, "learning_rate": 0.00045154576021966605, "loss": 0.0044, "num_input_tokens_seen": 153001696, "step": 70840 }, { "epoch": 11.557096247960848, "grad_norm": 0.5672785043716431, "learning_rate": 0.00045147491591052515, "loss": 0.1108, "num_input_tokens_seen": 153011616, "step": 70845 }, { "epoch": 11.557911908646004, "grad_norm": 0.183214470744133, "learning_rate": 0.0004514040725848064, "loss": 0.0455, "num_input_tokens_seen": 153022848, "step": 70850 }, { "epoch": 11.558727569331158, "grad_norm": 0.012468835338950157, "learning_rate": 0.0004513332302439461, "loss": 0.0065, "num_input_tokens_seen": 153033280, "step": 70855 }, { "epoch": 11.559543230016313, "grad_norm": 0.09847768396139145, "learning_rate": 0.00045126238888937927, "loss": 0.0173, "num_input_tokens_seen": 153043584, "step": 70860 }, { "epoch": 11.560358890701469, "grad_norm": 0.004849771969020367, "learning_rate": 0.00045119154852254204, "loss": 0.0074, "num_input_tokens_seen": 153054080, "step": 70865 }, { "epoch": 11.561174551386623, "grad_norm": 0.02971162274479866, "learning_rate": 0.0004511207091448701, "loss": 0.0201, "num_input_tokens_seen": 153064128, "step": 70870 }, { "epoch": 11.561990212071779, "grad_norm": 0.031622979789972305, "learning_rate": 0.0004510498707577989, "loss": 0.0153, "num_input_tokens_seen": 153075328, "step": 70875 }, { "epoch": 11.562805872756933, "grad_norm": 0.07961271703243256, "learning_rate": 0.0004509790333627644, "loss": 0.0087, "num_input_tokens_seen": 153085600, "step": 70880 }, { "epoch": 11.563621533442088, "grad_norm": 0.002815672429278493, "learning_rate": 0.00045090819696120166, "loss": 0.0111, "num_input_tokens_seen": 153095744, "step": 70885 }, { "epoch": 11.564437194127244, "grad_norm": 0.03652092441916466, "learning_rate": 0.0004508373615545469, "loss": 0.0065, "num_input_tokens_seen": 153106624, "step": 70890 }, { "epoch": 11.565252854812398, "grad_norm": 0.007009325083345175, "learning_rate": 0.00045076652714423507, "loss": 0.0029, "num_input_tokens_seen": 153116800, "step": 70895 }, { "epoch": 11.566068515497554, "grad_norm": 0.007585311774164438, "learning_rate": 0.00045069569373170227, "loss": 0.0016, "num_input_tokens_seen": 153127008, "step": 70900 }, { "epoch": 11.566884176182707, "grad_norm": 0.002985976403579116, "learning_rate": 0.0004506248613183836, "loss": 0.0264, "num_input_tokens_seen": 153137376, "step": 70905 }, { "epoch": 11.567699836867863, "grad_norm": 0.016019705682992935, "learning_rate": 0.00045055402990571493, "loss": 0.0053, "num_input_tokens_seen": 153147264, "step": 70910 }, { "epoch": 11.568515497553017, "grad_norm": 0.00407218374311924, "learning_rate": 0.00045048319949513136, "loss": 0.0052, "num_input_tokens_seen": 153158816, "step": 70915 }, { "epoch": 11.569331158238173, "grad_norm": 0.0016611559549346566, "learning_rate": 0.0004504123700880688, "loss": 0.0079, "num_input_tokens_seen": 153169728, "step": 70920 }, { "epoch": 11.570146818923329, "grad_norm": 0.02866864949464798, "learning_rate": 0.00045034154168596224, "loss": 0.006, "num_input_tokens_seen": 153180704, "step": 70925 }, { "epoch": 11.570962479608482, "grad_norm": 0.3860038220882416, "learning_rate": 0.00045027071429024757, "loss": 0.0218, "num_input_tokens_seen": 153192320, "step": 70930 }, { "epoch": 11.571778140293638, "grad_norm": 0.0035648008342832327, "learning_rate": 0.00045019988790235974, "loss": 0.0056, "num_input_tokens_seen": 153203296, "step": 70935 }, { "epoch": 11.572593800978792, "grad_norm": 0.5095841288566589, "learning_rate": 0.0004501290625237345, "loss": 0.0219, "num_input_tokens_seen": 153213952, "step": 70940 }, { "epoch": 11.573409461663948, "grad_norm": 0.004286561626940966, "learning_rate": 0.00045005823815580696, "loss": 0.0044, "num_input_tokens_seen": 153225472, "step": 70945 }, { "epoch": 11.574225122349104, "grad_norm": 0.002824546070769429, "learning_rate": 0.00044998741480001264, "loss": 0.0111, "num_input_tokens_seen": 153235168, "step": 70950 }, { "epoch": 11.575040783034257, "grad_norm": 0.00039686966920271516, "learning_rate": 0.00044991659245778684, "loss": 0.0033, "num_input_tokens_seen": 153246112, "step": 70955 }, { "epoch": 11.575856443719413, "grad_norm": 0.009134351275861263, "learning_rate": 0.00044984577113056477, "loss": 0.018, "num_input_tokens_seen": 153257952, "step": 70960 }, { "epoch": 11.576672104404567, "grad_norm": 0.016628161072731018, "learning_rate": 0.0004497749508197818, "loss": 0.0098, "num_input_tokens_seen": 153268608, "step": 70965 }, { "epoch": 11.577487765089723, "grad_norm": 0.03593922778964043, "learning_rate": 0.00044970413152687304, "loss": 0.1567, "num_input_tokens_seen": 153279168, "step": 70970 }, { "epoch": 11.578303425774878, "grad_norm": 0.37217482924461365, "learning_rate": 0.000449633313253274, "loss": 0.0878, "num_input_tokens_seen": 153289824, "step": 70975 }, { "epoch": 11.579119086460032, "grad_norm": 0.762610912322998, "learning_rate": 0.00044956249600041975, "loss": 0.0469, "num_input_tokens_seen": 153300928, "step": 70980 }, { "epoch": 11.579934747145188, "grad_norm": 0.025967687368392944, "learning_rate": 0.00044949167976974553, "loss": 0.0235, "num_input_tokens_seen": 153311072, "step": 70985 }, { "epoch": 11.580750407830342, "grad_norm": 0.10705938190221786, "learning_rate": 0.00044942086456268643, "loss": 0.0283, "num_input_tokens_seen": 153322400, "step": 70990 }, { "epoch": 11.581566068515498, "grad_norm": 0.48525816202163696, "learning_rate": 0.0004493500503806777, "loss": 0.0323, "num_input_tokens_seen": 153333984, "step": 70995 }, { "epoch": 11.582381729200652, "grad_norm": 0.008772018365561962, "learning_rate": 0.0004492792372251544, "loss": 0.0771, "num_input_tokens_seen": 153344672, "step": 71000 }, { "epoch": 11.583197389885807, "grad_norm": 0.0028414896223694086, "learning_rate": 0.00044920842509755187, "loss": 0.0161, "num_input_tokens_seen": 153354528, "step": 71005 }, { "epoch": 11.584013050570963, "grad_norm": 0.5699886083602905, "learning_rate": 0.0004491376139993048, "loss": 0.0729, "num_input_tokens_seen": 153365536, "step": 71010 }, { "epoch": 11.584828711256117, "grad_norm": 0.0065368469804525375, "learning_rate": 0.0004490668039318488, "loss": 0.0076, "num_input_tokens_seen": 153376704, "step": 71015 }, { "epoch": 11.585644371941273, "grad_norm": 0.004508780315518379, "learning_rate": 0.00044899599489661837, "loss": 0.0053, "num_input_tokens_seen": 153387968, "step": 71020 }, { "epoch": 11.586460032626427, "grad_norm": 0.018700627610087395, "learning_rate": 0.000448925186895049, "loss": 0.1483, "num_input_tokens_seen": 153398688, "step": 71025 }, { "epoch": 11.587275693311582, "grad_norm": 0.003485024208202958, "learning_rate": 0.0004488543799285753, "loss": 0.0029, "num_input_tokens_seen": 153409344, "step": 71030 }, { "epoch": 11.588091353996738, "grad_norm": 0.00151357043068856, "learning_rate": 0.00044878357399863266, "loss": 0.0494, "num_input_tokens_seen": 153419648, "step": 71035 }, { "epoch": 11.588907014681892, "grad_norm": 0.013715144246816635, "learning_rate": 0.0004487127691066558, "loss": 0.0606, "num_input_tokens_seen": 153429472, "step": 71040 }, { "epoch": 11.589722675367048, "grad_norm": 0.005180860869586468, "learning_rate": 0.0004486419652540798, "loss": 0.198, "num_input_tokens_seen": 153440288, "step": 71045 }, { "epoch": 11.590538336052202, "grad_norm": 0.0013805264607071877, "learning_rate": 0.0004485711624423393, "loss": 0.0538, "num_input_tokens_seen": 153450336, "step": 71050 }, { "epoch": 11.591353996737357, "grad_norm": 0.0028821558225899935, "learning_rate": 0.0004485003606728698, "loss": 0.035, "num_input_tokens_seen": 153460864, "step": 71055 }, { "epoch": 11.592169657422513, "grad_norm": 0.003991037607192993, "learning_rate": 0.0004484295599471054, "loss": 0.0029, "num_input_tokens_seen": 153472576, "step": 71060 }, { "epoch": 11.592985318107667, "grad_norm": 0.05960950627923012, "learning_rate": 0.00044835876026648176, "loss": 0.0081, "num_input_tokens_seen": 153483584, "step": 71065 }, { "epoch": 11.593800978792823, "grad_norm": 0.0034852263052016497, "learning_rate": 0.00044828796163243315, "loss": 0.0762, "num_input_tokens_seen": 153495168, "step": 71070 }, { "epoch": 11.594616639477977, "grad_norm": 0.47948548197746277, "learning_rate": 0.0004482171640463945, "loss": 0.028, "num_input_tokens_seen": 153505568, "step": 71075 }, { "epoch": 11.595432300163132, "grad_norm": 0.024540584534406662, "learning_rate": 0.000448146367509801, "loss": 0.0166, "num_input_tokens_seen": 153515488, "step": 71080 }, { "epoch": 11.596247960848288, "grad_norm": 0.20424458384513855, "learning_rate": 0.0004480755720240869, "loss": 0.0543, "num_input_tokens_seen": 153524192, "step": 71085 }, { "epoch": 11.597063621533442, "grad_norm": 0.24791987240314484, "learning_rate": 0.0004480047775906874, "loss": 0.0096, "num_input_tokens_seen": 153535776, "step": 71090 }, { "epoch": 11.597879282218598, "grad_norm": 0.023952824994921684, "learning_rate": 0.0004479339842110368, "loss": 0.0278, "num_input_tokens_seen": 153546432, "step": 71095 }, { "epoch": 11.598694942903752, "grad_norm": 0.0008945376030169427, "learning_rate": 0.0004478631918865704, "loss": 0.0057, "num_input_tokens_seen": 153557920, "step": 71100 }, { "epoch": 11.599510603588907, "grad_norm": 0.0072722178883850574, "learning_rate": 0.00044779240061872225, "loss": 0.071, "num_input_tokens_seen": 153569216, "step": 71105 }, { "epoch": 11.600326264274061, "grad_norm": 0.31958064436912537, "learning_rate": 0.00044772161040892755, "loss": 0.0305, "num_input_tokens_seen": 153579360, "step": 71110 }, { "epoch": 11.601141924959217, "grad_norm": 0.07264435291290283, "learning_rate": 0.00044765082125862053, "loss": 0.0394, "num_input_tokens_seen": 153588768, "step": 71115 }, { "epoch": 11.601957585644373, "grad_norm": 0.005538736004382372, "learning_rate": 0.0004475800331692361, "loss": 0.144, "num_input_tokens_seen": 153600224, "step": 71120 }, { "epoch": 11.602773246329527, "grad_norm": 0.21943026781082153, "learning_rate": 0.0004475092461422089, "loss": 0.0477, "num_input_tokens_seen": 153611584, "step": 71125 }, { "epoch": 11.603588907014682, "grad_norm": 0.06436196714639664, "learning_rate": 0.0004474384601789733, "loss": 0.0255, "num_input_tokens_seen": 153622336, "step": 71130 }, { "epoch": 11.604404567699836, "grad_norm": 0.0007576481439173222, "learning_rate": 0.00044736767528096407, "loss": 0.0411, "num_input_tokens_seen": 153633376, "step": 71135 }, { "epoch": 11.605220228384992, "grad_norm": 0.007207597605884075, "learning_rate": 0.0004472968914496156, "loss": 0.0439, "num_input_tokens_seen": 153644544, "step": 71140 }, { "epoch": 11.606035889070148, "grad_norm": 0.033221352845430374, "learning_rate": 0.00044722610868636243, "loss": 0.0196, "num_input_tokens_seen": 153655872, "step": 71145 }, { "epoch": 11.606851549755302, "grad_norm": 0.008055893704295158, "learning_rate": 0.00044715532699263926, "loss": 0.0873, "num_input_tokens_seen": 153667040, "step": 71150 }, { "epoch": 11.607667210440457, "grad_norm": 0.07398483902215958, "learning_rate": 0.00044708454636988026, "loss": 0.026, "num_input_tokens_seen": 153678496, "step": 71155 }, { "epoch": 11.608482871125611, "grad_norm": 0.008414418436586857, "learning_rate": 0.00044701376681952033, "loss": 0.0084, "num_input_tokens_seen": 153690240, "step": 71160 }, { "epoch": 11.609298531810767, "grad_norm": 0.04958515241742134, "learning_rate": 0.00044694298834299336, "loss": 0.0081, "num_input_tokens_seen": 153700992, "step": 71165 }, { "epoch": 11.61011419249592, "grad_norm": 0.3301873505115509, "learning_rate": 0.00044687221094173425, "loss": 0.0086, "num_input_tokens_seen": 153712224, "step": 71170 }, { "epoch": 11.610929853181077, "grad_norm": 0.21836425364017487, "learning_rate": 0.0004468014346171769, "loss": 0.0336, "num_input_tokens_seen": 153723008, "step": 71175 }, { "epoch": 11.611745513866232, "grad_norm": 0.1655757576227188, "learning_rate": 0.0004467306593707563, "loss": 0.0125, "num_input_tokens_seen": 153733248, "step": 71180 }, { "epoch": 11.612561174551386, "grad_norm": 0.01050649955868721, "learning_rate": 0.00044665988520390624, "loss": 0.0294, "num_input_tokens_seen": 153744416, "step": 71185 }, { "epoch": 11.613376835236542, "grad_norm": 0.016805484890937805, "learning_rate": 0.0004465891121180612, "loss": 0.0067, "num_input_tokens_seen": 153754528, "step": 71190 }, { "epoch": 11.614192495921696, "grad_norm": 0.0019383433973416686, "learning_rate": 0.0004465183401146558, "loss": 0.0027, "num_input_tokens_seen": 153765216, "step": 71195 }, { "epoch": 11.615008156606851, "grad_norm": 0.4953695833683014, "learning_rate": 0.00044644756919512386, "loss": 0.0204, "num_input_tokens_seen": 153776352, "step": 71200 }, { "epoch": 11.615823817292007, "grad_norm": 0.08260602504014969, "learning_rate": 0.00044637679936090013, "loss": 0.0265, "num_input_tokens_seen": 153787040, "step": 71205 }, { "epoch": 11.616639477977161, "grad_norm": 0.008234014734625816, "learning_rate": 0.00044630603061341837, "loss": 0.0062, "num_input_tokens_seen": 153797664, "step": 71210 }, { "epoch": 11.617455138662317, "grad_norm": 0.040098607540130615, "learning_rate": 0.00044623526295411314, "loss": 0.0418, "num_input_tokens_seen": 153808352, "step": 71215 }, { "epoch": 11.61827079934747, "grad_norm": 0.007715737447142601, "learning_rate": 0.00044616449638441836, "loss": 0.0025, "num_input_tokens_seen": 153818400, "step": 71220 }, { "epoch": 11.619086460032626, "grad_norm": 0.002162148244678974, "learning_rate": 0.0004460937309057686, "loss": 0.0161, "num_input_tokens_seen": 153829376, "step": 71225 }, { "epoch": 11.619902120717782, "grad_norm": 0.030692892149090767, "learning_rate": 0.0004460229665195975, "loss": 0.0489, "num_input_tokens_seen": 153840672, "step": 71230 }, { "epoch": 11.620717781402936, "grad_norm": 0.0370308980345726, "learning_rate": 0.0004459522032273397, "loss": 0.0121, "num_input_tokens_seen": 153850976, "step": 71235 }, { "epoch": 11.621533442088092, "grad_norm": 0.00597534142434597, "learning_rate": 0.00044588144103042883, "loss": 0.037, "num_input_tokens_seen": 153862112, "step": 71240 }, { "epoch": 11.622349102773246, "grad_norm": 0.0010464468505233526, "learning_rate": 0.00044581067993029944, "loss": 0.0013, "num_input_tokens_seen": 153872928, "step": 71245 }, { "epoch": 11.623164763458401, "grad_norm": 0.07114594429731369, "learning_rate": 0.0004457399199283852, "loss": 0.0026, "num_input_tokens_seen": 153884320, "step": 71250 }, { "epoch": 11.623980424143557, "grad_norm": 0.01107731182128191, "learning_rate": 0.00044566916102612043, "loss": 0.0933, "num_input_tokens_seen": 153894880, "step": 71255 }, { "epoch": 11.624796084828711, "grad_norm": 0.0015193721046671271, "learning_rate": 0.0004455984032249389, "loss": 0.0929, "num_input_tokens_seen": 153905984, "step": 71260 }, { "epoch": 11.625611745513867, "grad_norm": 0.2539882957935333, "learning_rate": 0.0004455276465262748, "loss": 0.015, "num_input_tokens_seen": 153914624, "step": 71265 }, { "epoch": 11.62642740619902, "grad_norm": 0.008223704993724823, "learning_rate": 0.0004454568909315621, "loss": 0.2522, "num_input_tokens_seen": 153924640, "step": 71270 }, { "epoch": 11.627243066884176, "grad_norm": 0.0044373562559485435, "learning_rate": 0.0004453861364422347, "loss": 0.0143, "num_input_tokens_seen": 153935392, "step": 71275 }, { "epoch": 11.62805872756933, "grad_norm": 0.016537398099899292, "learning_rate": 0.00044531538305972646, "loss": 0.0056, "num_input_tokens_seen": 153945824, "step": 71280 }, { "epoch": 11.628874388254486, "grad_norm": 0.012589816004037857, "learning_rate": 0.0004452446307854714, "loss": 0.1544, "num_input_tokens_seen": 153958336, "step": 71285 }, { "epoch": 11.629690048939642, "grad_norm": 0.0054365224204957485, "learning_rate": 0.00044517387962090323, "loss": 0.0861, "num_input_tokens_seen": 153969728, "step": 71290 }, { "epoch": 11.630505709624796, "grad_norm": 0.004397984594106674, "learning_rate": 0.00044510312956745607, "loss": 0.0168, "num_input_tokens_seen": 153980416, "step": 71295 }, { "epoch": 11.631321370309951, "grad_norm": 0.1747157722711563, "learning_rate": 0.00044503238062656357, "loss": 0.0271, "num_input_tokens_seen": 153990432, "step": 71300 }, { "epoch": 11.632137030995105, "grad_norm": 0.026617346331477165, "learning_rate": 0.0004449616327996597, "loss": 0.0429, "num_input_tokens_seen": 154001024, "step": 71305 }, { "epoch": 11.632952691680261, "grad_norm": 0.00191340537276119, "learning_rate": 0.0004448908860881781, "loss": 0.0846, "num_input_tokens_seen": 154011136, "step": 71310 }, { "epoch": 11.633768352365417, "grad_norm": 0.21351198852062225, "learning_rate": 0.0004448201404935525, "loss": 0.0238, "num_input_tokens_seen": 154021760, "step": 71315 }, { "epoch": 11.63458401305057, "grad_norm": 0.0022384291514754295, "learning_rate": 0.00044474939601721705, "loss": 0.0032, "num_input_tokens_seen": 154033312, "step": 71320 }, { "epoch": 11.635399673735726, "grad_norm": 0.009010802023112774, "learning_rate": 0.00044467865266060487, "loss": 0.012, "num_input_tokens_seen": 154044864, "step": 71325 }, { "epoch": 11.63621533442088, "grad_norm": 0.14343449473381042, "learning_rate": 0.0004446079104251503, "loss": 0.1224, "num_input_tokens_seen": 154055712, "step": 71330 }, { "epoch": 11.637030995106036, "grad_norm": 0.03126922994852066, "learning_rate": 0.0004445371693122863, "loss": 0.2138, "num_input_tokens_seen": 154067200, "step": 71335 }, { "epoch": 11.63784665579119, "grad_norm": 0.004906293470412493, "learning_rate": 0.00044446642932344726, "loss": 0.1311, "num_input_tokens_seen": 154078176, "step": 71340 }, { "epoch": 11.638662316476346, "grad_norm": 0.08446597307920456, "learning_rate": 0.0004443956904600663, "loss": 0.0163, "num_input_tokens_seen": 154088832, "step": 71345 }, { "epoch": 11.639477977161501, "grad_norm": 0.05098491162061691, "learning_rate": 0.00044432495272357734, "loss": 0.0066, "num_input_tokens_seen": 154099808, "step": 71350 }, { "epoch": 11.640293637846655, "grad_norm": 0.006743690464645624, "learning_rate": 0.00044425421611541364, "loss": 0.0077, "num_input_tokens_seen": 154110528, "step": 71355 }, { "epoch": 11.641109298531811, "grad_norm": 0.004304811824113131, "learning_rate": 0.0004441834806370092, "loss": 0.092, "num_input_tokens_seen": 154120800, "step": 71360 }, { "epoch": 11.641924959216965, "grad_norm": 0.5354902148246765, "learning_rate": 0.00044411274628979714, "loss": 0.0588, "num_input_tokens_seen": 154132320, "step": 71365 }, { "epoch": 11.64274061990212, "grad_norm": 0.033445850014686584, "learning_rate": 0.00044404201307521134, "loss": 0.0083, "num_input_tokens_seen": 154143392, "step": 71370 }, { "epoch": 11.643556280587276, "grad_norm": 0.011206441558897495, "learning_rate": 0.00044397128099468497, "loss": 0.0602, "num_input_tokens_seen": 154156224, "step": 71375 }, { "epoch": 11.64437194127243, "grad_norm": 0.03548984229564667, "learning_rate": 0.0004439005500496519, "loss": 0.1289, "num_input_tokens_seen": 154166528, "step": 71380 }, { "epoch": 11.645187601957586, "grad_norm": 0.16109098494052887, "learning_rate": 0.00044382982024154506, "loss": 0.1376, "num_input_tokens_seen": 154177376, "step": 71385 }, { "epoch": 11.64600326264274, "grad_norm": 0.09362545609474182, "learning_rate": 0.0004437590915717984, "loss": 0.0276, "num_input_tokens_seen": 154188800, "step": 71390 }, { "epoch": 11.646818923327896, "grad_norm": 0.008209917694330215, "learning_rate": 0.0004436883640418449, "loss": 0.0742, "num_input_tokens_seen": 154200192, "step": 71395 }, { "epoch": 11.647634584013051, "grad_norm": 0.04360055923461914, "learning_rate": 0.0004436176376531181, "loss": 0.1731, "num_input_tokens_seen": 154212032, "step": 71400 }, { "epoch": 11.648450244698205, "grad_norm": 0.006665311753749847, "learning_rate": 0.00044354691240705167, "loss": 0.0237, "num_input_tokens_seen": 154222880, "step": 71405 }, { "epoch": 11.649265905383361, "grad_norm": 0.1875232309103012, "learning_rate": 0.00044347618830507845, "loss": 0.0155, "num_input_tokens_seen": 154233568, "step": 71410 }, { "epoch": 11.650081566068515, "grad_norm": 0.003251552814617753, "learning_rate": 0.00044340546534863226, "loss": 0.0194, "num_input_tokens_seen": 154244128, "step": 71415 }, { "epoch": 11.65089722675367, "grad_norm": 0.005995164625346661, "learning_rate": 0.00044333474353914576, "loss": 0.0882, "num_input_tokens_seen": 154254240, "step": 71420 }, { "epoch": 11.651712887438826, "grad_norm": 0.005441619548946619, "learning_rate": 0.0004432640228780529, "loss": 0.0085, "num_input_tokens_seen": 154265568, "step": 71425 }, { "epoch": 11.65252854812398, "grad_norm": 0.13086704909801483, "learning_rate": 0.0004431933033667863, "loss": 0.0782, "num_input_tokens_seen": 154275488, "step": 71430 }, { "epoch": 11.653344208809136, "grad_norm": 0.04926230385899544, "learning_rate": 0.0004431225850067796, "loss": 0.0233, "num_input_tokens_seen": 154287136, "step": 71435 }, { "epoch": 11.65415986949429, "grad_norm": 0.050195761024951935, "learning_rate": 0.0004430518677994659, "loss": 0.0262, "num_input_tokens_seen": 154298240, "step": 71440 }, { "epoch": 11.654975530179446, "grad_norm": 0.6612548828125, "learning_rate": 0.0004429811517462783, "loss": 0.0612, "num_input_tokens_seen": 154308672, "step": 71445 }, { "epoch": 11.655791190864601, "grad_norm": 0.015694979578256607, "learning_rate": 0.00044291043684865, "loss": 0.0226, "num_input_tokens_seen": 154318592, "step": 71450 }, { "epoch": 11.656606851549755, "grad_norm": 0.027238953858613968, "learning_rate": 0.0004428397231080141, "loss": 0.0052, "num_input_tokens_seen": 154329280, "step": 71455 }, { "epoch": 11.65742251223491, "grad_norm": 0.00588644715026021, "learning_rate": 0.0004427690105258037, "loss": 0.0532, "num_input_tokens_seen": 154339168, "step": 71460 }, { "epoch": 11.658238172920065, "grad_norm": 0.007147925905883312, "learning_rate": 0.00044269829910345207, "loss": 0.0336, "num_input_tokens_seen": 154349120, "step": 71465 }, { "epoch": 11.65905383360522, "grad_norm": 0.009599827229976654, "learning_rate": 0.00044262758884239185, "loss": 0.0092, "num_input_tokens_seen": 154359424, "step": 71470 }, { "epoch": 11.659869494290374, "grad_norm": 0.008452493697404861, "learning_rate": 0.00044255687974405656, "loss": 0.0363, "num_input_tokens_seen": 154369856, "step": 71475 }, { "epoch": 11.66068515497553, "grad_norm": 0.26467153429985046, "learning_rate": 0.0004424861718098788, "loss": 0.0345, "num_input_tokens_seen": 154381024, "step": 71480 }, { "epoch": 11.661500815660686, "grad_norm": 0.011430994607508183, "learning_rate": 0.00044241546504129186, "loss": 0.0115, "num_input_tokens_seen": 154392960, "step": 71485 }, { "epoch": 11.66231647634584, "grad_norm": 0.048355769366025925, "learning_rate": 0.0004423447594397284, "loss": 0.0076, "num_input_tokens_seen": 154404480, "step": 71490 }, { "epoch": 11.663132137030995, "grad_norm": 0.41922426223754883, "learning_rate": 0.00044227405500662175, "loss": 0.0497, "num_input_tokens_seen": 154415168, "step": 71495 }, { "epoch": 11.66394779771615, "grad_norm": 0.009930071420967579, "learning_rate": 0.00044220335174340443, "loss": 0.0143, "num_input_tokens_seen": 154425920, "step": 71500 }, { "epoch": 11.664763458401305, "grad_norm": 0.03307803347706795, "learning_rate": 0.00044213264965150943, "loss": 0.0133, "num_input_tokens_seen": 154436864, "step": 71505 }, { "epoch": 11.66557911908646, "grad_norm": 0.00203361245803535, "learning_rate": 0.00044206194873237, "loss": 0.0773, "num_input_tokens_seen": 154448416, "step": 71510 }, { "epoch": 11.666394779771615, "grad_norm": 0.06398969888687134, "learning_rate": 0.00044199124898741844, "loss": 0.0295, "num_input_tokens_seen": 154459872, "step": 71515 }, { "epoch": 11.66721044045677, "grad_norm": 0.00550407450646162, "learning_rate": 0.000441920550418088, "loss": 0.0044, "num_input_tokens_seen": 154470304, "step": 71520 }, { "epoch": 11.668026101141924, "grad_norm": 0.023381365463137627, "learning_rate": 0.00044184985302581103, "loss": 0.0054, "num_input_tokens_seen": 154481152, "step": 71525 }, { "epoch": 11.66884176182708, "grad_norm": 0.058864492923021317, "learning_rate": 0.00044177915681202083, "loss": 0.0988, "num_input_tokens_seen": 154491104, "step": 71530 }, { "epoch": 11.669657422512234, "grad_norm": 0.005633897613734007, "learning_rate": 0.00044170846177814965, "loss": 0.0408, "num_input_tokens_seen": 154503136, "step": 71535 }, { "epoch": 11.67047308319739, "grad_norm": 0.03195603936910629, "learning_rate": 0.0004416377679256307, "loss": 0.0082, "num_input_tokens_seen": 154513376, "step": 71540 }, { "epoch": 11.671288743882545, "grad_norm": 0.006400907877832651, "learning_rate": 0.0004415670752558961, "loss": 0.0015, "num_input_tokens_seen": 154524384, "step": 71545 }, { "epoch": 11.6721044045677, "grad_norm": 0.010677113197743893, "learning_rate": 0.0004414963837703791, "loss": 0.0069, "num_input_tokens_seen": 154535136, "step": 71550 }, { "epoch": 11.672920065252855, "grad_norm": 0.007044284604489803, "learning_rate": 0.0004414256934705119, "loss": 0.1687, "num_input_tokens_seen": 154545952, "step": 71555 }, { "epoch": 11.673735725938009, "grad_norm": 0.042897067964076996, "learning_rate": 0.00044135500435772755, "loss": 0.0048, "num_input_tokens_seen": 154556288, "step": 71560 }, { "epoch": 11.674551386623165, "grad_norm": 0.05293981730937958, "learning_rate": 0.0004412843164334582, "loss": 0.0919, "num_input_tokens_seen": 154567360, "step": 71565 }, { "epoch": 11.67536704730832, "grad_norm": 0.2870676815509796, "learning_rate": 0.00044121362969913683, "loss": 0.0427, "num_input_tokens_seen": 154577984, "step": 71570 }, { "epoch": 11.676182707993474, "grad_norm": 0.034628961235284805, "learning_rate": 0.00044114294415619577, "loss": 0.0061, "num_input_tokens_seen": 154589568, "step": 71575 }, { "epoch": 11.67699836867863, "grad_norm": 0.00338340294547379, "learning_rate": 0.00044107225980606765, "loss": 0.0041, "num_input_tokens_seen": 154601664, "step": 71580 }, { "epoch": 11.677814029363784, "grad_norm": 0.49561360478401184, "learning_rate": 0.0004410015766501849, "loss": 0.1177, "num_input_tokens_seen": 154612576, "step": 71585 }, { "epoch": 11.67862969004894, "grad_norm": 0.06097765639424324, "learning_rate": 0.00044093089468998006, "loss": 0.0431, "num_input_tokens_seen": 154624416, "step": 71590 }, { "epoch": 11.679445350734095, "grad_norm": 0.00320567493326962, "learning_rate": 0.0004408602139268856, "loss": 0.0774, "num_input_tokens_seen": 154634944, "step": 71595 }, { "epoch": 11.68026101141925, "grad_norm": 0.014213986694812775, "learning_rate": 0.00044078953436233387, "loss": 0.0144, "num_input_tokens_seen": 154646400, "step": 71600 }, { "epoch": 11.681076672104405, "grad_norm": 0.03145314380526543, "learning_rate": 0.0004407188559977573, "loss": 0.007, "num_input_tokens_seen": 154658112, "step": 71605 }, { "epoch": 11.681892332789559, "grad_norm": 0.06955621391534805, "learning_rate": 0.00044064817883458833, "loss": 0.0498, "num_input_tokens_seen": 154669632, "step": 71610 }, { "epoch": 11.682707993474715, "grad_norm": 0.21142300963401794, "learning_rate": 0.0004405775028742594, "loss": 0.1781, "num_input_tokens_seen": 154680992, "step": 71615 }, { "epoch": 11.68352365415987, "grad_norm": 0.3759807050228119, "learning_rate": 0.00044050682811820277, "loss": 0.167, "num_input_tokens_seen": 154691296, "step": 71620 }, { "epoch": 11.684339314845024, "grad_norm": 0.004578839987516403, "learning_rate": 0.00044043615456785065, "loss": 0.0103, "num_input_tokens_seen": 154702496, "step": 71625 }, { "epoch": 11.68515497553018, "grad_norm": 0.004665186163038015, "learning_rate": 0.00044036548222463535, "loss": 0.0067, "num_input_tokens_seen": 154713056, "step": 71630 }, { "epoch": 11.685970636215334, "grad_norm": 0.007182937115430832, "learning_rate": 0.0004402948110899894, "loss": 0.0233, "num_input_tokens_seen": 154724160, "step": 71635 }, { "epoch": 11.68678629690049, "grad_norm": 0.4717956781387329, "learning_rate": 0.0004402241411653447, "loss": 0.0361, "num_input_tokens_seen": 154735104, "step": 71640 }, { "epoch": 11.687601957585644, "grad_norm": 0.14137259125709534, "learning_rate": 0.00044015347245213377, "loss": 0.0359, "num_input_tokens_seen": 154747104, "step": 71645 }, { "epoch": 11.6884176182708, "grad_norm": 0.0016079474007710814, "learning_rate": 0.00044008280495178844, "loss": 0.0189, "num_input_tokens_seen": 154757184, "step": 71650 }, { "epoch": 11.689233278955955, "grad_norm": 0.7597583532333374, "learning_rate": 0.0004400121386657413, "loss": 0.0709, "num_input_tokens_seen": 154768896, "step": 71655 }, { "epoch": 11.690048939641109, "grad_norm": 0.09265612810850143, "learning_rate": 0.000439941473595424, "loss": 0.0077, "num_input_tokens_seen": 154779808, "step": 71660 }, { "epoch": 11.690864600326265, "grad_norm": 0.009280568920075893, "learning_rate": 0.00043987080974226925, "loss": 0.0353, "num_input_tokens_seen": 154789952, "step": 71665 }, { "epoch": 11.691680261011419, "grad_norm": 0.001288570580072701, "learning_rate": 0.00043980014710770857, "loss": 0.0185, "num_input_tokens_seen": 154799936, "step": 71670 }, { "epoch": 11.692495921696574, "grad_norm": 0.07057865709066391, "learning_rate": 0.00043972948569317446, "loss": 0.077, "num_input_tokens_seen": 154810720, "step": 71675 }, { "epoch": 11.69331158238173, "grad_norm": 0.00797590147703886, "learning_rate": 0.00043965882550009856, "loss": 0.0107, "num_input_tokens_seen": 154821152, "step": 71680 }, { "epoch": 11.694127243066884, "grad_norm": 0.005033727269619703, "learning_rate": 0.0004395881665299134, "loss": 0.0182, "num_input_tokens_seen": 154830944, "step": 71685 }, { "epoch": 11.69494290375204, "grad_norm": 0.26927709579467773, "learning_rate": 0.0004395175087840503, "loss": 0.1215, "num_input_tokens_seen": 154841920, "step": 71690 }, { "epoch": 11.695758564437194, "grad_norm": 0.02626313455402851, "learning_rate": 0.000439446852263942, "loss": 0.0262, "num_input_tokens_seen": 154854432, "step": 71695 }, { "epoch": 11.69657422512235, "grad_norm": 0.05705942586064339, "learning_rate": 0.00043937619697101974, "loss": 0.0067, "num_input_tokens_seen": 154864544, "step": 71700 }, { "epoch": 11.697389885807503, "grad_norm": 0.010143991559743881, "learning_rate": 0.00043930554290671597, "loss": 0.0051, "num_input_tokens_seen": 154874880, "step": 71705 }, { "epoch": 11.698205546492659, "grad_norm": 0.0047828564420342445, "learning_rate": 0.0004392348900724622, "loss": 0.0073, "num_input_tokens_seen": 154886240, "step": 71710 }, { "epoch": 11.699021207177815, "grad_norm": 0.03272762522101402, "learning_rate": 0.00043916423846969047, "loss": 0.0039, "num_input_tokens_seen": 154896192, "step": 71715 }, { "epoch": 11.699836867862969, "grad_norm": 0.33838027715682983, "learning_rate": 0.0004390935880998329, "loss": 0.1627, "num_input_tokens_seen": 154906560, "step": 71720 }, { "epoch": 11.700652528548124, "grad_norm": 0.03905438259243965, "learning_rate": 0.00043902293896432064, "loss": 0.0452, "num_input_tokens_seen": 154917568, "step": 71725 }, { "epoch": 11.701468189233278, "grad_norm": 0.5773392915725708, "learning_rate": 0.0004389522910645862, "loss": 0.0153, "num_input_tokens_seen": 154927232, "step": 71730 }, { "epoch": 11.702283849918434, "grad_norm": 0.009112930856645107, "learning_rate": 0.00043888164440206086, "loss": 0.0061, "num_input_tokens_seen": 154937376, "step": 71735 }, { "epoch": 11.70309951060359, "grad_norm": 0.07352989912033081, "learning_rate": 0.0004388109989781766, "loss": 0.0039, "num_input_tokens_seen": 154947744, "step": 71740 }, { "epoch": 11.703915171288743, "grad_norm": 0.007768069859594107, "learning_rate": 0.000438740354794365, "loss": 0.0372, "num_input_tokens_seen": 154959104, "step": 71745 }, { "epoch": 11.7047308319739, "grad_norm": 0.004225387237966061, "learning_rate": 0.0004386697118520579, "loss": 0.0024, "num_input_tokens_seen": 154971264, "step": 71750 }, { "epoch": 11.705546492659053, "grad_norm": 0.2465088665485382, "learning_rate": 0.00043859907015268685, "loss": 0.1459, "num_input_tokens_seen": 154983008, "step": 71755 }, { "epoch": 11.706362153344209, "grad_norm": 0.00690916832536459, "learning_rate": 0.00043852842969768356, "loss": 0.0083, "num_input_tokens_seen": 154993600, "step": 71760 }, { "epoch": 11.707177814029365, "grad_norm": 0.011656506918370724, "learning_rate": 0.0004384577904884795, "loss": 0.024, "num_input_tokens_seen": 155003776, "step": 71765 }, { "epoch": 11.707993474714518, "grad_norm": 0.1968628615140915, "learning_rate": 0.0004383871525265066, "loss": 0.1653, "num_input_tokens_seen": 155013056, "step": 71770 }, { "epoch": 11.708809135399674, "grad_norm": 0.028053130954504013, "learning_rate": 0.00043831651581319604, "loss": 0.023, "num_input_tokens_seen": 155023072, "step": 71775 }, { "epoch": 11.709624796084828, "grad_norm": 0.07982442528009415, "learning_rate": 0.00043824588034997974, "loss": 0.0625, "num_input_tokens_seen": 155033472, "step": 71780 }, { "epoch": 11.710440456769984, "grad_norm": 0.11559919267892838, "learning_rate": 0.0004381752461382888, "loss": 0.0227, "num_input_tokens_seen": 155044960, "step": 71785 }, { "epoch": 11.71125611745514, "grad_norm": 0.0043896157294511795, "learning_rate": 0.0004381046131795551, "loss": 0.0182, "num_input_tokens_seen": 155056704, "step": 71790 }, { "epoch": 11.712071778140293, "grad_norm": 0.07724365592002869, "learning_rate": 0.0004380339814752098, "loss": 0.0099, "num_input_tokens_seen": 155067584, "step": 71795 }, { "epoch": 11.71288743882545, "grad_norm": 0.013904483988881111, "learning_rate": 0.0004379633510266846, "loss": 0.0106, "num_input_tokens_seen": 155078880, "step": 71800 }, { "epoch": 11.713703099510603, "grad_norm": 0.0046301172114908695, "learning_rate": 0.0004378927218354106, "loss": 0.0298, "num_input_tokens_seen": 155089408, "step": 71805 }, { "epoch": 11.714518760195759, "grad_norm": 0.005695376545190811, "learning_rate": 0.00043782209390281964, "loss": 0.0251, "num_input_tokens_seen": 155098816, "step": 71810 }, { "epoch": 11.715334420880914, "grad_norm": 0.005193586926907301, "learning_rate": 0.00043775146723034253, "loss": 0.015, "num_input_tokens_seen": 155110048, "step": 71815 }, { "epoch": 11.716150081566068, "grad_norm": 0.05635349825024605, "learning_rate": 0.00043768084181941097, "loss": 0.0186, "num_input_tokens_seen": 155120256, "step": 71820 }, { "epoch": 11.716965742251224, "grad_norm": 0.03338460251688957, "learning_rate": 0.00043761021767145644, "loss": 0.033, "num_input_tokens_seen": 155130976, "step": 71825 }, { "epoch": 11.717781402936378, "grad_norm": 0.0054107471369206905, "learning_rate": 0.0004375395947879097, "loss": 0.0038, "num_input_tokens_seen": 155141376, "step": 71830 }, { "epoch": 11.718597063621534, "grad_norm": 0.006177667994052172, "learning_rate": 0.0004374689731702026, "loss": 0.0159, "num_input_tokens_seen": 155151616, "step": 71835 }, { "epoch": 11.719412724306688, "grad_norm": 0.5736343860626221, "learning_rate": 0.0004373983528197659, "loss": 0.1354, "num_input_tokens_seen": 155163232, "step": 71840 }, { "epoch": 11.720228384991843, "grad_norm": 0.006295854225754738, "learning_rate": 0.0004373277337380311, "loss": 0.011, "num_input_tokens_seen": 155173824, "step": 71845 }, { "epoch": 11.721044045676999, "grad_norm": 0.0067205713130533695, "learning_rate": 0.00043725711592642913, "loss": 0.0057, "num_input_tokens_seen": 155183552, "step": 71850 }, { "epoch": 11.721859706362153, "grad_norm": 0.003925715573132038, "learning_rate": 0.0004371864993863915, "loss": 0.1543, "num_input_tokens_seen": 155194048, "step": 71855 }, { "epoch": 11.722675367047309, "grad_norm": 0.05358777940273285, "learning_rate": 0.00043711588411934893, "loss": 0.0682, "num_input_tokens_seen": 155204800, "step": 71860 }, { "epoch": 11.723491027732463, "grad_norm": 0.15012463927268982, "learning_rate": 0.00043704527012673294, "loss": 0.0292, "num_input_tokens_seen": 155215904, "step": 71865 }, { "epoch": 11.724306688417618, "grad_norm": 0.009517773985862732, "learning_rate": 0.00043697465740997424, "loss": 0.0189, "num_input_tokens_seen": 155226464, "step": 71870 }, { "epoch": 11.725122349102774, "grad_norm": 0.2332668900489807, "learning_rate": 0.00043690404597050426, "loss": 0.0632, "num_input_tokens_seen": 155237152, "step": 71875 }, { "epoch": 11.725938009787928, "grad_norm": 0.2832512855529785, "learning_rate": 0.0004368334358097536, "loss": 0.0268, "num_input_tokens_seen": 155248608, "step": 71880 }, { "epoch": 11.726753670473084, "grad_norm": 0.001509108697064221, "learning_rate": 0.00043676282692915367, "loss": 0.1853, "num_input_tokens_seen": 155259744, "step": 71885 }, { "epoch": 11.727569331158238, "grad_norm": 0.0022017841693013906, "learning_rate": 0.0004366922193301352, "loss": 0.0226, "num_input_tokens_seen": 155272000, "step": 71890 }, { "epoch": 11.728384991843393, "grad_norm": 0.0046439943835139275, "learning_rate": 0.00043662161301412925, "loss": 0.055, "num_input_tokens_seen": 155282304, "step": 71895 }, { "epoch": 11.729200652528547, "grad_norm": 0.006962585728615522, "learning_rate": 0.0004365510079825667, "loss": 0.0529, "num_input_tokens_seen": 155291840, "step": 71900 }, { "epoch": 11.730016313213703, "grad_norm": 0.011773492209613323, "learning_rate": 0.00043648040423687845, "loss": 0.006, "num_input_tokens_seen": 155302976, "step": 71905 }, { "epoch": 11.730831973898859, "grad_norm": 0.40067851543426514, "learning_rate": 0.00043640980177849534, "loss": 0.0753, "num_input_tokens_seen": 155313728, "step": 71910 }, { "epoch": 11.731647634584013, "grad_norm": 0.00812000036239624, "learning_rate": 0.00043633920060884843, "loss": 0.0072, "num_input_tokens_seen": 155326080, "step": 71915 }, { "epoch": 11.732463295269168, "grad_norm": 0.006097911857068539, "learning_rate": 0.0004362686007293681, "loss": 0.0566, "num_input_tokens_seen": 155337248, "step": 71920 }, { "epoch": 11.733278955954322, "grad_norm": 0.007870636880397797, "learning_rate": 0.0004361980021414858, "loss": 0.0629, "num_input_tokens_seen": 155348224, "step": 71925 }, { "epoch": 11.734094616639478, "grad_norm": 0.015115322545170784, "learning_rate": 0.00043612740484663155, "loss": 0.0099, "num_input_tokens_seen": 155359744, "step": 71930 }, { "epoch": 11.734910277324634, "grad_norm": 0.007088929880410433, "learning_rate": 0.00043605680884623656, "loss": 0.0338, "num_input_tokens_seen": 155370912, "step": 71935 }, { "epoch": 11.735725938009788, "grad_norm": 0.024514637887477875, "learning_rate": 0.00043598621414173166, "loss": 0.0059, "num_input_tokens_seen": 155382336, "step": 71940 }, { "epoch": 11.736541598694943, "grad_norm": 0.016428545117378235, "learning_rate": 0.0004359156207345471, "loss": 0.0081, "num_input_tokens_seen": 155393152, "step": 71945 }, { "epoch": 11.737357259380097, "grad_norm": 0.6242492198944092, "learning_rate": 0.00043584502862611404, "loss": 0.0421, "num_input_tokens_seen": 155403744, "step": 71950 }, { "epoch": 11.738172920065253, "grad_norm": 0.07482123374938965, "learning_rate": 0.00043577443781786263, "loss": 0.0078, "num_input_tokens_seen": 155415360, "step": 71955 }, { "epoch": 11.738988580750409, "grad_norm": 0.06953166425228119, "learning_rate": 0.0004357038483112239, "loss": 0.0079, "num_input_tokens_seen": 155425632, "step": 71960 }, { "epoch": 11.739804241435563, "grad_norm": 0.0036088728811591864, "learning_rate": 0.00043563326010762803, "loss": 0.0609, "num_input_tokens_seen": 155437952, "step": 71965 }, { "epoch": 11.740619902120718, "grad_norm": 0.08081181347370148, "learning_rate": 0.00043556267320850605, "loss": 0.0097, "num_input_tokens_seen": 155447552, "step": 71970 }, { "epoch": 11.741435562805872, "grad_norm": 0.019740041345357895, "learning_rate": 0.000435492087615288, "loss": 0.0245, "num_input_tokens_seen": 155457216, "step": 71975 }, { "epoch": 11.742251223491028, "grad_norm": 0.021085111424326897, "learning_rate": 0.00043542150332940487, "loss": 0.1118, "num_input_tokens_seen": 155468064, "step": 71980 }, { "epoch": 11.743066884176184, "grad_norm": 0.13764089345932007, "learning_rate": 0.00043535092035228666, "loss": 0.1031, "num_input_tokens_seen": 155477280, "step": 71985 }, { "epoch": 11.743882544861338, "grad_norm": 0.032629575580358505, "learning_rate": 0.00043528033868536433, "loss": 0.0121, "num_input_tokens_seen": 155488672, "step": 71990 }, { "epoch": 11.744698205546493, "grad_norm": 0.007054131478071213, "learning_rate": 0.0004352097583300678, "loss": 0.007, "num_input_tokens_seen": 155499744, "step": 71995 }, { "epoch": 11.745513866231647, "grad_norm": 0.3839552700519562, "learning_rate": 0.0004351391792878279, "loss": 0.1698, "num_input_tokens_seen": 155510112, "step": 72000 }, { "epoch": 11.746329526916803, "grad_norm": 0.03238167613744736, "learning_rate": 0.00043506860156007453, "loss": 0.0255, "num_input_tokens_seen": 155520864, "step": 72005 }, { "epoch": 11.747145187601957, "grad_norm": 0.4877621829509735, "learning_rate": 0.00043499802514823866, "loss": 0.0288, "num_input_tokens_seen": 155532384, "step": 72010 }, { "epoch": 11.747960848287113, "grad_norm": 0.13125985860824585, "learning_rate": 0.00043492745005375, "loss": 0.059, "num_input_tokens_seen": 155544576, "step": 72015 }, { "epoch": 11.748776508972268, "grad_norm": 0.3048825263977051, "learning_rate": 0.00043485687627803935, "loss": 0.1243, "num_input_tokens_seen": 155554432, "step": 72020 }, { "epoch": 11.749592169657422, "grad_norm": 0.0021826811134815216, "learning_rate": 0.00043478630382253646, "loss": 0.0617, "num_input_tokens_seen": 155565024, "step": 72025 }, { "epoch": 11.750407830342578, "grad_norm": 0.020689282566308975, "learning_rate": 0.00043471573268867206, "loss": 0.1112, "num_input_tokens_seen": 155576736, "step": 72030 }, { "epoch": 11.751223491027732, "grad_norm": 0.29919424653053284, "learning_rate": 0.00043464516287787617, "loss": 0.0361, "num_input_tokens_seen": 155587968, "step": 72035 }, { "epoch": 11.752039151712887, "grad_norm": 0.019435329362750053, "learning_rate": 0.0004345745943915788, "loss": 0.0073, "num_input_tokens_seen": 155598208, "step": 72040 }, { "epoch": 11.752854812398043, "grad_norm": 0.08339189738035202, "learning_rate": 0.0004345040272312104, "loss": 0.011, "num_input_tokens_seen": 155610400, "step": 72045 }, { "epoch": 11.753670473083197, "grad_norm": 0.34416478872299194, "learning_rate": 0.00043443346139820086, "loss": 0.0958, "num_input_tokens_seen": 155620800, "step": 72050 }, { "epoch": 11.754486133768353, "grad_norm": 0.017133589833974838, "learning_rate": 0.0004343628968939805, "loss": 0.0529, "num_input_tokens_seen": 155632480, "step": 72055 }, { "epoch": 11.755301794453507, "grad_norm": 0.17699752748012543, "learning_rate": 0.0004342923337199793, "loss": 0.0876, "num_input_tokens_seen": 155643328, "step": 72060 }, { "epoch": 11.756117455138662, "grad_norm": 0.004552872385829687, "learning_rate": 0.0004342217718776273, "loss": 0.0117, "num_input_tokens_seen": 155654048, "step": 72065 }, { "epoch": 11.756933115823816, "grad_norm": 0.33629310131073, "learning_rate": 0.00043415121136835454, "loss": 0.0809, "num_input_tokens_seen": 155663904, "step": 72070 }, { "epoch": 11.757748776508972, "grad_norm": 0.09349898993968964, "learning_rate": 0.00043408065219359106, "loss": 0.1015, "num_input_tokens_seen": 155674816, "step": 72075 }, { "epoch": 11.758564437194128, "grad_norm": 0.005316116847097874, "learning_rate": 0.00043401009435476665, "loss": 0.0045, "num_input_tokens_seen": 155685216, "step": 72080 }, { "epoch": 11.759380097879282, "grad_norm": 0.008520056493580341, "learning_rate": 0.0004339395378533116, "loss": 0.0169, "num_input_tokens_seen": 155695840, "step": 72085 }, { "epoch": 11.760195758564437, "grad_norm": 0.5987900495529175, "learning_rate": 0.00043386898269065537, "loss": 0.0355, "num_input_tokens_seen": 155706656, "step": 72090 }, { "epoch": 11.761011419249591, "grad_norm": 0.4160362780094147, "learning_rate": 0.00043379842886822836, "loss": 0.1533, "num_input_tokens_seen": 155716928, "step": 72095 }, { "epoch": 11.761827079934747, "grad_norm": 0.2085852324962616, "learning_rate": 0.0004337278763874599, "loss": 0.0114, "num_input_tokens_seen": 155727104, "step": 72100 }, { "epoch": 11.762642740619903, "grad_norm": 0.0025748233310878277, "learning_rate": 0.0004336573252497804, "loss": 0.0105, "num_input_tokens_seen": 155738080, "step": 72105 }, { "epoch": 11.763458401305057, "grad_norm": 0.002957735676318407, "learning_rate": 0.00043358677545661913, "loss": 0.0045, "num_input_tokens_seen": 155749824, "step": 72110 }, { "epoch": 11.764274061990212, "grad_norm": 0.07855616509914398, "learning_rate": 0.0004335162270094063, "loss": 0.0181, "num_input_tokens_seen": 155760256, "step": 72115 }, { "epoch": 11.765089722675366, "grad_norm": 0.010629786178469658, "learning_rate": 0.0004334456799095712, "loss": 0.0302, "num_input_tokens_seen": 155771360, "step": 72120 }, { "epoch": 11.765905383360522, "grad_norm": 0.39609336853027344, "learning_rate": 0.00043337513415854414, "loss": 0.0256, "num_input_tokens_seen": 155783392, "step": 72125 }, { "epoch": 11.766721044045678, "grad_norm": 0.3709852397441864, "learning_rate": 0.0004333045897577542, "loss": 0.222, "num_input_tokens_seen": 155793728, "step": 72130 }, { "epoch": 11.767536704730832, "grad_norm": 0.001966248033568263, "learning_rate": 0.00043323404670863165, "loss": 0.004, "num_input_tokens_seen": 155804576, "step": 72135 }, { "epoch": 11.768352365415987, "grad_norm": 0.018825042992830276, "learning_rate": 0.0004331635050126056, "loss": 0.0058, "num_input_tokens_seen": 155815040, "step": 72140 }, { "epoch": 11.769168026101141, "grad_norm": 0.01670904830098152, "learning_rate": 0.0004330929646711059, "loss": 0.1545, "num_input_tokens_seen": 155825664, "step": 72145 }, { "epoch": 11.769983686786297, "grad_norm": 0.04210560396313667, "learning_rate": 0.0004330224256855624, "loss": 0.0745, "num_input_tokens_seen": 155836736, "step": 72150 }, { "epoch": 11.770799347471453, "grad_norm": 0.36961254477500916, "learning_rate": 0.00043295188805740414, "loss": 0.1821, "num_input_tokens_seen": 155847872, "step": 72155 }, { "epoch": 11.771615008156607, "grad_norm": 0.014704558998346329, "learning_rate": 0.0004328813517880612, "loss": 0.0853, "num_input_tokens_seen": 155859904, "step": 72160 }, { "epoch": 11.772430668841762, "grad_norm": 0.04387712478637695, "learning_rate": 0.00043281081687896253, "loss": 0.015, "num_input_tokens_seen": 155870848, "step": 72165 }, { "epoch": 11.773246329526916, "grad_norm": 0.041084855794906616, "learning_rate": 0.0004327402833315381, "loss": 0.0096, "num_input_tokens_seen": 155882336, "step": 72170 }, { "epoch": 11.774061990212072, "grad_norm": 0.04652201011776924, "learning_rate": 0.000432669751147217, "loss": 0.0086, "num_input_tokens_seen": 155894464, "step": 72175 }, { "epoch": 11.774877650897226, "grad_norm": 0.006085763685405254, "learning_rate": 0.000432599220327429, "loss": 0.0269, "num_input_tokens_seen": 155905120, "step": 72180 }, { "epoch": 11.775693311582382, "grad_norm": 0.40596145391464233, "learning_rate": 0.0004325286908736031, "loss": 0.0509, "num_input_tokens_seen": 155915680, "step": 72185 }, { "epoch": 11.776508972267537, "grad_norm": 0.011549671180546284, "learning_rate": 0.0004324581627871691, "loss": 0.0183, "num_input_tokens_seen": 155925856, "step": 72190 }, { "epoch": 11.777324632952691, "grad_norm": 0.39453864097595215, "learning_rate": 0.00043238763606955586, "loss": 0.0818, "num_input_tokens_seen": 155936736, "step": 72195 }, { "epoch": 11.778140293637847, "grad_norm": 0.02689771167933941, "learning_rate": 0.00043231711072219307, "loss": 0.0361, "num_input_tokens_seen": 155947072, "step": 72200 }, { "epoch": 11.778955954323001, "grad_norm": 0.012872003018856049, "learning_rate": 0.0004322465867465099, "loss": 0.0329, "num_input_tokens_seen": 155958656, "step": 72205 }, { "epoch": 11.779771615008157, "grad_norm": 0.891151487827301, "learning_rate": 0.0004321760641439356, "loss": 0.0593, "num_input_tokens_seen": 155968256, "step": 72210 }, { "epoch": 11.780587275693312, "grad_norm": 0.026634545996785164, "learning_rate": 0.00043210554291589937, "loss": 0.0254, "num_input_tokens_seen": 155978272, "step": 72215 }, { "epoch": 11.781402936378466, "grad_norm": 0.36695098876953125, "learning_rate": 0.00043203502306383046, "loss": 0.0309, "num_input_tokens_seen": 155989024, "step": 72220 }, { "epoch": 11.782218597063622, "grad_norm": 0.008795715868473053, "learning_rate": 0.0004319645045891579, "loss": 0.0144, "num_input_tokens_seen": 156000800, "step": 72225 }, { "epoch": 11.783034257748776, "grad_norm": 0.018142348155379295, "learning_rate": 0.0004318939874933113, "loss": 0.0285, "num_input_tokens_seen": 156011904, "step": 72230 }, { "epoch": 11.783849918433932, "grad_norm": 0.004247451666742563, "learning_rate": 0.00043182347177771907, "loss": 0.1222, "num_input_tokens_seen": 156024160, "step": 72235 }, { "epoch": 11.784665579119086, "grad_norm": 0.0237015001475811, "learning_rate": 0.000431752957443811, "loss": 0.0067, "num_input_tokens_seen": 156035488, "step": 72240 }, { "epoch": 11.785481239804241, "grad_norm": 0.06558331102132797, "learning_rate": 0.00043168244449301555, "loss": 0.0623, "num_input_tokens_seen": 156045888, "step": 72245 }, { "epoch": 11.786296900489397, "grad_norm": 0.5224149227142334, "learning_rate": 0.00043161193292676203, "loss": 0.0465, "num_input_tokens_seen": 156057056, "step": 72250 }, { "epoch": 11.78711256117455, "grad_norm": 0.022556733340024948, "learning_rate": 0.00043154142274647966, "loss": 0.0282, "num_input_tokens_seen": 156068032, "step": 72255 }, { "epoch": 11.787928221859707, "grad_norm": 0.009831923991441727, "learning_rate": 0.000431470913953597, "loss": 0.0086, "num_input_tokens_seen": 156078016, "step": 72260 }, { "epoch": 11.78874388254486, "grad_norm": 0.273215115070343, "learning_rate": 0.00043140040654954346, "loss": 0.0367, "num_input_tokens_seen": 156089472, "step": 72265 }, { "epoch": 11.789559543230016, "grad_norm": 0.007073753513395786, "learning_rate": 0.00043132990053574747, "loss": 0.0049, "num_input_tokens_seen": 156101504, "step": 72270 }, { "epoch": 11.790375203915172, "grad_norm": 0.004455335903912783, "learning_rate": 0.0004312593959136383, "loss": 0.0196, "num_input_tokens_seen": 156112192, "step": 72275 }, { "epoch": 11.791190864600326, "grad_norm": 0.004594567697495222, "learning_rate": 0.0004311888926846445, "loss": 0.0364, "num_input_tokens_seen": 156121984, "step": 72280 }, { "epoch": 11.792006525285482, "grad_norm": 0.056093163788318634, "learning_rate": 0.00043111839085019534, "loss": 0.0064, "num_input_tokens_seen": 156132160, "step": 72285 }, { "epoch": 11.792822185970635, "grad_norm": 0.41622522473335266, "learning_rate": 0.0004310478904117191, "loss": 0.0237, "num_input_tokens_seen": 156143200, "step": 72290 }, { "epoch": 11.793637846655791, "grad_norm": 0.0016232366906479, "learning_rate": 0.0004309773913706451, "loss": 0.0161, "num_input_tokens_seen": 156154496, "step": 72295 }, { "epoch": 11.794453507340947, "grad_norm": 0.00741207879036665, "learning_rate": 0.00043090689372840156, "loss": 0.0072, "num_input_tokens_seen": 156165824, "step": 72300 }, { "epoch": 11.7952691680261, "grad_norm": 0.01622309908270836, "learning_rate": 0.0004308363974864178, "loss": 0.0057, "num_input_tokens_seen": 156175520, "step": 72305 }, { "epoch": 11.796084828711257, "grad_norm": 0.6132098436355591, "learning_rate": 0.0004307659026461218, "loss": 0.1409, "num_input_tokens_seen": 156186144, "step": 72310 }, { "epoch": 11.79690048939641, "grad_norm": 0.004296452272683382, "learning_rate": 0.00043069540920894297, "loss": 0.01, "num_input_tokens_seen": 156197728, "step": 72315 }, { "epoch": 11.797716150081566, "grad_norm": 0.26713958382606506, "learning_rate": 0.0004306249171763093, "loss": 0.0242, "num_input_tokens_seen": 156207936, "step": 72320 }, { "epoch": 11.798531810766722, "grad_norm": 0.002047403249889612, "learning_rate": 0.0004305544265496499, "loss": 0.0265, "num_input_tokens_seen": 156219648, "step": 72325 }, { "epoch": 11.799347471451876, "grad_norm": 0.00636103842407465, "learning_rate": 0.000430483937330393, "loss": 0.0037, "num_input_tokens_seen": 156230080, "step": 72330 }, { "epoch": 11.800163132137031, "grad_norm": 0.24282555282115936, "learning_rate": 0.0004304134495199674, "loss": 0.0891, "num_input_tokens_seen": 156240448, "step": 72335 }, { "epoch": 11.800978792822185, "grad_norm": 0.608010470867157, "learning_rate": 0.0004303429631198014, "loss": 0.1334, "num_input_tokens_seen": 156251552, "step": 72340 }, { "epoch": 11.801794453507341, "grad_norm": 0.01011840533465147, "learning_rate": 0.0004302724781313237, "loss": 0.0059, "num_input_tokens_seen": 156262240, "step": 72345 }, { "epoch": 11.802610114192497, "grad_norm": 0.03190528601408005, "learning_rate": 0.0004302019945559627, "loss": 0.1422, "num_input_tokens_seen": 156273792, "step": 72350 }, { "epoch": 11.80342577487765, "grad_norm": 0.3652302026748657, "learning_rate": 0.0004301315123951467, "loss": 0.0394, "num_input_tokens_seen": 156284352, "step": 72355 }, { "epoch": 11.804241435562806, "grad_norm": 0.15005654096603394, "learning_rate": 0.0004300610316503045, "loss": 0.0115, "num_input_tokens_seen": 156294720, "step": 72360 }, { "epoch": 11.80505709624796, "grad_norm": 0.00928251352161169, "learning_rate": 0.00042999055232286387, "loss": 0.1893, "num_input_tokens_seen": 156304256, "step": 72365 }, { "epoch": 11.805872756933116, "grad_norm": 0.08327314257621765, "learning_rate": 0.00042992007441425376, "loss": 0.0146, "num_input_tokens_seen": 156314560, "step": 72370 }, { "epoch": 11.80668841761827, "grad_norm": 1.7067999839782715, "learning_rate": 0.00042984959792590215, "loss": 0.0457, "num_input_tokens_seen": 156325696, "step": 72375 }, { "epoch": 11.807504078303426, "grad_norm": 0.010561229661107063, "learning_rate": 0.00042977912285923747, "loss": 0.0905, "num_input_tokens_seen": 156335584, "step": 72380 }, { "epoch": 11.808319738988581, "grad_norm": 0.007480216212570667, "learning_rate": 0.000429708649215688, "loss": 0.0226, "num_input_tokens_seen": 156345024, "step": 72385 }, { "epoch": 11.809135399673735, "grad_norm": 0.9692756533622742, "learning_rate": 0.00042963817699668183, "loss": 0.0896, "num_input_tokens_seen": 156354976, "step": 72390 }, { "epoch": 11.809951060358891, "grad_norm": 0.14579908549785614, "learning_rate": 0.0004295677062036472, "loss": 0.0503, "num_input_tokens_seen": 156366496, "step": 72395 }, { "epoch": 11.810766721044045, "grad_norm": 0.015927450731396675, "learning_rate": 0.00042949723683801256, "loss": 0.0572, "num_input_tokens_seen": 156377344, "step": 72400 }, { "epoch": 11.8115823817292, "grad_norm": 0.0816449522972107, "learning_rate": 0.0004294267689012057, "loss": 0.1673, "num_input_tokens_seen": 156388704, "step": 72405 }, { "epoch": 11.812398042414356, "grad_norm": 0.26386797428131104, "learning_rate": 0.000429356302394655, "loss": 0.0143, "num_input_tokens_seen": 156399872, "step": 72410 }, { "epoch": 11.81321370309951, "grad_norm": 0.004181982949376106, "learning_rate": 0.00042928583731978833, "loss": 0.0192, "num_input_tokens_seen": 156412800, "step": 72415 }, { "epoch": 11.814029363784666, "grad_norm": 0.4576594829559326, "learning_rate": 0.00042921537367803403, "loss": 0.0298, "num_input_tokens_seen": 156421824, "step": 72420 }, { "epoch": 11.81484502446982, "grad_norm": 0.004218620248138905, "learning_rate": 0.0004291449114708198, "loss": 0.1628, "num_input_tokens_seen": 156432896, "step": 72425 }, { "epoch": 11.815660685154976, "grad_norm": 0.02189936861395836, "learning_rate": 0.000429074450699574, "loss": 0.0228, "num_input_tokens_seen": 156443936, "step": 72430 }, { "epoch": 11.81647634584013, "grad_norm": 0.005173725076019764, "learning_rate": 0.0004290039913657243, "loss": 0.0237, "num_input_tokens_seen": 156455840, "step": 72435 }, { "epoch": 11.817292006525285, "grad_norm": 0.4792866110801697, "learning_rate": 0.00042893353347069887, "loss": 0.0449, "num_input_tokens_seen": 156466400, "step": 72440 }, { "epoch": 11.818107667210441, "grad_norm": 0.0040856278501451015, "learning_rate": 0.0004288630770159254, "loss": 0.0086, "num_input_tokens_seen": 156476096, "step": 72445 }, { "epoch": 11.818923327895595, "grad_norm": 0.1025933250784874, "learning_rate": 0.00042879262200283216, "loss": 0.0369, "num_input_tokens_seen": 156488096, "step": 72450 }, { "epoch": 11.81973898858075, "grad_norm": 0.11138315498828888, "learning_rate": 0.0004287221684328465, "loss": 0.0209, "num_input_tokens_seen": 156499008, "step": 72455 }, { "epoch": 11.820554649265905, "grad_norm": 0.0017173081869259477, "learning_rate": 0.00042865171630739654, "loss": 0.0046, "num_input_tokens_seen": 156508992, "step": 72460 }, { "epoch": 11.82137030995106, "grad_norm": 0.001704095397144556, "learning_rate": 0.0004285812656279102, "loss": 0.0048, "num_input_tokens_seen": 156517664, "step": 72465 }, { "epoch": 11.822185970636216, "grad_norm": 0.010462358593940735, "learning_rate": 0.000428510816395815, "loss": 0.0669, "num_input_tokens_seen": 156528608, "step": 72470 }, { "epoch": 11.82300163132137, "grad_norm": 0.04697816073894501, "learning_rate": 0.00042844036861253897, "loss": 0.0765, "num_input_tokens_seen": 156539776, "step": 72475 }, { "epoch": 11.823817292006526, "grad_norm": 0.003039855509996414, "learning_rate": 0.00042836992227950944, "loss": 0.0023, "num_input_tokens_seen": 156551488, "step": 72480 }, { "epoch": 11.82463295269168, "grad_norm": 0.28987327218055725, "learning_rate": 0.0004282994773981546, "loss": 0.0569, "num_input_tokens_seen": 156561952, "step": 72485 }, { "epoch": 11.825448613376835, "grad_norm": 0.06460442394018173, "learning_rate": 0.00042822903396990146, "loss": 0.0267, "num_input_tokens_seen": 156572448, "step": 72490 }, { "epoch": 11.826264274061991, "grad_norm": 0.006775304209440947, "learning_rate": 0.0004281585919961783, "loss": 0.032, "num_input_tokens_seen": 156581696, "step": 72495 }, { "epoch": 11.827079934747145, "grad_norm": 0.03162192925810814, "learning_rate": 0.00042808815147841214, "loss": 0.0071, "num_input_tokens_seen": 156592224, "step": 72500 }, { "epoch": 11.8278955954323, "grad_norm": 0.1275867521762848, "learning_rate": 0.0004280177124180311, "loss": 0.1319, "num_input_tokens_seen": 156602432, "step": 72505 }, { "epoch": 11.828711256117455, "grad_norm": 0.0015156982699409127, "learning_rate": 0.0004279472748164621, "loss": 0.0439, "num_input_tokens_seen": 156613856, "step": 72510 }, { "epoch": 11.82952691680261, "grad_norm": 0.005000817123800516, "learning_rate": 0.0004278768386751332, "loss": 0.182, "num_input_tokens_seen": 156624256, "step": 72515 }, { "epoch": 11.830342577487766, "grad_norm": 0.003169822273775935, "learning_rate": 0.0004278064039954716, "loss": 0.0317, "num_input_tokens_seen": 156635648, "step": 72520 }, { "epoch": 11.83115823817292, "grad_norm": 0.41648584604263306, "learning_rate": 0.00042773597077890485, "loss": 0.1084, "num_input_tokens_seen": 156645984, "step": 72525 }, { "epoch": 11.831973898858076, "grad_norm": 0.14442989230155945, "learning_rate": 0.0004276655390268603, "loss": 0.0124, "num_input_tokens_seen": 156657056, "step": 72530 }, { "epoch": 11.83278955954323, "grad_norm": 0.009963775984942913, "learning_rate": 0.0004275951087407653, "loss": 0.1158, "num_input_tokens_seen": 156668928, "step": 72535 }, { "epoch": 11.833605220228385, "grad_norm": 0.005215761251747608, "learning_rate": 0.0004275246799220473, "loss": 0.0212, "num_input_tokens_seen": 156679808, "step": 72540 }, { "epoch": 11.83442088091354, "grad_norm": 0.01756799779832363, "learning_rate": 0.0004274542525721338, "loss": 0.0152, "num_input_tokens_seen": 156690400, "step": 72545 }, { "epoch": 11.835236541598695, "grad_norm": 0.010215381160378456, "learning_rate": 0.00042738382669245157, "loss": 0.0113, "num_input_tokens_seen": 156701024, "step": 72550 }, { "epoch": 11.83605220228385, "grad_norm": 0.33329635858535767, "learning_rate": 0.0004273134022844285, "loss": 0.0295, "num_input_tokens_seen": 156712576, "step": 72555 }, { "epoch": 11.836867862969005, "grad_norm": 0.004677685908973217, "learning_rate": 0.00042724297934949136, "loss": 0.0091, "num_input_tokens_seen": 156723552, "step": 72560 }, { "epoch": 11.83768352365416, "grad_norm": 0.014706281013786793, "learning_rate": 0.0004271725578890675, "loss": 0.0045, "num_input_tokens_seen": 156735328, "step": 72565 }, { "epoch": 11.838499184339314, "grad_norm": 0.052408941090106964, "learning_rate": 0.00042710213790458435, "loss": 0.0571, "num_input_tokens_seen": 156746304, "step": 72570 }, { "epoch": 11.83931484502447, "grad_norm": 0.005835378542542458, "learning_rate": 0.00042703171939746865, "loss": 0.0169, "num_input_tokens_seen": 156758208, "step": 72575 }, { "epoch": 11.840130505709626, "grad_norm": 0.018282631412148476, "learning_rate": 0.00042696130236914796, "loss": 0.1527, "num_input_tokens_seen": 156767488, "step": 72580 }, { "epoch": 11.84094616639478, "grad_norm": 0.25564491748809814, "learning_rate": 0.00042689088682104886, "loss": 0.1054, "num_input_tokens_seen": 156778592, "step": 72585 }, { "epoch": 11.841761827079935, "grad_norm": 0.0028037067968398333, "learning_rate": 0.00042682047275459893, "loss": 0.0059, "num_input_tokens_seen": 156789888, "step": 72590 }, { "epoch": 11.84257748776509, "grad_norm": 0.15276266634464264, "learning_rate": 0.00042675006017122477, "loss": 0.045, "num_input_tokens_seen": 156799872, "step": 72595 }, { "epoch": 11.843393148450245, "grad_norm": 0.003195535857230425, "learning_rate": 0.0004266796490723538, "loss": 0.008, "num_input_tokens_seen": 156809632, "step": 72600 }, { "epoch": 11.844208809135399, "grad_norm": 0.009690443985164165, "learning_rate": 0.0004266092394594124, "loss": 0.0188, "num_input_tokens_seen": 156820064, "step": 72605 }, { "epoch": 11.845024469820554, "grad_norm": 0.01867981068789959, "learning_rate": 0.00042653883133382824, "loss": 0.0901, "num_input_tokens_seen": 156831424, "step": 72610 }, { "epoch": 11.84584013050571, "grad_norm": 0.0158432237803936, "learning_rate": 0.00042646842469702754, "loss": 0.0193, "num_input_tokens_seen": 156841664, "step": 72615 }, { "epoch": 11.846655791190864, "grad_norm": 0.01342178788036108, "learning_rate": 0.0004263980195504378, "loss": 0.01, "num_input_tokens_seen": 156853888, "step": 72620 }, { "epoch": 11.84747145187602, "grad_norm": 0.025491604581475258, "learning_rate": 0.0004263276158954853, "loss": 0.012, "num_input_tokens_seen": 156865184, "step": 72625 }, { "epoch": 11.848287112561174, "grad_norm": 0.00676423916593194, "learning_rate": 0.0004262572137335973, "loss": 0.013, "num_input_tokens_seen": 156876736, "step": 72630 }, { "epoch": 11.84910277324633, "grad_norm": 0.021774085238575935, "learning_rate": 0.00042618681306620025, "loss": 0.0122, "num_input_tokens_seen": 156886848, "step": 72635 }, { "epoch": 11.849918433931485, "grad_norm": 0.005161866080015898, "learning_rate": 0.00042611641389472127, "loss": 0.0046, "num_input_tokens_seen": 156898592, "step": 72640 }, { "epoch": 11.850734094616639, "grad_norm": 0.0016783748287707567, "learning_rate": 0.0004260460162205867, "loss": 0.0036, "num_input_tokens_seen": 156909888, "step": 72645 }, { "epoch": 11.851549755301795, "grad_norm": 0.0034606284461915493, "learning_rate": 0.0004259756200452236, "loss": 0.0063, "num_input_tokens_seen": 156920320, "step": 72650 }, { "epoch": 11.852365415986949, "grad_norm": 0.006742651574313641, "learning_rate": 0.00042590522537005825, "loss": 0.0132, "num_input_tokens_seen": 156930368, "step": 72655 }, { "epoch": 11.853181076672104, "grad_norm": 0.3364547789096832, "learning_rate": 0.00042583483219651763, "loss": 0.0294, "num_input_tokens_seen": 156940608, "step": 72660 }, { "epoch": 11.85399673735726, "grad_norm": 0.015008768998086452, "learning_rate": 0.0004257644405260282, "loss": 0.0062, "num_input_tokens_seen": 156951168, "step": 72665 }, { "epoch": 11.854812398042414, "grad_norm": 0.010510805994272232, "learning_rate": 0.0004256940503600166, "loss": 0.0103, "num_input_tokens_seen": 156961920, "step": 72670 }, { "epoch": 11.85562805872757, "grad_norm": 0.015424901619553566, "learning_rate": 0.00042562366169990936, "loss": 0.0085, "num_input_tokens_seen": 156973888, "step": 72675 }, { "epoch": 11.856443719412724, "grad_norm": 0.1747453808784485, "learning_rate": 0.00042555327454713276, "loss": 0.0947, "num_input_tokens_seen": 156984448, "step": 72680 }, { "epoch": 11.85725938009788, "grad_norm": 0.006023809779435396, "learning_rate": 0.0004254828889031137, "loss": 0.0107, "num_input_tokens_seen": 156996864, "step": 72685 }, { "epoch": 11.858075040783035, "grad_norm": 0.007003793492913246, "learning_rate": 0.0004254125047692784, "loss": 0.0661, "num_input_tokens_seen": 157007424, "step": 72690 }, { "epoch": 11.858890701468189, "grad_norm": 0.008758111856877804, "learning_rate": 0.00042534212214705326, "loss": 0.0322, "num_input_tokens_seen": 157018624, "step": 72695 }, { "epoch": 11.859706362153345, "grad_norm": 0.0082249129191041, "learning_rate": 0.0004252717410378648, "loss": 0.05, "num_input_tokens_seen": 157028768, "step": 72700 }, { "epoch": 11.860522022838499, "grad_norm": 0.00721960561349988, "learning_rate": 0.00042520136144313925, "loss": 0.0335, "num_input_tokens_seen": 157041248, "step": 72705 }, { "epoch": 11.861337683523654, "grad_norm": 0.010221786797046661, "learning_rate": 0.0004251309833643029, "loss": 0.0218, "num_input_tokens_seen": 157051616, "step": 72710 }, { "epoch": 11.86215334420881, "grad_norm": 0.015197236090898514, "learning_rate": 0.00042506060680278234, "loss": 0.0374, "num_input_tokens_seen": 157062336, "step": 72715 }, { "epoch": 11.862969004893964, "grad_norm": 0.45400407910346985, "learning_rate": 0.00042499023176000353, "loss": 0.0753, "num_input_tokens_seen": 157072256, "step": 72720 }, { "epoch": 11.86378466557912, "grad_norm": 0.12778514623641968, "learning_rate": 0.000424919858237393, "loss": 0.0286, "num_input_tokens_seen": 157082848, "step": 72725 }, { "epoch": 11.864600326264274, "grad_norm": 0.491372287273407, "learning_rate": 0.00042484948623637656, "loss": 0.0654, "num_input_tokens_seen": 157094080, "step": 72730 }, { "epoch": 11.86541598694943, "grad_norm": 0.003955055959522724, "learning_rate": 0.0004247791157583808, "loss": 0.0077, "num_input_tokens_seen": 157104416, "step": 72735 }, { "epoch": 11.866231647634583, "grad_norm": 0.006338398437947035, "learning_rate": 0.0004247087468048315, "loss": 0.0162, "num_input_tokens_seen": 157114080, "step": 72740 }, { "epoch": 11.867047308319739, "grad_norm": 0.004220007918775082, "learning_rate": 0.00042463837937715515, "loss": 0.0978, "num_input_tokens_seen": 157124192, "step": 72745 }, { "epoch": 11.867862969004895, "grad_norm": 0.0883263647556305, "learning_rate": 0.0004245680134767775, "loss": 0.0224, "num_input_tokens_seen": 157135360, "step": 72750 }, { "epoch": 11.868678629690049, "grad_norm": 0.0914829820394516, "learning_rate": 0.0004244976491051249, "loss": 0.0169, "num_input_tokens_seen": 157146208, "step": 72755 }, { "epoch": 11.869494290375204, "grad_norm": 0.04714475944638252, "learning_rate": 0.00042442728626362306, "loss": 0.08, "num_input_tokens_seen": 157156928, "step": 72760 }, { "epoch": 11.870309951060358, "grad_norm": 0.005293759051710367, "learning_rate": 0.00042435692495369824, "loss": 0.0727, "num_input_tokens_seen": 157167936, "step": 72765 }, { "epoch": 11.871125611745514, "grad_norm": 0.014321415685117245, "learning_rate": 0.0004242865651767762, "loss": 0.1189, "num_input_tokens_seen": 157178272, "step": 72770 }, { "epoch": 11.87194127243067, "grad_norm": 0.0015413248911499977, "learning_rate": 0.0004242162069342831, "loss": 0.0057, "num_input_tokens_seen": 157189408, "step": 72775 }, { "epoch": 11.872756933115824, "grad_norm": 0.012533880770206451, "learning_rate": 0.0004241458502276446, "loss": 0.0094, "num_input_tokens_seen": 157200704, "step": 72780 }, { "epoch": 11.87357259380098, "grad_norm": 0.16592223942279816, "learning_rate": 0.00042407549505828657, "loss": 0.0206, "num_input_tokens_seen": 157213280, "step": 72785 }, { "epoch": 11.874388254486133, "grad_norm": 0.11769827455282211, "learning_rate": 0.0004240051414276352, "loss": 0.0478, "num_input_tokens_seen": 157223680, "step": 72790 }, { "epoch": 11.875203915171289, "grad_norm": 0.017359424382448196, "learning_rate": 0.00042393478933711585, "loss": 0.0186, "num_input_tokens_seen": 157233664, "step": 72795 }, { "epoch": 11.876019575856443, "grad_norm": 0.05703236162662506, "learning_rate": 0.0004238644387881546, "loss": 0.0218, "num_input_tokens_seen": 157244192, "step": 72800 }, { "epoch": 11.876835236541599, "grad_norm": 0.030190356075763702, "learning_rate": 0.000423794089782177, "loss": 0.0064, "num_input_tokens_seen": 157254080, "step": 72805 }, { "epoch": 11.877650897226754, "grad_norm": 0.0018631864804774523, "learning_rate": 0.000423723742320609, "loss": 0.0051, "num_input_tokens_seen": 157265824, "step": 72810 }, { "epoch": 11.878466557911908, "grad_norm": 0.17252780497074127, "learning_rate": 0.00042365339640487596, "loss": 0.0133, "num_input_tokens_seen": 157277280, "step": 72815 }, { "epoch": 11.879282218597064, "grad_norm": 0.08619865775108337, "learning_rate": 0.0004235830520364038, "loss": 0.0658, "num_input_tokens_seen": 157287584, "step": 72820 }, { "epoch": 11.880097879282218, "grad_norm": 0.007091619074344635, "learning_rate": 0.0004235127092166179, "loss": 0.0053, "num_input_tokens_seen": 157298816, "step": 72825 }, { "epoch": 11.880913539967374, "grad_norm": 0.0057108355686068535, "learning_rate": 0.0004234423679469441, "loss": 0.0201, "num_input_tokens_seen": 157310304, "step": 72830 }, { "epoch": 11.88172920065253, "grad_norm": 0.009273702278733253, "learning_rate": 0.0004233720282288078, "loss": 0.0125, "num_input_tokens_seen": 157319840, "step": 72835 }, { "epoch": 11.882544861337683, "grad_norm": 0.7806374430656433, "learning_rate": 0.00042330169006363455, "loss": 0.094, "num_input_tokens_seen": 157329984, "step": 72840 }, { "epoch": 11.883360522022839, "grad_norm": 0.005861148703843355, "learning_rate": 0.0004232313534528499, "loss": 0.0059, "num_input_tokens_seen": 157340768, "step": 72845 }, { "epoch": 11.884176182707993, "grad_norm": 0.009260977618396282, "learning_rate": 0.00042316101839787916, "loss": 0.0863, "num_input_tokens_seen": 157351488, "step": 72850 }, { "epoch": 11.884991843393149, "grad_norm": 0.07549002766609192, "learning_rate": 0.00042309068490014787, "loss": 0.065, "num_input_tokens_seen": 157363392, "step": 72855 }, { "epoch": 11.885807504078304, "grad_norm": 0.5480133295059204, "learning_rate": 0.00042302035296108156, "loss": 0.0349, "num_input_tokens_seen": 157374496, "step": 72860 }, { "epoch": 11.886623164763458, "grad_norm": 0.0028016124852001667, "learning_rate": 0.00042295002258210525, "loss": 0.0144, "num_input_tokens_seen": 157385856, "step": 72865 }, { "epoch": 11.887438825448614, "grad_norm": 0.011609912849962711, "learning_rate": 0.00042287969376464466, "loss": 0.0094, "num_input_tokens_seen": 157396512, "step": 72870 }, { "epoch": 11.888254486133768, "grad_norm": 0.003198280232027173, "learning_rate": 0.0004228093665101247, "loss": 0.0068, "num_input_tokens_seen": 157407328, "step": 72875 }, { "epoch": 11.889070146818923, "grad_norm": 0.005472021643072367, "learning_rate": 0.00042273904081997115, "loss": 0.025, "num_input_tokens_seen": 157417376, "step": 72880 }, { "epoch": 11.88988580750408, "grad_norm": 0.7649688720703125, "learning_rate": 0.0004226687166956087, "loss": 0.0381, "num_input_tokens_seen": 157430624, "step": 72885 }, { "epoch": 11.890701468189233, "grad_norm": 0.004005796741694212, "learning_rate": 0.00042259839413846275, "loss": 0.1093, "num_input_tokens_seen": 157442336, "step": 72890 }, { "epoch": 11.891517128874389, "grad_norm": 0.034606240689754486, "learning_rate": 0.0004225280731499588, "loss": 0.0074, "num_input_tokens_seen": 157452064, "step": 72895 }, { "epoch": 11.892332789559543, "grad_norm": 0.028569230809807777, "learning_rate": 0.00042245775373152153, "loss": 0.0157, "num_input_tokens_seen": 157463264, "step": 72900 }, { "epoch": 11.893148450244698, "grad_norm": 0.018665973097085953, "learning_rate": 0.0004223874358845764, "loss": 0.0219, "num_input_tokens_seen": 157475296, "step": 72905 }, { "epoch": 11.893964110929852, "grad_norm": 0.004474216606467962, "learning_rate": 0.0004223171196105482, "loss": 0.0412, "num_input_tokens_seen": 157486176, "step": 72910 }, { "epoch": 11.894779771615008, "grad_norm": 0.005233396776020527, "learning_rate": 0.0004222468049108623, "loss": 0.0067, "num_input_tokens_seen": 157497952, "step": 72915 }, { "epoch": 11.895595432300164, "grad_norm": 0.006671491544693708, "learning_rate": 0.00042217649178694327, "loss": 0.0047, "num_input_tokens_seen": 157509248, "step": 72920 }, { "epoch": 11.896411092985318, "grad_norm": 0.007623288314789534, "learning_rate": 0.00042210618024021663, "loss": 0.0022, "num_input_tokens_seen": 157519136, "step": 72925 }, { "epoch": 11.897226753670473, "grad_norm": 0.03528051823377609, "learning_rate": 0.00042203587027210684, "loss": 0.0392, "num_input_tokens_seen": 157529824, "step": 72930 }, { "epoch": 11.898042414355627, "grad_norm": 0.5511897802352905, "learning_rate": 0.00042196556188403924, "loss": 0.1273, "num_input_tokens_seen": 157540704, "step": 72935 }, { "epoch": 11.898858075040783, "grad_norm": 0.015145723707973957, "learning_rate": 0.0004218952550774383, "loss": 0.0138, "num_input_tokens_seen": 157552640, "step": 72940 }, { "epoch": 11.899673735725939, "grad_norm": 0.3677273690700531, "learning_rate": 0.00042182494985372937, "loss": 0.0796, "num_input_tokens_seen": 157562816, "step": 72945 }, { "epoch": 11.900489396411093, "grad_norm": 0.03927216678857803, "learning_rate": 0.0004217546462143368, "loss": 0.0473, "num_input_tokens_seen": 157574496, "step": 72950 }, { "epoch": 11.901305057096248, "grad_norm": 0.0031949521508067846, "learning_rate": 0.0004216843441606857, "loss": 0.0693, "num_input_tokens_seen": 157584832, "step": 72955 }, { "epoch": 11.902120717781402, "grad_norm": 0.005437622778117657, "learning_rate": 0.0004216140436942006, "loss": 0.0939, "num_input_tokens_seen": 157595520, "step": 72960 }, { "epoch": 11.902936378466558, "grad_norm": 0.007426468189805746, "learning_rate": 0.0004215437448163065, "loss": 0.0326, "num_input_tokens_seen": 157607520, "step": 72965 }, { "epoch": 11.903752039151712, "grad_norm": 0.008727246895432472, "learning_rate": 0.00042147344752842774, "loss": 0.0091, "num_input_tokens_seen": 157619520, "step": 72970 }, { "epoch": 11.904567699836868, "grad_norm": 0.7979373335838318, "learning_rate": 0.0004214031518319893, "loss": 0.1674, "num_input_tokens_seen": 157630400, "step": 72975 }, { "epoch": 11.905383360522023, "grad_norm": 0.012567078694701195, "learning_rate": 0.0004213328577284157, "loss": 0.1404, "num_input_tokens_seen": 157642080, "step": 72980 }, { "epoch": 11.906199021207177, "grad_norm": 0.07097362726926804, "learning_rate": 0.0004212625652191315, "loss": 0.0049, "num_input_tokens_seen": 157653504, "step": 72985 }, { "epoch": 11.907014681892333, "grad_norm": 0.13373368978500366, "learning_rate": 0.00042119227430556137, "loss": 0.0298, "num_input_tokens_seen": 157665280, "step": 72990 }, { "epoch": 11.907830342577487, "grad_norm": 0.012603395618498325, "learning_rate": 0.0004211219849891296, "loss": 0.0817, "num_input_tokens_seen": 157675136, "step": 72995 }, { "epoch": 11.908646003262643, "grad_norm": 0.0021373082417994738, "learning_rate": 0.00042105169727126094, "loss": 0.1518, "num_input_tokens_seen": 157684544, "step": 73000 }, { "epoch": 11.909461663947798, "grad_norm": 0.016298236325383186, "learning_rate": 0.00042098141115337986, "loss": 0.0065, "num_input_tokens_seen": 157694720, "step": 73005 }, { "epoch": 11.910277324632952, "grad_norm": 0.015862375497817993, "learning_rate": 0.0004209111266369107, "loss": 0.0309, "num_input_tokens_seen": 157705568, "step": 73010 }, { "epoch": 11.911092985318108, "grad_norm": 0.007350914645940065, "learning_rate": 0.0004208408437232779, "loss": 0.0091, "num_input_tokens_seen": 157717248, "step": 73015 }, { "epoch": 11.911908646003262, "grad_norm": 0.5517430305480957, "learning_rate": 0.00042077056241390586, "loss": 0.0344, "num_input_tokens_seen": 157728096, "step": 73020 }, { "epoch": 11.912724306688418, "grad_norm": 0.044113751500844955, "learning_rate": 0.00042070028271021877, "loss": 0.1243, "num_input_tokens_seen": 157738720, "step": 73025 }, { "epoch": 11.913539967373573, "grad_norm": 0.0020097021479159594, "learning_rate": 0.0004206300046136412, "loss": 0.0363, "num_input_tokens_seen": 157748768, "step": 73030 }, { "epoch": 11.914355628058727, "grad_norm": 0.013776198029518127, "learning_rate": 0.00042055972812559707, "loss": 0.0945, "num_input_tokens_seen": 157759392, "step": 73035 }, { "epoch": 11.915171288743883, "grad_norm": 0.0191465113312006, "learning_rate": 0.0004204894532475111, "loss": 0.0793, "num_input_tokens_seen": 157770976, "step": 73040 }, { "epoch": 11.915986949429037, "grad_norm": 0.002885065972805023, "learning_rate": 0.00042041917998080695, "loss": 0.021, "num_input_tokens_seen": 157782080, "step": 73045 }, { "epoch": 11.916802610114193, "grad_norm": 0.006058351136744022, "learning_rate": 0.0004203489083269093, "loss": 0.0599, "num_input_tokens_seen": 157793088, "step": 73050 }, { "epoch": 11.917618270799348, "grad_norm": 0.016492463648319244, "learning_rate": 0.0004202786382872419, "loss": 0.1625, "num_input_tokens_seen": 157805056, "step": 73055 }, { "epoch": 11.918433931484502, "grad_norm": 0.03171005845069885, "learning_rate": 0.00042020836986322917, "loss": 0.0166, "num_input_tokens_seen": 157815072, "step": 73060 }, { "epoch": 11.919249592169658, "grad_norm": 0.0331353098154068, "learning_rate": 0.0004201381030562949, "loss": 0.0128, "num_input_tokens_seen": 157824480, "step": 73065 }, { "epoch": 11.920065252854812, "grad_norm": 0.12538665533065796, "learning_rate": 0.00042006783786786346, "loss": 0.0305, "num_input_tokens_seen": 157836928, "step": 73070 }, { "epoch": 11.920880913539968, "grad_norm": 0.005170703399926424, "learning_rate": 0.0004199975742993585, "loss": 0.0047, "num_input_tokens_seen": 157848704, "step": 73075 }, { "epoch": 11.921696574225122, "grad_norm": 0.04474220797419548, "learning_rate": 0.0004199273123522044, "loss": 0.0444, "num_input_tokens_seen": 157860992, "step": 73080 }, { "epoch": 11.922512234910277, "grad_norm": 0.006513546220958233, "learning_rate": 0.00041985705202782464, "loss": 0.0557, "num_input_tokens_seen": 157872128, "step": 73085 }, { "epoch": 11.923327895595433, "grad_norm": 0.042542729526758194, "learning_rate": 0.00041978679332764366, "loss": 0.0479, "num_input_tokens_seen": 157883776, "step": 73090 }, { "epoch": 11.924143556280587, "grad_norm": 0.007474198471754789, "learning_rate": 0.0004197165362530848, "loss": 0.063, "num_input_tokens_seen": 157893984, "step": 73095 }, { "epoch": 11.924959216965743, "grad_norm": 0.019985618069767952, "learning_rate": 0.00041964628080557224, "loss": 0.0069, "num_input_tokens_seen": 157904960, "step": 73100 }, { "epoch": 11.925774877650896, "grad_norm": 0.002067849040031433, "learning_rate": 0.0004195760269865299, "loss": 0.016, "num_input_tokens_seen": 157914816, "step": 73105 }, { "epoch": 11.926590538336052, "grad_norm": 0.39414486289024353, "learning_rate": 0.0004195057747973812, "loss": 0.2092, "num_input_tokens_seen": 157926080, "step": 73110 }, { "epoch": 11.927406199021208, "grad_norm": 0.0032160452101379633, "learning_rate": 0.0004194355242395503, "loss": 0.0482, "num_input_tokens_seen": 157936992, "step": 73115 }, { "epoch": 11.928221859706362, "grad_norm": 0.4254874289035797, "learning_rate": 0.00041936527531446046, "loss": 0.1429, "num_input_tokens_seen": 157948864, "step": 73120 }, { "epoch": 11.929037520391518, "grad_norm": 0.14057868719100952, "learning_rate": 0.0004192950280235359, "loss": 0.0168, "num_input_tokens_seen": 157958912, "step": 73125 }, { "epoch": 11.929853181076671, "grad_norm": 0.008988683111965656, "learning_rate": 0.0004192247823681997, "loss": 0.0432, "num_input_tokens_seen": 157968704, "step": 73130 }, { "epoch": 11.930668841761827, "grad_norm": 0.018005967140197754, "learning_rate": 0.00041915453834987594, "loss": 0.0148, "num_input_tokens_seen": 157978944, "step": 73135 }, { "epoch": 11.931484502446983, "grad_norm": 0.26343950629234314, "learning_rate": 0.0004190842959699879, "loss": 0.0194, "num_input_tokens_seen": 157989504, "step": 73140 }, { "epoch": 11.932300163132137, "grad_norm": 0.10334479063749313, "learning_rate": 0.0004190140552299593, "loss": 0.0142, "num_input_tokens_seen": 157999904, "step": 73145 }, { "epoch": 11.933115823817293, "grad_norm": 0.18696285784244537, "learning_rate": 0.0004189438161312136, "loss": 0.0162, "num_input_tokens_seen": 158011168, "step": 73150 }, { "epoch": 11.933931484502446, "grad_norm": 0.003918380010873079, "learning_rate": 0.00041887357867517435, "loss": 0.0469, "num_input_tokens_seen": 158022176, "step": 73155 }, { "epoch": 11.934747145187602, "grad_norm": 0.35251501202583313, "learning_rate": 0.0004188033428632649, "loss": 0.0338, "num_input_tokens_seen": 158033408, "step": 73160 }, { "epoch": 11.935562805872756, "grad_norm": 0.0022698971442878246, "learning_rate": 0.00041873310869690875, "loss": 0.0141, "num_input_tokens_seen": 158045056, "step": 73165 }, { "epoch": 11.936378466557912, "grad_norm": 0.0054527875036001205, "learning_rate": 0.00041866287617752906, "loss": 0.0126, "num_input_tokens_seen": 158056352, "step": 73170 }, { "epoch": 11.937194127243067, "grad_norm": 0.002886369824409485, "learning_rate": 0.0004185926453065496, "loss": 0.0076, "num_input_tokens_seen": 158067488, "step": 73175 }, { "epoch": 11.938009787928221, "grad_norm": 0.002251496771350503, "learning_rate": 0.0004185224160853933, "loss": 0.1491, "num_input_tokens_seen": 158079488, "step": 73180 }, { "epoch": 11.938825448613377, "grad_norm": 0.06846843659877777, "learning_rate": 0.00041845218851548375, "loss": 0.0121, "num_input_tokens_seen": 158091232, "step": 73185 }, { "epoch": 11.939641109298531, "grad_norm": 0.0593634694814682, "learning_rate": 0.0004183819625982439, "loss": 0.045, "num_input_tokens_seen": 158102112, "step": 73190 }, { "epoch": 11.940456769983687, "grad_norm": 0.007378603331744671, "learning_rate": 0.0004183117383350973, "loss": 0.0152, "num_input_tokens_seen": 158114112, "step": 73195 }, { "epoch": 11.941272430668842, "grad_norm": 0.005221458151936531, "learning_rate": 0.0004182415157274668, "loss": 0.0204, "num_input_tokens_seen": 158124672, "step": 73200 }, { "epoch": 11.942088091353996, "grad_norm": 0.04406864568591118, "learning_rate": 0.00041817129477677564, "loss": 0.0173, "num_input_tokens_seen": 158134816, "step": 73205 }, { "epoch": 11.942903752039152, "grad_norm": 0.004383188672363758, "learning_rate": 0.0004181010754844472, "loss": 0.0635, "num_input_tokens_seen": 158145376, "step": 73210 }, { "epoch": 11.943719412724306, "grad_norm": 0.00374322896823287, "learning_rate": 0.00041803085785190416, "loss": 0.0327, "num_input_tokens_seen": 158156064, "step": 73215 }, { "epoch": 11.944535073409462, "grad_norm": 0.010824406519532204, "learning_rate": 0.00041796064188057, "loss": 0.0089, "num_input_tokens_seen": 158166176, "step": 73220 }, { "epoch": 11.945350734094617, "grad_norm": 0.01288297027349472, "learning_rate": 0.00041789042757186726, "loss": 0.0273, "num_input_tokens_seen": 158177184, "step": 73225 }, { "epoch": 11.946166394779771, "grad_norm": 0.055671948939561844, "learning_rate": 0.00041782021492721937, "loss": 0.1799, "num_input_tokens_seen": 158188416, "step": 73230 }, { "epoch": 11.946982055464927, "grad_norm": 0.02129376120865345, "learning_rate": 0.00041775000394804896, "loss": 0.0095, "num_input_tokens_seen": 158200256, "step": 73235 }, { "epoch": 11.947797716150081, "grad_norm": 0.054272472858428955, "learning_rate": 0.0004176797946357792, "loss": 0.0122, "num_input_tokens_seen": 158211776, "step": 73240 }, { "epoch": 11.948613376835237, "grad_norm": 0.02800886332988739, "learning_rate": 0.00041760958699183263, "loss": 0.1102, "num_input_tokens_seen": 158222944, "step": 73245 }, { "epoch": 11.949429037520392, "grad_norm": 0.11227209866046906, "learning_rate": 0.0004175393810176325, "loss": 0.0936, "num_input_tokens_seen": 158233600, "step": 73250 }, { "epoch": 11.950244698205546, "grad_norm": 0.013811938464641571, "learning_rate": 0.00041746917671460124, "loss": 0.0141, "num_input_tokens_seen": 158243360, "step": 73255 }, { "epoch": 11.951060358890702, "grad_norm": 0.10094699263572693, "learning_rate": 0.000417398974084162, "loss": 0.0198, "num_input_tokens_seen": 158253600, "step": 73260 }, { "epoch": 11.951876019575856, "grad_norm": 0.05100074037909508, "learning_rate": 0.0004173287731277371, "loss": 0.1161, "num_input_tokens_seen": 158265984, "step": 73265 }, { "epoch": 11.952691680261012, "grad_norm": 0.009566979482769966, "learning_rate": 0.00041725857384674974, "loss": 0.0074, "num_input_tokens_seen": 158277536, "step": 73270 }, { "epoch": 11.953507340946166, "grad_norm": 0.004473550245165825, "learning_rate": 0.0004171883762426221, "loss": 0.0099, "num_input_tokens_seen": 158288576, "step": 73275 }, { "epoch": 11.954323001631321, "grad_norm": 0.004220154602080584, "learning_rate": 0.00041711818031677737, "loss": 0.0092, "num_input_tokens_seen": 158300128, "step": 73280 }, { "epoch": 11.955138662316477, "grad_norm": 0.022628581151366234, "learning_rate": 0.00041704798607063756, "loss": 0.019, "num_input_tokens_seen": 158310400, "step": 73285 }, { "epoch": 11.955954323001631, "grad_norm": 0.0035848692059516907, "learning_rate": 0.0004169777935056257, "loss": 0.0092, "num_input_tokens_seen": 158320448, "step": 73290 }, { "epoch": 11.956769983686787, "grad_norm": 0.13575603067874908, "learning_rate": 0.00041690760262316415, "loss": 0.0417, "num_input_tokens_seen": 158330176, "step": 73295 }, { "epoch": 11.95758564437194, "grad_norm": 0.03912244364619255, "learning_rate": 0.0004168374134246754, "loss": 0.0483, "num_input_tokens_seen": 158340352, "step": 73300 }, { "epoch": 11.958401305057096, "grad_norm": 0.006890024524182081, "learning_rate": 0.000416767225911582, "loss": 0.0818, "num_input_tokens_seen": 158351904, "step": 73305 }, { "epoch": 11.959216965742252, "grad_norm": 0.0030039497651159763, "learning_rate": 0.0004166970400853064, "loss": 0.0169, "num_input_tokens_seen": 158363360, "step": 73310 }, { "epoch": 11.960032626427406, "grad_norm": 0.006594918668270111, "learning_rate": 0.00041662685594727076, "loss": 0.0462, "num_input_tokens_seen": 158374080, "step": 73315 }, { "epoch": 11.960848287112562, "grad_norm": 0.058372244238853455, "learning_rate": 0.0004165566734988979, "loss": 0.0248, "num_input_tokens_seen": 158385792, "step": 73320 }, { "epoch": 11.961663947797716, "grad_norm": 0.06784452497959137, "learning_rate": 0.00041648649274160976, "loss": 0.0104, "num_input_tokens_seen": 158395680, "step": 73325 }, { "epoch": 11.962479608482871, "grad_norm": 0.010779723525047302, "learning_rate": 0.0004164163136768289, "loss": 0.0506, "num_input_tokens_seen": 158407552, "step": 73330 }, { "epoch": 11.963295269168025, "grad_norm": 0.21537868678569794, "learning_rate": 0.0004163461363059774, "loss": 0.0242, "num_input_tokens_seen": 158418304, "step": 73335 }, { "epoch": 11.964110929853181, "grad_norm": 0.004940703511238098, "learning_rate": 0.00041627596063047753, "loss": 0.0049, "num_input_tokens_seen": 158429248, "step": 73340 }, { "epoch": 11.964926590538337, "grad_norm": 0.139415442943573, "learning_rate": 0.00041620578665175166, "loss": 0.0116, "num_input_tokens_seen": 158439840, "step": 73345 }, { "epoch": 11.96574225122349, "grad_norm": 0.47116124629974365, "learning_rate": 0.00041613561437122163, "loss": 0.037, "num_input_tokens_seen": 158449312, "step": 73350 }, { "epoch": 11.966557911908646, "grad_norm": 0.001149240881204605, "learning_rate": 0.0004160654437903101, "loss": 0.0128, "num_input_tokens_seen": 158460800, "step": 73355 }, { "epoch": 11.9673735725938, "grad_norm": 0.018407588824629784, "learning_rate": 0.0004159952749104385, "loss": 0.0127, "num_input_tokens_seen": 158471648, "step": 73360 }, { "epoch": 11.968189233278956, "grad_norm": 0.004824475850909948, "learning_rate": 0.00041592510773302946, "loss": 0.01, "num_input_tokens_seen": 158482944, "step": 73365 }, { "epoch": 11.969004893964112, "grad_norm": 0.4063175320625305, "learning_rate": 0.0004158549422595045, "loss": 0.0741, "num_input_tokens_seen": 158493856, "step": 73370 }, { "epoch": 11.969820554649266, "grad_norm": 0.007951868698000908, "learning_rate": 0.0004157847784912861, "loss": 0.0037, "num_input_tokens_seen": 158503680, "step": 73375 }, { "epoch": 11.970636215334421, "grad_norm": 0.0126974331215024, "learning_rate": 0.0004157146164297959, "loss": 0.0307, "num_input_tokens_seen": 158514336, "step": 73380 }, { "epoch": 11.971451876019575, "grad_norm": 0.02527419850230217, "learning_rate": 0.00041564445607645607, "loss": 0.0166, "num_input_tokens_seen": 158525088, "step": 73385 }, { "epoch": 11.97226753670473, "grad_norm": 0.413861483335495, "learning_rate": 0.0004155742974326881, "loss": 0.1583, "num_input_tokens_seen": 158536832, "step": 73390 }, { "epoch": 11.973083197389887, "grad_norm": 0.009883550927042961, "learning_rate": 0.00041550414049991435, "loss": 0.0024, "num_input_tokens_seen": 158547296, "step": 73395 }, { "epoch": 11.97389885807504, "grad_norm": 0.04352164641022682, "learning_rate": 0.0004154339852795562, "loss": 0.0394, "num_input_tokens_seen": 158558464, "step": 73400 }, { "epoch": 11.974714518760196, "grad_norm": 0.002612270647659898, "learning_rate": 0.0004153638317730358, "loss": 0.0099, "num_input_tokens_seen": 158569888, "step": 73405 }, { "epoch": 11.97553017944535, "grad_norm": 0.011124800890684128, "learning_rate": 0.00041529367998177446, "loss": 0.0074, "num_input_tokens_seen": 158580992, "step": 73410 }, { "epoch": 11.976345840130506, "grad_norm": 0.01622111164033413, "learning_rate": 0.00041522352990719434, "loss": 0.0269, "num_input_tokens_seen": 158593056, "step": 73415 }, { "epoch": 11.977161500815662, "grad_norm": 0.002231568330898881, "learning_rate": 0.0004151533815507168, "loss": 0.0884, "num_input_tokens_seen": 158604128, "step": 73420 }, { "epoch": 11.977977161500815, "grad_norm": 0.1311320662498474, "learning_rate": 0.00041508323491376364, "loss": 0.0209, "num_input_tokens_seen": 158615424, "step": 73425 }, { "epoch": 11.978792822185971, "grad_norm": 0.002909077098593116, "learning_rate": 0.00041501308999775664, "loss": 0.0178, "num_input_tokens_seen": 158627456, "step": 73430 }, { "epoch": 11.979608482871125, "grad_norm": 0.30766910314559937, "learning_rate": 0.00041494294680411695, "loss": 0.0986, "num_input_tokens_seen": 158638016, "step": 73435 }, { "epoch": 11.98042414355628, "grad_norm": 0.37345361709594727, "learning_rate": 0.0004148728053342665, "loss": 0.0145, "num_input_tokens_seen": 158649184, "step": 73440 }, { "epoch": 11.981239804241435, "grad_norm": 0.018977565690875053, "learning_rate": 0.0004148026655896265, "loss": 0.0061, "num_input_tokens_seen": 158661152, "step": 73445 }, { "epoch": 11.98205546492659, "grad_norm": 0.04246421530842781, "learning_rate": 0.0004147325275716188, "loss": 0.0122, "num_input_tokens_seen": 158670976, "step": 73450 }, { "epoch": 11.982871125611746, "grad_norm": 0.0053014010190963745, "learning_rate": 0.00041466239128166435, "loss": 0.0141, "num_input_tokens_seen": 158682080, "step": 73455 }, { "epoch": 11.9836867862969, "grad_norm": 0.019669989123940468, "learning_rate": 0.00041459225672118487, "loss": 0.0445, "num_input_tokens_seen": 158691232, "step": 73460 }, { "epoch": 11.984502446982056, "grad_norm": 0.4572742283344269, "learning_rate": 0.0004145221238916017, "loss": 0.1544, "num_input_tokens_seen": 158702688, "step": 73465 }, { "epoch": 11.98531810766721, "grad_norm": 0.42395129799842834, "learning_rate": 0.0004144519927943361, "loss": 0.1659, "num_input_tokens_seen": 158713408, "step": 73470 }, { "epoch": 11.986133768352365, "grad_norm": 0.004313977435231209, "learning_rate": 0.0004143818634308094, "loss": 0.0113, "num_input_tokens_seen": 158722624, "step": 73475 }, { "epoch": 11.986949429037521, "grad_norm": 0.01042697299271822, "learning_rate": 0.00041431173580244284, "loss": 0.0055, "num_input_tokens_seen": 158732928, "step": 73480 }, { "epoch": 11.987765089722675, "grad_norm": 0.3611927628517151, "learning_rate": 0.0004142416099106576, "loss": 0.053, "num_input_tokens_seen": 158743360, "step": 73485 }, { "epoch": 11.98858075040783, "grad_norm": 0.03010513260960579, "learning_rate": 0.0004141714857568751, "loss": 0.0072, "num_input_tokens_seen": 158753728, "step": 73490 }, { "epoch": 11.989396411092985, "grad_norm": 0.002898262580856681, "learning_rate": 0.0004141013633425161, "loss": 0.0093, "num_input_tokens_seen": 158764768, "step": 73495 }, { "epoch": 11.99021207177814, "grad_norm": 0.0036916485987603664, "learning_rate": 0.0004140312426690022, "loss": 0.0285, "num_input_tokens_seen": 158775072, "step": 73500 }, { "epoch": 11.991027732463294, "grad_norm": 0.06801813840866089, "learning_rate": 0.000413961123737754, "loss": 0.0264, "num_input_tokens_seen": 158786528, "step": 73505 }, { "epoch": 11.99184339314845, "grad_norm": 0.40527936816215515, "learning_rate": 0.00041389100655019295, "loss": 0.0218, "num_input_tokens_seen": 158797664, "step": 73510 }, { "epoch": 11.992659053833606, "grad_norm": 0.005984405521303415, "learning_rate": 0.00041382089110773975, "loss": 0.0065, "num_input_tokens_seen": 158808992, "step": 73515 }, { "epoch": 11.99347471451876, "grad_norm": 0.3398209810256958, "learning_rate": 0.00041375077741181564, "loss": 0.0239, "num_input_tokens_seen": 158819520, "step": 73520 }, { "epoch": 11.994290375203915, "grad_norm": 0.03455796837806702, "learning_rate": 0.0004136806654638413, "loss": 0.0152, "num_input_tokens_seen": 158829600, "step": 73525 }, { "epoch": 11.99510603588907, "grad_norm": 0.423898309469223, "learning_rate": 0.0004136105552652377, "loss": 0.1036, "num_input_tokens_seen": 158841376, "step": 73530 }, { "epoch": 11.995921696574225, "grad_norm": 0.08282370865345001, "learning_rate": 0.0004135404468174261, "loss": 0.0658, "num_input_tokens_seen": 158851264, "step": 73535 }, { "epoch": 11.99673735725938, "grad_norm": 0.01659543439745903, "learning_rate": 0.0004134703401218268, "loss": 0.0133, "num_input_tokens_seen": 158861536, "step": 73540 }, { "epoch": 11.997553017944535, "grad_norm": 0.0014833472669124603, "learning_rate": 0.00041340023517986096, "loss": 0.024, "num_input_tokens_seen": 158872288, "step": 73545 }, { "epoch": 11.99836867862969, "grad_norm": 0.014465752989053726, "learning_rate": 0.00041333013199294907, "loss": 0.0194, "num_input_tokens_seen": 158881920, "step": 73550 }, { "epoch": 11.999184339314844, "grad_norm": 0.032355792820453644, "learning_rate": 0.0004132600305625122, "loss": 0.0053, "num_input_tokens_seen": 158892960, "step": 73555 }, { "epoch": 12.0, "grad_norm": 0.007642882410436869, "learning_rate": 0.0004131899308899706, "loss": 0.0057, "num_input_tokens_seen": 158902432, "step": 73560 }, { "epoch": 12.0, "eval_loss": 0.1854449361562729, "eval_runtime": 103.8442, "eval_samples_per_second": 26.241, "eval_steps_per_second": 6.568, "num_input_tokens_seen": 158902432, "step": 73560 }, { "epoch": 12.000815660685156, "grad_norm": 0.5162748694419861, "learning_rate": 0.00041311983297674545, "loss": 0.0175, "num_input_tokens_seen": 158913760, "step": 73565 }, { "epoch": 12.00163132137031, "grad_norm": 0.05222615599632263, "learning_rate": 0.00041304973682425685, "loss": 0.0077, "num_input_tokens_seen": 158924672, "step": 73570 }, { "epoch": 12.002446982055465, "grad_norm": 0.021363843232393265, "learning_rate": 0.00041297964243392583, "loss": 0.0061, "num_input_tokens_seen": 158935840, "step": 73575 }, { "epoch": 12.00326264274062, "grad_norm": 0.002701932331547141, "learning_rate": 0.0004129095498071726, "loss": 0.0069, "num_input_tokens_seen": 158946688, "step": 73580 }, { "epoch": 12.004078303425775, "grad_norm": 0.013134065084159374, "learning_rate": 0.000412839458945418, "loss": 0.0071, "num_input_tokens_seen": 158956704, "step": 73585 }, { "epoch": 12.00489396411093, "grad_norm": 0.018720904365181923, "learning_rate": 0.0004127693698500821, "loss": 0.0057, "num_input_tokens_seen": 158967648, "step": 73590 }, { "epoch": 12.005709624796085, "grad_norm": 0.014408317394554615, "learning_rate": 0.0004126992825225858, "loss": 0.0891, "num_input_tokens_seen": 158978912, "step": 73595 }, { "epoch": 12.00652528548124, "grad_norm": 0.46795928478240967, "learning_rate": 0.00041262919696434915, "loss": 0.1526, "num_input_tokens_seen": 158990272, "step": 73600 }, { "epoch": 12.007340946166394, "grad_norm": 0.0026603129226714373, "learning_rate": 0.0004125591131767927, "loss": 0.053, "num_input_tokens_seen": 159001440, "step": 73605 }, { "epoch": 12.00815660685155, "grad_norm": 0.0020938925445079803, "learning_rate": 0.00041248903116133674, "loss": 0.003, "num_input_tokens_seen": 159012608, "step": 73610 }, { "epoch": 12.008972267536704, "grad_norm": 0.0015925763873383403, "learning_rate": 0.0004124189509194016, "loss": 0.0042, "num_input_tokens_seen": 159024256, "step": 73615 }, { "epoch": 12.00978792822186, "grad_norm": 0.03945109248161316, "learning_rate": 0.00041234887245240756, "loss": 0.0057, "num_input_tokens_seen": 159035424, "step": 73620 }, { "epoch": 12.010603588907015, "grad_norm": 0.06973441690206528, "learning_rate": 0.00041227879576177475, "loss": 0.0043, "num_input_tokens_seen": 159044448, "step": 73625 }, { "epoch": 12.01141924959217, "grad_norm": 0.05123627558350563, "learning_rate": 0.00041220872084892337, "loss": 0.0322, "num_input_tokens_seen": 159054912, "step": 73630 }, { "epoch": 12.012234910277325, "grad_norm": 0.014868333004415035, "learning_rate": 0.00041213864771527366, "loss": 0.021, "num_input_tokens_seen": 159066912, "step": 73635 }, { "epoch": 12.013050570962479, "grad_norm": 0.29003819823265076, "learning_rate": 0.0004120685763622458, "loss": 0.0768, "num_input_tokens_seen": 159076704, "step": 73640 }, { "epoch": 12.013866231647635, "grad_norm": 0.002655792748555541, "learning_rate": 0.00041199850679125974, "loss": 0.1417, "num_input_tokens_seen": 159087200, "step": 73645 }, { "epoch": 12.01468189233279, "grad_norm": 0.019231455400586128, "learning_rate": 0.0004119284390037356, "loss": 0.0102, "num_input_tokens_seen": 159099200, "step": 73650 }, { "epoch": 12.015497553017944, "grad_norm": 0.0031092215795069933, "learning_rate": 0.00041185837300109326, "loss": 0.1035, "num_input_tokens_seen": 159108800, "step": 73655 }, { "epoch": 12.0163132137031, "grad_norm": 0.08691810816526413, "learning_rate": 0.00041178830878475304, "loss": 0.0125, "num_input_tokens_seen": 159120032, "step": 73660 }, { "epoch": 12.017128874388254, "grad_norm": 0.05167734995484352, "learning_rate": 0.00041171824635613443, "loss": 0.0057, "num_input_tokens_seen": 159131584, "step": 73665 }, { "epoch": 12.01794453507341, "grad_norm": 0.03976357355713844, "learning_rate": 0.00041164818571665774, "loss": 0.1218, "num_input_tokens_seen": 159142752, "step": 73670 }, { "epoch": 12.018760195758565, "grad_norm": 0.0090693524107337, "learning_rate": 0.00041157812686774245, "loss": 0.0507, "num_input_tokens_seen": 159152384, "step": 73675 }, { "epoch": 12.01957585644372, "grad_norm": 0.004124946426600218, "learning_rate": 0.0004115080698108088, "loss": 0.009, "num_input_tokens_seen": 159163360, "step": 73680 }, { "epoch": 12.020391517128875, "grad_norm": 0.003406350966542959, "learning_rate": 0.0004114380145472761, "loss": 0.0528, "num_input_tokens_seen": 159174176, "step": 73685 }, { "epoch": 12.021207177814029, "grad_norm": 0.01586691476404667, "learning_rate": 0.00041136796107856465, "loss": 0.015, "num_input_tokens_seen": 159185088, "step": 73690 }, { "epoch": 12.022022838499185, "grad_norm": 0.005119045730680227, "learning_rate": 0.00041129790940609375, "loss": 0.0294, "num_input_tokens_seen": 159196448, "step": 73695 }, { "epoch": 12.022838499184338, "grad_norm": 0.0177877489477396, "learning_rate": 0.0004112278595312834, "loss": 0.0978, "num_input_tokens_seen": 159207872, "step": 73700 }, { "epoch": 12.023654159869494, "grad_norm": 0.003837467636913061, "learning_rate": 0.00041115781145555286, "loss": 0.0158, "num_input_tokens_seen": 159218304, "step": 73705 }, { "epoch": 12.02446982055465, "grad_norm": 0.006132758688181639, "learning_rate": 0.0004110877651803222, "loss": 0.0286, "num_input_tokens_seen": 159229152, "step": 73710 }, { "epoch": 12.025285481239804, "grad_norm": 0.0033713181037455797, "learning_rate": 0.0004110177207070106, "loss": 0.0034, "num_input_tokens_seen": 159239808, "step": 73715 }, { "epoch": 12.02610114192496, "grad_norm": 0.018628062680363655, "learning_rate": 0.0004109476780370379, "loss": 0.0183, "num_input_tokens_seen": 159250400, "step": 73720 }, { "epoch": 12.026916802610113, "grad_norm": 0.018260814249515533, "learning_rate": 0.00041087763717182336, "loss": 0.0364, "num_input_tokens_seen": 159261024, "step": 73725 }, { "epoch": 12.02773246329527, "grad_norm": 0.009373247623443604, "learning_rate": 0.00041080759811278674, "loss": 0.0082, "num_input_tokens_seen": 159271264, "step": 73730 }, { "epoch": 12.028548123980425, "grad_norm": 0.025325864553451538, "learning_rate": 0.00041073756086134705, "loss": 0.0073, "num_input_tokens_seen": 159282400, "step": 73735 }, { "epoch": 12.029363784665579, "grad_norm": 0.004139396827667952, "learning_rate": 0.00041066752541892395, "loss": 0.0093, "num_input_tokens_seen": 159293248, "step": 73740 }, { "epoch": 12.030179445350734, "grad_norm": 0.331638365983963, "learning_rate": 0.000410597491786937, "loss": 0.0759, "num_input_tokens_seen": 159303040, "step": 73745 }, { "epoch": 12.030995106035888, "grad_norm": 0.02152330055832863, "learning_rate": 0.0004105274599668051, "loss": 0.0321, "num_input_tokens_seen": 159314432, "step": 73750 }, { "epoch": 12.031810766721044, "grad_norm": 0.002201348775997758, "learning_rate": 0.00041045742995994783, "loss": 0.0067, "num_input_tokens_seen": 159325696, "step": 73755 }, { "epoch": 12.0326264274062, "grad_norm": 0.003991606179624796, "learning_rate": 0.0004103874017677842, "loss": 0.0143, "num_input_tokens_seen": 159337664, "step": 73760 }, { "epoch": 12.033442088091354, "grad_norm": 0.05896512418985367, "learning_rate": 0.0004103173753917337, "loss": 0.0149, "num_input_tokens_seen": 159349088, "step": 73765 }, { "epoch": 12.03425774877651, "grad_norm": 0.0014206412015482783, "learning_rate": 0.0004102473508332153, "loss": 0.0039, "num_input_tokens_seen": 159361056, "step": 73770 }, { "epoch": 12.035073409461663, "grad_norm": 0.024218376725912094, "learning_rate": 0.00041017732809364824, "loss": 0.0099, "num_input_tokens_seen": 159371104, "step": 73775 }, { "epoch": 12.035889070146819, "grad_norm": 0.6135803461074829, "learning_rate": 0.00041010730717445156, "loss": 0.0184, "num_input_tokens_seen": 159380960, "step": 73780 }, { "epoch": 12.036704730831975, "grad_norm": 0.014351412653923035, "learning_rate": 0.00041003728807704435, "loss": 0.0046, "num_input_tokens_seen": 159393280, "step": 73785 }, { "epoch": 12.037520391517129, "grad_norm": 0.0027120737358927727, "learning_rate": 0.00040996727080284555, "loss": 0.0208, "num_input_tokens_seen": 159404960, "step": 73790 }, { "epoch": 12.038336052202284, "grad_norm": 0.020590249449014664, "learning_rate": 0.0004098972553532743, "loss": 0.0149, "num_input_tokens_seen": 159416544, "step": 73795 }, { "epoch": 12.039151712887438, "grad_norm": 0.006616574712097645, "learning_rate": 0.00040982724172974926, "loss": 0.0131, "num_input_tokens_seen": 159427072, "step": 73800 }, { "epoch": 12.039967373572594, "grad_norm": 0.00522937485948205, "learning_rate": 0.0004097572299336899, "loss": 0.0061, "num_input_tokens_seen": 159438144, "step": 73805 }, { "epoch": 12.040783034257748, "grad_norm": 0.005134327802807093, "learning_rate": 0.00040968721996651445, "loss": 0.0015, "num_input_tokens_seen": 159447392, "step": 73810 }, { "epoch": 12.041598694942904, "grad_norm": 0.016191143542528152, "learning_rate": 0.00040961721182964235, "loss": 0.0247, "num_input_tokens_seen": 159458144, "step": 73815 }, { "epoch": 12.04241435562806, "grad_norm": 0.007574434857815504, "learning_rate": 0.00040954720552449186, "loss": 0.0024, "num_input_tokens_seen": 159469472, "step": 73820 }, { "epoch": 12.043230016313213, "grad_norm": 0.010849296115338802, "learning_rate": 0.0004094772010524822, "loss": 0.0039, "num_input_tokens_seen": 159480096, "step": 73825 }, { "epoch": 12.044045676998369, "grad_norm": 0.10277484357357025, "learning_rate": 0.0004094071984150317, "loss": 0.1408, "num_input_tokens_seen": 159492000, "step": 73830 }, { "epoch": 12.044861337683523, "grad_norm": 0.0031220330856740475, "learning_rate": 0.0004093371976135595, "loss": 0.0018, "num_input_tokens_seen": 159503392, "step": 73835 }, { "epoch": 12.045676998368679, "grad_norm": 0.03206094354391098, "learning_rate": 0.0004092671986494837, "loss": 0.1296, "num_input_tokens_seen": 159514944, "step": 73840 }, { "epoch": 12.046492659053834, "grad_norm": 0.043139755725860596, "learning_rate": 0.00040919720152422323, "loss": 0.0204, "num_input_tokens_seen": 159526368, "step": 73845 }, { "epoch": 12.047308319738988, "grad_norm": 0.0034898552112281322, "learning_rate": 0.00040912720623919696, "loss": 0.0064, "num_input_tokens_seen": 159538336, "step": 73850 }, { "epoch": 12.048123980424144, "grad_norm": 0.001161689287982881, "learning_rate": 0.00040905721279582284, "loss": 0.0106, "num_input_tokens_seen": 159548096, "step": 73855 }, { "epoch": 12.048939641109298, "grad_norm": 0.0020751608535647392, "learning_rate": 0.00040898722119551994, "loss": 0.0026, "num_input_tokens_seen": 159558592, "step": 73860 }, { "epoch": 12.049755301794454, "grad_norm": 0.002460476942360401, "learning_rate": 0.0004089172314397063, "loss": 0.0038, "num_input_tokens_seen": 159569088, "step": 73865 }, { "epoch": 12.05057096247961, "grad_norm": 0.012570970691740513, "learning_rate": 0.00040884724352980065, "loss": 0.0024, "num_input_tokens_seen": 159579232, "step": 73870 }, { "epoch": 12.051386623164763, "grad_norm": 0.023806337267160416, "learning_rate": 0.00040877725746722097, "loss": 0.0333, "num_input_tokens_seen": 159590688, "step": 73875 }, { "epoch": 12.052202283849919, "grad_norm": 0.004042148124426603, "learning_rate": 0.0004087072732533862, "loss": 0.1251, "num_input_tokens_seen": 159602016, "step": 73880 }, { "epoch": 12.053017944535073, "grad_norm": 0.001157692400738597, "learning_rate": 0.0004086372908897141, "loss": 0.0095, "num_input_tokens_seen": 159611392, "step": 73885 }, { "epoch": 12.053833605220229, "grad_norm": 0.03635965660214424, "learning_rate": 0.0004085673103776234, "loss": 0.0257, "num_input_tokens_seen": 159623008, "step": 73890 }, { "epoch": 12.054649265905383, "grad_norm": 0.0029063147958368063, "learning_rate": 0.000408497331718532, "loss": 0.0145, "num_input_tokens_seen": 159633120, "step": 73895 }, { "epoch": 12.055464926590538, "grad_norm": 0.027852777391672134, "learning_rate": 0.0004084273549138584, "loss": 0.0359, "num_input_tokens_seen": 159642944, "step": 73900 }, { "epoch": 12.056280587275694, "grad_norm": 0.6098665595054626, "learning_rate": 0.0004083573799650204, "loss": 0.1428, "num_input_tokens_seen": 159652064, "step": 73905 }, { "epoch": 12.057096247960848, "grad_norm": 0.002536676125600934, "learning_rate": 0.00040828740687343654, "loss": 0.002, "num_input_tokens_seen": 159661824, "step": 73910 }, { "epoch": 12.057911908646004, "grad_norm": 0.1208946481347084, "learning_rate": 0.0004082174356405247, "loss": 0.0692, "num_input_tokens_seen": 159673024, "step": 73915 }, { "epoch": 12.058727569331158, "grad_norm": 0.002335605677217245, "learning_rate": 0.00040814746626770287, "loss": 0.0039, "num_input_tokens_seen": 159684736, "step": 73920 }, { "epoch": 12.059543230016313, "grad_norm": 0.175007626414299, "learning_rate": 0.0004080774987563893, "loss": 0.0077, "num_input_tokens_seen": 159696128, "step": 73925 }, { "epoch": 12.060358890701469, "grad_norm": 0.019628094509243965, "learning_rate": 0.0004080075331080017, "loss": 0.0152, "num_input_tokens_seen": 159707712, "step": 73930 }, { "epoch": 12.061174551386623, "grad_norm": 0.7227426767349243, "learning_rate": 0.0004079375693239581, "loss": 0.2223, "num_input_tokens_seen": 159717440, "step": 73935 }, { "epoch": 12.061990212071779, "grad_norm": 0.2504555583000183, "learning_rate": 0.0004078676074056766, "loss": 0.021, "num_input_tokens_seen": 159729312, "step": 73940 }, { "epoch": 12.062805872756933, "grad_norm": 0.15913152694702148, "learning_rate": 0.0004077976473545748, "loss": 0.0104, "num_input_tokens_seen": 159739680, "step": 73945 }, { "epoch": 12.063621533442088, "grad_norm": 0.004883287940174341, "learning_rate": 0.0004077276891720707, "loss": 0.0138, "num_input_tokens_seen": 159750784, "step": 73950 }, { "epoch": 12.064437194127244, "grad_norm": 0.004812562372535467, "learning_rate": 0.000407657732859582, "loss": 0.0031, "num_input_tokens_seen": 159760544, "step": 73955 }, { "epoch": 12.065252854812398, "grad_norm": 0.2379300892353058, "learning_rate": 0.00040758777841852647, "loss": 0.1435, "num_input_tokens_seen": 159772416, "step": 73960 }, { "epoch": 12.066068515497554, "grad_norm": 0.220754474401474, "learning_rate": 0.000407517825850322, "loss": 0.0081, "num_input_tokens_seen": 159782560, "step": 73965 }, { "epoch": 12.066884176182707, "grad_norm": 0.004285324830561876, "learning_rate": 0.00040744787515638585, "loss": 0.009, "num_input_tokens_seen": 159792480, "step": 73970 }, { "epoch": 12.067699836867863, "grad_norm": 0.0040510534308850765, "learning_rate": 0.00040737792633813624, "loss": 0.0043, "num_input_tokens_seen": 159803456, "step": 73975 }, { "epoch": 12.068515497553017, "grad_norm": 0.0067472876980900764, "learning_rate": 0.00040730797939699014, "loss": 0.0909, "num_input_tokens_seen": 159813984, "step": 73980 }, { "epoch": 12.069331158238173, "grad_norm": 0.006962975487112999, "learning_rate": 0.00040723803433436573, "loss": 0.0056, "num_input_tokens_seen": 159824608, "step": 73985 }, { "epoch": 12.070146818923329, "grad_norm": 0.10496911406517029, "learning_rate": 0.00040716809115167997, "loss": 0.0192, "num_input_tokens_seen": 159834720, "step": 73990 }, { "epoch": 12.070962479608482, "grad_norm": 0.022543715313076973, "learning_rate": 0.0004070981498503508, "loss": 0.0316, "num_input_tokens_seen": 159846080, "step": 73995 }, { "epoch": 12.071778140293638, "grad_norm": 0.0111811188980937, "learning_rate": 0.0004070282104317953, "loss": 0.0073, "num_input_tokens_seen": 159857792, "step": 74000 }, { "epoch": 12.072593800978792, "grad_norm": 0.023856064304709435, "learning_rate": 0.0004069582728974313, "loss": 0.0073, "num_input_tokens_seen": 159869056, "step": 74005 }, { "epoch": 12.073409461663948, "grad_norm": 0.0023362748324871063, "learning_rate": 0.00040688833724867565, "loss": 0.0029, "num_input_tokens_seen": 159880192, "step": 74010 }, { "epoch": 12.074225122349104, "grad_norm": 0.0023756767623126507, "learning_rate": 0.0004068184034869462, "loss": 0.0058, "num_input_tokens_seen": 159892288, "step": 74015 }, { "epoch": 12.075040783034257, "grad_norm": 0.008905721828341484, "learning_rate": 0.0004067484716136598, "loss": 0.0026, "num_input_tokens_seen": 159903200, "step": 74020 }, { "epoch": 12.075856443719413, "grad_norm": 0.0030674946028739214, "learning_rate": 0.00040667854163023415, "loss": 0.0576, "num_input_tokens_seen": 159913952, "step": 74025 }, { "epoch": 12.076672104404567, "grad_norm": 0.0404362678527832, "learning_rate": 0.000406608613538086, "loss": 0.0075, "num_input_tokens_seen": 159925248, "step": 74030 }, { "epoch": 12.077487765089723, "grad_norm": 0.013325365260243416, "learning_rate": 0.000406538687338633, "loss": 0.07, "num_input_tokens_seen": 159936096, "step": 74035 }, { "epoch": 12.078303425774878, "grad_norm": 0.026394739747047424, "learning_rate": 0.0004064687630332919, "loss": 0.005, "num_input_tokens_seen": 159946592, "step": 74040 }, { "epoch": 12.079119086460032, "grad_norm": 0.16963490843772888, "learning_rate": 0.0004063988406234801, "loss": 0.0147, "num_input_tokens_seen": 159957248, "step": 74045 }, { "epoch": 12.079934747145188, "grad_norm": 0.1959126889705658, "learning_rate": 0.0004063289201106144, "loss": 0.0144, "num_input_tokens_seen": 159968384, "step": 74050 }, { "epoch": 12.080750407830342, "grad_norm": 0.006187156308442354, "learning_rate": 0.000406259001496112, "loss": 0.015, "num_input_tokens_seen": 159977536, "step": 74055 }, { "epoch": 12.081566068515498, "grad_norm": 0.008363723754882812, "learning_rate": 0.00040618908478138986, "loss": 0.0087, "num_input_tokens_seen": 159987904, "step": 74060 }, { "epoch": 12.082381729200652, "grad_norm": 0.025374621152877808, "learning_rate": 0.0004061191699678649, "loss": 0.0057, "num_input_tokens_seen": 159998080, "step": 74065 }, { "epoch": 12.083197389885807, "grad_norm": 0.001546714105643332, "learning_rate": 0.0004060492570569542, "loss": 0.0067, "num_input_tokens_seen": 160009984, "step": 74070 }, { "epoch": 12.084013050570963, "grad_norm": 0.014130688272416592, "learning_rate": 0.0004059793460500742, "loss": 0.0185, "num_input_tokens_seen": 160021728, "step": 74075 }, { "epoch": 12.084828711256117, "grad_norm": 0.3345913589000702, "learning_rate": 0.0004059094369486423, "loss": 0.0118, "num_input_tokens_seen": 160031264, "step": 74080 }, { "epoch": 12.085644371941273, "grad_norm": 0.0023609360214322805, "learning_rate": 0.00040583952975407493, "loss": 0.0036, "num_input_tokens_seen": 160042560, "step": 74085 }, { "epoch": 12.086460032626427, "grad_norm": 0.0316849909722805, "learning_rate": 0.000405769624467789, "loss": 0.0125, "num_input_tokens_seen": 160053856, "step": 74090 }, { "epoch": 12.087275693311582, "grad_norm": 0.0007231601630337536, "learning_rate": 0.0004056997210912011, "loss": 0.0156, "num_input_tokens_seen": 160064800, "step": 74095 }, { "epoch": 12.088091353996738, "grad_norm": 0.014824706129729748, "learning_rate": 0.00040562981962572803, "loss": 0.1165, "num_input_tokens_seen": 160076224, "step": 74100 }, { "epoch": 12.088907014681892, "grad_norm": 0.0029631692450493574, "learning_rate": 0.00040555992007278624, "loss": 0.0088, "num_input_tokens_seen": 160087328, "step": 74105 }, { "epoch": 12.089722675367048, "grad_norm": 0.01084907166659832, "learning_rate": 0.00040549002243379267, "loss": 0.0584, "num_input_tokens_seen": 160097184, "step": 74110 }, { "epoch": 12.090538336052202, "grad_norm": 0.017258066684007645, "learning_rate": 0.00040542012671016355, "loss": 0.0036, "num_input_tokens_seen": 160107392, "step": 74115 }, { "epoch": 12.091353996737357, "grad_norm": 0.022070029750466347, "learning_rate": 0.00040535023290331573, "loss": 0.0028, "num_input_tokens_seen": 160118176, "step": 74120 }, { "epoch": 12.092169657422513, "grad_norm": 0.013220726512372494, "learning_rate": 0.0004052803410146653, "loss": 0.0159, "num_input_tokens_seen": 160129792, "step": 74125 }, { "epoch": 12.092985318107667, "grad_norm": 0.018424084410071373, "learning_rate": 0.0004052104510456291, "loss": 0.0102, "num_input_tokens_seen": 160139712, "step": 74130 }, { "epoch": 12.093800978792823, "grad_norm": 0.0037279201205819845, "learning_rate": 0.00040514056299762314, "loss": 0.1487, "num_input_tokens_seen": 160150880, "step": 74135 }, { "epoch": 12.094616639477977, "grad_norm": 0.00917022954672575, "learning_rate": 0.0004050706768720642, "loss": 0.1348, "num_input_tokens_seen": 160162368, "step": 74140 }, { "epoch": 12.095432300163132, "grad_norm": 0.07259730249643326, "learning_rate": 0.00040500079267036834, "loss": 0.0033, "num_input_tokens_seen": 160172864, "step": 74145 }, { "epoch": 12.096247960848286, "grad_norm": 0.004743486177176237, "learning_rate": 0.000404930910393952, "loss": 0.0112, "num_input_tokens_seen": 160183040, "step": 74150 }, { "epoch": 12.097063621533442, "grad_norm": 0.0031580179929733276, "learning_rate": 0.0004048610300442313, "loss": 0.0051, "num_input_tokens_seen": 160192416, "step": 74155 }, { "epoch": 12.097879282218598, "grad_norm": 0.4201613664627075, "learning_rate": 0.0004047911516226226, "loss": 0.0184, "num_input_tokens_seen": 160202496, "step": 74160 }, { "epoch": 12.098694942903752, "grad_norm": 0.010104143060743809, "learning_rate": 0.0004047212751305418, "loss": 0.0021, "num_input_tokens_seen": 160211904, "step": 74165 }, { "epoch": 12.099510603588907, "grad_norm": 0.021120961755514145, "learning_rate": 0.00040465140056940524, "loss": 0.004, "num_input_tokens_seen": 160221632, "step": 74170 }, { "epoch": 12.100326264274061, "grad_norm": 0.04819793999195099, "learning_rate": 0.00040458152794062925, "loss": 0.0081, "num_input_tokens_seen": 160232864, "step": 74175 }, { "epoch": 12.101141924959217, "grad_norm": 0.01660446636378765, "learning_rate": 0.00040451165724562937, "loss": 0.0046, "num_input_tokens_seen": 160244512, "step": 74180 }, { "epoch": 12.101957585644373, "grad_norm": 0.0020989153999835253, "learning_rate": 0.0004044417884858221, "loss": 0.3075, "num_input_tokens_seen": 160255872, "step": 74185 }, { "epoch": 12.102773246329527, "grad_norm": 0.0209684856235981, "learning_rate": 0.0004043719216626231, "loss": 0.0249, "num_input_tokens_seen": 160266432, "step": 74190 }, { "epoch": 12.103588907014682, "grad_norm": 1.1977218389511108, "learning_rate": 0.00040430205677744857, "loss": 0.0415, "num_input_tokens_seen": 160276928, "step": 74195 }, { "epoch": 12.104404567699836, "grad_norm": 0.004599343985319138, "learning_rate": 0.00040423219383171405, "loss": 0.0048, "num_input_tokens_seen": 160287072, "step": 74200 }, { "epoch": 12.105220228384992, "grad_norm": 0.01282755471765995, "learning_rate": 0.0004041623328268358, "loss": 0.0224, "num_input_tokens_seen": 160298240, "step": 74205 }, { "epoch": 12.106035889070148, "grad_norm": 0.06137290969491005, "learning_rate": 0.0004040924737642293, "loss": 0.0072, "num_input_tokens_seen": 160310016, "step": 74210 }, { "epoch": 12.106851549755302, "grad_norm": 0.004412582144141197, "learning_rate": 0.0004040226166453107, "loss": 0.0158, "num_input_tokens_seen": 160320928, "step": 74215 }, { "epoch": 12.107667210440457, "grad_norm": 0.022301241755485535, "learning_rate": 0.00040395276147149524, "loss": 0.0096, "num_input_tokens_seen": 160331360, "step": 74220 }, { "epoch": 12.108482871125611, "grad_norm": 0.002443633507937193, "learning_rate": 0.000403882908244199, "loss": 0.0052, "num_input_tokens_seen": 160342496, "step": 74225 }, { "epoch": 12.109298531810767, "grad_norm": 0.028277039527893066, "learning_rate": 0.00040381305696483773, "loss": 0.0746, "num_input_tokens_seen": 160352960, "step": 74230 }, { "epoch": 12.11011419249592, "grad_norm": 0.006705199368298054, "learning_rate": 0.00040374320763482673, "loss": 0.0211, "num_input_tokens_seen": 160363936, "step": 74235 }, { "epoch": 12.110929853181077, "grad_norm": 0.24397537112236023, "learning_rate": 0.0004036733602555818, "loss": 0.0758, "num_input_tokens_seen": 160375520, "step": 74240 }, { "epoch": 12.111745513866232, "grad_norm": 0.0006988913519307971, "learning_rate": 0.0004036035148285184, "loss": 0.0076, "num_input_tokens_seen": 160387072, "step": 74245 }, { "epoch": 12.112561174551386, "grad_norm": 0.0005958918482065201, "learning_rate": 0.00040353367135505193, "loss": 0.0036, "num_input_tokens_seen": 160397824, "step": 74250 }, { "epoch": 12.113376835236542, "grad_norm": 0.012600153684616089, "learning_rate": 0.00040346382983659826, "loss": 0.0377, "num_input_tokens_seen": 160409056, "step": 74255 }, { "epoch": 12.114192495921696, "grad_norm": 0.03363886475563049, "learning_rate": 0.0004033939902745723, "loss": 0.0207, "num_input_tokens_seen": 160417984, "step": 74260 }, { "epoch": 12.115008156606851, "grad_norm": 0.027498042210936546, "learning_rate": 0.0004033241526703899, "loss": 0.0037, "num_input_tokens_seen": 160428864, "step": 74265 }, { "epoch": 12.115823817292007, "grad_norm": 0.013976640067994595, "learning_rate": 0.00040325431702546596, "loss": 0.0187, "num_input_tokens_seen": 160439296, "step": 74270 }, { "epoch": 12.116639477977161, "grad_norm": 0.09229818731546402, "learning_rate": 0.000403184483341216, "loss": 0.0075, "num_input_tokens_seen": 160450336, "step": 74275 }, { "epoch": 12.117455138662317, "grad_norm": 0.004774386063218117, "learning_rate": 0.0004031146516190556, "loss": 0.0143, "num_input_tokens_seen": 160461152, "step": 74280 }, { "epoch": 12.11827079934747, "grad_norm": 0.0031991363503038883, "learning_rate": 0.00040304482186039937, "loss": 0.0296, "num_input_tokens_seen": 160472384, "step": 74285 }, { "epoch": 12.119086460032626, "grad_norm": 0.08653301745653152, "learning_rate": 0.0004029749940666631, "loss": 0.008, "num_input_tokens_seen": 160482080, "step": 74290 }, { "epoch": 12.119902120717782, "grad_norm": 0.035763002932071686, "learning_rate": 0.00040290516823926145, "loss": 0.0205, "num_input_tokens_seen": 160492864, "step": 74295 }, { "epoch": 12.120717781402936, "grad_norm": 0.1830175220966339, "learning_rate": 0.0004028353443796099, "loss": 0.0184, "num_input_tokens_seen": 160504064, "step": 74300 }, { "epoch": 12.121533442088092, "grad_norm": 0.0060828630812466145, "learning_rate": 0.00040276552248912317, "loss": 0.0035, "num_input_tokens_seen": 160513888, "step": 74305 }, { "epoch": 12.122349102773246, "grad_norm": 0.013971041887998581, "learning_rate": 0.00040269570256921673, "loss": 0.0127, "num_input_tokens_seen": 160524512, "step": 74310 }, { "epoch": 12.123164763458401, "grad_norm": 0.5153065323829651, "learning_rate": 0.00040262588462130507, "loss": 0.1341, "num_input_tokens_seen": 160535712, "step": 74315 }, { "epoch": 12.123980424143557, "grad_norm": 0.01689624786376953, "learning_rate": 0.0004025560686468036, "loss": 0.0043, "num_input_tokens_seen": 160546432, "step": 74320 }, { "epoch": 12.124796084828711, "grad_norm": 0.058722566813230515, "learning_rate": 0.0004024862546471268, "loss": 0.0054, "num_input_tokens_seen": 160556704, "step": 74325 }, { "epoch": 12.125611745513867, "grad_norm": 0.006426146719604731, "learning_rate": 0.00040241644262368993, "loss": 0.0029, "num_input_tokens_seen": 160568064, "step": 74330 }, { "epoch": 12.12642740619902, "grad_norm": 0.007449703756719828, "learning_rate": 0.00040234663257790747, "loss": 0.0075, "num_input_tokens_seen": 160578496, "step": 74335 }, { "epoch": 12.127243066884176, "grad_norm": 0.03456910327076912, "learning_rate": 0.00040227682451119464, "loss": 0.1314, "num_input_tokens_seen": 160588352, "step": 74340 }, { "epoch": 12.12805872756933, "grad_norm": 0.006443498190492392, "learning_rate": 0.0004022070184249657, "loss": 0.0055, "num_input_tokens_seen": 160599232, "step": 74345 }, { "epoch": 12.128874388254486, "grad_norm": 0.003807036206126213, "learning_rate": 0.0004021372143206358, "loss": 0.0759, "num_input_tokens_seen": 160610400, "step": 74350 }, { "epoch": 12.129690048939642, "grad_norm": 0.3225690722465515, "learning_rate": 0.0004020674121996191, "loss": 0.0325, "num_input_tokens_seen": 160622368, "step": 74355 }, { "epoch": 12.130505709624796, "grad_norm": 0.06799621880054474, "learning_rate": 0.0004019976120633308, "loss": 0.139, "num_input_tokens_seen": 160633344, "step": 74360 }, { "epoch": 12.131321370309951, "grad_norm": 0.009520080871880054, "learning_rate": 0.000401927813913185, "loss": 0.0027, "num_input_tokens_seen": 160643552, "step": 74365 }, { "epoch": 12.132137030995105, "grad_norm": 0.016409458592534065, "learning_rate": 0.0004018580177505966, "loss": 0.0079, "num_input_tokens_seen": 160653280, "step": 74370 }, { "epoch": 12.132952691680261, "grad_norm": 0.0015376622322946787, "learning_rate": 0.00040178822357698, "loss": 0.0117, "num_input_tokens_seen": 160664224, "step": 74375 }, { "epoch": 12.133768352365417, "grad_norm": 0.0044131772592663765, "learning_rate": 0.0004017184313937494, "loss": 0.0278, "num_input_tokens_seen": 160676128, "step": 74380 }, { "epoch": 12.13458401305057, "grad_norm": 0.0254372451454401, "learning_rate": 0.0004016486412023198, "loss": 0.0107, "num_input_tokens_seen": 160686400, "step": 74385 }, { "epoch": 12.135399673735726, "grad_norm": 0.02156521938741207, "learning_rate": 0.000401578853004105, "loss": 0.0034, "num_input_tokens_seen": 160696832, "step": 74390 }, { "epoch": 12.13621533442088, "grad_norm": 0.008907387033104897, "learning_rate": 0.00040150906680051974, "loss": 0.0031, "num_input_tokens_seen": 160707712, "step": 74395 }, { "epoch": 12.137030995106036, "grad_norm": 0.12198542058467865, "learning_rate": 0.00040143928259297817, "loss": 0.0279, "num_input_tokens_seen": 160719104, "step": 74400 }, { "epoch": 12.137846655791192, "grad_norm": 0.0023205087054520845, "learning_rate": 0.00040136950038289457, "loss": 0.0047, "num_input_tokens_seen": 160729920, "step": 74405 }, { "epoch": 12.138662316476346, "grad_norm": 0.008131838403642178, "learning_rate": 0.0004012997201716831, "loss": 0.0156, "num_input_tokens_seen": 160740384, "step": 74410 }, { "epoch": 12.139477977161501, "grad_norm": 0.07084905356168747, "learning_rate": 0.0004012299419607581, "loss": 0.0119, "num_input_tokens_seen": 160751360, "step": 74415 }, { "epoch": 12.140293637846655, "grad_norm": 0.008088597096502781, "learning_rate": 0.00040116016575153344, "loss": 0.0051, "num_input_tokens_seen": 160763488, "step": 74420 }, { "epoch": 12.141109298531811, "grad_norm": 0.012776483781635761, "learning_rate": 0.0004010903915454237, "loss": 0.0038, "num_input_tokens_seen": 160775072, "step": 74425 }, { "epoch": 12.141924959216965, "grad_norm": 0.33567920327186584, "learning_rate": 0.0004010206193438424, "loss": 0.1305, "num_input_tokens_seen": 160785984, "step": 74430 }, { "epoch": 12.14274061990212, "grad_norm": 0.005716219078749418, "learning_rate": 0.0004009508491482041, "loss": 0.0154, "num_input_tokens_seen": 160796576, "step": 74435 }, { "epoch": 12.143556280587276, "grad_norm": 0.005408111959695816, "learning_rate": 0.00040088108095992216, "loss": 0.0671, "num_input_tokens_seen": 160806752, "step": 74440 }, { "epoch": 12.14437194127243, "grad_norm": 0.019407780840992928, "learning_rate": 0.00040081131478041115, "loss": 0.062, "num_input_tokens_seen": 160817408, "step": 74445 }, { "epoch": 12.145187601957586, "grad_norm": 0.004047623835504055, "learning_rate": 0.00040074155061108443, "loss": 0.0075, "num_input_tokens_seen": 160828160, "step": 74450 }, { "epoch": 12.14600326264274, "grad_norm": 0.005578754004091024, "learning_rate": 0.00040067178845335633, "loss": 0.0032, "num_input_tokens_seen": 160839072, "step": 74455 }, { "epoch": 12.146818923327896, "grad_norm": 0.005533120129257441, "learning_rate": 0.0004006020283086402, "loss": 0.0069, "num_input_tokens_seen": 160851200, "step": 74460 }, { "epoch": 12.147634584013051, "grad_norm": 1.1328115463256836, "learning_rate": 0.00040053227017835033, "loss": 0.0612, "num_input_tokens_seen": 160862976, "step": 74465 }, { "epoch": 12.148450244698205, "grad_norm": 0.1987270712852478, "learning_rate": 0.00040046251406389993, "loss": 0.1336, "num_input_tokens_seen": 160874752, "step": 74470 }, { "epoch": 12.149265905383361, "grad_norm": 0.03130248934030533, "learning_rate": 0.0004003927599667032, "loss": 0.0089, "num_input_tokens_seen": 160885312, "step": 74475 }, { "epoch": 12.150081566068515, "grad_norm": 0.02075079269707203, "learning_rate": 0.0004003230078881733, "loss": 0.018, "num_input_tokens_seen": 160896704, "step": 74480 }, { "epoch": 12.15089722675367, "grad_norm": 0.004763288889080286, "learning_rate": 0.0004002532578297241, "loss": 0.0017, "num_input_tokens_seen": 160907680, "step": 74485 }, { "epoch": 12.151712887438826, "grad_norm": 0.012047209776937962, "learning_rate": 0.0004001835097927694, "loss": 0.0045, "num_input_tokens_seen": 160916992, "step": 74490 }, { "epoch": 12.15252854812398, "grad_norm": 0.024075627326965332, "learning_rate": 0.00040011376377872235, "loss": 0.0081, "num_input_tokens_seen": 160926816, "step": 74495 }, { "epoch": 12.153344208809136, "grad_norm": 0.005183520261198282, "learning_rate": 0.0004000440197889967, "loss": 0.1024, "num_input_tokens_seen": 160938112, "step": 74500 }, { "epoch": 12.15415986949429, "grad_norm": 0.031936485320329666, "learning_rate": 0.0003999742778250056, "loss": 0.0055, "num_input_tokens_seen": 160948704, "step": 74505 }, { "epoch": 12.154975530179446, "grad_norm": 0.6701197624206543, "learning_rate": 0.0003999045378881629, "loss": 0.082, "num_input_tokens_seen": 160959840, "step": 74510 }, { "epoch": 12.1557911908646, "grad_norm": 0.01326004695147276, "learning_rate": 0.0003998347999798815, "loss": 0.0083, "num_input_tokens_seen": 160971488, "step": 74515 }, { "epoch": 12.156606851549755, "grad_norm": 0.0024851495400071144, "learning_rate": 0.00039976506410157513, "loss": 0.0033, "num_input_tokens_seen": 160982208, "step": 74520 }, { "epoch": 12.15742251223491, "grad_norm": 0.0026938130613416433, "learning_rate": 0.0003996953302546567, "loss": 0.0171, "num_input_tokens_seen": 160994304, "step": 74525 }, { "epoch": 12.158238172920065, "grad_norm": 0.03997796028852463, "learning_rate": 0.0003996255984405399, "loss": 0.0037, "num_input_tokens_seen": 161004096, "step": 74530 }, { "epoch": 12.15905383360522, "grad_norm": 0.002501038834452629, "learning_rate": 0.00039955586866063735, "loss": 0.0196, "num_input_tokens_seen": 161013920, "step": 74535 }, { "epoch": 12.159869494290374, "grad_norm": 0.0058213709853589535, "learning_rate": 0.0003994861409163628, "loss": 0.0027, "num_input_tokens_seen": 161025728, "step": 74540 }, { "epoch": 12.16068515497553, "grad_norm": 0.04390028864145279, "learning_rate": 0.000399416415209129, "loss": 0.1984, "num_input_tokens_seen": 161037120, "step": 74545 }, { "epoch": 12.161500815660686, "grad_norm": 0.6059911847114563, "learning_rate": 0.0003993466915403492, "loss": 0.0559, "num_input_tokens_seen": 161048128, "step": 74550 }, { "epoch": 12.16231647634584, "grad_norm": 0.002712165005505085, "learning_rate": 0.0003992769699114364, "loss": 0.122, "num_input_tokens_seen": 161059424, "step": 74555 }, { "epoch": 12.163132137030995, "grad_norm": 0.11503525823354721, "learning_rate": 0.0003992072503238035, "loss": 0.0098, "num_input_tokens_seen": 161070848, "step": 74560 }, { "epoch": 12.16394779771615, "grad_norm": 0.005386181641370058, "learning_rate": 0.0003991375327788635, "loss": 0.0185, "num_input_tokens_seen": 161083104, "step": 74565 }, { "epoch": 12.164763458401305, "grad_norm": 0.01078125275671482, "learning_rate": 0.00039906781727802956, "loss": 0.1355, "num_input_tokens_seen": 161094560, "step": 74570 }, { "epoch": 12.16557911908646, "grad_norm": 0.012528739869594574, "learning_rate": 0.0003989981038227141, "loss": 0.0135, "num_input_tokens_seen": 161104256, "step": 74575 }, { "epoch": 12.166394779771615, "grad_norm": 0.0010541232768446207, "learning_rate": 0.0003989283924143304, "loss": 0.0044, "num_input_tokens_seen": 161114176, "step": 74580 }, { "epoch": 12.16721044045677, "grad_norm": 0.017634112387895584, "learning_rate": 0.0003988586830542909, "loss": 0.0076, "num_input_tokens_seen": 161124992, "step": 74585 }, { "epoch": 12.168026101141924, "grad_norm": 0.006476237438619137, "learning_rate": 0.00039878897574400845, "loss": 0.0054, "num_input_tokens_seen": 161135488, "step": 74590 }, { "epoch": 12.16884176182708, "grad_norm": 0.021481206640601158, "learning_rate": 0.00039871927048489605, "loss": 0.0049, "num_input_tokens_seen": 161147648, "step": 74595 }, { "epoch": 12.169657422512234, "grad_norm": 0.006172158755362034, "learning_rate": 0.0003986495672783659, "loss": 0.0068, "num_input_tokens_seen": 161157120, "step": 74600 }, { "epoch": 12.17047308319739, "grad_norm": 0.005506650544703007, "learning_rate": 0.000398579866125831, "loss": 0.061, "num_input_tokens_seen": 161169152, "step": 74605 }, { "epoch": 12.171288743882545, "grad_norm": 0.06159405782818794, "learning_rate": 0.00039851016702870356, "loss": 0.1283, "num_input_tokens_seen": 161179808, "step": 74610 }, { "epoch": 12.1721044045677, "grad_norm": 0.30520564317703247, "learning_rate": 0.0003984404699883966, "loss": 0.0298, "num_input_tokens_seen": 161190784, "step": 74615 }, { "epoch": 12.172920065252855, "grad_norm": 0.015198386274278164, "learning_rate": 0.00039837077500632213, "loss": 0.0062, "num_input_tokens_seen": 161202176, "step": 74620 }, { "epoch": 12.173735725938009, "grad_norm": 0.015425390563905239, "learning_rate": 0.00039830108208389306, "loss": 0.0026, "num_input_tokens_seen": 161213248, "step": 74625 }, { "epoch": 12.174551386623165, "grad_norm": 0.00755777582526207, "learning_rate": 0.00039823139122252126, "loss": 0.0124, "num_input_tokens_seen": 161222752, "step": 74630 }, { "epoch": 12.17536704730832, "grad_norm": 0.004761831369251013, "learning_rate": 0.0003981617024236197, "loss": 0.0026, "num_input_tokens_seen": 161233824, "step": 74635 }, { "epoch": 12.176182707993474, "grad_norm": 0.0022641567047685385, "learning_rate": 0.0003980920156886003, "loss": 0.0071, "num_input_tokens_seen": 161243840, "step": 74640 }, { "epoch": 12.17699836867863, "grad_norm": 0.02729635499417782, "learning_rate": 0.0003980223310188756, "loss": 0.0047, "num_input_tokens_seen": 161254560, "step": 74645 }, { "epoch": 12.177814029363784, "grad_norm": 0.06542062014341354, "learning_rate": 0.00039795264841585755, "loss": 0.0211, "num_input_tokens_seen": 161264960, "step": 74650 }, { "epoch": 12.17862969004894, "grad_norm": 0.027670329436659813, "learning_rate": 0.00039788296788095866, "loss": 0.0032, "num_input_tokens_seen": 161276128, "step": 74655 }, { "epoch": 12.179445350734095, "grad_norm": 0.008770488202571869, "learning_rate": 0.00039781328941559084, "loss": 0.0494, "num_input_tokens_seen": 161288288, "step": 74660 }, { "epoch": 12.18026101141925, "grad_norm": 0.04313148930668831, "learning_rate": 0.0003977436130211666, "loss": 0.0081, "num_input_tokens_seen": 161299232, "step": 74665 }, { "epoch": 12.181076672104405, "grad_norm": 0.001219844096340239, "learning_rate": 0.0003976739386990975, "loss": 0.0134, "num_input_tokens_seen": 161309600, "step": 74670 }, { "epoch": 12.181892332789559, "grad_norm": 0.013389154337346554, "learning_rate": 0.0003976042664507961, "loss": 0.0415, "num_input_tokens_seen": 161320256, "step": 74675 }, { "epoch": 12.182707993474715, "grad_norm": 1.1981724500656128, "learning_rate": 0.0003975345962776738, "loss": 0.0513, "num_input_tokens_seen": 161331840, "step": 74680 }, { "epoch": 12.18352365415987, "grad_norm": 0.006036388222128153, "learning_rate": 0.0003974649281811431, "loss": 0.0065, "num_input_tokens_seen": 161342656, "step": 74685 }, { "epoch": 12.184339314845024, "grad_norm": 0.006011773832142353, "learning_rate": 0.00039739526216261566, "loss": 0.005, "num_input_tokens_seen": 161352768, "step": 74690 }, { "epoch": 12.18515497553018, "grad_norm": 0.0059346966445446014, "learning_rate": 0.00039732559822350336, "loss": 0.1203, "num_input_tokens_seen": 161364000, "step": 74695 }, { "epoch": 12.185970636215334, "grad_norm": 0.3481042981147766, "learning_rate": 0.00039725593636521817, "loss": 0.0506, "num_input_tokens_seen": 161374816, "step": 74700 }, { "epoch": 12.18678629690049, "grad_norm": 0.07168328016996384, "learning_rate": 0.0003971862765891716, "loss": 0.0671, "num_input_tokens_seen": 161385088, "step": 74705 }, { "epoch": 12.187601957585644, "grad_norm": 0.11459054052829742, "learning_rate": 0.00039711661889677577, "loss": 0.0086, "num_input_tokens_seen": 161395136, "step": 74710 }, { "epoch": 12.1884176182708, "grad_norm": 0.026510460302233696, "learning_rate": 0.00039704696328944205, "loss": 0.0036, "num_input_tokens_seen": 161405248, "step": 74715 }, { "epoch": 12.189233278955955, "grad_norm": 0.006999279838055372, "learning_rate": 0.0003969773097685823, "loss": 0.0073, "num_input_tokens_seen": 161414304, "step": 74720 }, { "epoch": 12.190048939641109, "grad_norm": 0.008538886904716492, "learning_rate": 0.000396907658335608, "loss": 0.0039, "num_input_tokens_seen": 161425184, "step": 74725 }, { "epoch": 12.190864600326265, "grad_norm": 0.024406736716628075, "learning_rate": 0.0003968380089919308, "loss": 0.0967, "num_input_tokens_seen": 161436352, "step": 74730 }, { "epoch": 12.191680261011419, "grad_norm": 0.007404988165944815, "learning_rate": 0.0003967683617389621, "loss": 0.0044, "num_input_tokens_seen": 161447424, "step": 74735 }, { "epoch": 12.192495921696574, "grad_norm": 0.024634407833218575, "learning_rate": 0.0003966987165781138, "loss": 0.007, "num_input_tokens_seen": 161458496, "step": 74740 }, { "epoch": 12.19331158238173, "grad_norm": 0.10026438534259796, "learning_rate": 0.00039662907351079675, "loss": 0.1007, "num_input_tokens_seen": 161469152, "step": 74745 }, { "epoch": 12.194127243066884, "grad_norm": 0.016706952825188637, "learning_rate": 0.00039655943253842293, "loss": 0.0027, "num_input_tokens_seen": 161479616, "step": 74750 }, { "epoch": 12.19494290375204, "grad_norm": 0.0008600183646194637, "learning_rate": 0.00039648979366240325, "loss": 0.003, "num_input_tokens_seen": 161491136, "step": 74755 }, { "epoch": 12.195758564437194, "grad_norm": 0.025418315082788467, "learning_rate": 0.00039642015688414936, "loss": 0.0029, "num_input_tokens_seen": 161502144, "step": 74760 }, { "epoch": 12.19657422512235, "grad_norm": 0.002887872513383627, "learning_rate": 0.00039635052220507216, "loss": 0.0019, "num_input_tokens_seen": 161513248, "step": 74765 }, { "epoch": 12.197389885807505, "grad_norm": 0.017393076792359352, "learning_rate": 0.0003962808896265834, "loss": 0.0523, "num_input_tokens_seen": 161523424, "step": 74770 }, { "epoch": 12.198205546492659, "grad_norm": 0.25441282987594604, "learning_rate": 0.0003962112591500937, "loss": 0.0195, "num_input_tokens_seen": 161535200, "step": 74775 }, { "epoch": 12.199021207177815, "grad_norm": 0.18550816178321838, "learning_rate": 0.00039614163077701474, "loss": 0.1819, "num_input_tokens_seen": 161543808, "step": 74780 }, { "epoch": 12.199836867862969, "grad_norm": 0.3364203870296478, "learning_rate": 0.00039607200450875716, "loss": 0.0632, "num_input_tokens_seen": 161554656, "step": 74785 }, { "epoch": 12.200652528548124, "grad_norm": 0.003161477390676737, "learning_rate": 0.0003960023803467325, "loss": 0.0021, "num_input_tokens_seen": 161565920, "step": 74790 }, { "epoch": 12.201468189233278, "grad_norm": 0.007758776657283306, "learning_rate": 0.0003959327582923513, "loss": 0.0041, "num_input_tokens_seen": 161577824, "step": 74795 }, { "epoch": 12.202283849918434, "grad_norm": 0.0124747259542346, "learning_rate": 0.000395863138347025, "loss": 0.0131, "num_input_tokens_seen": 161588352, "step": 74800 }, { "epoch": 12.20309951060359, "grad_norm": 0.019156094640493393, "learning_rate": 0.0003957935205121641, "loss": 0.0227, "num_input_tokens_seen": 161599680, "step": 74805 }, { "epoch": 12.203915171288743, "grad_norm": 0.032875653356313705, "learning_rate": 0.00039572390478917973, "loss": 0.0123, "num_input_tokens_seen": 161610016, "step": 74810 }, { "epoch": 12.2047308319739, "grad_norm": 0.0038414266891777515, "learning_rate": 0.00039565429117948287, "loss": 0.0075, "num_input_tokens_seen": 161621184, "step": 74815 }, { "epoch": 12.205546492659053, "grad_norm": 0.0043778130784630775, "learning_rate": 0.000395584679684484, "loss": 0.1721, "num_input_tokens_seen": 161631872, "step": 74820 }, { "epoch": 12.206362153344209, "grad_norm": 0.006347167305648327, "learning_rate": 0.00039551507030559423, "loss": 0.003, "num_input_tokens_seen": 161642304, "step": 74825 }, { "epoch": 12.207177814029365, "grad_norm": 0.40752550959587097, "learning_rate": 0.0003954454630442239, "loss": 0.1171, "num_input_tokens_seen": 161652672, "step": 74830 }, { "epoch": 12.207993474714518, "grad_norm": 0.0724559798836708, "learning_rate": 0.0003953758579017842, "loss": 0.0265, "num_input_tokens_seen": 161664000, "step": 74835 }, { "epoch": 12.208809135399674, "grad_norm": 0.08504586666822433, "learning_rate": 0.00039530625487968507, "loss": 0.0195, "num_input_tokens_seen": 161675296, "step": 74840 }, { "epoch": 12.209624796084828, "grad_norm": 0.01224282942712307, "learning_rate": 0.00039523665397933784, "loss": 0.0058, "num_input_tokens_seen": 161686880, "step": 74845 }, { "epoch": 12.210440456769984, "grad_norm": 1.089669942855835, "learning_rate": 0.0003951670552021525, "loss": 0.1151, "num_input_tokens_seen": 161697408, "step": 74850 }, { "epoch": 12.21125611745514, "grad_norm": 0.0018654355080798268, "learning_rate": 0.0003950974585495399, "loss": 0.012, "num_input_tokens_seen": 161707616, "step": 74855 }, { "epoch": 12.212071778140293, "grad_norm": 0.053873226046562195, "learning_rate": 0.0003950278640229103, "loss": 0.0099, "num_input_tokens_seen": 161718912, "step": 74860 }, { "epoch": 12.21288743882545, "grad_norm": 0.0093051353469491, "learning_rate": 0.0003949582716236743, "loss": 0.0042, "num_input_tokens_seen": 161729280, "step": 74865 }, { "epoch": 12.213703099510603, "grad_norm": 0.02878117561340332, "learning_rate": 0.0003948886813532421, "loss": 0.1499, "num_input_tokens_seen": 161739456, "step": 74870 }, { "epoch": 12.214518760195759, "grad_norm": 0.004532721359282732, "learning_rate": 0.00039481909321302413, "loss": 0.1098, "num_input_tokens_seen": 161750464, "step": 74875 }, { "epoch": 12.215334420880913, "grad_norm": 0.005164369475096464, "learning_rate": 0.0003947495072044306, "loss": 0.0166, "num_input_tokens_seen": 161761760, "step": 74880 }, { "epoch": 12.216150081566068, "grad_norm": 0.00576377147808671, "learning_rate": 0.00039467992332887196, "loss": 0.0097, "num_input_tokens_seen": 161773344, "step": 74885 }, { "epoch": 12.216965742251224, "grad_norm": 0.024265503510832787, "learning_rate": 0.0003946103415877582, "loss": 0.0151, "num_input_tokens_seen": 161785440, "step": 74890 }, { "epoch": 12.217781402936378, "grad_norm": 0.008004284463822842, "learning_rate": 0.00039454076198249964, "loss": 0.0237, "num_input_tokens_seen": 161795808, "step": 74895 }, { "epoch": 12.218597063621534, "grad_norm": 0.0055056121200323105, "learning_rate": 0.00039447118451450613, "loss": 0.0036, "num_input_tokens_seen": 161806432, "step": 74900 }, { "epoch": 12.219412724306688, "grad_norm": 0.624906599521637, "learning_rate": 0.00039440160918518825, "loss": 0.1011, "num_input_tokens_seen": 161816736, "step": 74905 }, { "epoch": 12.220228384991843, "grad_norm": 0.05831537023186684, "learning_rate": 0.00039433203599595546, "loss": 0.0059, "num_input_tokens_seen": 161826848, "step": 74910 }, { "epoch": 12.221044045676999, "grad_norm": 0.007869354449212551, "learning_rate": 0.00039426246494821793, "loss": 0.0118, "num_input_tokens_seen": 161837120, "step": 74915 }, { "epoch": 12.221859706362153, "grad_norm": 0.05399933084845543, "learning_rate": 0.000394192896043386, "loss": 0.0087, "num_input_tokens_seen": 161848064, "step": 74920 }, { "epoch": 12.222675367047309, "grad_norm": 0.01727372780442238, "learning_rate": 0.000394123329282869, "loss": 0.0141, "num_input_tokens_seen": 161858432, "step": 74925 }, { "epoch": 12.223491027732463, "grad_norm": 0.00785834901034832, "learning_rate": 0.0003940537646680773, "loss": 0.0043, "num_input_tokens_seen": 161869792, "step": 74930 }, { "epoch": 12.224306688417618, "grad_norm": 0.06565750390291214, "learning_rate": 0.0003939842022004202, "loss": 0.0154, "num_input_tokens_seen": 161880384, "step": 74935 }, { "epoch": 12.225122349102774, "grad_norm": 0.005559089593589306, "learning_rate": 0.00039391464188130796, "loss": 0.0386, "num_input_tokens_seen": 161889984, "step": 74940 }, { "epoch": 12.225938009787928, "grad_norm": 0.041153181344270706, "learning_rate": 0.0003938450837121499, "loss": 0.0063, "num_input_tokens_seen": 161900128, "step": 74945 }, { "epoch": 12.226753670473084, "grad_norm": 0.0016776022966951132, "learning_rate": 0.00039377552769435606, "loss": 0.0016, "num_input_tokens_seen": 161910944, "step": 74950 }, { "epoch": 12.227569331158238, "grad_norm": 0.00216303626075387, "learning_rate": 0.0003937059738293357, "loss": 0.1426, "num_input_tokens_seen": 161921600, "step": 74955 }, { "epoch": 12.228384991843393, "grad_norm": 0.003117464715614915, "learning_rate": 0.0003936364221184988, "loss": 0.0097, "num_input_tokens_seen": 161932768, "step": 74960 }, { "epoch": 12.229200652528547, "grad_norm": 0.44894319772720337, "learning_rate": 0.00039356687256325465, "loss": 0.0747, "num_input_tokens_seen": 161944480, "step": 74965 }, { "epoch": 12.230016313213703, "grad_norm": 0.8600792288780212, "learning_rate": 0.0003934973251650129, "loss": 0.0236, "num_input_tokens_seen": 161954048, "step": 74970 }, { "epoch": 12.230831973898859, "grad_norm": 0.05913609266281128, "learning_rate": 0.0003934277799251829, "loss": 0.0307, "num_input_tokens_seen": 161965792, "step": 74975 }, { "epoch": 12.231647634584013, "grad_norm": 0.010049772448837757, "learning_rate": 0.00039335823684517423, "loss": 0.0067, "num_input_tokens_seen": 161977312, "step": 74980 }, { "epoch": 12.232463295269168, "grad_norm": 0.0010888243559747934, "learning_rate": 0.00039328869592639604, "loss": 0.005, "num_input_tokens_seen": 161987680, "step": 74985 }, { "epoch": 12.233278955954322, "grad_norm": 0.0009124244097620249, "learning_rate": 0.00039321915717025797, "loss": 0.0057, "num_input_tokens_seen": 161998784, "step": 74990 }, { "epoch": 12.234094616639478, "grad_norm": 0.01991415210068226, "learning_rate": 0.00039314962057816896, "loss": 0.1298, "num_input_tokens_seen": 162009696, "step": 74995 }, { "epoch": 12.234910277324634, "grad_norm": 0.016737831756472588, "learning_rate": 0.0003930800861515385, "loss": 0.0037, "num_input_tokens_seen": 162020640, "step": 75000 }, { "epoch": 12.235725938009788, "grad_norm": 0.02599795162677765, "learning_rate": 0.00039301055389177577, "loss": 0.0141, "num_input_tokens_seen": 162031776, "step": 75005 }, { "epoch": 12.236541598694943, "grad_norm": 0.0481642484664917, "learning_rate": 0.00039294102380028987, "loss": 0.0057, "num_input_tokens_seen": 162043456, "step": 75010 }, { "epoch": 12.237357259380097, "grad_norm": 0.004535711370408535, "learning_rate": 0.0003928714958784899, "loss": 0.0181, "num_input_tokens_seen": 162055264, "step": 75015 }, { "epoch": 12.238172920065253, "grad_norm": 0.016865408048033714, "learning_rate": 0.00039280197012778493, "loss": 0.0197, "num_input_tokens_seen": 162065920, "step": 75020 }, { "epoch": 12.238988580750409, "grad_norm": 0.020147256553173065, "learning_rate": 0.0003927324465495841, "loss": 0.0058, "num_input_tokens_seen": 162076032, "step": 75025 }, { "epoch": 12.239804241435563, "grad_norm": 0.004990022629499435, "learning_rate": 0.0003926629251452963, "loss": 0.0078, "num_input_tokens_seen": 162087552, "step": 75030 }, { "epoch": 12.240619902120718, "grad_norm": 0.00859206635504961, "learning_rate": 0.0003925934059163306, "loss": 0.0023, "num_input_tokens_seen": 162097760, "step": 75035 }, { "epoch": 12.241435562805872, "grad_norm": 0.0490594208240509, "learning_rate": 0.0003925238888640957, "loss": 0.0184, "num_input_tokens_seen": 162107936, "step": 75040 }, { "epoch": 12.242251223491028, "grad_norm": 0.006913432851433754, "learning_rate": 0.0003924543739900005, "loss": 0.033, "num_input_tokens_seen": 162119264, "step": 75045 }, { "epoch": 12.243066884176184, "grad_norm": 0.3514501750469208, "learning_rate": 0.00039238486129545376, "loss": 0.1685, "num_input_tokens_seen": 162130464, "step": 75050 }, { "epoch": 12.243882544861338, "grad_norm": 0.018597450107336044, "learning_rate": 0.0003923153507818645, "loss": 0.0288, "num_input_tokens_seen": 162140928, "step": 75055 }, { "epoch": 12.244698205546493, "grad_norm": 0.02613472379744053, "learning_rate": 0.00039224584245064114, "loss": 0.0078, "num_input_tokens_seen": 162151936, "step": 75060 }, { "epoch": 12.245513866231647, "grad_norm": 0.02216893993318081, "learning_rate": 0.00039217633630319264, "loss": 0.0027, "num_input_tokens_seen": 162163744, "step": 75065 }, { "epoch": 12.246329526916803, "grad_norm": 0.35483214259147644, "learning_rate": 0.00039210683234092733, "loss": 0.0128, "num_input_tokens_seen": 162174880, "step": 75070 }, { "epoch": 12.247145187601957, "grad_norm": 0.013999617658555508, "learning_rate": 0.000392037330565254, "loss": 0.0023, "num_input_tokens_seen": 162185984, "step": 75075 }, { "epoch": 12.247960848287113, "grad_norm": 0.09692630916833878, "learning_rate": 0.000391967830977581, "loss": 0.0063, "num_input_tokens_seen": 162196000, "step": 75080 }, { "epoch": 12.248776508972268, "grad_norm": 0.0007761928136460483, "learning_rate": 0.0003918983335793173, "loss": 0.1045, "num_input_tokens_seen": 162205312, "step": 75085 }, { "epoch": 12.249592169657422, "grad_norm": 0.013476379215717316, "learning_rate": 0.00039182883837187056, "loss": 0.0123, "num_input_tokens_seen": 162213888, "step": 75090 }, { "epoch": 12.250407830342578, "grad_norm": 0.017461730167269707, "learning_rate": 0.00039175934535665, "loss": 0.0037, "num_input_tokens_seen": 162224960, "step": 75095 }, { "epoch": 12.251223491027732, "grad_norm": 0.1801888644695282, "learning_rate": 0.00039168985453506334, "loss": 0.0109, "num_input_tokens_seen": 162234592, "step": 75100 }, { "epoch": 12.252039151712887, "grad_norm": 0.006768012419342995, "learning_rate": 0.0003916203659085194, "loss": 0.0058, "num_input_tokens_seen": 162245440, "step": 75105 }, { "epoch": 12.252854812398043, "grad_norm": 0.010229643434286118, "learning_rate": 0.00039155087947842607, "loss": 0.0678, "num_input_tokens_seen": 162256320, "step": 75110 }, { "epoch": 12.253670473083197, "grad_norm": 0.003439029911532998, "learning_rate": 0.00039148139524619184, "loss": 0.0025, "num_input_tokens_seen": 162267264, "step": 75115 }, { "epoch": 12.254486133768353, "grad_norm": 0.041233912110328674, "learning_rate": 0.00039141191321322464, "loss": 0.0076, "num_input_tokens_seen": 162278112, "step": 75120 }, { "epoch": 12.255301794453507, "grad_norm": 0.00618229852989316, "learning_rate": 0.00039134243338093285, "loss": 0.0033, "num_input_tokens_seen": 162288480, "step": 75125 }, { "epoch": 12.256117455138662, "grad_norm": 0.003569080028682947, "learning_rate": 0.0003912729557507246, "loss": 0.0086, "num_input_tokens_seen": 162299680, "step": 75130 }, { "epoch": 12.256933115823816, "grad_norm": 0.019696485251188278, "learning_rate": 0.0003912034803240077, "loss": 0.0173, "num_input_tokens_seen": 162309984, "step": 75135 }, { "epoch": 12.257748776508972, "grad_norm": 0.19719895720481873, "learning_rate": 0.0003911340071021905, "loss": 0.0075, "num_input_tokens_seen": 162321152, "step": 75140 }, { "epoch": 12.258564437194128, "grad_norm": 0.07820143550634384, "learning_rate": 0.00039106453608668047, "loss": 0.0567, "num_input_tokens_seen": 162332448, "step": 75145 }, { "epoch": 12.259380097879282, "grad_norm": 0.013048024848103523, "learning_rate": 0.0003909950672788861, "loss": 0.0053, "num_input_tokens_seen": 162343552, "step": 75150 }, { "epoch": 12.260195758564437, "grad_norm": 0.02106913924217224, "learning_rate": 0.0003909256006802147, "loss": 0.0076, "num_input_tokens_seen": 162353280, "step": 75155 }, { "epoch": 12.261011419249591, "grad_norm": 0.00745503231883049, "learning_rate": 0.0003908561362920746, "loss": 0.0496, "num_input_tokens_seen": 162364224, "step": 75160 }, { "epoch": 12.261827079934747, "grad_norm": 0.0053404951468110085, "learning_rate": 0.00039078667411587316, "loss": 0.0024, "num_input_tokens_seen": 162375520, "step": 75165 }, { "epoch": 12.262642740619903, "grad_norm": 0.003612641477957368, "learning_rate": 0.0003907172141530184, "loss": 0.0019, "num_input_tokens_seen": 162386016, "step": 75170 }, { "epoch": 12.263458401305057, "grad_norm": 0.004556257743388414, "learning_rate": 0.00039064775640491796, "loss": 0.0014, "num_input_tokens_seen": 162396576, "step": 75175 }, { "epoch": 12.264274061990212, "grad_norm": 0.4065341651439667, "learning_rate": 0.00039057830087297946, "loss": 0.0141, "num_input_tokens_seen": 162406912, "step": 75180 }, { "epoch": 12.265089722675366, "grad_norm": 0.004324205219745636, "learning_rate": 0.0003905088475586105, "loss": 0.0497, "num_input_tokens_seen": 162418144, "step": 75185 }, { "epoch": 12.265905383360522, "grad_norm": 0.002117524156346917, "learning_rate": 0.0003904393964632186, "loss": 0.003, "num_input_tokens_seen": 162428640, "step": 75190 }, { "epoch": 12.266721044045678, "grad_norm": 0.007448033429682255, "learning_rate": 0.00039036994758821124, "loss": 0.1817, "num_input_tokens_seen": 162440064, "step": 75195 }, { "epoch": 12.267536704730832, "grad_norm": 0.10965090245008469, "learning_rate": 0.00039030050093499623, "loss": 0.0442, "num_input_tokens_seen": 162451744, "step": 75200 }, { "epoch": 12.268352365415987, "grad_norm": 0.48558029532432556, "learning_rate": 0.0003902310565049805, "loss": 0.0091, "num_input_tokens_seen": 162462368, "step": 75205 }, { "epoch": 12.269168026101141, "grad_norm": 0.013588961213827133, "learning_rate": 0.0003901616142995718, "loss": 0.125, "num_input_tokens_seen": 162472128, "step": 75210 }, { "epoch": 12.269983686786297, "grad_norm": 1.1377973556518555, "learning_rate": 0.0003900921743201772, "loss": 0.0954, "num_input_tokens_seen": 162482848, "step": 75215 }, { "epoch": 12.270799347471453, "grad_norm": 0.023800566792488098, "learning_rate": 0.00039002273656820423, "loss": 0.0457, "num_input_tokens_seen": 162493408, "step": 75220 }, { "epoch": 12.271615008156607, "grad_norm": 0.0248698852956295, "learning_rate": 0.0003899533010450599, "loss": 0.0097, "num_input_tokens_seen": 162505184, "step": 75225 }, { "epoch": 12.272430668841762, "grad_norm": 0.06870092451572418, "learning_rate": 0.0003898838677521515, "loss": 0.0128, "num_input_tokens_seen": 162515552, "step": 75230 }, { "epoch": 12.273246329526916, "grad_norm": 0.07277870923280716, "learning_rate": 0.00038981443669088646, "loss": 0.1021, "num_input_tokens_seen": 162526176, "step": 75235 }, { "epoch": 12.274061990212072, "grad_norm": 0.01803704723715782, "learning_rate": 0.0003897450078626714, "loss": 0.0147, "num_input_tokens_seen": 162537600, "step": 75240 }, { "epoch": 12.274877650897226, "grad_norm": 0.0018408960895612836, "learning_rate": 0.0003896755812689138, "loss": 0.0197, "num_input_tokens_seen": 162548096, "step": 75245 }, { "epoch": 12.275693311582382, "grad_norm": 0.0035825977101922035, "learning_rate": 0.0003896061569110203, "loss": 0.0085, "num_input_tokens_seen": 162559872, "step": 75250 }, { "epoch": 12.276508972267537, "grad_norm": 0.032333966344594955, "learning_rate": 0.0003895367347903983, "loss": 0.0207, "num_input_tokens_seen": 162570816, "step": 75255 }, { "epoch": 12.277324632952691, "grad_norm": 0.0008797519840300083, "learning_rate": 0.0003894673149084543, "loss": 0.0035, "num_input_tokens_seen": 162581536, "step": 75260 }, { "epoch": 12.278140293637847, "grad_norm": 0.0028373999521136284, "learning_rate": 0.0003893978972665956, "loss": 0.0742, "num_input_tokens_seen": 162591712, "step": 75265 }, { "epoch": 12.278955954323001, "grad_norm": 0.0014782834332436323, "learning_rate": 0.0003893284818662286, "loss": 0.0037, "num_input_tokens_seen": 162602624, "step": 75270 }, { "epoch": 12.279771615008157, "grad_norm": 0.008053838275372982, "learning_rate": 0.0003892590687087605, "loss": 0.0069, "num_input_tokens_seen": 162614048, "step": 75275 }, { "epoch": 12.280587275693312, "grad_norm": 0.0029203668236732483, "learning_rate": 0.0003891896577955977, "loss": 0.0346, "num_input_tokens_seen": 162624160, "step": 75280 }, { "epoch": 12.281402936378466, "grad_norm": 0.021967828273773193, "learning_rate": 0.0003891202491281472, "loss": 0.06, "num_input_tokens_seen": 162636000, "step": 75285 }, { "epoch": 12.282218597063622, "grad_norm": 0.006306948605924845, "learning_rate": 0.0003890508427078153, "loss": 0.0015, "num_input_tokens_seen": 162646400, "step": 75290 }, { "epoch": 12.283034257748776, "grad_norm": 0.07245063781738281, "learning_rate": 0.0003889814385360091, "loss": 0.0118, "num_input_tokens_seen": 162657344, "step": 75295 }, { "epoch": 12.283849918433932, "grad_norm": 0.0017536969389766455, "learning_rate": 0.0003889120366141347, "loss": 0.1543, "num_input_tokens_seen": 162668544, "step": 75300 }, { "epoch": 12.284665579119087, "grad_norm": 0.25096216797828674, "learning_rate": 0.0003888426369435989, "loss": 0.0068, "num_input_tokens_seen": 162679296, "step": 75305 }, { "epoch": 12.285481239804241, "grad_norm": 0.002455186564475298, "learning_rate": 0.0003887732395258079, "loss": 0.0049, "num_input_tokens_seen": 162689728, "step": 75310 }, { "epoch": 12.286296900489397, "grad_norm": 0.004991905763745308, "learning_rate": 0.0003887038443621684, "loss": 0.008, "num_input_tokens_seen": 162700576, "step": 75315 }, { "epoch": 12.28711256117455, "grad_norm": 0.013448765501379967, "learning_rate": 0.0003886344514540868, "loss": 0.0036, "num_input_tokens_seen": 162711808, "step": 75320 }, { "epoch": 12.287928221859707, "grad_norm": 0.03673629090189934, "learning_rate": 0.0003885650608029692, "loss": 0.0065, "num_input_tokens_seen": 162722208, "step": 75325 }, { "epoch": 12.28874388254486, "grad_norm": 0.2677193582057953, "learning_rate": 0.00038849567241022205, "loss": 0.0226, "num_input_tokens_seen": 162732416, "step": 75330 }, { "epoch": 12.289559543230016, "grad_norm": 0.006491140462458134, "learning_rate": 0.0003884262862772514, "loss": 0.0028, "num_input_tokens_seen": 162744576, "step": 75335 }, { "epoch": 12.290375203915172, "grad_norm": 0.3217860162258148, "learning_rate": 0.0003883569024054638, "loss": 0.1769, "num_input_tokens_seen": 162753600, "step": 75340 }, { "epoch": 12.291190864600326, "grad_norm": 0.019507482647895813, "learning_rate": 0.0003882875207962651, "loss": 0.0056, "num_input_tokens_seen": 162764256, "step": 75345 }, { "epoch": 12.292006525285482, "grad_norm": 0.0013944999082013965, "learning_rate": 0.0003882181414510616, "loss": 0.0074, "num_input_tokens_seen": 162775840, "step": 75350 }, { "epoch": 12.292822185970635, "grad_norm": 0.03465007618069649, "learning_rate": 0.00038814876437125916, "loss": 0.0037, "num_input_tokens_seen": 162786656, "step": 75355 }, { "epoch": 12.293637846655791, "grad_norm": 0.04906386137008667, "learning_rate": 0.000388079389558264, "loss": 0.0085, "num_input_tokens_seen": 162799584, "step": 75360 }, { "epoch": 12.294453507340947, "grad_norm": 0.028646433725953102, "learning_rate": 0.0003880100170134818, "loss": 0.0081, "num_input_tokens_seen": 162809984, "step": 75365 }, { "epoch": 12.2952691680261, "grad_norm": 0.028914660215377808, "learning_rate": 0.00038794064673831896, "loss": 0.0114, "num_input_tokens_seen": 162820256, "step": 75370 }, { "epoch": 12.296084828711257, "grad_norm": 0.016150979325175285, "learning_rate": 0.0003878712787341809, "loss": 0.0354, "num_input_tokens_seen": 162830880, "step": 75375 }, { "epoch": 12.29690048939641, "grad_norm": 0.0038916615303605795, "learning_rate": 0.0003878019130024737, "loss": 0.0096, "num_input_tokens_seen": 162840896, "step": 75380 }, { "epoch": 12.297716150081566, "grad_norm": 0.011551225557923317, "learning_rate": 0.000387732549544603, "loss": 0.1076, "num_input_tokens_seen": 162852000, "step": 75385 }, { "epoch": 12.298531810766722, "grad_norm": 0.017128009349107742, "learning_rate": 0.0003876631883619747, "loss": 0.0223, "num_input_tokens_seen": 162862368, "step": 75390 }, { "epoch": 12.299347471451876, "grad_norm": 0.01165260374546051, "learning_rate": 0.0003875938294559942, "loss": 0.003, "num_input_tokens_seen": 162873504, "step": 75395 }, { "epoch": 12.300163132137031, "grad_norm": 0.01163018774241209, "learning_rate": 0.0003875244728280676, "loss": 0.2225, "num_input_tokens_seen": 162884992, "step": 75400 }, { "epoch": 12.300978792822185, "grad_norm": 0.019337153062224388, "learning_rate": 0.00038745511847960003, "loss": 0.1481, "num_input_tokens_seen": 162895488, "step": 75405 }, { "epoch": 12.301794453507341, "grad_norm": 0.0666937604546547, "learning_rate": 0.0003873857664119974, "loss": 0.0548, "num_input_tokens_seen": 162905888, "step": 75410 }, { "epoch": 12.302610114192497, "grad_norm": 0.06270725280046463, "learning_rate": 0.00038731641662666493, "loss": 0.0347, "num_input_tokens_seen": 162917344, "step": 75415 }, { "epoch": 12.30342577487765, "grad_norm": 0.000730838452000171, "learning_rate": 0.00038724706912500847, "loss": 0.0063, "num_input_tokens_seen": 162927616, "step": 75420 }, { "epoch": 12.304241435562806, "grad_norm": 0.052667297422885895, "learning_rate": 0.0003871777239084329, "loss": 0.0157, "num_input_tokens_seen": 162937856, "step": 75425 }, { "epoch": 12.30505709624796, "grad_norm": 0.02632969245314598, "learning_rate": 0.00038710838097834414, "loss": 0.0766, "num_input_tokens_seen": 162949632, "step": 75430 }, { "epoch": 12.305872756933116, "grad_norm": 0.005952873267233372, "learning_rate": 0.000387039040336147, "loss": 0.0048, "num_input_tokens_seen": 162959552, "step": 75435 }, { "epoch": 12.30668841761827, "grad_norm": 0.022846754640340805, "learning_rate": 0.0003869697019832473, "loss": 0.1431, "num_input_tokens_seen": 162970496, "step": 75440 }, { "epoch": 12.307504078303426, "grad_norm": 0.22744394838809967, "learning_rate": 0.0003869003659210497, "loss": 0.1829, "num_input_tokens_seen": 162980160, "step": 75445 }, { "epoch": 12.308319738988581, "grad_norm": 0.05033477395772934, "learning_rate": 0.00038683103215095965, "loss": 0.1123, "num_input_tokens_seen": 162990496, "step": 75450 }, { "epoch": 12.309135399673735, "grad_norm": 0.007061969488859177, "learning_rate": 0.00038676170067438256, "loss": 0.011, "num_input_tokens_seen": 163000832, "step": 75455 }, { "epoch": 12.309951060358891, "grad_norm": 0.022670293226838112, "learning_rate": 0.00038669237149272303, "loss": 0.0088, "num_input_tokens_seen": 163012416, "step": 75460 }, { "epoch": 12.310766721044045, "grad_norm": 0.0011929698521271348, "learning_rate": 0.0003866230446073865, "loss": 0.01, "num_input_tokens_seen": 163022912, "step": 75465 }, { "epoch": 12.3115823817292, "grad_norm": 0.006316805724054575, "learning_rate": 0.0003865537200197776, "loss": 0.0059, "num_input_tokens_seen": 163033280, "step": 75470 }, { "epoch": 12.312398042414356, "grad_norm": 0.45057278871536255, "learning_rate": 0.0003864843977313017, "loss": 0.0742, "num_input_tokens_seen": 163043808, "step": 75475 }, { "epoch": 12.31321370309951, "grad_norm": 0.010832220315933228, "learning_rate": 0.0003864150777433634, "loss": 0.0587, "num_input_tokens_seen": 163054752, "step": 75480 }, { "epoch": 12.314029363784666, "grad_norm": 0.10409935563802719, "learning_rate": 0.0003863457600573676, "loss": 0.1449, "num_input_tokens_seen": 163065152, "step": 75485 }, { "epoch": 12.31484502446982, "grad_norm": 0.014816675335168839, "learning_rate": 0.00038627644467471915, "loss": 0.0093, "num_input_tokens_seen": 163076992, "step": 75490 }, { "epoch": 12.315660685154976, "grad_norm": 0.003097902750596404, "learning_rate": 0.00038620713159682286, "loss": 0.0294, "num_input_tokens_seen": 163088704, "step": 75495 }, { "epoch": 12.31647634584013, "grad_norm": 0.032903462648391724, "learning_rate": 0.0003861378208250834, "loss": 0.0136, "num_input_tokens_seen": 163099168, "step": 75500 }, { "epoch": 12.317292006525285, "grad_norm": 0.01986708678305149, "learning_rate": 0.00038606851236090543, "loss": 0.0251, "num_input_tokens_seen": 163108800, "step": 75505 }, { "epoch": 12.318107667210441, "grad_norm": 0.029241114854812622, "learning_rate": 0.00038599920620569357, "loss": 0.0785, "num_input_tokens_seen": 163119456, "step": 75510 }, { "epoch": 12.318923327895595, "grad_norm": 0.0034615658223628998, "learning_rate": 0.00038592990236085257, "loss": 0.0089, "num_input_tokens_seen": 163130656, "step": 75515 }, { "epoch": 12.31973898858075, "grad_norm": 0.020488232374191284, "learning_rate": 0.0003858606008277866, "loss": 0.041, "num_input_tokens_seen": 163141056, "step": 75520 }, { "epoch": 12.320554649265905, "grad_norm": 0.04791320860385895, "learning_rate": 0.0003857913016079005, "loss": 0.1059, "num_input_tokens_seen": 163151328, "step": 75525 }, { "epoch": 12.32137030995106, "grad_norm": 0.014877380803227425, "learning_rate": 0.0003857220047025984, "loss": 0.0387, "num_input_tokens_seen": 163161952, "step": 75530 }, { "epoch": 12.322185970636216, "grad_norm": 0.016949398443102837, "learning_rate": 0.00038565271011328507, "loss": 0.0205, "num_input_tokens_seen": 163173184, "step": 75535 }, { "epoch": 12.32300163132137, "grad_norm": 0.022877110168337822, "learning_rate": 0.00038558341784136437, "loss": 0.1064, "num_input_tokens_seen": 163185472, "step": 75540 }, { "epoch": 12.323817292006526, "grad_norm": 0.3788914680480957, "learning_rate": 0.00038551412788824106, "loss": 0.0798, "num_input_tokens_seen": 163195936, "step": 75545 }, { "epoch": 12.32463295269168, "grad_norm": 0.005041462369263172, "learning_rate": 0.0003854448402553191, "loss": 0.0708, "num_input_tokens_seen": 163205152, "step": 75550 }, { "epoch": 12.325448613376835, "grad_norm": 0.007434530183672905, "learning_rate": 0.0003853755549440026, "loss": 0.0199, "num_input_tokens_seen": 163214848, "step": 75555 }, { "epoch": 12.326264274061991, "grad_norm": 0.19846367835998535, "learning_rate": 0.0003853062719556962, "loss": 0.0238, "num_input_tokens_seen": 163225888, "step": 75560 }, { "epoch": 12.327079934747145, "grad_norm": 0.1723729521036148, "learning_rate": 0.0003852369912918035, "loss": 0.0675, "num_input_tokens_seen": 163237664, "step": 75565 }, { "epoch": 12.3278955954323, "grad_norm": 0.014083434827625751, "learning_rate": 0.00038516771295372894, "loss": 0.0091, "num_input_tokens_seen": 163248896, "step": 75570 }, { "epoch": 12.328711256117455, "grad_norm": 0.011963681317865849, "learning_rate": 0.00038509843694287615, "loss": 0.0126, "num_input_tokens_seen": 163258784, "step": 75575 }, { "epoch": 12.32952691680261, "grad_norm": 0.042714718729257584, "learning_rate": 0.0003850291632606495, "loss": 0.0186, "num_input_tokens_seen": 163269536, "step": 75580 }, { "epoch": 12.330342577487766, "grad_norm": 0.008637349121272564, "learning_rate": 0.00038495989190845246, "loss": 0.0394, "num_input_tokens_seen": 163280736, "step": 75585 }, { "epoch": 12.33115823817292, "grad_norm": 0.0011679278686642647, "learning_rate": 0.00038489062288768944, "loss": 0.003, "num_input_tokens_seen": 163291040, "step": 75590 }, { "epoch": 12.331973898858076, "grad_norm": 0.018200945109128952, "learning_rate": 0.00038482135619976373, "loss": 0.0067, "num_input_tokens_seen": 163302464, "step": 75595 }, { "epoch": 12.33278955954323, "grad_norm": 0.010142846964299679, "learning_rate": 0.0003847520918460795, "loss": 0.0091, "num_input_tokens_seen": 163312832, "step": 75600 }, { "epoch": 12.333605220228385, "grad_norm": 0.01162709854543209, "learning_rate": 0.00038468282982804023, "loss": 0.0056, "num_input_tokens_seen": 163323552, "step": 75605 }, { "epoch": 12.33442088091354, "grad_norm": 0.004280074033886194, "learning_rate": 0.00038461357014704986, "loss": 0.0029, "num_input_tokens_seen": 163333824, "step": 75610 }, { "epoch": 12.335236541598695, "grad_norm": 0.0024590755347162485, "learning_rate": 0.00038454431280451163, "loss": 0.009, "num_input_tokens_seen": 163345152, "step": 75615 }, { "epoch": 12.33605220228385, "grad_norm": 0.019506709650158882, "learning_rate": 0.00038447505780182963, "loss": 0.0101, "num_input_tokens_seen": 163356288, "step": 75620 }, { "epoch": 12.336867862969005, "grad_norm": 0.03595130145549774, "learning_rate": 0.0003844058051404069, "loss": 0.0354, "num_input_tokens_seen": 163367488, "step": 75625 }, { "epoch": 12.33768352365416, "grad_norm": 0.011195999570190907, "learning_rate": 0.00038433655482164727, "loss": 0.0184, "num_input_tokens_seen": 163379328, "step": 75630 }, { "epoch": 12.338499184339314, "grad_norm": 0.20951534807682037, "learning_rate": 0.0003842673068469541, "loss": 0.1006, "num_input_tokens_seen": 163390656, "step": 75635 }, { "epoch": 12.33931484502447, "grad_norm": 0.008074449375271797, "learning_rate": 0.0003841980612177308, "loss": 0.004, "num_input_tokens_seen": 163401568, "step": 75640 }, { "epoch": 12.340130505709626, "grad_norm": 0.007740038447082043, "learning_rate": 0.00038412881793538063, "loss": 0.0179, "num_input_tokens_seen": 163413088, "step": 75645 }, { "epoch": 12.34094616639478, "grad_norm": 0.022545767948031425, "learning_rate": 0.000384059577001307, "loss": 0.0115, "num_input_tokens_seen": 163425024, "step": 75650 }, { "epoch": 12.341761827079935, "grad_norm": 0.012054262682795525, "learning_rate": 0.000383990338416913, "loss": 0.0167, "num_input_tokens_seen": 163436736, "step": 75655 }, { "epoch": 12.34257748776509, "grad_norm": 0.02720271609723568, "learning_rate": 0.00038392110218360203, "loss": 0.025, "num_input_tokens_seen": 163446464, "step": 75660 }, { "epoch": 12.343393148450245, "grad_norm": 0.0017249691300094128, "learning_rate": 0.0003838518683027772, "loss": 0.0115, "num_input_tokens_seen": 163457312, "step": 75665 }, { "epoch": 12.3442088091354, "grad_norm": 0.008015700615942478, "learning_rate": 0.0003837826367758417, "loss": 0.0092, "num_input_tokens_seen": 163467296, "step": 75670 }, { "epoch": 12.345024469820554, "grad_norm": 0.004186101723462343, "learning_rate": 0.0003837134076041984, "loss": 0.0031, "num_input_tokens_seen": 163477216, "step": 75675 }, { "epoch": 12.34584013050571, "grad_norm": 0.0532694011926651, "learning_rate": 0.00038364418078925037, "loss": 0.0089, "num_input_tokens_seen": 163487008, "step": 75680 }, { "epoch": 12.346655791190864, "grad_norm": 0.004246499389410019, "learning_rate": 0.0003835749563324008, "loss": 0.0027, "num_input_tokens_seen": 163498624, "step": 75685 }, { "epoch": 12.34747145187602, "grad_norm": 0.005101019516587257, "learning_rate": 0.0003835057342350522, "loss": 0.0017, "num_input_tokens_seen": 163508832, "step": 75690 }, { "epoch": 12.348287112561174, "grad_norm": 0.003990354016423225, "learning_rate": 0.0003834365144986079, "loss": 0.1461, "num_input_tokens_seen": 163520032, "step": 75695 }, { "epoch": 12.34910277324633, "grad_norm": 0.015845347195863724, "learning_rate": 0.00038336729712447034, "loss": 0.0331, "num_input_tokens_seen": 163530368, "step": 75700 }, { "epoch": 12.349918433931485, "grad_norm": 0.001217706361785531, "learning_rate": 0.0003832980821140426, "loss": 0.0098, "num_input_tokens_seen": 163541280, "step": 75705 }, { "epoch": 12.350734094616639, "grad_norm": 0.006318389903753996, "learning_rate": 0.00038322886946872716, "loss": 0.0794, "num_input_tokens_seen": 163551584, "step": 75710 }, { "epoch": 12.351549755301795, "grad_norm": 0.13416573405265808, "learning_rate": 0.000383159659189927, "loss": 0.1529, "num_input_tokens_seen": 163562592, "step": 75715 }, { "epoch": 12.352365415986949, "grad_norm": 0.23575806617736816, "learning_rate": 0.0003830904512790443, "loss": 0.1086, "num_input_tokens_seen": 163573728, "step": 75720 }, { "epoch": 12.353181076672104, "grad_norm": 0.026259060949087143, "learning_rate": 0.0003830212457374821, "loss": 0.0059, "num_input_tokens_seen": 163585216, "step": 75725 }, { "epoch": 12.35399673735726, "grad_norm": 0.019766176119446754, "learning_rate": 0.00038295204256664264, "loss": 0.0041, "num_input_tokens_seen": 163596288, "step": 75730 }, { "epoch": 12.354812398042414, "grad_norm": 0.020990602672100067, "learning_rate": 0.00038288284176792866, "loss": 0.0167, "num_input_tokens_seen": 163608000, "step": 75735 }, { "epoch": 12.35562805872757, "grad_norm": 0.28732380270957947, "learning_rate": 0.0003828136433427423, "loss": 0.1368, "num_input_tokens_seen": 163618976, "step": 75740 }, { "epoch": 12.356443719412724, "grad_norm": 0.011000544764101505, "learning_rate": 0.00038274444729248633, "loss": 0.01, "num_input_tokens_seen": 163628320, "step": 75745 }, { "epoch": 12.35725938009788, "grad_norm": 0.002288726856932044, "learning_rate": 0.00038267525361856264, "loss": 0.0062, "num_input_tokens_seen": 163638336, "step": 75750 }, { "epoch": 12.358075040783035, "grad_norm": 0.03208902105689049, "learning_rate": 0.000382606062322374, "loss": 0.0044, "num_input_tokens_seen": 163649376, "step": 75755 }, { "epoch": 12.358890701468189, "grad_norm": 0.019938675686717033, "learning_rate": 0.00038253687340532224, "loss": 0.0064, "num_input_tokens_seen": 163660416, "step": 75760 }, { "epoch": 12.359706362153345, "grad_norm": 0.030354809015989304, "learning_rate": 0.0003824676868688097, "loss": 0.0428, "num_input_tokens_seen": 163670720, "step": 75765 }, { "epoch": 12.360522022838499, "grad_norm": 0.020712848752737045, "learning_rate": 0.0003823985027142389, "loss": 0.0043, "num_input_tokens_seen": 163681728, "step": 75770 }, { "epoch": 12.361337683523654, "grad_norm": 0.012714402750134468, "learning_rate": 0.0003823293209430113, "loss": 0.0044, "num_input_tokens_seen": 163691584, "step": 75775 }, { "epoch": 12.362153344208808, "grad_norm": 0.003048856742680073, "learning_rate": 0.00038226014155652956, "loss": 0.0054, "num_input_tokens_seen": 163702688, "step": 75780 }, { "epoch": 12.362969004893964, "grad_norm": 0.049370184540748596, "learning_rate": 0.0003821909645561952, "loss": 0.0042, "num_input_tokens_seen": 163713472, "step": 75785 }, { "epoch": 12.36378466557912, "grad_norm": 0.043685123324394226, "learning_rate": 0.0003821217899434106, "loss": 0.0054, "num_input_tokens_seen": 163725760, "step": 75790 }, { "epoch": 12.364600326264274, "grad_norm": 0.021820900961756706, "learning_rate": 0.0003820526177195772, "loss": 0.0047, "num_input_tokens_seen": 163736928, "step": 75795 }, { "epoch": 12.36541598694943, "grad_norm": 0.10135416686534882, "learning_rate": 0.00038198344788609737, "loss": 0.0081, "num_input_tokens_seen": 163747392, "step": 75800 }, { "epoch": 12.366231647634583, "grad_norm": 0.02166566252708435, "learning_rate": 0.0003819142804443726, "loss": 0.0131, "num_input_tokens_seen": 163758144, "step": 75805 }, { "epoch": 12.367047308319739, "grad_norm": 0.011187167838215828, "learning_rate": 0.0003818451153958047, "loss": 0.0423, "num_input_tokens_seen": 163770528, "step": 75810 }, { "epoch": 12.367862969004895, "grad_norm": 0.00776352034881711, "learning_rate": 0.0003817759527417955, "loss": 0.0035, "num_input_tokens_seen": 163782112, "step": 75815 }, { "epoch": 12.368678629690049, "grad_norm": 0.00991200003772974, "learning_rate": 0.00038170679248374653, "loss": 0.0044, "num_input_tokens_seen": 163792736, "step": 75820 }, { "epoch": 12.369494290375204, "grad_norm": 0.2220042645931244, "learning_rate": 0.00038163763462305944, "loss": 0.0094, "num_input_tokens_seen": 163804320, "step": 75825 }, { "epoch": 12.370309951060358, "grad_norm": 0.052491020411252975, "learning_rate": 0.000381568479161136, "loss": 0.0061, "num_input_tokens_seen": 163814272, "step": 75830 }, { "epoch": 12.371125611745514, "grad_norm": 0.18457885086536407, "learning_rate": 0.00038149932609937736, "loss": 0.0268, "num_input_tokens_seen": 163825664, "step": 75835 }, { "epoch": 12.37194127243067, "grad_norm": 0.007164886686950922, "learning_rate": 0.00038143017543918546, "loss": 0.0203, "num_input_tokens_seen": 163835680, "step": 75840 }, { "epoch": 12.372756933115824, "grad_norm": 0.01174076460301876, "learning_rate": 0.0003813610271819612, "loss": 0.0053, "num_input_tokens_seen": 163845536, "step": 75845 }, { "epoch": 12.37357259380098, "grad_norm": 0.002305036410689354, "learning_rate": 0.00038129188132910645, "loss": 0.1235, "num_input_tokens_seen": 163855968, "step": 75850 }, { "epoch": 12.374388254486133, "grad_norm": 0.36928537487983704, "learning_rate": 0.00038122273788202216, "loss": 0.0149, "num_input_tokens_seen": 163866496, "step": 75855 }, { "epoch": 12.375203915171289, "grad_norm": 0.00295365322381258, "learning_rate": 0.00038115359684210993, "loss": 0.0068, "num_input_tokens_seen": 163877184, "step": 75860 }, { "epoch": 12.376019575856443, "grad_norm": 0.4139918386936188, "learning_rate": 0.00038108445821077066, "loss": 0.0076, "num_input_tokens_seen": 163887200, "step": 75865 }, { "epoch": 12.376835236541599, "grad_norm": 0.033299028873443604, "learning_rate": 0.00038101532198940563, "loss": 0.0251, "num_input_tokens_seen": 163898208, "step": 75870 }, { "epoch": 12.377650897226754, "grad_norm": 0.024843864142894745, "learning_rate": 0.0003809461881794163, "loss": 0.0106, "num_input_tokens_seen": 163908512, "step": 75875 }, { "epoch": 12.378466557911908, "grad_norm": 1.026667594909668, "learning_rate": 0.0003808770567822033, "loss": 0.0927, "num_input_tokens_seen": 163918976, "step": 75880 }, { "epoch": 12.379282218597064, "grad_norm": 0.01017333846539259, "learning_rate": 0.000380807927799168, "loss": 0.2334, "num_input_tokens_seen": 163928544, "step": 75885 }, { "epoch": 12.380097879282218, "grad_norm": 0.009438030421733856, "learning_rate": 0.0003807388012317111, "loss": 0.1196, "num_input_tokens_seen": 163939072, "step": 75890 }, { "epoch": 12.380913539967374, "grad_norm": 0.03808213025331497, "learning_rate": 0.0003806696770812339, "loss": 0.0146, "num_input_tokens_seen": 163949216, "step": 75895 }, { "epoch": 12.38172920065253, "grad_norm": 0.01764621213078499, "learning_rate": 0.00038060055534913683, "loss": 0.0278, "num_input_tokens_seen": 163959744, "step": 75900 }, { "epoch": 12.382544861337683, "grad_norm": 0.0389665849506855, "learning_rate": 0.0003805314360368212, "loss": 0.2302, "num_input_tokens_seen": 163970144, "step": 75905 }, { "epoch": 12.383360522022839, "grad_norm": 0.09590235352516174, "learning_rate": 0.0003804623191456874, "loss": 0.0173, "num_input_tokens_seen": 163980832, "step": 75910 }, { "epoch": 12.384176182707993, "grad_norm": 0.028626440092921257, "learning_rate": 0.00038039320467713654, "loss": 0.0128, "num_input_tokens_seen": 163992032, "step": 75915 }, { "epoch": 12.384991843393149, "grad_norm": 0.055960919708013535, "learning_rate": 0.0003803240926325689, "loss": 0.0064, "num_input_tokens_seen": 164002368, "step": 75920 }, { "epoch": 12.385807504078304, "grad_norm": 0.5099681615829468, "learning_rate": 0.00038025498301338554, "loss": 0.0107, "num_input_tokens_seen": 164013472, "step": 75925 }, { "epoch": 12.386623164763458, "grad_norm": 0.004505421034991741, "learning_rate": 0.00038018587582098665, "loss": 0.0025, "num_input_tokens_seen": 164024224, "step": 75930 }, { "epoch": 12.387438825448614, "grad_norm": 0.035571370273828506, "learning_rate": 0.0003801167710567731, "loss": 0.0085, "num_input_tokens_seen": 164034880, "step": 75935 }, { "epoch": 12.388254486133768, "grad_norm": 0.003777115372940898, "learning_rate": 0.00038004766872214526, "loss": 0.0035, "num_input_tokens_seen": 164045632, "step": 75940 }, { "epoch": 12.389070146818923, "grad_norm": 0.011398572474718094, "learning_rate": 0.0003799785688185036, "loss": 0.0615, "num_input_tokens_seen": 164055648, "step": 75945 }, { "epoch": 12.38988580750408, "grad_norm": 0.005105683580040932, "learning_rate": 0.00037990947134724845, "loss": 0.151, "num_input_tokens_seen": 164066560, "step": 75950 }, { "epoch": 12.390701468189233, "grad_norm": 0.07168906182050705, "learning_rate": 0.00037984037630978026, "loss": 0.0169, "num_input_tokens_seen": 164076992, "step": 75955 }, { "epoch": 12.391517128874389, "grad_norm": 0.20706669986248016, "learning_rate": 0.00037977128370749916, "loss": 0.0277, "num_input_tokens_seen": 164086912, "step": 75960 }, { "epoch": 12.392332789559543, "grad_norm": 0.38154295086860657, "learning_rate": 0.00037970219354180573, "loss": 0.0972, "num_input_tokens_seen": 164097504, "step": 75965 }, { "epoch": 12.393148450244698, "grad_norm": 0.04678434878587723, "learning_rate": 0.0003796331058140997, "loss": 0.0166, "num_input_tokens_seen": 164107968, "step": 75970 }, { "epoch": 12.393964110929852, "grad_norm": 0.0053862021304667, "learning_rate": 0.00037956402052578164, "loss": 0.0181, "num_input_tokens_seen": 164118080, "step": 75975 }, { "epoch": 12.394779771615008, "grad_norm": 0.0014402979286387563, "learning_rate": 0.0003794949376782515, "loss": 0.11, "num_input_tokens_seen": 164129152, "step": 75980 }, { "epoch": 12.395595432300164, "grad_norm": 0.003660842776298523, "learning_rate": 0.00037942585727290926, "loss": 0.0028, "num_input_tokens_seen": 164139488, "step": 75985 }, { "epoch": 12.396411092985318, "grad_norm": 0.00224318471737206, "learning_rate": 0.000379356779311155, "loss": 0.005, "num_input_tokens_seen": 164151104, "step": 75990 }, { "epoch": 12.397226753670473, "grad_norm": 0.08662576228380203, "learning_rate": 0.0003792877037943886, "loss": 0.0567, "num_input_tokens_seen": 164161696, "step": 75995 }, { "epoch": 12.398042414355627, "grad_norm": 0.026156943291425705, "learning_rate": 0.0003792186307240102, "loss": 0.0047, "num_input_tokens_seen": 164172320, "step": 76000 }, { "epoch": 12.398858075040783, "grad_norm": 0.4226818382740021, "learning_rate": 0.0003791495601014192, "loss": 0.0428, "num_input_tokens_seen": 164183104, "step": 76005 }, { "epoch": 12.399673735725939, "grad_norm": 0.014250795356929302, "learning_rate": 0.00037908049192801596, "loss": 0.0906, "num_input_tokens_seen": 164195008, "step": 76010 }, { "epoch": 12.400489396411093, "grad_norm": 0.3014688193798065, "learning_rate": 0.00037901142620519967, "loss": 0.0257, "num_input_tokens_seen": 164206208, "step": 76015 }, { "epoch": 12.401305057096248, "grad_norm": 0.008320405147969723, "learning_rate": 0.00037894236293437055, "loss": 0.0072, "num_input_tokens_seen": 164216832, "step": 76020 }, { "epoch": 12.402120717781402, "grad_norm": 0.06271061301231384, "learning_rate": 0.00037887330211692783, "loss": 0.0122, "num_input_tokens_seen": 164227936, "step": 76025 }, { "epoch": 12.402936378466558, "grad_norm": 0.1204233169555664, "learning_rate": 0.00037880424375427154, "loss": 0.0122, "num_input_tokens_seen": 164238688, "step": 76030 }, { "epoch": 12.403752039151712, "grad_norm": 0.008676442317664623, "learning_rate": 0.00037873518784780074, "loss": 0.0555, "num_input_tokens_seen": 164249408, "step": 76035 }, { "epoch": 12.404567699836868, "grad_norm": 0.019311709329485893, "learning_rate": 0.0003786661343989154, "loss": 0.0161, "num_input_tokens_seen": 164260736, "step": 76040 }, { "epoch": 12.405383360522023, "grad_norm": 0.0016197053482756019, "learning_rate": 0.00037859708340901455, "loss": 0.075, "num_input_tokens_seen": 164270144, "step": 76045 }, { "epoch": 12.406199021207177, "grad_norm": 0.0075719174928963184, "learning_rate": 0.00037852803487949804, "loss": 0.0167, "num_input_tokens_seen": 164279936, "step": 76050 }, { "epoch": 12.407014681892333, "grad_norm": 0.016589025035500526, "learning_rate": 0.0003784589888117648, "loss": 0.0467, "num_input_tokens_seen": 164292704, "step": 76055 }, { "epoch": 12.407830342577487, "grad_norm": 0.4371892213821411, "learning_rate": 0.0003783899452072146, "loss": 0.009, "num_input_tokens_seen": 164303904, "step": 76060 }, { "epoch": 12.408646003262643, "grad_norm": 0.0021243118681013584, "learning_rate": 0.00037832090406724617, "loss": 0.0039, "num_input_tokens_seen": 164316000, "step": 76065 }, { "epoch": 12.409461663947798, "grad_norm": 0.0714460015296936, "learning_rate": 0.0003782518653932592, "loss": 0.0351, "num_input_tokens_seen": 164326848, "step": 76070 }, { "epoch": 12.410277324632952, "grad_norm": 0.2750934660434723, "learning_rate": 0.00037818282918665236, "loss": 0.0179, "num_input_tokens_seen": 164337792, "step": 76075 }, { "epoch": 12.411092985318108, "grad_norm": 0.01962362602353096, "learning_rate": 0.0003781137954488251, "loss": 0.0042, "num_input_tokens_seen": 164349856, "step": 76080 }, { "epoch": 12.411908646003262, "grad_norm": 0.03165091201663017, "learning_rate": 0.0003780447641811766, "loss": 0.0208, "num_input_tokens_seen": 164360672, "step": 76085 }, { "epoch": 12.412724306688418, "grad_norm": 0.005478391423821449, "learning_rate": 0.0003779757353851054, "loss": 0.0128, "num_input_tokens_seen": 164371392, "step": 76090 }, { "epoch": 12.413539967373573, "grad_norm": 0.0063373674638569355, "learning_rate": 0.000377906709062011, "loss": 0.0862, "num_input_tokens_seen": 164381664, "step": 76095 }, { "epoch": 12.414355628058727, "grad_norm": 0.027820097282528877, "learning_rate": 0.00037783768521329177, "loss": 0.0071, "num_input_tokens_seen": 164393184, "step": 76100 }, { "epoch": 12.415171288743883, "grad_norm": 0.002753217238932848, "learning_rate": 0.0003777686638403469, "loss": 0.0465, "num_input_tokens_seen": 164402048, "step": 76105 }, { "epoch": 12.415986949429037, "grad_norm": 1.1631052494049072, "learning_rate": 0.0003776996449445752, "loss": 0.0841, "num_input_tokens_seen": 164411904, "step": 76110 }, { "epoch": 12.416802610114193, "grad_norm": 0.0034380650613456964, "learning_rate": 0.0003776306285273753, "loss": 0.0017, "num_input_tokens_seen": 164421920, "step": 76115 }, { "epoch": 12.417618270799348, "grad_norm": 0.04066552594304085, "learning_rate": 0.0003775616145901459, "loss": 0.0109, "num_input_tokens_seen": 164432576, "step": 76120 }, { "epoch": 12.418433931484502, "grad_norm": 0.0008301659254357219, "learning_rate": 0.0003774926031342858, "loss": 0.0066, "num_input_tokens_seen": 164443936, "step": 76125 }, { "epoch": 12.419249592169658, "grad_norm": 0.002140910131856799, "learning_rate": 0.0003774235941611934, "loss": 0.017, "num_input_tokens_seen": 164456416, "step": 76130 }, { "epoch": 12.420065252854812, "grad_norm": 0.05225667729973793, "learning_rate": 0.0003773545876722675, "loss": 0.0427, "num_input_tokens_seen": 164467776, "step": 76135 }, { "epoch": 12.420880913539968, "grad_norm": 0.004565094597637653, "learning_rate": 0.00037728558366890633, "loss": 0.0737, "num_input_tokens_seen": 164478912, "step": 76140 }, { "epoch": 12.421696574225122, "grad_norm": 0.013736764900386333, "learning_rate": 0.00037721658215250864, "loss": 0.0062, "num_input_tokens_seen": 164489280, "step": 76145 }, { "epoch": 12.422512234910277, "grad_norm": 0.0084912134334445, "learning_rate": 0.00037714758312447247, "loss": 0.0194, "num_input_tokens_seen": 164498144, "step": 76150 }, { "epoch": 12.423327895595433, "grad_norm": 0.0022617534268647432, "learning_rate": 0.0003770785865861966, "loss": 0.089, "num_input_tokens_seen": 164510048, "step": 76155 }, { "epoch": 12.424143556280587, "grad_norm": 0.02164643630385399, "learning_rate": 0.0003770095925390789, "loss": 0.0114, "num_input_tokens_seen": 164521472, "step": 76160 }, { "epoch": 12.424959216965743, "grad_norm": 0.00475015165284276, "learning_rate": 0.000376940600984518, "loss": 0.0049, "num_input_tokens_seen": 164532320, "step": 76165 }, { "epoch": 12.425774877650896, "grad_norm": 0.007536349352449179, "learning_rate": 0.0003768716119239118, "loss": 0.0071, "num_input_tokens_seen": 164543296, "step": 76170 }, { "epoch": 12.426590538336052, "grad_norm": 0.030248427763581276, "learning_rate": 0.0003768026253586587, "loss": 0.005, "num_input_tokens_seen": 164554720, "step": 76175 }, { "epoch": 12.427406199021208, "grad_norm": 0.07713694125413895, "learning_rate": 0.00037673364129015653, "loss": 0.0364, "num_input_tokens_seen": 164566432, "step": 76180 }, { "epoch": 12.428221859706362, "grad_norm": 0.11699513345956802, "learning_rate": 0.0003766646597198037, "loss": 0.0136, "num_input_tokens_seen": 164576672, "step": 76185 }, { "epoch": 12.429037520391518, "grad_norm": 0.02320980280637741, "learning_rate": 0.0003765956806489978, "loss": 0.0027, "num_input_tokens_seen": 164588544, "step": 76190 }, { "epoch": 12.429853181076671, "grad_norm": 0.010927310213446617, "learning_rate": 0.00037652670407913697, "loss": 0.019, "num_input_tokens_seen": 164599136, "step": 76195 }, { "epoch": 12.430668841761827, "grad_norm": 0.011162595823407173, "learning_rate": 0.00037645773001161937, "loss": 0.0913, "num_input_tokens_seen": 164608672, "step": 76200 }, { "epoch": 12.431484502446983, "grad_norm": 0.008266448974609375, "learning_rate": 0.0003763887584478423, "loss": 0.0099, "num_input_tokens_seen": 164619168, "step": 76205 }, { "epoch": 12.432300163132137, "grad_norm": 0.002109188586473465, "learning_rate": 0.00037631978938920414, "loss": 0.0707, "num_input_tokens_seen": 164630016, "step": 76210 }, { "epoch": 12.433115823817293, "grad_norm": 0.007610084023326635, "learning_rate": 0.0003762508228371021, "loss": 0.0051, "num_input_tokens_seen": 164641152, "step": 76215 }, { "epoch": 12.433931484502446, "grad_norm": 0.002785197226330638, "learning_rate": 0.0003761818587929344, "loss": 0.0041, "num_input_tokens_seen": 164652672, "step": 76220 }, { "epoch": 12.434747145187602, "grad_norm": 0.1258106827735901, "learning_rate": 0.0003761128972580981, "loss": 0.0801, "num_input_tokens_seen": 164662848, "step": 76225 }, { "epoch": 12.435562805872756, "grad_norm": 0.40725257992744446, "learning_rate": 0.00037604393823399137, "loss": 0.0898, "num_input_tokens_seen": 164674208, "step": 76230 }, { "epoch": 12.436378466557912, "grad_norm": 0.017194107174873352, "learning_rate": 0.00037597498172201125, "loss": 0.0443, "num_input_tokens_seen": 164686272, "step": 76235 }, { "epoch": 12.437194127243067, "grad_norm": 0.00854388065636158, "learning_rate": 0.0003759060277235556, "loss": 0.0677, "num_input_tokens_seen": 164697536, "step": 76240 }, { "epoch": 12.438009787928221, "grad_norm": 0.001409175805747509, "learning_rate": 0.00037583707624002163, "loss": 0.0644, "num_input_tokens_seen": 164708512, "step": 76245 }, { "epoch": 12.438825448613377, "grad_norm": 0.020490366965532303, "learning_rate": 0.00037576812727280683, "loss": 0.0046, "num_input_tokens_seen": 164718688, "step": 76250 }, { "epoch": 12.439641109298531, "grad_norm": 0.017789531499147415, "learning_rate": 0.0003756991808233086, "loss": 0.0046, "num_input_tokens_seen": 164729344, "step": 76255 }, { "epoch": 12.440456769983687, "grad_norm": 0.0018462835578247905, "learning_rate": 0.0003756302368929241, "loss": 0.1103, "num_input_tokens_seen": 164740288, "step": 76260 }, { "epoch": 12.441272430668842, "grad_norm": 0.007640472613275051, "learning_rate": 0.00037556129548305074, "loss": 0.0041, "num_input_tokens_seen": 164751520, "step": 76265 }, { "epoch": 12.442088091353996, "grad_norm": 0.002510238206014037, "learning_rate": 0.0003754923565950855, "loss": 0.0034, "num_input_tokens_seen": 164762336, "step": 76270 }, { "epoch": 12.442903752039152, "grad_norm": 0.0017224326729774475, "learning_rate": 0.0003754234202304255, "loss": 0.013, "num_input_tokens_seen": 164773888, "step": 76275 }, { "epoch": 12.443719412724306, "grad_norm": 0.0037370871286839247, "learning_rate": 0.00037535448639046816, "loss": 0.0064, "num_input_tokens_seen": 164785408, "step": 76280 }, { "epoch": 12.444535073409462, "grad_norm": 0.001963126938790083, "learning_rate": 0.00037528555507661, "loss": 0.0034, "num_input_tokens_seen": 164797376, "step": 76285 }, { "epoch": 12.445350734094617, "grad_norm": 0.0748407244682312, "learning_rate": 0.00037521662629024855, "loss": 0.0147, "num_input_tokens_seen": 164808640, "step": 76290 }, { "epoch": 12.446166394779771, "grad_norm": 0.0008259370806626976, "learning_rate": 0.00037514770003278027, "loss": 0.0489, "num_input_tokens_seen": 164820672, "step": 76295 }, { "epoch": 12.446982055464927, "grad_norm": 0.008207214064896107, "learning_rate": 0.00037507877630560215, "loss": 0.0158, "num_input_tokens_seen": 164830912, "step": 76300 }, { "epoch": 12.447797716150081, "grad_norm": 0.13782595098018646, "learning_rate": 0.00037500985511011145, "loss": 0.0231, "num_input_tokens_seen": 164841216, "step": 76305 }, { "epoch": 12.448613376835237, "grad_norm": 0.06296905130147934, "learning_rate": 0.00037494093644770425, "loss": 0.0214, "num_input_tokens_seen": 164852160, "step": 76310 }, { "epoch": 12.449429037520392, "grad_norm": 0.5310968160629272, "learning_rate": 0.000374872020319778, "loss": 0.0279, "num_input_tokens_seen": 164862848, "step": 76315 }, { "epoch": 12.450244698205546, "grad_norm": 0.009087975136935711, "learning_rate": 0.0003748031067277286, "loss": 0.034, "num_input_tokens_seen": 164872896, "step": 76320 }, { "epoch": 12.451060358890702, "grad_norm": 0.040168534964323044, "learning_rate": 0.00037473419567295337, "loss": 0.0878, "num_input_tokens_seen": 164882816, "step": 76325 }, { "epoch": 12.451876019575856, "grad_norm": 0.03108314424753189, "learning_rate": 0.0003746652871568483, "loss": 0.0094, "num_input_tokens_seen": 164892800, "step": 76330 }, { "epoch": 12.452691680261012, "grad_norm": 0.01985093019902706, "learning_rate": 0.0003745963811808105, "loss": 0.02, "num_input_tokens_seen": 164903648, "step": 76335 }, { "epoch": 12.453507340946166, "grad_norm": 0.019592365249991417, "learning_rate": 0.00037452747774623584, "loss": 0.0049, "num_input_tokens_seen": 164914048, "step": 76340 }, { "epoch": 12.454323001631321, "grad_norm": 0.015187375247478485, "learning_rate": 0.0003744585768545212, "loss": 0.1063, "num_input_tokens_seen": 164924928, "step": 76345 }, { "epoch": 12.455138662316477, "grad_norm": 0.009520821273326874, "learning_rate": 0.00037438967850706264, "loss": 0.0033, "num_input_tokens_seen": 164935680, "step": 76350 }, { "epoch": 12.455954323001631, "grad_norm": 0.12158270925283432, "learning_rate": 0.0003743207827052567, "loss": 0.0308, "num_input_tokens_seen": 164946944, "step": 76355 }, { "epoch": 12.456769983686787, "grad_norm": 0.003837285563349724, "learning_rate": 0.0003742518894504994, "loss": 0.0466, "num_input_tokens_seen": 164958336, "step": 76360 }, { "epoch": 12.45758564437194, "grad_norm": 0.06163405254483223, "learning_rate": 0.00037418299874418726, "loss": 0.0038, "num_input_tokens_seen": 164971104, "step": 76365 }, { "epoch": 12.458401305057096, "grad_norm": 0.003957700449973345, "learning_rate": 0.00037411411058771606, "loss": 0.0231, "num_input_tokens_seen": 164982208, "step": 76370 }, { "epoch": 12.459216965742252, "grad_norm": 0.019950520247220993, "learning_rate": 0.00037404522498248234, "loss": 0.0125, "num_input_tokens_seen": 164994144, "step": 76375 }, { "epoch": 12.460032626427406, "grad_norm": 0.014288338832557201, "learning_rate": 0.0003739763419298817, "loss": 0.0124, "num_input_tokens_seen": 165004864, "step": 76380 }, { "epoch": 12.460848287112562, "grad_norm": 0.012740354984998703, "learning_rate": 0.0003739074614313105, "loss": 0.0069, "num_input_tokens_seen": 165016544, "step": 76385 }, { "epoch": 12.461663947797716, "grad_norm": 0.0955539122223854, "learning_rate": 0.00037383858348816445, "loss": 0.0247, "num_input_tokens_seen": 165027232, "step": 76390 }, { "epoch": 12.462479608482871, "grad_norm": 0.5261601209640503, "learning_rate": 0.0003737697081018396, "loss": 0.2179, "num_input_tokens_seen": 165038272, "step": 76395 }, { "epoch": 12.463295269168025, "grad_norm": 0.3856097459793091, "learning_rate": 0.0003737008352737318, "loss": 0.0523, "num_input_tokens_seen": 165047968, "step": 76400 }, { "epoch": 12.464110929853181, "grad_norm": 0.0045360904186964035, "learning_rate": 0.0003736319650052366, "loss": 0.0119, "num_input_tokens_seen": 165059872, "step": 76405 }, { "epoch": 12.464926590538337, "grad_norm": 0.004846095573157072, "learning_rate": 0.0003735630972977502, "loss": 0.0064, "num_input_tokens_seen": 165070912, "step": 76410 }, { "epoch": 12.46574225122349, "grad_norm": 0.0615304559469223, "learning_rate": 0.00037349423215266784, "loss": 0.0149, "num_input_tokens_seen": 165082208, "step": 76415 }, { "epoch": 12.466557911908646, "grad_norm": 0.41746899485588074, "learning_rate": 0.0003734253695713854, "loss": 0.0213, "num_input_tokens_seen": 165092096, "step": 76420 }, { "epoch": 12.4673735725938, "grad_norm": 0.03316102921962738, "learning_rate": 0.0003733565095552985, "loss": 0.0053, "num_input_tokens_seen": 165101472, "step": 76425 }, { "epoch": 12.468189233278956, "grad_norm": 0.12689688801765442, "learning_rate": 0.0003732876521058025, "loss": 0.0144, "num_input_tokens_seen": 165111168, "step": 76430 }, { "epoch": 12.469004893964112, "grad_norm": 0.012960254214704037, "learning_rate": 0.000373218797224293, "loss": 0.0599, "num_input_tokens_seen": 165121792, "step": 76435 }, { "epoch": 12.469820554649266, "grad_norm": 0.008622650988399982, "learning_rate": 0.00037314994491216547, "loss": 0.0103, "num_input_tokens_seen": 165132160, "step": 76440 }, { "epoch": 12.470636215334421, "grad_norm": 0.0005445160204544663, "learning_rate": 0.00037308109517081506, "loss": 0.01, "num_input_tokens_seen": 165143872, "step": 76445 }, { "epoch": 12.471451876019575, "grad_norm": 0.0248736385256052, "learning_rate": 0.0003730122480016375, "loss": 0.0042, "num_input_tokens_seen": 165154624, "step": 76450 }, { "epoch": 12.47226753670473, "grad_norm": 0.5134662985801697, "learning_rate": 0.00037294340340602764, "loss": 0.1234, "num_input_tokens_seen": 165164704, "step": 76455 }, { "epoch": 12.473083197389887, "grad_norm": 0.005300603806972504, "learning_rate": 0.0003728745613853811, "loss": 0.0756, "num_input_tokens_seen": 165176192, "step": 76460 }, { "epoch": 12.47389885807504, "grad_norm": 0.09585113823413849, "learning_rate": 0.00037280572194109255, "loss": 0.0272, "num_input_tokens_seen": 165186784, "step": 76465 }, { "epoch": 12.474714518760196, "grad_norm": 0.020219076424837112, "learning_rate": 0.00037273688507455773, "loss": 0.0254, "num_input_tokens_seen": 165198496, "step": 76470 }, { "epoch": 12.47553017944535, "grad_norm": 0.11191736906766891, "learning_rate": 0.00037266805078717106, "loss": 0.0172, "num_input_tokens_seen": 165209216, "step": 76475 }, { "epoch": 12.476345840130506, "grad_norm": 0.009172594174742699, "learning_rate": 0.00037259921908032814, "loss": 0.0428, "num_input_tokens_seen": 165220736, "step": 76480 }, { "epoch": 12.477161500815662, "grad_norm": 0.003471218980848789, "learning_rate": 0.0003725303899554234, "loss": 0.0092, "num_input_tokens_seen": 165231104, "step": 76485 }, { "epoch": 12.477977161500815, "grad_norm": 0.3565479815006256, "learning_rate": 0.00037246156341385234, "loss": 0.0655, "num_input_tokens_seen": 165241536, "step": 76490 }, { "epoch": 12.478792822185971, "grad_norm": 0.003832635236904025, "learning_rate": 0.0003723927394570092, "loss": 0.1338, "num_input_tokens_seen": 165253024, "step": 76495 }, { "epoch": 12.479608482871125, "grad_norm": 0.02578004077076912, "learning_rate": 0.0003723239180862893, "loss": 0.1179, "num_input_tokens_seen": 165263616, "step": 76500 }, { "epoch": 12.48042414355628, "grad_norm": 0.010445257648825645, "learning_rate": 0.00037225509930308696, "loss": 0.0054, "num_input_tokens_seen": 165275168, "step": 76505 }, { "epoch": 12.481239804241435, "grad_norm": 0.02623414248228073, "learning_rate": 0.0003721862831087971, "loss": 0.0221, "num_input_tokens_seen": 165286208, "step": 76510 }, { "epoch": 12.48205546492659, "grad_norm": 0.003427008166909218, "learning_rate": 0.0003721174695048145, "loss": 0.0111, "num_input_tokens_seen": 165296960, "step": 76515 }, { "epoch": 12.482871125611746, "grad_norm": 0.036691464483737946, "learning_rate": 0.0003720486584925335, "loss": 0.0072, "num_input_tokens_seen": 165308608, "step": 76520 }, { "epoch": 12.4836867862969, "grad_norm": 0.02001815289258957, "learning_rate": 0.0003719798500733489, "loss": 0.0421, "num_input_tokens_seen": 165318816, "step": 76525 }, { "epoch": 12.484502446982056, "grad_norm": 0.5764277577400208, "learning_rate": 0.00037191104424865487, "loss": 0.1406, "num_input_tokens_seen": 165330144, "step": 76530 }, { "epoch": 12.48531810766721, "grad_norm": 0.11190090328454971, "learning_rate": 0.0003718422410198462, "loss": 0.0179, "num_input_tokens_seen": 165340800, "step": 76535 }, { "epoch": 12.486133768352365, "grad_norm": 0.013130726292729378, "learning_rate": 0.0003717734403883169, "loss": 0.0164, "num_input_tokens_seen": 165351776, "step": 76540 }, { "epoch": 12.486949429037521, "grad_norm": 1.2218176126480103, "learning_rate": 0.0003717046423554617, "loss": 0.0147, "num_input_tokens_seen": 165363008, "step": 76545 }, { "epoch": 12.487765089722675, "grad_norm": 0.017160970717668533, "learning_rate": 0.0003716358469226745, "loss": 0.0056, "num_input_tokens_seen": 165373888, "step": 76550 }, { "epoch": 12.48858075040783, "grad_norm": 0.06486137211322784, "learning_rate": 0.0003715670540913499, "loss": 0.01, "num_input_tokens_seen": 165384928, "step": 76555 }, { "epoch": 12.489396411092985, "grad_norm": 0.2934146523475647, "learning_rate": 0.0003714982638628817, "loss": 0.1709, "num_input_tokens_seen": 165395616, "step": 76560 }, { "epoch": 12.49021207177814, "grad_norm": 0.0009802387794479728, "learning_rate": 0.00037142947623866417, "loss": 0.0393, "num_input_tokens_seen": 165405568, "step": 76565 }, { "epoch": 12.491027732463296, "grad_norm": 0.28663870692253113, "learning_rate": 0.0003713606912200915, "loss": 0.084, "num_input_tokens_seen": 165417216, "step": 76570 }, { "epoch": 12.49184339314845, "grad_norm": 0.0640694722533226, "learning_rate": 0.00037129190880855764, "loss": 0.0058, "num_input_tokens_seen": 165428608, "step": 76575 }, { "epoch": 12.492659053833606, "grad_norm": 0.0026230604853481054, "learning_rate": 0.00037122312900545644, "loss": 0.0175, "num_input_tokens_seen": 165440576, "step": 76580 }, { "epoch": 12.49347471451876, "grad_norm": 0.0017055338248610497, "learning_rate": 0.000371154351812182, "loss": 0.0408, "num_input_tokens_seen": 165450976, "step": 76585 }, { "epoch": 12.494290375203915, "grad_norm": 0.14844626188278198, "learning_rate": 0.0003710855772301279, "loss": 0.0093, "num_input_tokens_seen": 165463104, "step": 76590 }, { "epoch": 12.49510603588907, "grad_norm": 0.0018314715707674623, "learning_rate": 0.00037101680526068837, "loss": 0.0151, "num_input_tokens_seen": 165473376, "step": 76595 }, { "epoch": 12.495921696574225, "grad_norm": 0.008383872918784618, "learning_rate": 0.0003709480359052566, "loss": 0.0945, "num_input_tokens_seen": 165483840, "step": 76600 }, { "epoch": 12.49673735725938, "grad_norm": 0.00485193869099021, "learning_rate": 0.0003708792691652269, "loss": 0.0051, "num_input_tokens_seen": 165494944, "step": 76605 }, { "epoch": 12.497553017944535, "grad_norm": 0.3109101355075836, "learning_rate": 0.00037081050504199245, "loss": 0.1019, "num_input_tokens_seen": 165505792, "step": 76610 }, { "epoch": 12.49836867862969, "grad_norm": 0.02356518618762493, "learning_rate": 0.0003707417435369469, "loss": 0.0127, "num_input_tokens_seen": 165515808, "step": 76615 }, { "epoch": 12.499184339314844, "grad_norm": 0.00426015630364418, "learning_rate": 0.00037067298465148416, "loss": 0.0076, "num_input_tokens_seen": 165525920, "step": 76620 }, { "epoch": 12.5, "grad_norm": 0.005503225605934858, "learning_rate": 0.00037060422838699716, "loss": 0.018, "num_input_tokens_seen": 165536352, "step": 76625 }, { "epoch": 12.500815660685156, "grad_norm": 0.010103323496878147, "learning_rate": 0.0003705354747448799, "loss": 0.0278, "num_input_tokens_seen": 165546240, "step": 76630 }, { "epoch": 12.50163132137031, "grad_norm": 0.02620244398713112, "learning_rate": 0.00037046672372652523, "loss": 0.0066, "num_input_tokens_seen": 165556096, "step": 76635 }, { "epoch": 12.502446982055465, "grad_norm": 0.02562333457171917, "learning_rate": 0.00037039797533332697, "loss": 0.0064, "num_input_tokens_seen": 165567552, "step": 76640 }, { "epoch": 12.50326264274062, "grad_norm": 0.010979540646076202, "learning_rate": 0.000370329229566678, "loss": 0.0037, "num_input_tokens_seen": 165579008, "step": 76645 }, { "epoch": 12.504078303425775, "grad_norm": 0.08632088452577591, "learning_rate": 0.0003702604864279718, "loss": 0.1055, "num_input_tokens_seen": 165589216, "step": 76650 }, { "epoch": 12.50489396411093, "grad_norm": 0.006022381596267223, "learning_rate": 0.00037019174591860127, "loss": 0.0139, "num_input_tokens_seen": 165601056, "step": 76655 }, { "epoch": 12.505709624796085, "grad_norm": 0.0006824670126661658, "learning_rate": 0.0003701230080399599, "loss": 0.0111, "num_input_tokens_seen": 165610336, "step": 76660 }, { "epoch": 12.50652528548124, "grad_norm": 0.13738100230693817, "learning_rate": 0.00037005427279344027, "loss": 0.0101, "num_input_tokens_seen": 165620800, "step": 76665 }, { "epoch": 12.507340946166394, "grad_norm": 0.0023835247848182917, "learning_rate": 0.0003699855401804359, "loss": 0.0068, "num_input_tokens_seen": 165631488, "step": 76670 }, { "epoch": 12.50815660685155, "grad_norm": 0.013245565816760063, "learning_rate": 0.0003699168102023393, "loss": 0.0028, "num_input_tokens_seen": 165641760, "step": 76675 }, { "epoch": 12.508972267536706, "grad_norm": 0.00538500864058733, "learning_rate": 0.0003698480828605437, "loss": 0.0095, "num_input_tokens_seen": 165652256, "step": 76680 }, { "epoch": 12.50978792822186, "grad_norm": 0.02130158618092537, "learning_rate": 0.0003697793581564417, "loss": 0.0321, "num_input_tokens_seen": 165663712, "step": 76685 }, { "epoch": 12.510603588907015, "grad_norm": 0.010575438849627972, "learning_rate": 0.00036971063609142637, "loss": 0.0047, "num_input_tokens_seen": 165674432, "step": 76690 }, { "epoch": 12.51141924959217, "grad_norm": 0.007054158952087164, "learning_rate": 0.00036964191666689005, "loss": 0.0054, "num_input_tokens_seen": 165685280, "step": 76695 }, { "epoch": 12.512234910277325, "grad_norm": 0.008702908642590046, "learning_rate": 0.00036957319988422586, "loss": 0.1665, "num_input_tokens_seen": 165694368, "step": 76700 }, { "epoch": 12.513050570962479, "grad_norm": 0.008559376932680607, "learning_rate": 0.0003695044857448261, "loss": 0.0048, "num_input_tokens_seen": 165704768, "step": 76705 }, { "epoch": 12.513866231647635, "grad_norm": 0.006825583055615425, "learning_rate": 0.0003694357742500835, "loss": 0.0079, "num_input_tokens_seen": 165715296, "step": 76710 }, { "epoch": 12.51468189233279, "grad_norm": 0.41185325384140015, "learning_rate": 0.00036936706540139063, "loss": 0.1374, "num_input_tokens_seen": 165725376, "step": 76715 }, { "epoch": 12.515497553017944, "grad_norm": 0.02468119002878666, "learning_rate": 0.0003692983592001398, "loss": 0.0171, "num_input_tokens_seen": 165735648, "step": 76720 }, { "epoch": 12.5163132137031, "grad_norm": 0.10309385508298874, "learning_rate": 0.0003692296556477237, "loss": 0.0533, "num_input_tokens_seen": 165746464, "step": 76725 }, { "epoch": 12.517128874388254, "grad_norm": 0.5680201053619385, "learning_rate": 0.0003691609547455343, "loss": 0.0943, "num_input_tokens_seen": 165757216, "step": 76730 }, { "epoch": 12.51794453507341, "grad_norm": 0.007687503471970558, "learning_rate": 0.0003690922564949643, "loss": 0.0071, "num_input_tokens_seen": 165769440, "step": 76735 }, { "epoch": 12.518760195758565, "grad_norm": 0.765103816986084, "learning_rate": 0.0003690235608974057, "loss": 0.1081, "num_input_tokens_seen": 165781568, "step": 76740 }, { "epoch": 12.51957585644372, "grad_norm": 0.17138712108135223, "learning_rate": 0.0003689548679542508, "loss": 0.0124, "num_input_tokens_seen": 165790880, "step": 76745 }, { "epoch": 12.520391517128875, "grad_norm": 0.007275717798620462, "learning_rate": 0.0003688861776668918, "loss": 0.0085, "num_input_tokens_seen": 165802336, "step": 76750 }, { "epoch": 12.521207177814029, "grad_norm": 0.004311291500926018, "learning_rate": 0.0003688174900367207, "loss": 0.0608, "num_input_tokens_seen": 165812512, "step": 76755 }, { "epoch": 12.522022838499185, "grad_norm": 0.02970127761363983, "learning_rate": 0.00036874880506512954, "loss": 0.0069, "num_input_tokens_seen": 165823136, "step": 76760 }, { "epoch": 12.522838499184338, "grad_norm": 0.016825757920742035, "learning_rate": 0.0003686801227535105, "loss": 0.0056, "num_input_tokens_seen": 165833568, "step": 76765 }, { "epoch": 12.523654159869494, "grad_norm": 0.0012513466645032167, "learning_rate": 0.00036861144310325523, "loss": 0.0039, "num_input_tokens_seen": 165843136, "step": 76770 }, { "epoch": 12.52446982055465, "grad_norm": 0.47699278593063354, "learning_rate": 0.0003685427661157559, "loss": 0.0367, "num_input_tokens_seen": 165853920, "step": 76775 }, { "epoch": 12.525285481239804, "grad_norm": 0.0014302321942523122, "learning_rate": 0.00036847409179240396, "loss": 0.0086, "num_input_tokens_seen": 165865216, "step": 76780 }, { "epoch": 12.52610114192496, "grad_norm": 0.09856893867254257, "learning_rate": 0.00036840542013459154, "loss": 0.0106, "num_input_tokens_seen": 165875712, "step": 76785 }, { "epoch": 12.526916802610113, "grad_norm": 0.03922778740525246, "learning_rate": 0.00036833675114371014, "loss": 0.0032, "num_input_tokens_seen": 165887072, "step": 76790 }, { "epoch": 12.52773246329527, "grad_norm": 0.74526447057724, "learning_rate": 0.00036826808482115167, "loss": 0.0451, "num_input_tokens_seen": 165897792, "step": 76795 }, { "epoch": 12.528548123980425, "grad_norm": 0.020173760131001472, "learning_rate": 0.00036819942116830736, "loss": 0.1448, "num_input_tokens_seen": 165908160, "step": 76800 }, { "epoch": 12.529363784665579, "grad_norm": 0.6142863631248474, "learning_rate": 0.0003681307601865692, "loss": 0.2382, "num_input_tokens_seen": 165919008, "step": 76805 }, { "epoch": 12.530179445350734, "grad_norm": 0.02296280302107334, "learning_rate": 0.00036806210187732824, "loss": 0.0068, "num_input_tokens_seen": 165930880, "step": 76810 }, { "epoch": 12.530995106035888, "grad_norm": 0.01190591137856245, "learning_rate": 0.00036799344624197637, "loss": 0.0398, "num_input_tokens_seen": 165942432, "step": 76815 }, { "epoch": 12.531810766721044, "grad_norm": 0.004319501109421253, "learning_rate": 0.00036792479328190457, "loss": 0.0023, "num_input_tokens_seen": 165954304, "step": 76820 }, { "epoch": 12.5326264274062, "grad_norm": 0.05026920139789581, "learning_rate": 0.0003678561429985044, "loss": 0.0073, "num_input_tokens_seen": 165965024, "step": 76825 }, { "epoch": 12.533442088091354, "grad_norm": 0.0011591935763135552, "learning_rate": 0.00036778749539316736, "loss": 0.0626, "num_input_tokens_seen": 165976096, "step": 76830 }, { "epoch": 12.53425774877651, "grad_norm": 0.006536852102726698, "learning_rate": 0.00036771885046728417, "loss": 0.0171, "num_input_tokens_seen": 165987232, "step": 76835 }, { "epoch": 12.535073409461663, "grad_norm": 0.04312235489487648, "learning_rate": 0.00036765020822224654, "loss": 0.0191, "num_input_tokens_seen": 165996800, "step": 76840 }, { "epoch": 12.535889070146819, "grad_norm": 0.01564362645149231, "learning_rate": 0.0003675815686594451, "loss": 0.0077, "num_input_tokens_seen": 166007424, "step": 76845 }, { "epoch": 12.536704730831975, "grad_norm": 0.02771487832069397, "learning_rate": 0.00036751293178027144, "loss": 0.0185, "num_input_tokens_seen": 166018144, "step": 76850 }, { "epoch": 12.537520391517129, "grad_norm": 0.015305979177355766, "learning_rate": 0.000367444297586116, "loss": 0.003, "num_input_tokens_seen": 166028992, "step": 76855 }, { "epoch": 12.538336052202284, "grad_norm": 0.4608621299266815, "learning_rate": 0.0003673756660783703, "loss": 0.1386, "num_input_tokens_seen": 166039968, "step": 76860 }, { "epoch": 12.539151712887438, "grad_norm": 0.016641966998577118, "learning_rate": 0.00036730703725842474, "loss": 0.0034, "num_input_tokens_seen": 166051552, "step": 76865 }, { "epoch": 12.539967373572594, "grad_norm": 0.004178878851234913, "learning_rate": 0.0003672384111276705, "loss": 0.0035, "num_input_tokens_seen": 166062144, "step": 76870 }, { "epoch": 12.540783034257748, "grad_norm": 0.004638664424419403, "learning_rate": 0.0003671697876874982, "loss": 0.1137, "num_input_tokens_seen": 166073632, "step": 76875 }, { "epoch": 12.541598694942904, "grad_norm": 0.03444049507379532, "learning_rate": 0.00036710116693929875, "loss": 0.0083, "num_input_tokens_seen": 166083552, "step": 76880 }, { "epoch": 12.54241435562806, "grad_norm": 0.03458646312355995, "learning_rate": 0.0003670325488844627, "loss": 0.0075, "num_input_tokens_seen": 166093376, "step": 76885 }, { "epoch": 12.543230016313213, "grad_norm": 0.008939512073993683, "learning_rate": 0.00036696393352438083, "loss": 0.0846, "num_input_tokens_seen": 166104800, "step": 76890 }, { "epoch": 12.544045676998369, "grad_norm": 0.030445056036114693, "learning_rate": 0.0003668953208604435, "loss": 0.0431, "num_input_tokens_seen": 166115968, "step": 76895 }, { "epoch": 12.544861337683523, "grad_norm": 0.015219416469335556, "learning_rate": 0.0003668267108940414, "loss": 0.0087, "num_input_tokens_seen": 166125152, "step": 76900 }, { "epoch": 12.545676998368679, "grad_norm": 0.05321419611573219, "learning_rate": 0.00036675810362656486, "loss": 0.0073, "num_input_tokens_seen": 166136192, "step": 76905 }, { "epoch": 12.546492659053834, "grad_norm": 0.008124981075525284, "learning_rate": 0.00036668949905940455, "loss": 0.0045, "num_input_tokens_seen": 166144512, "step": 76910 }, { "epoch": 12.547308319738988, "grad_norm": 0.03787637874484062, "learning_rate": 0.0003666208971939505, "loss": 0.1269, "num_input_tokens_seen": 166155168, "step": 76915 }, { "epoch": 12.548123980424144, "grad_norm": 0.09556692838668823, "learning_rate": 0.0003665522980315933, "loss": 0.0059, "num_input_tokens_seen": 166164864, "step": 76920 }, { "epoch": 12.548939641109298, "grad_norm": 0.008689331822097301, "learning_rate": 0.0003664837015737229, "loss": 0.0278, "num_input_tokens_seen": 166174272, "step": 76925 }, { "epoch": 12.549755301794454, "grad_norm": 0.01637548767030239, "learning_rate": 0.00036641510782172993, "loss": 0.0044, "num_input_tokens_seen": 166185536, "step": 76930 }, { "epoch": 12.550570962479608, "grad_norm": 0.011396531015634537, "learning_rate": 0.0003663465167770039, "loss": 0.1109, "num_input_tokens_seen": 166194944, "step": 76935 }, { "epoch": 12.551386623164763, "grad_norm": 0.005017245654016733, "learning_rate": 0.00036627792844093544, "loss": 0.0065, "num_input_tokens_seen": 166207168, "step": 76940 }, { "epoch": 12.552202283849919, "grad_norm": 0.0026462902314960957, "learning_rate": 0.0003662093428149145, "loss": 0.0174, "num_input_tokens_seen": 166217792, "step": 76945 }, { "epoch": 12.553017944535073, "grad_norm": 0.048384591937065125, "learning_rate": 0.0003661407599003308, "loss": 0.0765, "num_input_tokens_seen": 166227616, "step": 76950 }, { "epoch": 12.553833605220229, "grad_norm": 0.011623039841651917, "learning_rate": 0.0003660721796985746, "loss": 0.0046, "num_input_tokens_seen": 166237504, "step": 76955 }, { "epoch": 12.554649265905383, "grad_norm": 0.0399308018386364, "learning_rate": 0.0003660036022110353, "loss": 0.0044, "num_input_tokens_seen": 166248480, "step": 76960 }, { "epoch": 12.555464926590538, "grad_norm": 0.09246581792831421, "learning_rate": 0.00036593502743910336, "loss": 0.011, "num_input_tokens_seen": 166259904, "step": 76965 }, { "epoch": 12.556280587275694, "grad_norm": 0.005269172601401806, "learning_rate": 0.00036586645538416783, "loss": 0.0056, "num_input_tokens_seen": 166271872, "step": 76970 }, { "epoch": 12.557096247960848, "grad_norm": 0.015488158911466599, "learning_rate": 0.00036579788604761896, "loss": 0.0139, "num_input_tokens_seen": 166283136, "step": 76975 }, { "epoch": 12.557911908646004, "grad_norm": 0.6963528394699097, "learning_rate": 0.000365729319430846, "loss": 0.033, "num_input_tokens_seen": 166293632, "step": 76980 }, { "epoch": 12.558727569331158, "grad_norm": 0.051269035786390305, "learning_rate": 0.00036566075553523894, "loss": 0.0782, "num_input_tokens_seen": 166304992, "step": 76985 }, { "epoch": 12.559543230016313, "grad_norm": 0.01623843051493168, "learning_rate": 0.0003655921943621868, "loss": 0.0256, "num_input_tokens_seen": 166314880, "step": 76990 }, { "epoch": 12.560358890701469, "grad_norm": 0.003099554916843772, "learning_rate": 0.0003655236359130796, "loss": 0.1376, "num_input_tokens_seen": 166325536, "step": 76995 }, { "epoch": 12.561174551386623, "grad_norm": 0.27658185362815857, "learning_rate": 0.0003654550801893063, "loss": 0.015, "num_input_tokens_seen": 166336096, "step": 77000 }, { "epoch": 12.561990212071779, "grad_norm": 0.012075589969754219, "learning_rate": 0.00036538652719225674, "loss": 0.0092, "num_input_tokens_seen": 166345984, "step": 77005 }, { "epoch": 12.562805872756933, "grad_norm": 0.016627484932541847, "learning_rate": 0.0003653179769233197, "loss": 0.0353, "num_input_tokens_seen": 166357152, "step": 77010 }, { "epoch": 12.563621533442088, "grad_norm": 0.009096194058656693, "learning_rate": 0.00036524942938388495, "loss": 0.0075, "num_input_tokens_seen": 166368736, "step": 77015 }, { "epoch": 12.564437194127244, "grad_norm": 0.0031699403189122677, "learning_rate": 0.00036518088457534125, "loss": 0.0148, "num_input_tokens_seen": 166379264, "step": 77020 }, { "epoch": 12.565252854812398, "grad_norm": 0.020030856132507324, "learning_rate": 0.0003651123424990781, "loss": 0.0806, "num_input_tokens_seen": 166391072, "step": 77025 }, { "epoch": 12.566068515497554, "grad_norm": 0.022214405238628387, "learning_rate": 0.00036504380315648447, "loss": 0.0081, "num_input_tokens_seen": 166402304, "step": 77030 }, { "epoch": 12.566884176182707, "grad_norm": 0.07444703578948975, "learning_rate": 0.0003649752665489492, "loss": 0.0088, "num_input_tokens_seen": 166412736, "step": 77035 }, { "epoch": 12.567699836867863, "grad_norm": 0.013522377237677574, "learning_rate": 0.00036490673267786154, "loss": 0.007, "num_input_tokens_seen": 166423936, "step": 77040 }, { "epoch": 12.568515497553017, "grad_norm": 0.00476167444139719, "learning_rate": 0.0003648382015446103, "loss": 0.0456, "num_input_tokens_seen": 166433472, "step": 77045 }, { "epoch": 12.569331158238173, "grad_norm": 0.009685852564871311, "learning_rate": 0.0003647696731505844, "loss": 0.0379, "num_input_tokens_seen": 166443328, "step": 77050 }, { "epoch": 12.570146818923329, "grad_norm": 0.009335564449429512, "learning_rate": 0.00036470114749717267, "loss": 0.0074, "num_input_tokens_seen": 166453792, "step": 77055 }, { "epoch": 12.570962479608482, "grad_norm": 0.022270025685429573, "learning_rate": 0.00036463262458576374, "loss": 0.014, "num_input_tokens_seen": 166464896, "step": 77060 }, { "epoch": 12.571778140293638, "grad_norm": 0.24031361937522888, "learning_rate": 0.0003645641044177465, "loss": 0.0119, "num_input_tokens_seen": 166475904, "step": 77065 }, { "epoch": 12.572593800978792, "grad_norm": 0.0068599446676671505, "learning_rate": 0.00036449558699450937, "loss": 0.0112, "num_input_tokens_seen": 166486880, "step": 77070 }, { "epoch": 12.573409461663948, "grad_norm": 0.5030407309532166, "learning_rate": 0.0003644270723174411, "loss": 0.1388, "num_input_tokens_seen": 166498944, "step": 77075 }, { "epoch": 12.574225122349104, "grad_norm": 0.5042504668235779, "learning_rate": 0.0003643585603879303, "loss": 0.137, "num_input_tokens_seen": 166510112, "step": 77080 }, { "epoch": 12.575040783034257, "grad_norm": 0.020460493862628937, "learning_rate": 0.0003642900512073652, "loss": 0.0093, "num_input_tokens_seen": 166520736, "step": 77085 }, { "epoch": 12.575856443719413, "grad_norm": 0.12157510221004486, "learning_rate": 0.00036422154477713456, "loss": 0.0101, "num_input_tokens_seen": 166532192, "step": 77090 }, { "epoch": 12.576672104404567, "grad_norm": 0.009250016883015633, "learning_rate": 0.00036415304109862633, "loss": 0.0459, "num_input_tokens_seen": 166542688, "step": 77095 }, { "epoch": 12.577487765089723, "grad_norm": 0.004898452199995518, "learning_rate": 0.0003640845401732293, "loss": 0.016, "num_input_tokens_seen": 166553824, "step": 77100 }, { "epoch": 12.578303425774878, "grad_norm": 0.08203618228435516, "learning_rate": 0.0003640160420023313, "loss": 0.0757, "num_input_tokens_seen": 166564512, "step": 77105 }, { "epoch": 12.579119086460032, "grad_norm": 0.015118278563022614, "learning_rate": 0.00036394754658732086, "loss": 0.0056, "num_input_tokens_seen": 166575616, "step": 77110 }, { "epoch": 12.579934747145188, "grad_norm": 0.010293328203260899, "learning_rate": 0.00036387905392958574, "loss": 0.0167, "num_input_tokens_seen": 166586848, "step": 77115 }, { "epoch": 12.580750407830342, "grad_norm": 0.03571222350001335, "learning_rate": 0.0003638105640305146, "loss": 0.0101, "num_input_tokens_seen": 166597568, "step": 77120 }, { "epoch": 12.581566068515498, "grad_norm": 0.011700263246893883, "learning_rate": 0.00036374207689149487, "loss": 0.0066, "num_input_tokens_seen": 166608448, "step": 77125 }, { "epoch": 12.582381729200652, "grad_norm": 0.1275133490562439, "learning_rate": 0.00036367359251391506, "loss": 0.0099, "num_input_tokens_seen": 166618912, "step": 77130 }, { "epoch": 12.583197389885807, "grad_norm": 0.10448265820741653, "learning_rate": 0.0003636051108991626, "loss": 0.0071, "num_input_tokens_seen": 166628960, "step": 77135 }, { "epoch": 12.584013050570963, "grad_norm": 0.5431002378463745, "learning_rate": 0.0003635366320486258, "loss": 0.0921, "num_input_tokens_seen": 166638496, "step": 77140 }, { "epoch": 12.584828711256117, "grad_norm": 0.0023790623527020216, "learning_rate": 0.0003634681559636921, "loss": 0.0262, "num_input_tokens_seen": 166650464, "step": 77145 }, { "epoch": 12.585644371941273, "grad_norm": 0.04713069275021553, "learning_rate": 0.0003633996826457494, "loss": 0.0044, "num_input_tokens_seen": 166660384, "step": 77150 }, { "epoch": 12.586460032626427, "grad_norm": 0.14566659927368164, "learning_rate": 0.0003633312120961856, "loss": 0.0214, "num_input_tokens_seen": 166670848, "step": 77155 }, { "epoch": 12.587275693311582, "grad_norm": 0.005130293779075146, "learning_rate": 0.000363262744316388, "loss": 0.014, "num_input_tokens_seen": 166681216, "step": 77160 }, { "epoch": 12.588091353996738, "grad_norm": 0.006175138521939516, "learning_rate": 0.00036319427930774453, "loss": 0.042, "num_input_tokens_seen": 166691680, "step": 77165 }, { "epoch": 12.588907014681892, "grad_norm": 0.004177759867161512, "learning_rate": 0.0003631258170716423, "loss": 0.0052, "num_input_tokens_seen": 166701024, "step": 77170 }, { "epoch": 12.589722675367048, "grad_norm": 0.0710231363773346, "learning_rate": 0.0003630573576094693, "loss": 0.0449, "num_input_tokens_seen": 166711296, "step": 77175 }, { "epoch": 12.590538336052202, "grad_norm": 0.1315511018037796, "learning_rate": 0.0003629889009226124, "loss": 0.0168, "num_input_tokens_seen": 166722368, "step": 77180 }, { "epoch": 12.591353996737357, "grad_norm": 0.6337395310401917, "learning_rate": 0.0003629204470124595, "loss": 0.0962, "num_input_tokens_seen": 166733696, "step": 77185 }, { "epoch": 12.592169657422513, "grad_norm": 0.0053985025733709335, "learning_rate": 0.00036285199588039743, "loss": 0.0026, "num_input_tokens_seen": 166743456, "step": 77190 }, { "epoch": 12.592985318107667, "grad_norm": 0.011733738705515862, "learning_rate": 0.0003627835475278137, "loss": 0.0389, "num_input_tokens_seen": 166754528, "step": 77195 }, { "epoch": 12.593800978792823, "grad_norm": 0.438748836517334, "learning_rate": 0.0003627151019560955, "loss": 0.0514, "num_input_tokens_seen": 166766912, "step": 77200 }, { "epoch": 12.594616639477977, "grad_norm": 0.4337851405143738, "learning_rate": 0.00036264665916662986, "loss": 0.0634, "num_input_tokens_seen": 166778464, "step": 77205 }, { "epoch": 12.595432300163132, "grad_norm": 0.15691335499286652, "learning_rate": 0.000362578219160804, "loss": 0.0409, "num_input_tokens_seen": 166789760, "step": 77210 }, { "epoch": 12.596247960848288, "grad_norm": 0.0065701864659786224, "learning_rate": 0.0003625097819400048, "loss": 0.0162, "num_input_tokens_seen": 166799648, "step": 77215 }, { "epoch": 12.597063621533442, "grad_norm": 0.010911048389971256, "learning_rate": 0.0003624413475056192, "loss": 0.0109, "num_input_tokens_seen": 166810304, "step": 77220 }, { "epoch": 12.597879282218598, "grad_norm": 0.011135376058518887, "learning_rate": 0.00036237291585903436, "loss": 0.0046, "num_input_tokens_seen": 166820608, "step": 77225 }, { "epoch": 12.598694942903752, "grad_norm": 0.019748615100979805, "learning_rate": 0.0003623044870016368, "loss": 0.0054, "num_input_tokens_seen": 166831616, "step": 77230 }, { "epoch": 12.599510603588907, "grad_norm": 0.009777350351214409, "learning_rate": 0.0003622360609348138, "loss": 0.0934, "num_input_tokens_seen": 166842240, "step": 77235 }, { "epoch": 12.600326264274061, "grad_norm": 0.0011331519344821572, "learning_rate": 0.0003621676376599514, "loss": 0.0041, "num_input_tokens_seen": 166853984, "step": 77240 }, { "epoch": 12.601141924959217, "grad_norm": 0.005403982475399971, "learning_rate": 0.00036209921717843697, "loss": 0.0959, "num_input_tokens_seen": 166863712, "step": 77245 }, { "epoch": 12.601957585644373, "grad_norm": 0.11000215262174606, "learning_rate": 0.00036203079949165664, "loss": 0.0111, "num_input_tokens_seen": 166874592, "step": 77250 }, { "epoch": 12.602773246329527, "grad_norm": 0.009174146689474583, "learning_rate": 0.00036196238460099717, "loss": 0.005, "num_input_tokens_seen": 166883136, "step": 77255 }, { "epoch": 12.603588907014682, "grad_norm": 0.02310585230588913, "learning_rate": 0.0003618939725078453, "loss": 0.0731, "num_input_tokens_seen": 166893600, "step": 77260 }, { "epoch": 12.604404567699836, "grad_norm": 0.005659396760165691, "learning_rate": 0.0003618255632135871, "loss": 0.0124, "num_input_tokens_seen": 166906016, "step": 77265 }, { "epoch": 12.605220228384992, "grad_norm": 0.011715354397892952, "learning_rate": 0.00036175715671960934, "loss": 0.0704, "num_input_tokens_seen": 166917216, "step": 77270 }, { "epoch": 12.606035889070148, "grad_norm": 0.003530156798660755, "learning_rate": 0.000361688753027298, "loss": 0.0026, "num_input_tokens_seen": 166927584, "step": 77275 }, { "epoch": 12.606851549755302, "grad_norm": 0.010420121252536774, "learning_rate": 0.0003616203521380397, "loss": 0.005, "num_input_tokens_seen": 166937824, "step": 77280 }, { "epoch": 12.607667210440457, "grad_norm": 0.004818596411496401, "learning_rate": 0.00036155195405322026, "loss": 0.0489, "num_input_tokens_seen": 166948480, "step": 77285 }, { "epoch": 12.608482871125611, "grad_norm": 0.010222864337265491, "learning_rate": 0.0003614835587742264, "loss": 0.0506, "num_input_tokens_seen": 166958848, "step": 77290 }, { "epoch": 12.609298531810767, "grad_norm": 0.5348839163780212, "learning_rate": 0.0003614151663024436, "loss": 0.0773, "num_input_tokens_seen": 166969344, "step": 77295 }, { "epoch": 12.61011419249592, "grad_norm": 0.04770367220044136, "learning_rate": 0.0003613467766392586, "loss": 0.0079, "num_input_tokens_seen": 166980832, "step": 77300 }, { "epoch": 12.610929853181077, "grad_norm": 0.009626589715480804, "learning_rate": 0.00036127838978605687, "loss": 0.0305, "num_input_tokens_seen": 166989920, "step": 77305 }, { "epoch": 12.611745513866232, "grad_norm": 0.021754087880253792, "learning_rate": 0.0003612100057442247, "loss": 0.0289, "num_input_tokens_seen": 167000320, "step": 77310 }, { "epoch": 12.612561174551386, "grad_norm": 0.009066320955753326, "learning_rate": 0.00036114162451514765, "loss": 0.0025, "num_input_tokens_seen": 167011584, "step": 77315 }, { "epoch": 12.613376835236542, "grad_norm": 0.0031363347079604864, "learning_rate": 0.000361073246100212, "loss": 0.0947, "num_input_tokens_seen": 167022784, "step": 77320 }, { "epoch": 12.614192495921696, "grad_norm": 0.10713813453912735, "learning_rate": 0.0003610048705008029, "loss": 0.0119, "num_input_tokens_seen": 167033952, "step": 77325 }, { "epoch": 12.615008156606851, "grad_norm": 0.03944196179509163, "learning_rate": 0.00036093649771830674, "loss": 0.0402, "num_input_tokens_seen": 167044768, "step": 77330 }, { "epoch": 12.615823817292007, "grad_norm": 0.001964397495612502, "learning_rate": 0.0003608681277541086, "loss": 0.0072, "num_input_tokens_seen": 167055776, "step": 77335 }, { "epoch": 12.616639477977161, "grad_norm": 0.11983584612607956, "learning_rate": 0.00036079976060959454, "loss": 0.0115, "num_input_tokens_seen": 167067328, "step": 77340 }, { "epoch": 12.617455138662317, "grad_norm": 0.0034255923237651587, "learning_rate": 0.0003607313962861499, "loss": 0.0671, "num_input_tokens_seen": 167078432, "step": 77345 }, { "epoch": 12.61827079934747, "grad_norm": 0.007270899601280689, "learning_rate": 0.00036066303478516016, "loss": 0.0279, "num_input_tokens_seen": 167089472, "step": 77350 }, { "epoch": 12.619086460032626, "grad_norm": 0.002617582445964217, "learning_rate": 0.0003605946761080108, "loss": 0.0071, "num_input_tokens_seen": 167100768, "step": 77355 }, { "epoch": 12.619902120717782, "grad_norm": 0.4317784905433655, "learning_rate": 0.000360526320256087, "loss": 0.0725, "num_input_tokens_seen": 167112416, "step": 77360 }, { "epoch": 12.620717781402936, "grad_norm": 0.04355085641145706, "learning_rate": 0.0003604579672307744, "loss": 0.0157, "num_input_tokens_seen": 167123296, "step": 77365 }, { "epoch": 12.621533442088092, "grad_norm": 0.05431177094578743, "learning_rate": 0.00036038961703345815, "loss": 0.0818, "num_input_tokens_seen": 167133184, "step": 77370 }, { "epoch": 12.622349102773246, "grad_norm": 0.010911340825259686, "learning_rate": 0.00036032126966552335, "loss": 0.1086, "num_input_tokens_seen": 167143744, "step": 77375 }, { "epoch": 12.623164763458401, "grad_norm": 0.04832053557038307, "learning_rate": 0.0003602529251283553, "loss": 0.0061, "num_input_tokens_seen": 167154688, "step": 77380 }, { "epoch": 12.623980424143557, "grad_norm": 0.005749577656388283, "learning_rate": 0.000360184583423339, "loss": 0.0085, "num_input_tokens_seen": 167166368, "step": 77385 }, { "epoch": 12.624796084828711, "grad_norm": 0.3927803337574005, "learning_rate": 0.0003601162445518593, "loss": 0.0297, "num_input_tokens_seen": 167178080, "step": 77390 }, { "epoch": 12.625611745513867, "grad_norm": 0.0049598063342273235, "learning_rate": 0.0003600479085153017, "loss": 0.0896, "num_input_tokens_seen": 167189216, "step": 77395 }, { "epoch": 12.62642740619902, "grad_norm": 0.042065005749464035, "learning_rate": 0.00035997957531505045, "loss": 0.0098, "num_input_tokens_seen": 167200320, "step": 77400 }, { "epoch": 12.627243066884176, "grad_norm": 0.001665329560637474, "learning_rate": 0.00035991124495249094, "loss": 0.0057, "num_input_tokens_seen": 167210912, "step": 77405 }, { "epoch": 12.62805872756933, "grad_norm": 0.0032141683623194695, "learning_rate": 0.0003598429174290076, "loss": 0.0129, "num_input_tokens_seen": 167223296, "step": 77410 }, { "epoch": 12.628874388254486, "grad_norm": 0.02148437313735485, "learning_rate": 0.0003597745927459856, "loss": 0.0345, "num_input_tokens_seen": 167235232, "step": 77415 }, { "epoch": 12.629690048939642, "grad_norm": 0.08603756129741669, "learning_rate": 0.00035970627090480906, "loss": 0.0382, "num_input_tokens_seen": 167244928, "step": 77420 }, { "epoch": 12.630505709624796, "grad_norm": 0.03180314227938652, "learning_rate": 0.0003596379519068632, "loss": 0.0403, "num_input_tokens_seen": 167254464, "step": 77425 }, { "epoch": 12.631321370309951, "grad_norm": 0.3329426944255829, "learning_rate": 0.000359569635753532, "loss": 0.0766, "num_input_tokens_seen": 167265984, "step": 77430 }, { "epoch": 12.632137030995105, "grad_norm": 0.0028075268492102623, "learning_rate": 0.00035950132244620057, "loss": 0.003, "num_input_tokens_seen": 167275968, "step": 77435 }, { "epoch": 12.632952691680261, "grad_norm": 0.011716500855982304, "learning_rate": 0.0003594330119862529, "loss": 0.0101, "num_input_tokens_seen": 167286016, "step": 77440 }, { "epoch": 12.633768352365417, "grad_norm": 0.40379977226257324, "learning_rate": 0.00035936470437507366, "loss": 0.0451, "num_input_tokens_seen": 167296448, "step": 77445 }, { "epoch": 12.63458401305057, "grad_norm": 0.0061897290870547295, "learning_rate": 0.000359296399614047, "loss": 0.0198, "num_input_tokens_seen": 167306240, "step": 77450 }, { "epoch": 12.635399673735726, "grad_norm": 0.0006506768404506147, "learning_rate": 0.00035922809770455745, "loss": 0.016, "num_input_tokens_seen": 167317088, "step": 77455 }, { "epoch": 12.63621533442088, "grad_norm": 0.5576114058494568, "learning_rate": 0.00035915979864798884, "loss": 0.0658, "num_input_tokens_seen": 167328160, "step": 77460 }, { "epoch": 12.637030995106036, "grad_norm": 0.44090718030929565, "learning_rate": 0.0003590915024457256, "loss": 0.1749, "num_input_tokens_seen": 167338496, "step": 77465 }, { "epoch": 12.63784665579119, "grad_norm": 0.0012273893225938082, "learning_rate": 0.0003590232090991521, "loss": 0.0547, "num_input_tokens_seen": 167348544, "step": 77470 }, { "epoch": 12.638662316476346, "grad_norm": 0.04741557314991951, "learning_rate": 0.0003589549186096518, "loss": 0.0062, "num_input_tokens_seen": 167357568, "step": 77475 }, { "epoch": 12.639477977161501, "grad_norm": 0.025142524391412735, "learning_rate": 0.0003588866309786093, "loss": 0.0201, "num_input_tokens_seen": 167367520, "step": 77480 }, { "epoch": 12.640293637846655, "grad_norm": 1.77473783493042, "learning_rate": 0.00035881834620740796, "loss": 0.1292, "num_input_tokens_seen": 167377952, "step": 77485 }, { "epoch": 12.641109298531811, "grad_norm": 0.009403154253959656, "learning_rate": 0.0003587500642974322, "loss": 0.0039, "num_input_tokens_seen": 167389024, "step": 77490 }, { "epoch": 12.641924959216965, "grad_norm": 0.04819444566965103, "learning_rate": 0.0003586817852500653, "loss": 0.0079, "num_input_tokens_seen": 167399744, "step": 77495 }, { "epoch": 12.64274061990212, "grad_norm": 0.006756368558853865, "learning_rate": 0.00035861350906669156, "loss": 0.003, "num_input_tokens_seen": 167410976, "step": 77500 }, { "epoch": 12.643556280587276, "grad_norm": 0.04680448770523071, "learning_rate": 0.00035854523574869416, "loss": 0.02, "num_input_tokens_seen": 167423072, "step": 77505 }, { "epoch": 12.64437194127243, "grad_norm": 0.0036935280077159405, "learning_rate": 0.00035847696529745714, "loss": 0.0657, "num_input_tokens_seen": 167433056, "step": 77510 }, { "epoch": 12.645187601957586, "grad_norm": 0.04204836115241051, "learning_rate": 0.000358408697714364, "loss": 0.0172, "num_input_tokens_seen": 167444832, "step": 77515 }, { "epoch": 12.64600326264274, "grad_norm": 0.003103738185018301, "learning_rate": 0.0003583404330007981, "loss": 0.0087, "num_input_tokens_seen": 167456032, "step": 77520 }, { "epoch": 12.646818923327896, "grad_norm": 0.31443697214126587, "learning_rate": 0.00035827217115814313, "loss": 0.059, "num_input_tokens_seen": 167465856, "step": 77525 }, { "epoch": 12.647634584013051, "grad_norm": 0.6064665913581848, "learning_rate": 0.0003582039121877824, "loss": 0.046, "num_input_tokens_seen": 167477600, "step": 77530 }, { "epoch": 12.648450244698205, "grad_norm": 0.007075733970850706, "learning_rate": 0.0003581356560910992, "loss": 0.0107, "num_input_tokens_seen": 167488640, "step": 77535 }, { "epoch": 12.649265905383361, "grad_norm": 0.5475460886955261, "learning_rate": 0.00035806740286947704, "loss": 0.039, "num_input_tokens_seen": 167498816, "step": 77540 }, { "epoch": 12.650081566068515, "grad_norm": 0.003381013870239258, "learning_rate": 0.0003579991525242988, "loss": 0.0022, "num_input_tokens_seen": 167510080, "step": 77545 }, { "epoch": 12.65089722675367, "grad_norm": 0.050899162888526917, "learning_rate": 0.0003579309050569481, "loss": 0.0374, "num_input_tokens_seen": 167520576, "step": 77550 }, { "epoch": 12.651712887438826, "grad_norm": 0.02303638495504856, "learning_rate": 0.00035786266046880765, "loss": 0.0506, "num_input_tokens_seen": 167532160, "step": 77555 }, { "epoch": 12.65252854812398, "grad_norm": 0.006254466250538826, "learning_rate": 0.0003577944187612609, "loss": 0.1495, "num_input_tokens_seen": 167542816, "step": 77560 }, { "epoch": 12.653344208809136, "grad_norm": 0.00909637100994587, "learning_rate": 0.0003577261799356905, "loss": 0.0073, "num_input_tokens_seen": 167553184, "step": 77565 }, { "epoch": 12.65415986949429, "grad_norm": 0.002966930391266942, "learning_rate": 0.0003576579439934796, "loss": 0.0031, "num_input_tokens_seen": 167564672, "step": 77570 }, { "epoch": 12.654975530179446, "grad_norm": 0.0037646342534571886, "learning_rate": 0.000357589710936011, "loss": 0.1096, "num_input_tokens_seen": 167576128, "step": 77575 }, { "epoch": 12.655791190864601, "grad_norm": 0.001021684962324798, "learning_rate": 0.0003575214807646675, "loss": 0.2004, "num_input_tokens_seen": 167586752, "step": 77580 }, { "epoch": 12.656606851549755, "grad_norm": 0.008952321484684944, "learning_rate": 0.0003574532534808321, "loss": 0.0051, "num_input_tokens_seen": 167598048, "step": 77585 }, { "epoch": 12.65742251223491, "grad_norm": 0.04184262827038765, "learning_rate": 0.00035738502908588723, "loss": 0.0467, "num_input_tokens_seen": 167608832, "step": 77590 }, { "epoch": 12.658238172920065, "grad_norm": 0.02576330676674843, "learning_rate": 0.0003573168075812158, "loss": 0.0055, "num_input_tokens_seen": 167618880, "step": 77595 }, { "epoch": 12.65905383360522, "grad_norm": 0.04942226782441139, "learning_rate": 0.0003572485889682001, "loss": 0.0174, "num_input_tokens_seen": 167628928, "step": 77600 }, { "epoch": 12.659869494290374, "grad_norm": 0.2924436330795288, "learning_rate": 0.00035718037324822304, "loss": 0.027, "num_input_tokens_seen": 167639296, "step": 77605 }, { "epoch": 12.66068515497553, "grad_norm": 0.01681629940867424, "learning_rate": 0.0003571121604226667, "loss": 0.0147, "num_input_tokens_seen": 167648320, "step": 77610 }, { "epoch": 12.661500815660686, "grad_norm": 0.024903155863285065, "learning_rate": 0.0003570439504929139, "loss": 0.0567, "num_input_tokens_seen": 167658464, "step": 77615 }, { "epoch": 12.66231647634584, "grad_norm": 0.027922337874770164, "learning_rate": 0.00035697574346034655, "loss": 0.0291, "num_input_tokens_seen": 167669696, "step": 77620 }, { "epoch": 12.663132137030995, "grad_norm": 0.010534364730119705, "learning_rate": 0.0003569075393263475, "loss": 0.0069, "num_input_tokens_seen": 167680960, "step": 77625 }, { "epoch": 12.66394779771615, "grad_norm": 0.0009661827934905887, "learning_rate": 0.0003568393380922984, "loss": 0.0019, "num_input_tokens_seen": 167691520, "step": 77630 }, { "epoch": 12.664763458401305, "grad_norm": 0.008585717529058456, "learning_rate": 0.0003567711397595819, "loss": 0.0564, "num_input_tokens_seen": 167703456, "step": 77635 }, { "epoch": 12.66557911908646, "grad_norm": 0.0037884414196014404, "learning_rate": 0.00035670294432957984, "loss": 0.0206, "num_input_tokens_seen": 167713664, "step": 77640 }, { "epoch": 12.666394779771615, "grad_norm": 0.05994252860546112, "learning_rate": 0.00035663475180367453, "loss": 0.0627, "num_input_tokens_seen": 167723456, "step": 77645 }, { "epoch": 12.66721044045677, "grad_norm": 0.9670343399047852, "learning_rate": 0.00035656656218324765, "loss": 0.0979, "num_input_tokens_seen": 167734144, "step": 77650 }, { "epoch": 12.668026101141924, "grad_norm": 0.0011733782012015581, "learning_rate": 0.0003564983754696815, "loss": 0.0062, "num_input_tokens_seen": 167743584, "step": 77655 }, { "epoch": 12.66884176182708, "grad_norm": 0.0013517803745344281, "learning_rate": 0.00035643019166435775, "loss": 0.0083, "num_input_tokens_seen": 167753440, "step": 77660 }, { "epoch": 12.669657422512234, "grad_norm": 0.03769034519791603, "learning_rate": 0.00035636201076865836, "loss": 0.1392, "num_input_tokens_seen": 167765216, "step": 77665 }, { "epoch": 12.67047308319739, "grad_norm": 0.0010428469395264983, "learning_rate": 0.000356293832783965, "loss": 0.0037, "num_input_tokens_seen": 167776256, "step": 77670 }, { "epoch": 12.671288743882545, "grad_norm": 0.193328395485878, "learning_rate": 0.0003562256577116595, "loss": 0.0556, "num_input_tokens_seen": 167786432, "step": 77675 }, { "epoch": 12.6721044045677, "grad_norm": 0.0028232985641807318, "learning_rate": 0.0003561574855531232, "loss": 0.0014, "num_input_tokens_seen": 167796256, "step": 77680 }, { "epoch": 12.672920065252855, "grad_norm": 0.12443973869085312, "learning_rate": 0.00035608931630973814, "loss": 0.0076, "num_input_tokens_seen": 167806752, "step": 77685 }, { "epoch": 12.673735725938009, "grad_norm": 0.07046890258789062, "learning_rate": 0.0003560211499828856, "loss": 0.0945, "num_input_tokens_seen": 167818336, "step": 77690 }, { "epoch": 12.674551386623165, "grad_norm": 0.011465424671769142, "learning_rate": 0.00035595298657394714, "loss": 0.0076, "num_input_tokens_seen": 167829312, "step": 77695 }, { "epoch": 12.67536704730832, "grad_norm": 0.010278213769197464, "learning_rate": 0.0003558848260843041, "loss": 0.0059, "num_input_tokens_seen": 167839744, "step": 77700 }, { "epoch": 12.676182707993474, "grad_norm": 0.007338542491197586, "learning_rate": 0.00035581666851533777, "loss": 0.0155, "num_input_tokens_seen": 167849696, "step": 77705 }, { "epoch": 12.67699836867863, "grad_norm": 0.003327986691147089, "learning_rate": 0.0003557485138684299, "loss": 0.0147, "num_input_tokens_seen": 167860800, "step": 77710 }, { "epoch": 12.677814029363784, "grad_norm": 0.025251364335417747, "learning_rate": 0.00035568036214496103, "loss": 0.0088, "num_input_tokens_seen": 167870560, "step": 77715 }, { "epoch": 12.67862969004894, "grad_norm": 0.34143656492233276, "learning_rate": 0.000355612213346313, "loss": 0.1471, "num_input_tokens_seen": 167881600, "step": 77720 }, { "epoch": 12.679445350734095, "grad_norm": 0.03525270149111748, "learning_rate": 0.00035554406747386635, "loss": 0.0164, "num_input_tokens_seen": 167892064, "step": 77725 }, { "epoch": 12.68026101141925, "grad_norm": 0.05492442101240158, "learning_rate": 0.0003554759245290027, "loss": 0.0066, "num_input_tokens_seen": 167902848, "step": 77730 }, { "epoch": 12.681076672104405, "grad_norm": 0.3540668785572052, "learning_rate": 0.0003554077845131025, "loss": 0.0098, "num_input_tokens_seen": 167914272, "step": 77735 }, { "epoch": 12.681892332789559, "grad_norm": 0.044319842010736465, "learning_rate": 0.0003553396474275473, "loss": 0.0057, "num_input_tokens_seen": 167925696, "step": 77740 }, { "epoch": 12.682707993474715, "grad_norm": 0.052253205329179764, "learning_rate": 0.00035527151327371736, "loss": 0.0575, "num_input_tokens_seen": 167936160, "step": 77745 }, { "epoch": 12.68352365415987, "grad_norm": 0.005088796839118004, "learning_rate": 0.00035520338205299407, "loss": 0.0105, "num_input_tokens_seen": 167946112, "step": 77750 }, { "epoch": 12.684339314845024, "grad_norm": 0.024718090891838074, "learning_rate": 0.0003551352537667577, "loss": 0.0045, "num_input_tokens_seen": 167955328, "step": 77755 }, { "epoch": 12.68515497553018, "grad_norm": 0.03583148866891861, "learning_rate": 0.0003550671284163894, "loss": 0.0063, "num_input_tokens_seen": 167966688, "step": 77760 }, { "epoch": 12.685970636215334, "grad_norm": 0.02463572286069393, "learning_rate": 0.00035499900600326933, "loss": 0.0075, "num_input_tokens_seen": 167977984, "step": 77765 }, { "epoch": 12.68678629690049, "grad_norm": 0.006799501832574606, "learning_rate": 0.00035493088652877866, "loss": 0.0064, "num_input_tokens_seen": 167987680, "step": 77770 }, { "epoch": 12.687601957585644, "grad_norm": 0.022389927878975868, "learning_rate": 0.00035486276999429733, "loss": 0.0193, "num_input_tokens_seen": 167997984, "step": 77775 }, { "epoch": 12.6884176182708, "grad_norm": 0.0026521605905145407, "learning_rate": 0.00035479465640120636, "loss": 0.0131, "num_input_tokens_seen": 168007648, "step": 77780 }, { "epoch": 12.689233278955955, "grad_norm": 0.008616507053375244, "learning_rate": 0.0003547265457508856, "loss": 0.0114, "num_input_tokens_seen": 168019168, "step": 77785 }, { "epoch": 12.690048939641109, "grad_norm": 0.003639386035501957, "learning_rate": 0.0003546584380447157, "loss": 0.0025, "num_input_tokens_seen": 168030816, "step": 77790 }, { "epoch": 12.690864600326265, "grad_norm": 0.051106810569763184, "learning_rate": 0.0003545903332840772, "loss": 0.0131, "num_input_tokens_seen": 168041952, "step": 77795 }, { "epoch": 12.691680261011419, "grad_norm": 0.06359019875526428, "learning_rate": 0.0003545222314703498, "loss": 0.0036, "num_input_tokens_seen": 168052544, "step": 77800 }, { "epoch": 12.692495921696574, "grad_norm": 0.016655128449201584, "learning_rate": 0.0003544541326049141, "loss": 0.1497, "num_input_tokens_seen": 168063648, "step": 77805 }, { "epoch": 12.69331158238173, "grad_norm": 0.17468668520450592, "learning_rate": 0.0003543860366891499, "loss": 0.0094, "num_input_tokens_seen": 168075520, "step": 77810 }, { "epoch": 12.694127243066884, "grad_norm": 0.0019930857233703136, "learning_rate": 0.0003543179437244376, "loss": 0.0045, "num_input_tokens_seen": 168085888, "step": 77815 }, { "epoch": 12.69494290375204, "grad_norm": 0.007549921050667763, "learning_rate": 0.0003542498537121567, "loss": 0.0125, "num_input_tokens_seen": 168096064, "step": 77820 }, { "epoch": 12.695758564437194, "grad_norm": 0.17789390683174133, "learning_rate": 0.0003541817666536876, "loss": 0.0103, "num_input_tokens_seen": 168106624, "step": 77825 }, { "epoch": 12.69657422512235, "grad_norm": 0.029698913916945457, "learning_rate": 0.00035411368255040994, "loss": 0.1498, "num_input_tokens_seen": 168120064, "step": 77830 }, { "epoch": 12.697389885807503, "grad_norm": 0.19213663041591644, "learning_rate": 0.0003540456014037036, "loss": 0.0545, "num_input_tokens_seen": 168130624, "step": 77835 }, { "epoch": 12.698205546492659, "grad_norm": 0.0012613199651241302, "learning_rate": 0.00035397752321494826, "loss": 0.0064, "num_input_tokens_seen": 168141344, "step": 77840 }, { "epoch": 12.699021207177815, "grad_norm": 0.0015342944534495473, "learning_rate": 0.0003539094479855237, "loss": 0.0181, "num_input_tokens_seen": 168152192, "step": 77845 }, { "epoch": 12.699836867862969, "grad_norm": 0.4590141475200653, "learning_rate": 0.00035384137571680936, "loss": 0.2501, "num_input_tokens_seen": 168162496, "step": 77850 }, { "epoch": 12.700652528548124, "grad_norm": 0.009294010698795319, "learning_rate": 0.0003537733064101852, "loss": 0.0372, "num_input_tokens_seen": 168173984, "step": 77855 }, { "epoch": 12.701468189233278, "grad_norm": 0.008999227546155453, "learning_rate": 0.0003537052400670303, "loss": 0.1625, "num_input_tokens_seen": 168183328, "step": 77860 }, { "epoch": 12.702283849918434, "grad_norm": 0.28203514218330383, "learning_rate": 0.00035363717668872443, "loss": 0.0765, "num_input_tokens_seen": 168195680, "step": 77865 }, { "epoch": 12.70309951060359, "grad_norm": 0.0068134767934679985, "learning_rate": 0.00035356911627664665, "loss": 0.0082, "num_input_tokens_seen": 168206304, "step": 77870 }, { "epoch": 12.703915171288743, "grad_norm": 0.01844414509832859, "learning_rate": 0.00035350105883217675, "loss": 0.0139, "num_input_tokens_seen": 168217248, "step": 77875 }, { "epoch": 12.7047308319739, "grad_norm": 0.0019640587270259857, "learning_rate": 0.00035343300435669356, "loss": 0.0108, "num_input_tokens_seen": 168227744, "step": 77880 }, { "epoch": 12.705546492659053, "grad_norm": 0.3847804069519043, "learning_rate": 0.0003533649528515766, "loss": 0.0297, "num_input_tokens_seen": 168238304, "step": 77885 }, { "epoch": 12.706362153344209, "grad_norm": 0.005796543322503567, "learning_rate": 0.0003532969043182047, "loss": 0.0055, "num_input_tokens_seen": 168250688, "step": 77890 }, { "epoch": 12.707177814029365, "grad_norm": 0.014010935090482235, "learning_rate": 0.0003532288587579572, "loss": 0.0135, "num_input_tokens_seen": 168260960, "step": 77895 }, { "epoch": 12.707993474714518, "grad_norm": 0.008485809899866581, "learning_rate": 0.0003531608161722132, "loss": 0.0027, "num_input_tokens_seen": 168270656, "step": 77900 }, { "epoch": 12.708809135399674, "grad_norm": 0.05919577181339264, "learning_rate": 0.00035309277656235137, "loss": 0.0103, "num_input_tokens_seen": 168281632, "step": 77905 }, { "epoch": 12.709624796084828, "grad_norm": 0.03997613489627838, "learning_rate": 0.000353024739929751, "loss": 0.0545, "num_input_tokens_seen": 168293216, "step": 77910 }, { "epoch": 12.710440456769984, "grad_norm": 0.007604612503200769, "learning_rate": 0.0003529567062757905, "loss": 0.0073, "num_input_tokens_seen": 168304032, "step": 77915 }, { "epoch": 12.71125611745514, "grad_norm": 0.010337037965655327, "learning_rate": 0.0003528886756018491, "loss": 0.0211, "num_input_tokens_seen": 168314624, "step": 77920 }, { "epoch": 12.712071778140293, "grad_norm": 0.0031499990727752447, "learning_rate": 0.0003528206479093051, "loss": 0.02, "num_input_tokens_seen": 168324320, "step": 77925 }, { "epoch": 12.71288743882545, "grad_norm": 0.014163109473884106, "learning_rate": 0.0003527526231995376, "loss": 0.0178, "num_input_tokens_seen": 168336128, "step": 77930 }, { "epoch": 12.713703099510603, "grad_norm": 0.020187662914395332, "learning_rate": 0.0003526846014739248, "loss": 0.0122, "num_input_tokens_seen": 168344992, "step": 77935 }, { "epoch": 12.714518760195759, "grad_norm": 0.04705316200852394, "learning_rate": 0.00035261658273384554, "loss": 0.0071, "num_input_tokens_seen": 168356128, "step": 77940 }, { "epoch": 12.715334420880914, "grad_norm": 0.0028371878433972597, "learning_rate": 0.00035254856698067806, "loss": 0.0061, "num_input_tokens_seen": 168366784, "step": 77945 }, { "epoch": 12.716150081566068, "grad_norm": 0.003474497003480792, "learning_rate": 0.00035248055421580114, "loss": 0.0051, "num_input_tokens_seen": 168379552, "step": 77950 }, { "epoch": 12.716965742251224, "grad_norm": 0.004116491414606571, "learning_rate": 0.0003524125444405928, "loss": 0.1206, "num_input_tokens_seen": 168390592, "step": 77955 }, { "epoch": 12.717781402936378, "grad_norm": 0.0031350203789770603, "learning_rate": 0.00035234453765643146, "loss": 0.0111, "num_input_tokens_seen": 168401600, "step": 77960 }, { "epoch": 12.718597063621534, "grad_norm": 0.41057583689689636, "learning_rate": 0.0003522765338646954, "loss": 0.1253, "num_input_tokens_seen": 168412448, "step": 77965 }, { "epoch": 12.719412724306688, "grad_norm": 0.42899951338768005, "learning_rate": 0.00035220853306676284, "loss": 0.0739, "num_input_tokens_seen": 168423328, "step": 77970 }, { "epoch": 12.720228384991843, "grad_norm": 0.7427116632461548, "learning_rate": 0.0003521405352640118, "loss": 0.0192, "num_input_tokens_seen": 168434272, "step": 77975 }, { "epoch": 12.721044045676999, "grad_norm": 0.059706129133701324, "learning_rate": 0.00035207254045782036, "loss": 0.0624, "num_input_tokens_seen": 168445664, "step": 77980 }, { "epoch": 12.721859706362153, "grad_norm": 0.0043745641596615314, "learning_rate": 0.00035200454864956653, "loss": 0.0217, "num_input_tokens_seen": 168456928, "step": 77985 }, { "epoch": 12.722675367047309, "grad_norm": 0.055647846311330795, "learning_rate": 0.00035193655984062835, "loss": 0.1082, "num_input_tokens_seen": 168467904, "step": 77990 }, { "epoch": 12.723491027732463, "grad_norm": 0.023597707971930504, "learning_rate": 0.0003518685740323835, "loss": 0.0056, "num_input_tokens_seen": 168479104, "step": 77995 }, { "epoch": 12.724306688417618, "grad_norm": 1.1642639636993408, "learning_rate": 0.00035180059122621, "loss": 0.0656, "num_input_tokens_seen": 168490016, "step": 78000 }, { "epoch": 12.725122349102774, "grad_norm": 0.025501245632767677, "learning_rate": 0.0003517326114234855, "loss": 0.0024, "num_input_tokens_seen": 168501088, "step": 78005 }, { "epoch": 12.725938009787928, "grad_norm": 0.3803076446056366, "learning_rate": 0.0003516646346255877, "loss": 0.1273, "num_input_tokens_seen": 168511392, "step": 78010 }, { "epoch": 12.726753670473084, "grad_norm": 0.00820028968155384, "learning_rate": 0.00035159666083389436, "loss": 0.0344, "num_input_tokens_seen": 168523328, "step": 78015 }, { "epoch": 12.727569331158238, "grad_norm": 0.0002793922321870923, "learning_rate": 0.00035152869004978276, "loss": 0.0373, "num_input_tokens_seen": 168535040, "step": 78020 }, { "epoch": 12.728384991843393, "grad_norm": 0.0012598390458151698, "learning_rate": 0.0003514607222746309, "loss": 0.0196, "num_input_tokens_seen": 168545760, "step": 78025 }, { "epoch": 12.729200652528547, "grad_norm": 0.5319569110870361, "learning_rate": 0.0003513927575098156, "loss": 0.0419, "num_input_tokens_seen": 168556032, "step": 78030 }, { "epoch": 12.730016313213703, "grad_norm": 0.01204077061265707, "learning_rate": 0.0003513247957567149, "loss": 0.0406, "num_input_tokens_seen": 168567552, "step": 78035 }, { "epoch": 12.730831973898859, "grad_norm": 0.021115312352776527, "learning_rate": 0.0003512568370167055, "loss": 0.0037, "num_input_tokens_seen": 168579904, "step": 78040 }, { "epoch": 12.731647634584013, "grad_norm": 0.012967637740075588, "learning_rate": 0.0003511888812911653, "loss": 0.008, "num_input_tokens_seen": 168590560, "step": 78045 }, { "epoch": 12.732463295269168, "grad_norm": 0.003758589504286647, "learning_rate": 0.00035112092858147106, "loss": 0.012, "num_input_tokens_seen": 168601504, "step": 78050 }, { "epoch": 12.733278955954322, "grad_norm": 0.09647537022829056, "learning_rate": 0.0003510529788890001, "loss": 0.0249, "num_input_tokens_seen": 168612672, "step": 78055 }, { "epoch": 12.734094616639478, "grad_norm": 0.020022863522171974, "learning_rate": 0.0003509850322151294, "loss": 0.0072, "num_input_tokens_seen": 168623968, "step": 78060 }, { "epoch": 12.734910277324634, "grad_norm": 0.0031233022455126047, "learning_rate": 0.0003509170885612362, "loss": 0.0143, "num_input_tokens_seen": 168635712, "step": 78065 }, { "epoch": 12.735725938009788, "grad_norm": 0.0440434105694294, "learning_rate": 0.00035084914792869715, "loss": 0.0723, "num_input_tokens_seen": 168646176, "step": 78070 }, { "epoch": 12.736541598694943, "grad_norm": 0.004339613951742649, "learning_rate": 0.0003507812103188895, "loss": 0.0337, "num_input_tokens_seen": 168657408, "step": 78075 }, { "epoch": 12.737357259380097, "grad_norm": 0.4139990508556366, "learning_rate": 0.0003507132757331898, "loss": 0.0269, "num_input_tokens_seen": 168668672, "step": 78080 }, { "epoch": 12.738172920065253, "grad_norm": 0.21979013085365295, "learning_rate": 0.00035064534417297513, "loss": 0.0137, "num_input_tokens_seen": 168679648, "step": 78085 }, { "epoch": 12.738988580750409, "grad_norm": 0.03787407651543617, "learning_rate": 0.00035057741563962176, "loss": 0.0078, "num_input_tokens_seen": 168692480, "step": 78090 }, { "epoch": 12.739804241435563, "grad_norm": 0.01006343774497509, "learning_rate": 0.00035050949013450686, "loss": 0.0379, "num_input_tokens_seen": 168702048, "step": 78095 }, { "epoch": 12.740619902120718, "grad_norm": 0.006184085737913847, "learning_rate": 0.0003504415676590066, "loss": 0.0029, "num_input_tokens_seen": 168713216, "step": 78100 }, { "epoch": 12.741435562805872, "grad_norm": 1.6347105503082275, "learning_rate": 0.00035037364821449766, "loss": 0.2852, "num_input_tokens_seen": 168723712, "step": 78105 }, { "epoch": 12.742251223491028, "grad_norm": 0.00397314690053463, "learning_rate": 0.0003503057318023568, "loss": 0.0059, "num_input_tokens_seen": 168734752, "step": 78110 }, { "epoch": 12.743066884176184, "grad_norm": 0.002033184515312314, "learning_rate": 0.00035023781842395994, "loss": 0.0047, "num_input_tokens_seen": 168744480, "step": 78115 }, { "epoch": 12.743882544861338, "grad_norm": 0.02932196483016014, "learning_rate": 0.0003501699080806839, "loss": 0.0091, "num_input_tokens_seen": 168754624, "step": 78120 }, { "epoch": 12.744698205546493, "grad_norm": 0.5784959197044373, "learning_rate": 0.0003501020007739045, "loss": 0.1011, "num_input_tokens_seen": 168765920, "step": 78125 }, { "epoch": 12.745513866231647, "grad_norm": 0.3741350769996643, "learning_rate": 0.0003500340965049984, "loss": 0.1566, "num_input_tokens_seen": 168776000, "step": 78130 }, { "epoch": 12.746329526916803, "grad_norm": 0.002900507999584079, "learning_rate": 0.00034996619527534153, "loss": 0.0061, "num_input_tokens_seen": 168787200, "step": 78135 }, { "epoch": 12.747145187601957, "grad_norm": 0.3536345064640045, "learning_rate": 0.00034989829708631005, "loss": 0.0596, "num_input_tokens_seen": 168798464, "step": 78140 }, { "epoch": 12.747960848287113, "grad_norm": 0.008484461344778538, "learning_rate": 0.00034983040193927996, "loss": 0.0407, "num_input_tokens_seen": 168809536, "step": 78145 }, { "epoch": 12.748776508972268, "grad_norm": 0.006402932107448578, "learning_rate": 0.0003497625098356273, "loss": 0.0148, "num_input_tokens_seen": 168820512, "step": 78150 }, { "epoch": 12.749592169657422, "grad_norm": 0.11143842339515686, "learning_rate": 0.00034969462077672793, "loss": 0.0108, "num_input_tokens_seen": 168830752, "step": 78155 }, { "epoch": 12.750407830342578, "grad_norm": 0.00579224806278944, "learning_rate": 0.0003496267347639579, "loss": 0.0696, "num_input_tokens_seen": 168841920, "step": 78160 }, { "epoch": 12.751223491027732, "grad_norm": 0.002410769695416093, "learning_rate": 0.00034955885179869265, "loss": 0.0027, "num_input_tokens_seen": 168852896, "step": 78165 }, { "epoch": 12.752039151712887, "grad_norm": 0.6489723920822144, "learning_rate": 0.0003494909718823083, "loss": 0.0587, "num_input_tokens_seen": 168864736, "step": 78170 }, { "epoch": 12.752854812398043, "grad_norm": 0.0014829429564997554, "learning_rate": 0.00034942309501618016, "loss": 0.0093, "num_input_tokens_seen": 168876256, "step": 78175 }, { "epoch": 12.753670473083197, "grad_norm": 0.0063831862062215805, "learning_rate": 0.00034935522120168417, "loss": 0.0058, "num_input_tokens_seen": 168887456, "step": 78180 }, { "epoch": 12.754486133768353, "grad_norm": 0.587399423122406, "learning_rate": 0.0003492873504401956, "loss": 0.0277, "num_input_tokens_seen": 168899552, "step": 78185 }, { "epoch": 12.755301794453507, "grad_norm": 0.08072449266910553, "learning_rate": 0.0003492194827330902, "loss": 0.0194, "num_input_tokens_seen": 168910368, "step": 78190 }, { "epoch": 12.756117455138662, "grad_norm": 0.0015662991208955646, "learning_rate": 0.00034915161808174314, "loss": 0.0179, "num_input_tokens_seen": 168921728, "step": 78195 }, { "epoch": 12.756933115823816, "grad_norm": 0.08228830248117447, "learning_rate": 0.0003490837564875301, "loss": 0.0053, "num_input_tokens_seen": 168933568, "step": 78200 }, { "epoch": 12.757748776508972, "grad_norm": 0.002885582856833935, "learning_rate": 0.0003490158979518259, "loss": 0.0176, "num_input_tokens_seen": 168944384, "step": 78205 }, { "epoch": 12.758564437194128, "grad_norm": 0.5468868017196655, "learning_rate": 0.00034894804247600613, "loss": 0.0556, "num_input_tokens_seen": 168955584, "step": 78210 }, { "epoch": 12.759380097879282, "grad_norm": 0.004578125663101673, "learning_rate": 0.0003488801900614461, "loss": 0.0158, "num_input_tokens_seen": 168966016, "step": 78215 }, { "epoch": 12.760195758564437, "grad_norm": 0.17387300729751587, "learning_rate": 0.0003488123407095205, "loss": 0.0282, "num_input_tokens_seen": 168976832, "step": 78220 }, { "epoch": 12.761011419249591, "grad_norm": 0.03517686203122139, "learning_rate": 0.00034874449442160485, "loss": 0.0159, "num_input_tokens_seen": 168987936, "step": 78225 }, { "epoch": 12.761827079934747, "grad_norm": 0.006768247578293085, "learning_rate": 0.00034867665119907363, "loss": 0.0183, "num_input_tokens_seen": 168999584, "step": 78230 }, { "epoch": 12.762642740619903, "grad_norm": 0.09498634934425354, "learning_rate": 0.0003486088110433023, "loss": 0.1167, "num_input_tokens_seen": 169010272, "step": 78235 }, { "epoch": 12.763458401305057, "grad_norm": 0.01517215184867382, "learning_rate": 0.0003485409739556653, "loss": 0.0036, "num_input_tokens_seen": 169020768, "step": 78240 }, { "epoch": 12.764274061990212, "grad_norm": 0.016436690464615822, "learning_rate": 0.0003484731399375377, "loss": 0.1903, "num_input_tokens_seen": 169029472, "step": 78245 }, { "epoch": 12.765089722675366, "grad_norm": 0.009139187633991241, "learning_rate": 0.00034840530899029405, "loss": 0.1342, "num_input_tokens_seen": 169039520, "step": 78250 }, { "epoch": 12.765905383360522, "grad_norm": 0.0008956545498222113, "learning_rate": 0.00034833748111530926, "loss": 0.0154, "num_input_tokens_seen": 169049440, "step": 78255 }, { "epoch": 12.766721044045678, "grad_norm": 0.02780218981206417, "learning_rate": 0.00034826965631395767, "loss": 0.0034, "num_input_tokens_seen": 169061312, "step": 78260 }, { "epoch": 12.767536704730832, "grad_norm": 0.005282025318592787, "learning_rate": 0.0003482018345876141, "loss": 0.0081, "num_input_tokens_seen": 169071776, "step": 78265 }, { "epoch": 12.768352365415987, "grad_norm": 0.01068632211536169, "learning_rate": 0.0003481340159376528, "loss": 0.0079, "num_input_tokens_seen": 169082528, "step": 78270 }, { "epoch": 12.769168026101141, "grad_norm": 0.002080516656860709, "learning_rate": 0.0003480662003654483, "loss": 0.0355, "num_input_tokens_seen": 169093440, "step": 78275 }, { "epoch": 12.769983686786297, "grad_norm": 0.021333087235689163, "learning_rate": 0.00034799838787237514, "loss": 0.0067, "num_input_tokens_seen": 169103840, "step": 78280 }, { "epoch": 12.770799347471453, "grad_norm": 0.02037815749645233, "learning_rate": 0.00034793057845980744, "loss": 0.009, "num_input_tokens_seen": 169113760, "step": 78285 }, { "epoch": 12.771615008156607, "grad_norm": 0.006305212154984474, "learning_rate": 0.00034786277212911943, "loss": 0.0026, "num_input_tokens_seen": 169123552, "step": 78290 }, { "epoch": 12.772430668841762, "grad_norm": 0.006527638528496027, "learning_rate": 0.0003477949688816854, "loss": 0.007, "num_input_tokens_seen": 169133664, "step": 78295 }, { "epoch": 12.773246329526916, "grad_norm": 0.008840296417474747, "learning_rate": 0.00034772716871887924, "loss": 0.0116, "num_input_tokens_seen": 169145440, "step": 78300 }, { "epoch": 12.774061990212072, "grad_norm": 0.48614218831062317, "learning_rate": 0.0003476593716420754, "loss": 0.025, "num_input_tokens_seen": 169156480, "step": 78305 }, { "epoch": 12.774877650897226, "grad_norm": 0.029282858595252037, "learning_rate": 0.00034759157765264746, "loss": 0.0047, "num_input_tokens_seen": 169167744, "step": 78310 }, { "epoch": 12.775693311582382, "grad_norm": 0.27233338356018066, "learning_rate": 0.00034752378675196975, "loss": 0.0169, "num_input_tokens_seen": 169178464, "step": 78315 }, { "epoch": 12.776508972267537, "grad_norm": 0.005301279481500387, "learning_rate": 0.0003474559989414158, "loss": 0.0074, "num_input_tokens_seen": 169189248, "step": 78320 }, { "epoch": 12.777324632952691, "grad_norm": 0.04829799011349678, "learning_rate": 0.00034738821422235943, "loss": 0.0076, "num_input_tokens_seen": 169201024, "step": 78325 }, { "epoch": 12.778140293637847, "grad_norm": 0.030659900978207588, "learning_rate": 0.00034732043259617473, "loss": 0.0049, "num_input_tokens_seen": 169212480, "step": 78330 }, { "epoch": 12.778955954323001, "grad_norm": 0.006221574265509844, "learning_rate": 0.000347252654064235, "loss": 0.0549, "num_input_tokens_seen": 169224128, "step": 78335 }, { "epoch": 12.779771615008157, "grad_norm": 0.01066858321428299, "learning_rate": 0.00034718487862791413, "loss": 0.0024, "num_input_tokens_seen": 169234592, "step": 78340 }, { "epoch": 12.780587275693312, "grad_norm": 0.556633472442627, "learning_rate": 0.0003471171062885854, "loss": 0.0951, "num_input_tokens_seen": 169245728, "step": 78345 }, { "epoch": 12.781402936378466, "grad_norm": 0.031409382820129395, "learning_rate": 0.00034704933704762266, "loss": 0.0668, "num_input_tokens_seen": 169256160, "step": 78350 }, { "epoch": 12.782218597063622, "grad_norm": 0.007934520952403545, "learning_rate": 0.00034698157090639893, "loss": 0.0025, "num_input_tokens_seen": 169267232, "step": 78355 }, { "epoch": 12.783034257748776, "grad_norm": 0.18476316332817078, "learning_rate": 0.000346913807866288, "loss": 0.0094, "num_input_tokens_seen": 169277120, "step": 78360 }, { "epoch": 12.783849918433932, "grad_norm": 0.020248549059033394, "learning_rate": 0.00034684604792866277, "loss": 0.0063, "num_input_tokens_seen": 169288096, "step": 78365 }, { "epoch": 12.784665579119086, "grad_norm": 0.01171860285103321, "learning_rate": 0.00034677829109489684, "loss": 0.0058, "num_input_tokens_seen": 169299744, "step": 78370 }, { "epoch": 12.785481239804241, "grad_norm": 0.002188315847888589, "learning_rate": 0.00034671053736636307, "loss": 0.0073, "num_input_tokens_seen": 169310528, "step": 78375 }, { "epoch": 12.786296900489397, "grad_norm": 0.05189083144068718, "learning_rate": 0.0003466427867444348, "loss": 0.0166, "num_input_tokens_seen": 169321440, "step": 78380 }, { "epoch": 12.78711256117455, "grad_norm": 0.004644290544092655, "learning_rate": 0.00034657503923048497, "loss": 0.1892, "num_input_tokens_seen": 169332640, "step": 78385 }, { "epoch": 12.787928221859707, "grad_norm": 0.02340223640203476, "learning_rate": 0.00034650729482588665, "loss": 0.0228, "num_input_tokens_seen": 169344576, "step": 78390 }, { "epoch": 12.78874388254486, "grad_norm": 0.12741638720035553, "learning_rate": 0.0003464395535320126, "loss": 0.0293, "num_input_tokens_seen": 169356288, "step": 78395 }, { "epoch": 12.789559543230016, "grad_norm": 0.4657769203186035, "learning_rate": 0.000346371815350236, "loss": 0.0757, "num_input_tokens_seen": 169367520, "step": 78400 }, { "epoch": 12.790375203915172, "grad_norm": 0.011172788217663765, "learning_rate": 0.0003463040802819292, "loss": 0.0059, "num_input_tokens_seen": 169378208, "step": 78405 }, { "epoch": 12.791190864600326, "grad_norm": 0.0089213652536273, "learning_rate": 0.0003462363483284654, "loss": 0.0036, "num_input_tokens_seen": 169387968, "step": 78410 }, { "epoch": 12.792006525285482, "grad_norm": 0.019134066998958588, "learning_rate": 0.0003461686194912169, "loss": 0.0101, "num_input_tokens_seen": 169397952, "step": 78415 }, { "epoch": 12.792822185970635, "grad_norm": 0.0643213540315628, "learning_rate": 0.00034610089377155656, "loss": 0.141, "num_input_tokens_seen": 169410176, "step": 78420 }, { "epoch": 12.793637846655791, "grad_norm": 0.004243266768753529, "learning_rate": 0.0003460331711708569, "loss": 0.004, "num_input_tokens_seen": 169421408, "step": 78425 }, { "epoch": 12.794453507340947, "grad_norm": 0.008267040364444256, "learning_rate": 0.00034596545169049013, "loss": 0.0035, "num_input_tokens_seen": 169432032, "step": 78430 }, { "epoch": 12.7952691680261, "grad_norm": 0.00525688799098134, "learning_rate": 0.00034589773533182924, "loss": 0.0098, "num_input_tokens_seen": 169441888, "step": 78435 }, { "epoch": 12.796084828711257, "grad_norm": 0.04394717514514923, "learning_rate": 0.00034583002209624594, "loss": 0.1423, "num_input_tokens_seen": 169452064, "step": 78440 }, { "epoch": 12.79690048939641, "grad_norm": 0.02494252845644951, "learning_rate": 0.0003457623119851129, "loss": 0.0046, "num_input_tokens_seen": 169464576, "step": 78445 }, { "epoch": 12.797716150081566, "grad_norm": 0.14180971682071686, "learning_rate": 0.00034569460499980233, "loss": 0.0109, "num_input_tokens_seen": 169474464, "step": 78450 }, { "epoch": 12.798531810766722, "grad_norm": 0.008769070729613304, "learning_rate": 0.00034562690114168626, "loss": 0.0253, "num_input_tokens_seen": 169484992, "step": 78455 }, { "epoch": 12.799347471451876, "grad_norm": 0.0028862846083939075, "learning_rate": 0.000345559200412137, "loss": 0.0063, "num_input_tokens_seen": 169495328, "step": 78460 }, { "epoch": 12.800163132137031, "grad_norm": 0.006227980833500624, "learning_rate": 0.00034549150281252633, "loss": 0.0137, "num_input_tokens_seen": 169507008, "step": 78465 }, { "epoch": 12.800978792822185, "grad_norm": 0.505739688873291, "learning_rate": 0.00034542380834422633, "loss": 0.0376, "num_input_tokens_seen": 169518144, "step": 78470 }, { "epoch": 12.801794453507341, "grad_norm": 0.2012212574481964, "learning_rate": 0.00034535611700860913, "loss": 0.105, "num_input_tokens_seen": 169529536, "step": 78475 }, { "epoch": 12.802610114192497, "grad_norm": 0.015639374032616615, "learning_rate": 0.00034528842880704626, "loss": 0.0097, "num_input_tokens_seen": 169540384, "step": 78480 }, { "epoch": 12.80342577487765, "grad_norm": 0.034782614558935165, "learning_rate": 0.0003452207437409097, "loss": 0.0168, "num_input_tokens_seen": 169550816, "step": 78485 }, { "epoch": 12.804241435562806, "grad_norm": 0.010439387522637844, "learning_rate": 0.00034515306181157106, "loss": 0.06, "num_input_tokens_seen": 169562560, "step": 78490 }, { "epoch": 12.80505709624796, "grad_norm": 0.0014605855103582144, "learning_rate": 0.00034508538302040225, "loss": 0.0254, "num_input_tokens_seen": 169573312, "step": 78495 }, { "epoch": 12.805872756933116, "grad_norm": 0.2424784004688263, "learning_rate": 0.00034501770736877443, "loss": 0.0935, "num_input_tokens_seen": 169584032, "step": 78500 }, { "epoch": 12.80668841761827, "grad_norm": 0.00401564035564661, "learning_rate": 0.0003449500348580596, "loss": 0.0177, "num_input_tokens_seen": 169594368, "step": 78505 }, { "epoch": 12.807504078303426, "grad_norm": 0.009832642041146755, "learning_rate": 0.0003448823654896288, "loss": 0.0599, "num_input_tokens_seen": 169605248, "step": 78510 }, { "epoch": 12.808319738988581, "grad_norm": 0.29828646779060364, "learning_rate": 0.00034481469926485385, "loss": 0.0193, "num_input_tokens_seen": 169615904, "step": 78515 }, { "epoch": 12.809135399673735, "grad_norm": 0.027520187199115753, "learning_rate": 0.00034474703618510565, "loss": 0.0323, "num_input_tokens_seen": 169626592, "step": 78520 }, { "epoch": 12.809951060358891, "grad_norm": 0.08540055900812149, "learning_rate": 0.00034467937625175596, "loss": 0.1196, "num_input_tokens_seen": 169637504, "step": 78525 }, { "epoch": 12.810766721044045, "grad_norm": 0.0028646751306951046, "learning_rate": 0.00034461171946617553, "loss": 0.0752, "num_input_tokens_seen": 169647232, "step": 78530 }, { "epoch": 12.8115823817292, "grad_norm": 0.0070752971805632114, "learning_rate": 0.0003445440658297357, "loss": 0.0048, "num_input_tokens_seen": 169657824, "step": 78535 }, { "epoch": 12.812398042414356, "grad_norm": 0.007390094920992851, "learning_rate": 0.0003444764153438079, "loss": 0.0079, "num_input_tokens_seen": 169667776, "step": 78540 }, { "epoch": 12.81321370309951, "grad_norm": 0.1049143373966217, "learning_rate": 0.0003444087680097625, "loss": 0.0481, "num_input_tokens_seen": 169678784, "step": 78545 }, { "epoch": 12.814029363784666, "grad_norm": 0.008307898417115211, "learning_rate": 0.00034434112382897107, "loss": 0.036, "num_input_tokens_seen": 169688960, "step": 78550 }, { "epoch": 12.81484502446982, "grad_norm": 0.0053911637514829636, "learning_rate": 0.000344273482802804, "loss": 0.0137, "num_input_tokens_seen": 169698208, "step": 78555 }, { "epoch": 12.815660685154976, "grad_norm": 0.012019234709441662, "learning_rate": 0.00034420584493263264, "loss": 0.0108, "num_input_tokens_seen": 169709056, "step": 78560 }, { "epoch": 12.81647634584013, "grad_norm": 0.5703594088554382, "learning_rate": 0.0003441382102198272, "loss": 0.0911, "num_input_tokens_seen": 169719936, "step": 78565 }, { "epoch": 12.817292006525285, "grad_norm": 0.01869955286383629, "learning_rate": 0.0003440705786657588, "loss": 0.0259, "num_input_tokens_seen": 169730816, "step": 78570 }, { "epoch": 12.818107667210441, "grad_norm": 0.16946589946746826, "learning_rate": 0.00034400295027179776, "loss": 0.0073, "num_input_tokens_seen": 169741696, "step": 78575 }, { "epoch": 12.818923327895595, "grad_norm": 0.5616081953048706, "learning_rate": 0.00034393532503931514, "loss": 0.0469, "num_input_tokens_seen": 169752512, "step": 78580 }, { "epoch": 12.81973898858075, "grad_norm": 0.0031734046060591936, "learning_rate": 0.0003438677029696808, "loss": 0.01, "num_input_tokens_seen": 169760992, "step": 78585 }, { "epoch": 12.820554649265905, "grad_norm": 0.06362012028694153, "learning_rate": 0.0003438000840642657, "loss": 0.0378, "num_input_tokens_seen": 169771424, "step": 78590 }, { "epoch": 12.82137030995106, "grad_norm": 0.02829126827418804, "learning_rate": 0.00034373246832444007, "loss": 0.0224, "num_input_tokens_seen": 169781344, "step": 78595 }, { "epoch": 12.822185970636216, "grad_norm": 2.228576183319092, "learning_rate": 0.00034366485575157413, "loss": 0.1215, "num_input_tokens_seen": 169791488, "step": 78600 }, { "epoch": 12.82300163132137, "grad_norm": 0.0022986563853919506, "learning_rate": 0.00034359724634703827, "loss": 0.0039, "num_input_tokens_seen": 169803136, "step": 78605 }, { "epoch": 12.823817292006526, "grad_norm": 0.007648915518075228, "learning_rate": 0.0003435296401122027, "loss": 0.0078, "num_input_tokens_seen": 169813664, "step": 78610 }, { "epoch": 12.82463295269168, "grad_norm": 0.015873296186327934, "learning_rate": 0.0003434620370484372, "loss": 0.0045, "num_input_tokens_seen": 169824288, "step": 78615 }, { "epoch": 12.825448613376835, "grad_norm": 0.001540105091407895, "learning_rate": 0.0003433944371571124, "loss": 0.0136, "num_input_tokens_seen": 169833728, "step": 78620 }, { "epoch": 12.826264274061991, "grad_norm": 1.0538294315338135, "learning_rate": 0.00034332684043959777, "loss": 0.2605, "num_input_tokens_seen": 169843936, "step": 78625 }, { "epoch": 12.827079934747145, "grad_norm": 0.002735902788117528, "learning_rate": 0.00034325924689726376, "loss": 0.012, "num_input_tokens_seen": 169854688, "step": 78630 }, { "epoch": 12.8278955954323, "grad_norm": 0.2805021107196808, "learning_rate": 0.00034319165653147964, "loss": 0.0268, "num_input_tokens_seen": 169863680, "step": 78635 }, { "epoch": 12.828711256117455, "grad_norm": 0.023348089307546616, "learning_rate": 0.00034312406934361553, "loss": 0.0082, "num_input_tokens_seen": 169874176, "step": 78640 }, { "epoch": 12.82952691680261, "grad_norm": 0.03763202577829361, "learning_rate": 0.0003430564853350414, "loss": 0.0118, "num_input_tokens_seen": 169885632, "step": 78645 }, { "epoch": 12.830342577487766, "grad_norm": 0.005634233821183443, "learning_rate": 0.0003429889045071265, "loss": 0.0463, "num_input_tokens_seen": 169895552, "step": 78650 }, { "epoch": 12.83115823817292, "grad_norm": 0.023782063275575638, "learning_rate": 0.0003429213268612408, "loss": 0.1291, "num_input_tokens_seen": 169905728, "step": 78655 }, { "epoch": 12.831973898858076, "grad_norm": 0.05306378751993179, "learning_rate": 0.0003428537523987535, "loss": 0.0052, "num_input_tokens_seen": 169914304, "step": 78660 }, { "epoch": 12.83278955954323, "grad_norm": 0.0029371960554271936, "learning_rate": 0.0003427861811210345, "loss": 0.0051, "num_input_tokens_seen": 169924800, "step": 78665 }, { "epoch": 12.833605220228385, "grad_norm": 0.006226977799087763, "learning_rate": 0.0003427186130294527, "loss": 0.0046, "num_input_tokens_seen": 169935712, "step": 78670 }, { "epoch": 12.83442088091354, "grad_norm": 0.31288060545921326, "learning_rate": 0.00034265104812537805, "loss": 0.0823, "num_input_tokens_seen": 169947296, "step": 78675 }, { "epoch": 12.835236541598695, "grad_norm": 0.0038242738228291273, "learning_rate": 0.0003425834864101792, "loss": 0.0058, "num_input_tokens_seen": 169957504, "step": 78680 }, { "epoch": 12.83605220228385, "grad_norm": 0.031265001744031906, "learning_rate": 0.000342515927885226, "loss": 0.0828, "num_input_tokens_seen": 169968896, "step": 78685 }, { "epoch": 12.836867862969005, "grad_norm": 0.014412354677915573, "learning_rate": 0.000342448372551887, "loss": 0.0143, "num_input_tokens_seen": 169979680, "step": 78690 }, { "epoch": 12.83768352365416, "grad_norm": 0.010973574593663216, "learning_rate": 0.0003423808204115318, "loss": 0.0045, "num_input_tokens_seen": 169989952, "step": 78695 }, { "epoch": 12.838499184339314, "grad_norm": 0.015177428722381592, "learning_rate": 0.00034231327146552916, "loss": 0.0109, "num_input_tokens_seen": 170000896, "step": 78700 }, { "epoch": 12.83931484502447, "grad_norm": 0.02052193135023117, "learning_rate": 0.00034224572571524823, "loss": 0.0538, "num_input_tokens_seen": 170011616, "step": 78705 }, { "epoch": 12.840130505709626, "grad_norm": 0.04011628404259682, "learning_rate": 0.00034217818316205757, "loss": 0.009, "num_input_tokens_seen": 170023296, "step": 78710 }, { "epoch": 12.84094616639478, "grad_norm": 0.003024019068107009, "learning_rate": 0.0003421106438073265, "loss": 0.0082, "num_input_tokens_seen": 170032384, "step": 78715 }, { "epoch": 12.841761827079935, "grad_norm": 0.10613281279802322, "learning_rate": 0.0003420431076524233, "loss": 0.0085, "num_input_tokens_seen": 170044352, "step": 78720 }, { "epoch": 12.84257748776509, "grad_norm": 0.0019029824761673808, "learning_rate": 0.0003419755746987171, "loss": 0.0025, "num_input_tokens_seen": 170055136, "step": 78725 }, { "epoch": 12.843393148450245, "grad_norm": 0.013800938613712788, "learning_rate": 0.0003419080449475761, "loss": 0.0047, "num_input_tokens_seen": 170064672, "step": 78730 }, { "epoch": 12.844208809135399, "grad_norm": 0.006535480264574289, "learning_rate": 0.0003418405184003693, "loss": 0.0067, "num_input_tokens_seen": 170075488, "step": 78735 }, { "epoch": 12.845024469820554, "grad_norm": 0.35378557443618774, "learning_rate": 0.000341772995058465, "loss": 0.0358, "num_input_tokens_seen": 170084736, "step": 78740 }, { "epoch": 12.84584013050571, "grad_norm": 0.004953265190124512, "learning_rate": 0.0003417054749232316, "loss": 0.1255, "num_input_tokens_seen": 170094784, "step": 78745 }, { "epoch": 12.846655791190864, "grad_norm": 0.006483436096459627, "learning_rate": 0.0003416379579960377, "loss": 0.0064, "num_input_tokens_seen": 170104640, "step": 78750 }, { "epoch": 12.84747145187602, "grad_norm": 0.01915556751191616, "learning_rate": 0.00034157044427825137, "loss": 0.0042, "num_input_tokens_seen": 170115744, "step": 78755 }, { "epoch": 12.848287112561174, "grad_norm": 0.00910657923668623, "learning_rate": 0.000341502933771241, "loss": 0.0356, "num_input_tokens_seen": 170127008, "step": 78760 }, { "epoch": 12.84910277324633, "grad_norm": 0.01944858767092228, "learning_rate": 0.00034143542647637474, "loss": 0.0171, "num_input_tokens_seen": 170137248, "step": 78765 }, { "epoch": 12.849918433931485, "grad_norm": 0.002999127609655261, "learning_rate": 0.00034136792239502074, "loss": 0.0088, "num_input_tokens_seen": 170147904, "step": 78770 }, { "epoch": 12.850734094616639, "grad_norm": 0.7215339541435242, "learning_rate": 0.000341300421528547, "loss": 0.1081, "num_input_tokens_seen": 170158912, "step": 78775 }, { "epoch": 12.851549755301795, "grad_norm": 0.004867668263614178, "learning_rate": 0.0003412329238783216, "loss": 0.0065, "num_input_tokens_seen": 170169760, "step": 78780 }, { "epoch": 12.852365415986949, "grad_norm": 0.009730189107358456, "learning_rate": 0.00034116542944571227, "loss": 0.0279, "num_input_tokens_seen": 170180448, "step": 78785 }, { "epoch": 12.853181076672104, "grad_norm": 0.0032304124906659126, "learning_rate": 0.00034109793823208724, "loss": 0.0051, "num_input_tokens_seen": 170191136, "step": 78790 }, { "epoch": 12.85399673735726, "grad_norm": 0.010908522643148899, "learning_rate": 0.0003410304502388139, "loss": 0.0062, "num_input_tokens_seen": 170201376, "step": 78795 }, { "epoch": 12.854812398042414, "grad_norm": 0.0023245131596922874, "learning_rate": 0.0003409629654672602, "loss": 0.0141, "num_input_tokens_seen": 170212928, "step": 78800 }, { "epoch": 12.85562805872757, "grad_norm": 0.02462649531662464, "learning_rate": 0.0003408954839187938, "loss": 0.0148, "num_input_tokens_seen": 170224032, "step": 78805 }, { "epoch": 12.856443719412724, "grad_norm": 0.004026814829558134, "learning_rate": 0.0003408280055947823, "loss": 0.0066, "num_input_tokens_seen": 170234784, "step": 78810 }, { "epoch": 12.85725938009788, "grad_norm": 0.001957811415195465, "learning_rate": 0.00034076053049659295, "loss": 0.0067, "num_input_tokens_seen": 170247328, "step": 78815 }, { "epoch": 12.858075040783035, "grad_norm": 0.00296620256267488, "learning_rate": 0.00034069305862559373, "loss": 0.0027, "num_input_tokens_seen": 170258720, "step": 78820 }, { "epoch": 12.858890701468189, "grad_norm": 0.03529384359717369, "learning_rate": 0.00034062558998315163, "loss": 0.063, "num_input_tokens_seen": 170269376, "step": 78825 }, { "epoch": 12.859706362153345, "grad_norm": 0.09343760460615158, "learning_rate": 0.0003405581245706342, "loss": 0.0072, "num_input_tokens_seen": 170280672, "step": 78830 }, { "epoch": 12.860522022838499, "grad_norm": 0.011827799491584301, "learning_rate": 0.0003404906623894085, "loss": 0.0095, "num_input_tokens_seen": 170291488, "step": 78835 }, { "epoch": 12.861337683523654, "grad_norm": 0.0021250757854431868, "learning_rate": 0.0003404232034408421, "loss": 0.0062, "num_input_tokens_seen": 170303264, "step": 78840 }, { "epoch": 12.86215334420881, "grad_norm": 0.5916451215744019, "learning_rate": 0.00034035574772630175, "loss": 0.0772, "num_input_tokens_seen": 170313280, "step": 78845 }, { "epoch": 12.862969004893964, "grad_norm": 0.019389253109693527, "learning_rate": 0.00034028829524715464, "loss": 0.015, "num_input_tokens_seen": 170324544, "step": 78850 }, { "epoch": 12.86378466557912, "grad_norm": 0.07211606204509735, "learning_rate": 0.000340220846004768, "loss": 0.0096, "num_input_tokens_seen": 170336672, "step": 78855 }, { "epoch": 12.864600326264274, "grad_norm": 0.0109171811491251, "learning_rate": 0.00034015340000050846, "loss": 0.0024, "num_input_tokens_seen": 170347680, "step": 78860 }, { "epoch": 12.86541598694943, "grad_norm": 0.0226032342761755, "learning_rate": 0.00034008595723574326, "loss": 0.0035, "num_input_tokens_seen": 170359136, "step": 78865 }, { "epoch": 12.866231647634583, "grad_norm": 0.016140323132276535, "learning_rate": 0.00034001851771183877, "loss": 0.0688, "num_input_tokens_seen": 170369440, "step": 78870 }, { "epoch": 12.867047308319739, "grad_norm": 0.07779782265424728, "learning_rate": 0.00033995108143016216, "loss": 0.0144, "num_input_tokens_seen": 170380864, "step": 78875 }, { "epoch": 12.867862969004895, "grad_norm": 0.06042582169175148, "learning_rate": 0.0003398836483920798, "loss": 0.0052, "num_input_tokens_seen": 170392704, "step": 78880 }, { "epoch": 12.868678629690049, "grad_norm": 0.020869217813014984, "learning_rate": 0.0003398162185989586, "loss": 0.0058, "num_input_tokens_seen": 170403552, "step": 78885 }, { "epoch": 12.869494290375204, "grad_norm": 0.010013763792812824, "learning_rate": 0.0003397487920521647, "loss": 0.013, "num_input_tokens_seen": 170414336, "step": 78890 }, { "epoch": 12.870309951060358, "grad_norm": 0.0005221384926699102, "learning_rate": 0.00033968136875306496, "loss": 0.0119, "num_input_tokens_seen": 170426400, "step": 78895 }, { "epoch": 12.871125611745514, "grad_norm": 0.0225661713629961, "learning_rate": 0.0003396139487030256, "loss": 0.1702, "num_input_tokens_seen": 170437472, "step": 78900 }, { "epoch": 12.87194127243067, "grad_norm": 0.00248327711597085, "learning_rate": 0.00033954653190341306, "loss": 0.1597, "num_input_tokens_seen": 170448416, "step": 78905 }, { "epoch": 12.872756933115824, "grad_norm": 0.019737066701054573, "learning_rate": 0.0003394791183555936, "loss": 0.0068, "num_input_tokens_seen": 170460256, "step": 78910 }, { "epoch": 12.87357259380098, "grad_norm": 0.10288142412900925, "learning_rate": 0.0003394117080609335, "loss": 0.0048, "num_input_tokens_seen": 170471776, "step": 78915 }, { "epoch": 12.874388254486133, "grad_norm": 0.0019228693563491106, "learning_rate": 0.0003393443010207988, "loss": 0.0038, "num_input_tokens_seen": 170483328, "step": 78920 }, { "epoch": 12.875203915171289, "grad_norm": 0.010113107040524483, "learning_rate": 0.0003392768972365556, "loss": 0.0585, "num_input_tokens_seen": 170494432, "step": 78925 }, { "epoch": 12.876019575856443, "grad_norm": 0.037867337465286255, "learning_rate": 0.00033920949670956994, "loss": 0.0172, "num_input_tokens_seen": 170504864, "step": 78930 }, { "epoch": 12.876835236541599, "grad_norm": 0.015708623453974724, "learning_rate": 0.000339142099441208, "loss": 0.0058, "num_input_tokens_seen": 170515136, "step": 78935 }, { "epoch": 12.877650897226754, "grad_norm": 0.0006929939845576882, "learning_rate": 0.0003390747054328353, "loss": 0.0064, "num_input_tokens_seen": 170525824, "step": 78940 }, { "epoch": 12.878466557911908, "grad_norm": 0.3186556398868561, "learning_rate": 0.00033900731468581804, "loss": 0.2179, "num_input_tokens_seen": 170536928, "step": 78945 }, { "epoch": 12.879282218597064, "grad_norm": 0.0015595832373946905, "learning_rate": 0.0003389399272015215, "loss": 0.023, "num_input_tokens_seen": 170547520, "step": 78950 }, { "epoch": 12.880097879282218, "grad_norm": 0.007515274453908205, "learning_rate": 0.0003388725429813117, "loss": 0.002, "num_input_tokens_seen": 170558464, "step": 78955 }, { "epoch": 12.880913539967374, "grad_norm": 0.030450142920017242, "learning_rate": 0.0003388051620265544, "loss": 0.0093, "num_input_tokens_seen": 170568768, "step": 78960 }, { "epoch": 12.88172920065253, "grad_norm": 0.32018008828163147, "learning_rate": 0.0003387377843386148, "loss": 0.1654, "num_input_tokens_seen": 170579648, "step": 78965 }, { "epoch": 12.882544861337683, "grad_norm": 0.018319450318813324, "learning_rate": 0.00033867040991885885, "loss": 0.0129, "num_input_tokens_seen": 170589440, "step": 78970 }, { "epoch": 12.883360522022839, "grad_norm": 0.00849565677344799, "learning_rate": 0.0003386030387686514, "loss": 0.0034, "num_input_tokens_seen": 170600384, "step": 78975 }, { "epoch": 12.884176182707993, "grad_norm": 0.0026667932979762554, "learning_rate": 0.0003385356708893584, "loss": 0.0022, "num_input_tokens_seen": 170611584, "step": 78980 }, { "epoch": 12.884991843393149, "grad_norm": 0.0011457474902272224, "learning_rate": 0.0003384683062823446, "loss": 0.0027, "num_input_tokens_seen": 170623328, "step": 78985 }, { "epoch": 12.885807504078304, "grad_norm": 0.0027485296595841646, "learning_rate": 0.00033840094494897566, "loss": 0.0037, "num_input_tokens_seen": 170633760, "step": 78990 }, { "epoch": 12.886623164763458, "grad_norm": 0.13075962662696838, "learning_rate": 0.0003383335868906164, "loss": 0.0112, "num_input_tokens_seen": 170644704, "step": 78995 }, { "epoch": 12.887438825448614, "grad_norm": 0.004995839670300484, "learning_rate": 0.0003382662321086324, "loss": 0.0041, "num_input_tokens_seen": 170655040, "step": 79000 }, { "epoch": 12.888254486133768, "grad_norm": 0.10305944830179214, "learning_rate": 0.0003381988806043881, "loss": 0.0128, "num_input_tokens_seen": 170664352, "step": 79005 }, { "epoch": 12.889070146818923, "grad_norm": 0.0039926618337631226, "learning_rate": 0.0003381315323792489, "loss": 0.1244, "num_input_tokens_seen": 170675552, "step": 79010 }, { "epoch": 12.88988580750408, "grad_norm": 0.1336335837841034, "learning_rate": 0.00033806418743457937, "loss": 0.0076, "num_input_tokens_seen": 170686336, "step": 79015 }, { "epoch": 12.890701468189233, "grad_norm": 0.0027748846914619207, "learning_rate": 0.0003379968457717447, "loss": 0.0058, "num_input_tokens_seen": 170697408, "step": 79020 }, { "epoch": 12.891517128874389, "grad_norm": 0.01572308875620365, "learning_rate": 0.00033792950739210934, "loss": 0.0087, "num_input_tokens_seen": 170707872, "step": 79025 }, { "epoch": 12.892332789559543, "grad_norm": 0.006033416371792555, "learning_rate": 0.0003378621722970382, "loss": 0.0043, "num_input_tokens_seen": 170717792, "step": 79030 }, { "epoch": 12.893148450244698, "grad_norm": 0.0038843636866658926, "learning_rate": 0.00033779484048789574, "loss": 0.1461, "num_input_tokens_seen": 170728672, "step": 79035 }, { "epoch": 12.893964110929852, "grad_norm": 0.008453777059912682, "learning_rate": 0.0003377275119660467, "loss": 0.0211, "num_input_tokens_seen": 170739840, "step": 79040 }, { "epoch": 12.894779771615008, "grad_norm": 0.0564873032271862, "learning_rate": 0.00033766018673285535, "loss": 0.0061, "num_input_tokens_seen": 170750720, "step": 79045 }, { "epoch": 12.895595432300164, "grad_norm": 0.010843920521438122, "learning_rate": 0.0003375928647896863, "loss": 0.0262, "num_input_tokens_seen": 170761024, "step": 79050 }, { "epoch": 12.896411092985318, "grad_norm": 0.00172845006454736, "learning_rate": 0.000337525546137904, "loss": 0.0081, "num_input_tokens_seen": 170771872, "step": 79055 }, { "epoch": 12.897226753670473, "grad_norm": 0.40799251198768616, "learning_rate": 0.0003374582307788725, "loss": 0.1212, "num_input_tokens_seen": 170783264, "step": 79060 }, { "epoch": 12.898042414355627, "grad_norm": 0.005817765835672617, "learning_rate": 0.0003373909187139562, "loss": 0.0031, "num_input_tokens_seen": 170794752, "step": 79065 }, { "epoch": 12.898858075040783, "grad_norm": 0.0027624014765024185, "learning_rate": 0.0003373236099445191, "loss": 0.0182, "num_input_tokens_seen": 170805696, "step": 79070 }, { "epoch": 12.899673735725939, "grad_norm": 0.010239914059638977, "learning_rate": 0.00033725630447192556, "loss": 0.0041, "num_input_tokens_seen": 170816480, "step": 79075 }, { "epoch": 12.900489396411093, "grad_norm": 0.36265629529953003, "learning_rate": 0.0003371890022975394, "loss": 0.1057, "num_input_tokens_seen": 170826528, "step": 79080 }, { "epoch": 12.901305057096248, "grad_norm": 0.01303062029182911, "learning_rate": 0.0003371217034227247, "loss": 0.0304, "num_input_tokens_seen": 170838048, "step": 79085 }, { "epoch": 12.902120717781402, "grad_norm": 0.053241170942783356, "learning_rate": 0.0003370544078488453, "loss": 0.0172, "num_input_tokens_seen": 170849472, "step": 79090 }, { "epoch": 12.902936378466558, "grad_norm": 0.010403000749647617, "learning_rate": 0.000336987115577265, "loss": 0.0309, "num_input_tokens_seen": 170861472, "step": 79095 }, { "epoch": 12.903752039151712, "grad_norm": 0.0021436321549117565, "learning_rate": 0.0003369198266093475, "loss": 0.0087, "num_input_tokens_seen": 170873632, "step": 79100 }, { "epoch": 12.904567699836868, "grad_norm": 0.007782533764839172, "learning_rate": 0.00033685254094645685, "loss": 0.1225, "num_input_tokens_seen": 170883840, "step": 79105 }, { "epoch": 12.905383360522023, "grad_norm": 0.0011054445058107376, "learning_rate": 0.0003367852585899562, "loss": 0.0048, "num_input_tokens_seen": 170892480, "step": 79110 }, { "epoch": 12.906199021207177, "grad_norm": 0.041748058050870895, "learning_rate": 0.00033671797954120953, "loss": 0.0063, "num_input_tokens_seen": 170903040, "step": 79115 }, { "epoch": 12.907014681892333, "grad_norm": 0.7993329763412476, "learning_rate": 0.0003366507038015799, "loss": 0.0212, "num_input_tokens_seen": 170913440, "step": 79120 }, { "epoch": 12.907830342577487, "grad_norm": 0.07743581384420395, "learning_rate": 0.0003365834313724312, "loss": 0.022, "num_input_tokens_seen": 170923712, "step": 79125 }, { "epoch": 12.908646003262643, "grad_norm": 0.0008200727752409875, "learning_rate": 0.00033651616225512636, "loss": 0.0165, "num_input_tokens_seen": 170934720, "step": 79130 }, { "epoch": 12.909461663947798, "grad_norm": 0.010752071626484394, "learning_rate": 0.0003364488964510292, "loss": 0.0223, "num_input_tokens_seen": 170945568, "step": 79135 }, { "epoch": 12.910277324632952, "grad_norm": 0.5752093195915222, "learning_rate": 0.00033638163396150234, "loss": 0.1113, "num_input_tokens_seen": 170955616, "step": 79140 }, { "epoch": 12.911092985318108, "grad_norm": 0.0010481280041858554, "learning_rate": 0.0003363143747879094, "loss": 0.1485, "num_input_tokens_seen": 170967200, "step": 79145 }, { "epoch": 12.911908646003262, "grad_norm": 1.013627529144287, "learning_rate": 0.00033624711893161317, "loss": 0.1638, "num_input_tokens_seen": 170977952, "step": 79150 }, { "epoch": 12.912724306688418, "grad_norm": 0.012935176491737366, "learning_rate": 0.000336179866393977, "loss": 0.0035, "num_input_tokens_seen": 170989856, "step": 79155 }, { "epoch": 12.913539967373573, "grad_norm": 0.008194522932171822, "learning_rate": 0.0003361126171763634, "loss": 0.0104, "num_input_tokens_seen": 171000160, "step": 79160 }, { "epoch": 12.914355628058727, "grad_norm": 0.0054146721959114075, "learning_rate": 0.0003360453712801358, "loss": 0.0031, "num_input_tokens_seen": 171011488, "step": 79165 }, { "epoch": 12.915171288743883, "grad_norm": 0.0015244726091623306, "learning_rate": 0.00033597812870665657, "loss": 0.0111, "num_input_tokens_seen": 171022688, "step": 79170 }, { "epoch": 12.915986949429037, "grad_norm": 0.032646216452121735, "learning_rate": 0.00033591088945728856, "loss": 0.0043, "num_input_tokens_seen": 171033728, "step": 79175 }, { "epoch": 12.916802610114193, "grad_norm": 0.003069676924496889, "learning_rate": 0.0003358436535333947, "loss": 0.004, "num_input_tokens_seen": 171045152, "step": 79180 }, { "epoch": 12.917618270799348, "grad_norm": 0.011279561556875706, "learning_rate": 0.0003357764209363373, "loss": 0.0035, "num_input_tokens_seen": 171055872, "step": 79185 }, { "epoch": 12.918433931484502, "grad_norm": 0.008914372883737087, "learning_rate": 0.00033570919166747926, "loss": 0.0038, "num_input_tokens_seen": 171066624, "step": 79190 }, { "epoch": 12.919249592169658, "grad_norm": 0.17829741537570953, "learning_rate": 0.0003356419657281827, "loss": 0.0136, "num_input_tokens_seen": 171077632, "step": 79195 }, { "epoch": 12.920065252854812, "grad_norm": 0.02999373897910118, "learning_rate": 0.0003355747431198104, "loss": 0.0856, "num_input_tokens_seen": 171088640, "step": 79200 }, { "epoch": 12.920880913539968, "grad_norm": 0.6304906010627747, "learning_rate": 0.0003355075238437243, "loss": 0.0576, "num_input_tokens_seen": 171099200, "step": 79205 }, { "epoch": 12.921696574225122, "grad_norm": 0.7816330790519714, "learning_rate": 0.0003354403079012871, "loss": 0.098, "num_input_tokens_seen": 171109728, "step": 79210 }, { "epoch": 12.922512234910277, "grad_norm": 0.4523005187511444, "learning_rate": 0.0003353730952938606, "loss": 0.0258, "num_input_tokens_seen": 171120320, "step": 79215 }, { "epoch": 12.923327895595433, "grad_norm": 0.0062120272777974606, "learning_rate": 0.0003353058860228073, "loss": 0.0028, "num_input_tokens_seen": 171131328, "step": 79220 }, { "epoch": 12.924143556280587, "grad_norm": 0.23003165423870087, "learning_rate": 0.0003352386800894891, "loss": 0.0433, "num_input_tokens_seen": 171141984, "step": 79225 }, { "epoch": 12.924959216965743, "grad_norm": 0.036548204720020294, "learning_rate": 0.0003351714774952681, "loss": 0.0078, "num_input_tokens_seen": 171153152, "step": 79230 }, { "epoch": 12.925774877650896, "grad_norm": 0.1406838595867157, "learning_rate": 0.00033510427824150625, "loss": 0.0133, "num_input_tokens_seen": 171164288, "step": 79235 }, { "epoch": 12.926590538336052, "grad_norm": 0.013899928890168667, "learning_rate": 0.0003350370823295653, "loss": 0.0089, "num_input_tokens_seen": 171174944, "step": 79240 }, { "epoch": 12.927406199021208, "grad_norm": 0.011088810861110687, "learning_rate": 0.0003349698897608071, "loss": 0.0179, "num_input_tokens_seen": 171184864, "step": 79245 }, { "epoch": 12.928221859706362, "grad_norm": 0.010716955177485943, "learning_rate": 0.00033490270053659367, "loss": 0.0034, "num_input_tokens_seen": 171195840, "step": 79250 }, { "epoch": 12.929037520391518, "grad_norm": 0.133016899228096, "learning_rate": 0.0003348355146582862, "loss": 0.0078, "num_input_tokens_seen": 171207104, "step": 79255 }, { "epoch": 12.929853181076671, "grad_norm": 0.013618758879601955, "learning_rate": 0.00033476833212724676, "loss": 0.0029, "num_input_tokens_seen": 171217792, "step": 79260 }, { "epoch": 12.930668841761827, "grad_norm": 0.005291712004691362, "learning_rate": 0.0003347011529448365, "loss": 0.0031, "num_input_tokens_seen": 171229408, "step": 79265 }, { "epoch": 12.931484502446983, "grad_norm": 0.0027662403881549835, "learning_rate": 0.00033463397711241727, "loss": 0.0076, "num_input_tokens_seen": 171238784, "step": 79270 }, { "epoch": 12.932300163132137, "grad_norm": 0.004883588757365942, "learning_rate": 0.00033456680463135006, "loss": 0.0758, "num_input_tokens_seen": 171249344, "step": 79275 }, { "epoch": 12.933115823817293, "grad_norm": 0.0021018683910369873, "learning_rate": 0.00033449963550299646, "loss": 0.0072, "num_input_tokens_seen": 171259392, "step": 79280 }, { "epoch": 12.933931484502446, "grad_norm": 0.12410213053226471, "learning_rate": 0.00033443246972871785, "loss": 0.0069, "num_input_tokens_seen": 171269888, "step": 79285 }, { "epoch": 12.934747145187602, "grad_norm": 0.04729204624891281, "learning_rate": 0.000334365307309875, "loss": 0.0045, "num_input_tokens_seen": 171281696, "step": 79290 }, { "epoch": 12.935562805872756, "grad_norm": 0.13005883991718292, "learning_rate": 0.00033429814824782967, "loss": 0.0113, "num_input_tokens_seen": 171291232, "step": 79295 }, { "epoch": 12.936378466557912, "grad_norm": 0.0018234510207548738, "learning_rate": 0.0003342309925439423, "loss": 0.1334, "num_input_tokens_seen": 171300864, "step": 79300 }, { "epoch": 12.937194127243067, "grad_norm": 0.007303939666599035, "learning_rate": 0.0003341638401995744, "loss": 0.0206, "num_input_tokens_seen": 171311264, "step": 79305 }, { "epoch": 12.938009787928221, "grad_norm": 0.23786698281764984, "learning_rate": 0.0003340966912160864, "loss": 0.02, "num_input_tokens_seen": 171322080, "step": 79310 }, { "epoch": 12.938825448613377, "grad_norm": 0.07854799926280975, "learning_rate": 0.00033402954559483966, "loss": 0.0721, "num_input_tokens_seen": 171333760, "step": 79315 }, { "epoch": 12.939641109298531, "grad_norm": 0.0014028697041794658, "learning_rate": 0.0003339624033371945, "loss": 0.1161, "num_input_tokens_seen": 171345088, "step": 79320 }, { "epoch": 12.940456769983687, "grad_norm": 0.40298980474472046, "learning_rate": 0.00033389526444451215, "loss": 0.0158, "num_input_tokens_seen": 171355360, "step": 79325 }, { "epoch": 12.941272430668842, "grad_norm": 0.00499499449506402, "learning_rate": 0.00033382812891815267, "loss": 0.0167, "num_input_tokens_seen": 171365856, "step": 79330 }, { "epoch": 12.942088091353996, "grad_norm": 0.0031798086129128933, "learning_rate": 0.00033376099675947726, "loss": 0.0016, "num_input_tokens_seen": 171375904, "step": 79335 }, { "epoch": 12.942903752039152, "grad_norm": 0.2039460837841034, "learning_rate": 0.0003336938679698459, "loss": 0.0109, "num_input_tokens_seen": 171386144, "step": 79340 }, { "epoch": 12.943719412724306, "grad_norm": 0.6421544551849365, "learning_rate": 0.0003336267425506194, "loss": 0.0613, "num_input_tokens_seen": 171397248, "step": 79345 }, { "epoch": 12.944535073409462, "grad_norm": 0.017462583258748055, "learning_rate": 0.0003335596205031579, "loss": 0.018, "num_input_tokens_seen": 171407680, "step": 79350 }, { "epoch": 12.945350734094617, "grad_norm": 0.1923362910747528, "learning_rate": 0.00033349250182882205, "loss": 0.0152, "num_input_tokens_seen": 171418368, "step": 79355 }, { "epoch": 12.946166394779771, "grad_norm": 0.8219704627990723, "learning_rate": 0.0003334253865289717, "loss": 0.0385, "num_input_tokens_seen": 171428352, "step": 79360 }, { "epoch": 12.946982055464927, "grad_norm": 0.002316342433914542, "learning_rate": 0.00033335827460496725, "loss": 0.0296, "num_input_tokens_seen": 171439424, "step": 79365 }, { "epoch": 12.947797716150081, "grad_norm": 0.00106413708999753, "learning_rate": 0.0003332911660581688, "loss": 0.0838, "num_input_tokens_seen": 171449632, "step": 79370 }, { "epoch": 12.948613376835237, "grad_norm": 0.5249331593513489, "learning_rate": 0.0003332240608899363, "loss": 0.0539, "num_input_tokens_seen": 171458592, "step": 79375 }, { "epoch": 12.949429037520392, "grad_norm": 0.003496425226330757, "learning_rate": 0.0003331569591016298, "loss": 0.0227, "num_input_tokens_seen": 171469120, "step": 79380 }, { "epoch": 12.950244698205546, "grad_norm": 0.11744219064712524, "learning_rate": 0.0003330898606946091, "loss": 0.0088, "num_input_tokens_seen": 171480032, "step": 79385 }, { "epoch": 12.951060358890702, "grad_norm": 0.02061287686228752, "learning_rate": 0.0003330227656702342, "loss": 0.0075, "num_input_tokens_seen": 171491552, "step": 79390 }, { "epoch": 12.951876019575856, "grad_norm": 0.001456076861359179, "learning_rate": 0.00033295567402986476, "loss": 0.0062, "num_input_tokens_seen": 171502976, "step": 79395 }, { "epoch": 12.952691680261012, "grad_norm": 0.8078159689903259, "learning_rate": 0.0003328885857748605, "loss": 0.1798, "num_input_tokens_seen": 171513152, "step": 79400 }, { "epoch": 12.953507340946166, "grad_norm": 0.008021415211260319, "learning_rate": 0.00033282150090658115, "loss": 0.1147, "num_input_tokens_seen": 171524416, "step": 79405 }, { "epoch": 12.954323001631321, "grad_norm": 0.007471402175724506, "learning_rate": 0.0003327544194263861, "loss": 0.0029, "num_input_tokens_seen": 171535328, "step": 79410 }, { "epoch": 12.955138662316477, "grad_norm": 0.017298957332968712, "learning_rate": 0.0003326873413356347, "loss": 0.005, "num_input_tokens_seen": 171547008, "step": 79415 }, { "epoch": 12.955954323001631, "grad_norm": 0.8735957145690918, "learning_rate": 0.0003326202666356869, "loss": 0.044, "num_input_tokens_seen": 171559104, "step": 79420 }, { "epoch": 12.956769983686787, "grad_norm": 0.4984961748123169, "learning_rate": 0.0003325531953279015, "loss": 0.0768, "num_input_tokens_seen": 171569248, "step": 79425 }, { "epoch": 12.95758564437194, "grad_norm": 0.00716983899474144, "learning_rate": 0.0003324861274136382, "loss": 0.0034, "num_input_tokens_seen": 171579776, "step": 79430 }, { "epoch": 12.958401305057096, "grad_norm": 0.5368052124977112, "learning_rate": 0.0003324190628942558, "loss": 0.0639, "num_input_tokens_seen": 171590432, "step": 79435 }, { "epoch": 12.959216965742252, "grad_norm": 0.01023983582854271, "learning_rate": 0.000332352001771114, "loss": 0.0163, "num_input_tokens_seen": 171601760, "step": 79440 }, { "epoch": 12.960032626427406, "grad_norm": 0.03688769415020943, "learning_rate": 0.0003322849440455713, "loss": 0.0031, "num_input_tokens_seen": 171613376, "step": 79445 }, { "epoch": 12.960848287112562, "grad_norm": 0.009764597751200199, "learning_rate": 0.0003322178897189871, "loss": 0.0192, "num_input_tokens_seen": 171625056, "step": 79450 }, { "epoch": 12.961663947797716, "grad_norm": 0.006400657817721367, "learning_rate": 0.00033215083879272015, "loss": 0.0113, "num_input_tokens_seen": 171635712, "step": 79455 }, { "epoch": 12.962479608482871, "grad_norm": 0.0073861065320670605, "learning_rate": 0.00033208379126812947, "loss": 0.0061, "num_input_tokens_seen": 171646432, "step": 79460 }, { "epoch": 12.963295269168025, "grad_norm": 0.0030887688044458628, "learning_rate": 0.0003320167471465736, "loss": 0.0068, "num_input_tokens_seen": 171657472, "step": 79465 }, { "epoch": 12.964110929853181, "grad_norm": 0.35004812479019165, "learning_rate": 0.0003319497064294117, "loss": 0.0852, "num_input_tokens_seen": 171668864, "step": 79470 }, { "epoch": 12.964926590538337, "grad_norm": 0.012314059771597385, "learning_rate": 0.0003318826691180019, "loss": 0.0392, "num_input_tokens_seen": 171680832, "step": 79475 }, { "epoch": 12.96574225122349, "grad_norm": 0.004713066387921572, "learning_rate": 0.00033181563521370337, "loss": 0.0612, "num_input_tokens_seen": 171692640, "step": 79480 }, { "epoch": 12.966557911908646, "grad_norm": 0.01903359591960907, "learning_rate": 0.0003317486047178742, "loss": 0.0807, "num_input_tokens_seen": 171702752, "step": 79485 }, { "epoch": 12.9673735725938, "grad_norm": 0.026393504813313484, "learning_rate": 0.00033168157763187285, "loss": 0.0161, "num_input_tokens_seen": 171713120, "step": 79490 }, { "epoch": 12.968189233278956, "grad_norm": 0.0038926454726606607, "learning_rate": 0.0003316145539570581, "loss": 0.0089, "num_input_tokens_seen": 171725216, "step": 79495 }, { "epoch": 12.969004893964112, "grad_norm": 0.0027679800987243652, "learning_rate": 0.00033154753369478787, "loss": 0.0049, "num_input_tokens_seen": 171735072, "step": 79500 }, { "epoch": 12.969820554649266, "grad_norm": 0.003413753118366003, "learning_rate": 0.00033148051684642074, "loss": 0.0104, "num_input_tokens_seen": 171745536, "step": 79505 }, { "epoch": 12.970636215334421, "grad_norm": 0.0008030119352042675, "learning_rate": 0.00033141350341331447, "loss": 0.0053, "num_input_tokens_seen": 171756768, "step": 79510 }, { "epoch": 12.971451876019575, "grad_norm": 0.09928394109010696, "learning_rate": 0.00033134649339682773, "loss": 0.0125, "num_input_tokens_seen": 171767520, "step": 79515 }, { "epoch": 12.97226753670473, "grad_norm": 0.4272739589214325, "learning_rate": 0.000331279486798318, "loss": 0.0468, "num_input_tokens_seen": 171777920, "step": 79520 }, { "epoch": 12.973083197389887, "grad_norm": 0.25293394923210144, "learning_rate": 0.0003312124836191437, "loss": 0.0519, "num_input_tokens_seen": 171787648, "step": 79525 }, { "epoch": 12.97389885807504, "grad_norm": 0.006494682747870684, "learning_rate": 0.00033114548386066234, "loss": 0.0326, "num_input_tokens_seen": 171798176, "step": 79530 }, { "epoch": 12.974714518760196, "grad_norm": 0.0020080001559108496, "learning_rate": 0.00033107848752423203, "loss": 0.1135, "num_input_tokens_seen": 171808608, "step": 79535 }, { "epoch": 12.97553017944535, "grad_norm": 0.010064424015581608, "learning_rate": 0.0003310114946112105, "loss": 0.0463, "num_input_tokens_seen": 171820256, "step": 79540 }, { "epoch": 12.976345840130506, "grad_norm": 0.03084593638777733, "learning_rate": 0.00033094450512295535, "loss": 0.0052, "num_input_tokens_seen": 171830432, "step": 79545 }, { "epoch": 12.977161500815662, "grad_norm": 0.0262883510440588, "learning_rate": 0.00033087751906082436, "loss": 0.0138, "num_input_tokens_seen": 171841216, "step": 79550 }, { "epoch": 12.977977161500815, "grad_norm": 0.005067694932222366, "learning_rate": 0.000330810536426175, "loss": 0.0054, "num_input_tokens_seen": 171851616, "step": 79555 }, { "epoch": 12.978792822185971, "grad_norm": 0.039184898138046265, "learning_rate": 0.0003307435572203645, "loss": 0.0037, "num_input_tokens_seen": 171862912, "step": 79560 }, { "epoch": 12.979608482871125, "grad_norm": 0.008203844539821148, "learning_rate": 0.00033067658144475087, "loss": 0.0184, "num_input_tokens_seen": 171873504, "step": 79565 }, { "epoch": 12.98042414355628, "grad_norm": 0.008222805336117744, "learning_rate": 0.0003306096091006909, "loss": 0.013, "num_input_tokens_seen": 171883040, "step": 79570 }, { "epoch": 12.981239804241435, "grad_norm": 0.009524693712592125, "learning_rate": 0.0003305426401895423, "loss": 0.0042, "num_input_tokens_seen": 171893984, "step": 79575 }, { "epoch": 12.98205546492659, "grad_norm": 0.17951497435569763, "learning_rate": 0.0003304756747126618, "loss": 0.0818, "num_input_tokens_seen": 171905024, "step": 79580 }, { "epoch": 12.982871125611746, "grad_norm": 0.08185935020446777, "learning_rate": 0.00033040871267140705, "loss": 0.0199, "num_input_tokens_seen": 171915776, "step": 79585 }, { "epoch": 12.9836867862969, "grad_norm": 0.01454191654920578, "learning_rate": 0.00033034175406713464, "loss": 0.0299, "num_input_tokens_seen": 171926496, "step": 79590 }, { "epoch": 12.984502446982056, "grad_norm": 0.001568776206113398, "learning_rate": 0.0003302747989012019, "loss": 0.0074, "num_input_tokens_seen": 171936896, "step": 79595 }, { "epoch": 12.98531810766721, "grad_norm": 0.1458302140235901, "learning_rate": 0.00033020784717496576, "loss": 0.0087, "num_input_tokens_seen": 171947744, "step": 79600 }, { "epoch": 12.986133768352365, "grad_norm": 0.0026035963091999292, "learning_rate": 0.0003301408988897829, "loss": 0.0048, "num_input_tokens_seen": 171957216, "step": 79605 }, { "epoch": 12.986949429037521, "grad_norm": 0.1463485211133957, "learning_rate": 0.00033007395404701035, "loss": 0.0555, "num_input_tokens_seen": 171968640, "step": 79610 }, { "epoch": 12.987765089722675, "grad_norm": 0.0034880635794252157, "learning_rate": 0.0003300070126480045, "loss": 0.0025, "num_input_tokens_seen": 171979904, "step": 79615 }, { "epoch": 12.98858075040783, "grad_norm": 0.0010988858994096518, "learning_rate": 0.00032994007469412234, "loss": 0.0043, "num_input_tokens_seen": 171990944, "step": 79620 }, { "epoch": 12.989396411092985, "grad_norm": 0.012164515443146229, "learning_rate": 0.0003298731401867202, "loss": 0.1356, "num_input_tokens_seen": 172002432, "step": 79625 }, { "epoch": 12.99021207177814, "grad_norm": 0.05736779049038887, "learning_rate": 0.0003298062091271548, "loss": 0.01, "num_input_tokens_seen": 172013504, "step": 79630 }, { "epoch": 12.991027732463294, "grad_norm": 0.018071817234158516, "learning_rate": 0.00032973928151678233, "loss": 0.0131, "num_input_tokens_seen": 172025376, "step": 79635 }, { "epoch": 12.99184339314845, "grad_norm": 0.0019351065857335925, "learning_rate": 0.00032967235735695955, "loss": 0.0056, "num_input_tokens_seen": 172036320, "step": 79640 }, { "epoch": 12.992659053833606, "grad_norm": 0.005769283045083284, "learning_rate": 0.00032960543664904224, "loss": 0.0284, "num_input_tokens_seen": 172047584, "step": 79645 }, { "epoch": 12.99347471451876, "grad_norm": 0.0038038466591387987, "learning_rate": 0.0003295385193943872, "loss": 0.0029, "num_input_tokens_seen": 172059072, "step": 79650 }, { "epoch": 12.994290375203915, "grad_norm": 0.004908800590783358, "learning_rate": 0.00032947160559435, "loss": 0.0057, "num_input_tokens_seen": 172069920, "step": 79655 }, { "epoch": 12.99510603588907, "grad_norm": 0.0024959484580904245, "learning_rate": 0.00032940469525028735, "loss": 0.0516, "num_input_tokens_seen": 172079840, "step": 79660 }, { "epoch": 12.995921696574225, "grad_norm": 0.07145286351442337, "learning_rate": 0.0003293377883635547, "loss": 0.0155, "num_input_tokens_seen": 172092320, "step": 79665 }, { "epoch": 12.99673735725938, "grad_norm": 1.0221548080444336, "learning_rate": 0.0003292708849355085, "loss": 0.0672, "num_input_tokens_seen": 172102336, "step": 79670 }, { "epoch": 12.997553017944535, "grad_norm": 0.007001963909715414, "learning_rate": 0.0003292039849675042, "loss": 0.0359, "num_input_tokens_seen": 172113120, "step": 79675 }, { "epoch": 12.99836867862969, "grad_norm": 0.009530258364975452, "learning_rate": 0.0003291370884608979, "loss": 0.0039, "num_input_tokens_seen": 172123136, "step": 79680 }, { "epoch": 12.999184339314844, "grad_norm": 0.004639983177185059, "learning_rate": 0.00032907019541704533, "loss": 0.0163, "num_input_tokens_seen": 172133984, "step": 79685 }, { "epoch": 13.0, "grad_norm": 0.06852234899997711, "learning_rate": 0.00032900330583730196, "loss": 0.0084, "num_input_tokens_seen": 172144032, "step": 79690 }, { "epoch": 13.0, "eval_loss": 0.19773797690868378, "eval_runtime": 104.1935, "eval_samples_per_second": 26.153, "eval_steps_per_second": 6.546, "num_input_tokens_seen": 172144032, "step": 79690 }, { "epoch": 13.000815660685156, "grad_norm": 0.009670889936387539, "learning_rate": 0.0003289364197230236, "loss": 0.0033, "num_input_tokens_seen": 172156000, "step": 79695 }, { "epoch": 13.00163132137031, "grad_norm": 0.004153914283961058, "learning_rate": 0.0003288695370755657, "loss": 0.0047, "num_input_tokens_seen": 172165952, "step": 79700 }, { "epoch": 13.002446982055465, "grad_norm": 0.016005946323275566, "learning_rate": 0.0003288026578962836, "loss": 0.0043, "num_input_tokens_seen": 172177600, "step": 79705 }, { "epoch": 13.00326264274062, "grad_norm": 0.009066256694495678, "learning_rate": 0.0003287357821865329, "loss": 0.0522, "num_input_tokens_seen": 172187872, "step": 79710 }, { "epoch": 13.004078303425775, "grad_norm": 0.011497425846755505, "learning_rate": 0.0003286689099476689, "loss": 0.1064, "num_input_tokens_seen": 172199136, "step": 79715 }, { "epoch": 13.00489396411093, "grad_norm": 0.02010742947459221, "learning_rate": 0.00032860204118104674, "loss": 0.0035, "num_input_tokens_seen": 172210720, "step": 79720 }, { "epoch": 13.005709624796085, "grad_norm": 0.004298577085137367, "learning_rate": 0.00032853517588802173, "loss": 0.004, "num_input_tokens_seen": 172221984, "step": 79725 }, { "epoch": 13.00652528548124, "grad_norm": 0.00979915913194418, "learning_rate": 0.0003284683140699487, "loss": 0.0051, "num_input_tokens_seen": 172231520, "step": 79730 }, { "epoch": 13.007340946166394, "grad_norm": 0.009911553002893925, "learning_rate": 0.00032840145572818314, "loss": 0.0046, "num_input_tokens_seen": 172242560, "step": 79735 }, { "epoch": 13.00815660685155, "grad_norm": 0.00278778956271708, "learning_rate": 0.0003283346008640795, "loss": 0.0044, "num_input_tokens_seen": 172252864, "step": 79740 }, { "epoch": 13.008972267536704, "grad_norm": 0.00484444759786129, "learning_rate": 0.0003282677494789933, "loss": 0.0034, "num_input_tokens_seen": 172262496, "step": 79745 }, { "epoch": 13.00978792822186, "grad_norm": 0.004565055947750807, "learning_rate": 0.0003282009015742787, "loss": 0.0043, "num_input_tokens_seen": 172273376, "step": 79750 }, { "epoch": 13.010603588907015, "grad_norm": 0.010577378794550896, "learning_rate": 0.00032813405715129097, "loss": 0.003, "num_input_tokens_seen": 172284256, "step": 79755 }, { "epoch": 13.01141924959217, "grad_norm": 0.3631739616394043, "learning_rate": 0.00032806721621138444, "loss": 0.0748, "num_input_tokens_seen": 172295232, "step": 79760 }, { "epoch": 13.012234910277325, "grad_norm": 0.038808513432741165, "learning_rate": 0.00032800037875591406, "loss": 0.0509, "num_input_tokens_seen": 172306272, "step": 79765 }, { "epoch": 13.013050570962479, "grad_norm": 0.016878092661499977, "learning_rate": 0.000327933544786234, "loss": 0.0085, "num_input_tokens_seen": 172316800, "step": 79770 }, { "epoch": 13.013866231647635, "grad_norm": 0.08253694325685501, "learning_rate": 0.00032786671430369915, "loss": 0.0082, "num_input_tokens_seen": 172328064, "step": 79775 }, { "epoch": 13.01468189233279, "grad_norm": 0.004199854098260403, "learning_rate": 0.0003277998873096635, "loss": 0.1454, "num_input_tokens_seen": 172338016, "step": 79780 }, { "epoch": 13.015497553017944, "grad_norm": 0.3190031051635742, "learning_rate": 0.00032773306380548176, "loss": 0.1243, "num_input_tokens_seen": 172349440, "step": 79785 }, { "epoch": 13.0163132137031, "grad_norm": 0.021624628454446793, "learning_rate": 0.0003276662437925079, "loss": 0.0026, "num_input_tokens_seen": 172359776, "step": 79790 }, { "epoch": 13.017128874388254, "grad_norm": 0.021654745563864708, "learning_rate": 0.0003275994272720963, "loss": 0.0032, "num_input_tokens_seen": 172370560, "step": 79795 }, { "epoch": 13.01794453507341, "grad_norm": 0.06402740627527237, "learning_rate": 0.0003275326142456009, "loss": 0.0157, "num_input_tokens_seen": 172380992, "step": 79800 }, { "epoch": 13.018760195758565, "grad_norm": 0.004721821751445532, "learning_rate": 0.00032746580471437606, "loss": 0.0225, "num_input_tokens_seen": 172392928, "step": 79805 }, { "epoch": 13.01957585644372, "grad_norm": 0.0027865879237651825, "learning_rate": 0.0003273989986797753, "loss": 0.0153, "num_input_tokens_seen": 172401888, "step": 79810 }, { "epoch": 13.020391517128875, "grad_norm": 0.007285828702151775, "learning_rate": 0.00032733219614315283, "loss": 0.0126, "num_input_tokens_seen": 172412352, "step": 79815 }, { "epoch": 13.021207177814029, "grad_norm": 0.008828174322843552, "learning_rate": 0.00032726539710586266, "loss": 0.0189, "num_input_tokens_seen": 172422720, "step": 79820 }, { "epoch": 13.022022838499185, "grad_norm": 0.004288358148187399, "learning_rate": 0.0003271986015692582, "loss": 0.0029, "num_input_tokens_seen": 172431584, "step": 79825 }, { "epoch": 13.022838499184338, "grad_norm": 0.05239563062787056, "learning_rate": 0.0003271318095346934, "loss": 0.1232, "num_input_tokens_seen": 172444032, "step": 79830 }, { "epoch": 13.023654159869494, "grad_norm": 0.0009064357727766037, "learning_rate": 0.00032706502100352165, "loss": 0.0035, "num_input_tokens_seen": 172454976, "step": 79835 }, { "epoch": 13.02446982055465, "grad_norm": 0.03271019458770752, "learning_rate": 0.00032699823597709675, "loss": 0.0663, "num_input_tokens_seen": 172465600, "step": 79840 }, { "epoch": 13.025285481239804, "grad_norm": 0.020236380398273468, "learning_rate": 0.00032693145445677194, "loss": 0.0086, "num_input_tokens_seen": 172476864, "step": 79845 }, { "epoch": 13.02610114192496, "grad_norm": 0.020625924691557884, "learning_rate": 0.00032686467644390085, "loss": 0.0027, "num_input_tokens_seen": 172487328, "step": 79850 }, { "epoch": 13.026916802610113, "grad_norm": 0.0036424091085791588, "learning_rate": 0.00032679790193983666, "loss": 0.0074, "num_input_tokens_seen": 172496320, "step": 79855 }, { "epoch": 13.02773246329527, "grad_norm": 0.025376515462994576, "learning_rate": 0.0003267311309459328, "loss": 0.0243, "num_input_tokens_seen": 172505696, "step": 79860 }, { "epoch": 13.028548123980425, "grad_norm": 0.0028710965998470783, "learning_rate": 0.00032666436346354236, "loss": 0.144, "num_input_tokens_seen": 172516896, "step": 79865 }, { "epoch": 13.029363784665579, "grad_norm": 0.005142997018992901, "learning_rate": 0.0003265975994940185, "loss": 0.0068, "num_input_tokens_seen": 172527360, "step": 79870 }, { "epoch": 13.030179445350734, "grad_norm": 0.00245002587325871, "learning_rate": 0.00032653083903871406, "loss": 0.0029, "num_input_tokens_seen": 172537728, "step": 79875 }, { "epoch": 13.030995106035888, "grad_norm": 0.00920133013278246, "learning_rate": 0.0003264640820989825, "loss": 0.003, "num_input_tokens_seen": 172549344, "step": 79880 }, { "epoch": 13.031810766721044, "grad_norm": 0.28441938757896423, "learning_rate": 0.0003263973286761762, "loss": 0.0296, "num_input_tokens_seen": 172558880, "step": 79885 }, { "epoch": 13.0326264274062, "grad_norm": 0.00399240804836154, "learning_rate": 0.0003263305787716486, "loss": 0.1267, "num_input_tokens_seen": 172570752, "step": 79890 }, { "epoch": 13.033442088091354, "grad_norm": 0.024981455877423286, "learning_rate": 0.00032626383238675184, "loss": 0.04, "num_input_tokens_seen": 172581856, "step": 79895 }, { "epoch": 13.03425774877651, "grad_norm": 0.0030545040499418974, "learning_rate": 0.0003261970895228391, "loss": 0.0176, "num_input_tokens_seen": 172592256, "step": 79900 }, { "epoch": 13.035073409461663, "grad_norm": 0.03148532286286354, "learning_rate": 0.00032613035018126267, "loss": 0.0069, "num_input_tokens_seen": 172603104, "step": 79905 }, { "epoch": 13.035889070146819, "grad_norm": 0.07724172621965408, "learning_rate": 0.0003260636143633755, "loss": 0.0066, "num_input_tokens_seen": 172613312, "step": 79910 }, { "epoch": 13.036704730831975, "grad_norm": 0.003198714228346944, "learning_rate": 0.0003259968820705296, "loss": 0.0024, "num_input_tokens_seen": 172622912, "step": 79915 }, { "epoch": 13.037520391517129, "grad_norm": 0.004381487611681223, "learning_rate": 0.0003259301533040776, "loss": 0.0075, "num_input_tokens_seen": 172634112, "step": 79920 }, { "epoch": 13.038336052202284, "grad_norm": 0.4412562847137451, "learning_rate": 0.00032586342806537207, "loss": 0.0686, "num_input_tokens_seen": 172644800, "step": 79925 }, { "epoch": 13.039151712887438, "grad_norm": 0.002459079958498478, "learning_rate": 0.0003257967063557649, "loss": 0.0045, "num_input_tokens_seen": 172655072, "step": 79930 }, { "epoch": 13.039967373572594, "grad_norm": 0.026812493801116943, "learning_rate": 0.0003257299881766087, "loss": 0.0036, "num_input_tokens_seen": 172666912, "step": 79935 }, { "epoch": 13.040783034257748, "grad_norm": 0.0030856519006192684, "learning_rate": 0.0003256632735292551, "loss": 0.0047, "num_input_tokens_seen": 172678368, "step": 79940 }, { "epoch": 13.041598694942904, "grad_norm": 0.0029760266188532114, "learning_rate": 0.00032559656241505663, "loss": 0.0033, "num_input_tokens_seen": 172688224, "step": 79945 }, { "epoch": 13.04241435562806, "grad_norm": 0.005138032604008913, "learning_rate": 0.0003255298548353649, "loss": 0.0982, "num_input_tokens_seen": 172698784, "step": 79950 }, { "epoch": 13.043230016313213, "grad_norm": 0.004156198818236589, "learning_rate": 0.0003254631507915322, "loss": 0.0042, "num_input_tokens_seen": 172708480, "step": 79955 }, { "epoch": 13.044045676998369, "grad_norm": 0.001793356379494071, "learning_rate": 0.00032539645028490993, "loss": 0.0283, "num_input_tokens_seen": 172718080, "step": 79960 }, { "epoch": 13.044861337683523, "grad_norm": 0.0031504526268690825, "learning_rate": 0.0003253297533168503, "loss": 0.0066, "num_input_tokens_seen": 172728864, "step": 79965 }, { "epoch": 13.045676998368679, "grad_norm": 0.003528774017468095, "learning_rate": 0.0003252630598887046, "loss": 0.0865, "num_input_tokens_seen": 172740320, "step": 79970 }, { "epoch": 13.046492659053834, "grad_norm": 0.16039009392261505, "learning_rate": 0.00032519637000182495, "loss": 0.0091, "num_input_tokens_seen": 172752064, "step": 79975 }, { "epoch": 13.047308319738988, "grad_norm": 0.007757482118904591, "learning_rate": 0.0003251296836575623, "loss": 0.0066, "num_input_tokens_seen": 172763072, "step": 79980 }, { "epoch": 13.048123980424144, "grad_norm": 0.013547541573643684, "learning_rate": 0.00032506300085726874, "loss": 0.0147, "num_input_tokens_seen": 172773632, "step": 79985 }, { "epoch": 13.048939641109298, "grad_norm": 0.0315958634018898, "learning_rate": 0.0003249963216022951, "loss": 0.0067, "num_input_tokens_seen": 172784128, "step": 79990 }, { "epoch": 13.049755301794454, "grad_norm": 0.01640213653445244, "learning_rate": 0.0003249296458939932, "loss": 0.0266, "num_input_tokens_seen": 172794368, "step": 79995 }, { "epoch": 13.05057096247961, "grad_norm": 0.012024256400763988, "learning_rate": 0.0003248629737337141, "loss": 0.0068, "num_input_tokens_seen": 172805600, "step": 80000 }, { "epoch": 13.051386623164763, "grad_norm": 0.004929613322019577, "learning_rate": 0.000324796305122809, "loss": 0.013, "num_input_tokens_seen": 172815296, "step": 80005 }, { "epoch": 13.052202283849919, "grad_norm": 0.028229599818587303, "learning_rate": 0.000324729640062629, "loss": 0.0078, "num_input_tokens_seen": 172825280, "step": 80010 }, { "epoch": 13.053017944535073, "grad_norm": 0.002132690977305174, "learning_rate": 0.0003246629785545252, "loss": 0.0037, "num_input_tokens_seen": 172836576, "step": 80015 }, { "epoch": 13.053833605220229, "grad_norm": 0.00830269604921341, "learning_rate": 0.0003245963205998485, "loss": 0.0079, "num_input_tokens_seen": 172847296, "step": 80020 }, { "epoch": 13.054649265905383, "grad_norm": 0.009801110252737999, "learning_rate": 0.00032452966619994997, "loss": 0.0026, "num_input_tokens_seen": 172857440, "step": 80025 }, { "epoch": 13.055464926590538, "grad_norm": 0.03717103973031044, "learning_rate": 0.00032446301535618034, "loss": 0.0036, "num_input_tokens_seen": 172869344, "step": 80030 }, { "epoch": 13.056280587275694, "grad_norm": 0.01708907075226307, "learning_rate": 0.0003243963680698904, "loss": 0.0031, "num_input_tokens_seen": 172878528, "step": 80035 }, { "epoch": 13.057096247960848, "grad_norm": 0.034726761281490326, "learning_rate": 0.0003243297243424308, "loss": 0.0395, "num_input_tokens_seen": 172888992, "step": 80040 }, { "epoch": 13.057911908646004, "grad_norm": 0.0035679759457707405, "learning_rate": 0.0003242630841751522, "loss": 0.0021, "num_input_tokens_seen": 172899776, "step": 80045 }, { "epoch": 13.058727569331158, "grad_norm": 0.044592756778001785, "learning_rate": 0.00032419644756940527, "loss": 0.0035, "num_input_tokens_seen": 172910368, "step": 80050 }, { "epoch": 13.059543230016313, "grad_norm": 0.0022370279766619205, "learning_rate": 0.0003241298145265401, "loss": 0.0049, "num_input_tokens_seen": 172921664, "step": 80055 }, { "epoch": 13.060358890701469, "grad_norm": 0.002283082576468587, "learning_rate": 0.00032406318504790753, "loss": 0.0028, "num_input_tokens_seen": 172933472, "step": 80060 }, { "epoch": 13.061174551386623, "grad_norm": 0.3229667842388153, "learning_rate": 0.0003239965591348576, "loss": 0.0932, "num_input_tokens_seen": 172943872, "step": 80065 }, { "epoch": 13.061990212071779, "grad_norm": 0.01312661450356245, "learning_rate": 0.00032392993678874085, "loss": 0.0307, "num_input_tokens_seen": 172954912, "step": 80070 }, { "epoch": 13.062805872756933, "grad_norm": 0.0027966343332082033, "learning_rate": 0.0003238633180109071, "loss": 0.0726, "num_input_tokens_seen": 172964736, "step": 80075 }, { "epoch": 13.063621533442088, "grad_norm": 0.00816719327121973, "learning_rate": 0.00032379670280270677, "loss": 0.0045, "num_input_tokens_seen": 172974848, "step": 80080 }, { "epoch": 13.064437194127244, "grad_norm": 0.0017934865318238735, "learning_rate": 0.0003237300911654897, "loss": 0.0526, "num_input_tokens_seen": 172984448, "step": 80085 }, { "epoch": 13.065252854812398, "grad_norm": 0.33631932735443115, "learning_rate": 0.0003236634831006061, "loss": 0.0124, "num_input_tokens_seen": 172995776, "step": 80090 }, { "epoch": 13.066068515497554, "grad_norm": 0.002418245654553175, "learning_rate": 0.0003235968786094055, "loss": 0.0054, "num_input_tokens_seen": 173006016, "step": 80095 }, { "epoch": 13.066884176182707, "grad_norm": 0.5294049978256226, "learning_rate": 0.0003235302776932382, "loss": 0.0995, "num_input_tokens_seen": 173016960, "step": 80100 }, { "epoch": 13.067699836867863, "grad_norm": 0.38888972997665405, "learning_rate": 0.00032346368035345344, "loss": 0.2369, "num_input_tokens_seen": 173027648, "step": 80105 }, { "epoch": 13.068515497553017, "grad_norm": 0.6325135231018066, "learning_rate": 0.0003233970865914013, "loss": 0.216, "num_input_tokens_seen": 173039424, "step": 80110 }, { "epoch": 13.069331158238173, "grad_norm": 0.0035932499449700117, "learning_rate": 0.0003233304964084311, "loss": 0.0453, "num_input_tokens_seen": 173050048, "step": 80115 }, { "epoch": 13.070146818923329, "grad_norm": 0.02140737511217594, "learning_rate": 0.0003232639098058927, "loss": 0.0095, "num_input_tokens_seen": 173059904, "step": 80120 }, { "epoch": 13.070962479608482, "grad_norm": 0.07032874971628189, "learning_rate": 0.00032319732678513514, "loss": 0.007, "num_input_tokens_seen": 173070880, "step": 80125 }, { "epoch": 13.071778140293638, "grad_norm": 0.0047665243037045, "learning_rate": 0.00032313074734750813, "loss": 0.0199, "num_input_tokens_seen": 173081280, "step": 80130 }, { "epoch": 13.072593800978792, "grad_norm": 0.0695071592926979, "learning_rate": 0.000323064171494361, "loss": 0.0165, "num_input_tokens_seen": 173092960, "step": 80135 }, { "epoch": 13.073409461663948, "grad_norm": 0.08060871809720993, "learning_rate": 0.00032299759922704277, "loss": 0.0156, "num_input_tokens_seen": 173105216, "step": 80140 }, { "epoch": 13.074225122349104, "grad_norm": 0.008816416375339031, "learning_rate": 0.0003229310305469029, "loss": 0.0043, "num_input_tokens_seen": 173116352, "step": 80145 }, { "epoch": 13.075040783034257, "grad_norm": 0.026547571644186974, "learning_rate": 0.00032286446545529016, "loss": 0.0114, "num_input_tokens_seen": 173127008, "step": 80150 }, { "epoch": 13.075856443719413, "grad_norm": 1.0129142999649048, "learning_rate": 0.0003227979039535538, "loss": 0.0597, "num_input_tokens_seen": 173137856, "step": 80155 }, { "epoch": 13.076672104404567, "grad_norm": 0.07595764100551605, "learning_rate": 0.0003227313460430427, "loss": 0.0088, "num_input_tokens_seen": 173149952, "step": 80160 }, { "epoch": 13.077487765089723, "grad_norm": 0.029978493228554726, "learning_rate": 0.0003226647917251058, "loss": 0.0054, "num_input_tokens_seen": 173160512, "step": 80165 }, { "epoch": 13.078303425774878, "grad_norm": 0.01935637556016445, "learning_rate": 0.0003225982410010918, "loss": 0.0049, "num_input_tokens_seen": 173171872, "step": 80170 }, { "epoch": 13.079119086460032, "grad_norm": 0.06953626126050949, "learning_rate": 0.00032253169387234953, "loss": 0.0098, "num_input_tokens_seen": 173183552, "step": 80175 }, { "epoch": 13.079934747145188, "grad_norm": 1.5633295774459839, "learning_rate": 0.0003224651503402276, "loss": 0.06, "num_input_tokens_seen": 173194144, "step": 80180 }, { "epoch": 13.080750407830342, "grad_norm": 0.01591937430202961, "learning_rate": 0.00032239861040607464, "loss": 0.0083, "num_input_tokens_seen": 173205024, "step": 80185 }, { "epoch": 13.081566068515498, "grad_norm": 0.00395574327558279, "learning_rate": 0.0003223320740712391, "loss": 0.0015, "num_input_tokens_seen": 173214176, "step": 80190 }, { "epoch": 13.082381729200652, "grad_norm": 0.5557219386100769, "learning_rate": 0.0003222655413370696, "loss": 0.061, "num_input_tokens_seen": 173224832, "step": 80195 }, { "epoch": 13.083197389885807, "grad_norm": 0.05771186947822571, "learning_rate": 0.00032219901220491417, "loss": 0.0263, "num_input_tokens_seen": 173234528, "step": 80200 }, { "epoch": 13.084013050570963, "grad_norm": 0.005979506764560938, "learning_rate": 0.0003221324866761215, "loss": 0.0042, "num_input_tokens_seen": 173245248, "step": 80205 }, { "epoch": 13.084828711256117, "grad_norm": 0.029441453516483307, "learning_rate": 0.0003220659647520395, "loss": 0.0248, "num_input_tokens_seen": 173255776, "step": 80210 }, { "epoch": 13.085644371941273, "grad_norm": 0.7436314225196838, "learning_rate": 0.00032199944643401655, "loss": 0.0732, "num_input_tokens_seen": 173266272, "step": 80215 }, { "epoch": 13.086460032626427, "grad_norm": 0.006093572359532118, "learning_rate": 0.00032193293172340056, "loss": 0.0016, "num_input_tokens_seen": 173276064, "step": 80220 }, { "epoch": 13.087275693311582, "grad_norm": 0.07572884112596512, "learning_rate": 0.0003218664206215397, "loss": 0.0072, "num_input_tokens_seen": 173286880, "step": 80225 }, { "epoch": 13.088091353996738, "grad_norm": 0.0063800751231610775, "learning_rate": 0.00032179991312978164, "loss": 0.0129, "num_input_tokens_seen": 173297728, "step": 80230 }, { "epoch": 13.088907014681892, "grad_norm": 0.014107972383499146, "learning_rate": 0.00032173340924947436, "loss": 0.0048, "num_input_tokens_seen": 173308512, "step": 80235 }, { "epoch": 13.089722675367048, "grad_norm": 0.03823497146368027, "learning_rate": 0.00032166690898196594, "loss": 0.0065, "num_input_tokens_seen": 173320128, "step": 80240 }, { "epoch": 13.090538336052202, "grad_norm": 0.021396825090050697, "learning_rate": 0.0003216004123286036, "loss": 0.0048, "num_input_tokens_seen": 173331168, "step": 80245 }, { "epoch": 13.091353996737357, "grad_norm": 0.02987091988325119, "learning_rate": 0.0003215339192907355, "loss": 0.0152, "num_input_tokens_seen": 173341536, "step": 80250 }, { "epoch": 13.092169657422513, "grad_norm": 0.3899349570274353, "learning_rate": 0.00032146742986970865, "loss": 0.0587, "num_input_tokens_seen": 173352000, "step": 80255 }, { "epoch": 13.092985318107667, "grad_norm": 0.06607574969530106, "learning_rate": 0.000321400944066871, "loss": 0.1516, "num_input_tokens_seen": 173362368, "step": 80260 }, { "epoch": 13.093800978792823, "grad_norm": 0.021596498787403107, "learning_rate": 0.00032133446188356964, "loss": 0.0084, "num_input_tokens_seen": 173374016, "step": 80265 }, { "epoch": 13.094616639477977, "grad_norm": 0.0013468860415741801, "learning_rate": 0.00032126798332115223, "loss": 0.0017, "num_input_tokens_seen": 173384160, "step": 80270 }, { "epoch": 13.095432300163132, "grad_norm": 0.09484121203422546, "learning_rate": 0.00032120150838096576, "loss": 0.0071, "num_input_tokens_seen": 173395296, "step": 80275 }, { "epoch": 13.096247960848286, "grad_norm": 0.0008138703415170312, "learning_rate": 0.00032113503706435767, "loss": 0.0039, "num_input_tokens_seen": 173406304, "step": 80280 }, { "epoch": 13.097063621533442, "grad_norm": 0.11847636848688126, "learning_rate": 0.00032106856937267475, "loss": 0.0186, "num_input_tokens_seen": 173416160, "step": 80285 }, { "epoch": 13.097879282218598, "grad_norm": 0.029739893972873688, "learning_rate": 0.00032100210530726446, "loss": 0.1203, "num_input_tokens_seen": 173427328, "step": 80290 }, { "epoch": 13.098694942903752, "grad_norm": 0.0190031286329031, "learning_rate": 0.00032093564486947347, "loss": 0.0093, "num_input_tokens_seen": 173438784, "step": 80295 }, { "epoch": 13.099510603588907, "grad_norm": 0.012608149088919163, "learning_rate": 0.0003208691880606488, "loss": 0.0026, "num_input_tokens_seen": 173450464, "step": 80300 }, { "epoch": 13.100326264274061, "grad_norm": 0.5554617643356323, "learning_rate": 0.0003208027348821373, "loss": 0.0652, "num_input_tokens_seen": 173461664, "step": 80305 }, { "epoch": 13.101141924959217, "grad_norm": 0.026764320209622383, "learning_rate": 0.00032073628533528574, "loss": 0.0069, "num_input_tokens_seen": 173472704, "step": 80310 }, { "epoch": 13.101957585644373, "grad_norm": 0.014278876595199108, "learning_rate": 0.0003206698394214407, "loss": 0.0062, "num_input_tokens_seen": 173483616, "step": 80315 }, { "epoch": 13.102773246329527, "grad_norm": 0.009230894036591053, "learning_rate": 0.00032060339714194897, "loss": 0.0322, "num_input_tokens_seen": 173495104, "step": 80320 }, { "epoch": 13.103588907014682, "grad_norm": 0.010614125989377499, "learning_rate": 0.0003205369584981568, "loss": 0.0987, "num_input_tokens_seen": 173505376, "step": 80325 }, { "epoch": 13.104404567699836, "grad_norm": 0.008995750918984413, "learning_rate": 0.000320470523491411, "loss": 0.027, "num_input_tokens_seen": 173517024, "step": 80330 }, { "epoch": 13.105220228384992, "grad_norm": 0.38214293122291565, "learning_rate": 0.00032040409212305765, "loss": 0.0829, "num_input_tokens_seen": 173528256, "step": 80335 }, { "epoch": 13.106035889070148, "grad_norm": 0.00634557381272316, "learning_rate": 0.0003203376643944433, "loss": 0.0079, "num_input_tokens_seen": 173538240, "step": 80340 }, { "epoch": 13.106851549755302, "grad_norm": 0.009608942084014416, "learning_rate": 0.0003202712403069141, "loss": 0.0064, "num_input_tokens_seen": 173548160, "step": 80345 }, { "epoch": 13.107667210440457, "grad_norm": 0.010670358315110207, "learning_rate": 0.00032020481986181606, "loss": 0.0033, "num_input_tokens_seen": 173559776, "step": 80350 }, { "epoch": 13.108482871125611, "grad_norm": 0.019212661311030388, "learning_rate": 0.0003201384030604957, "loss": 0.0661, "num_input_tokens_seen": 173570368, "step": 80355 }, { "epoch": 13.109298531810767, "grad_norm": 0.3519008159637451, "learning_rate": 0.0003200719899042985, "loss": 0.0225, "num_input_tokens_seen": 173581792, "step": 80360 }, { "epoch": 13.11011419249592, "grad_norm": 0.09790132939815521, "learning_rate": 0.00032000558039457094, "loss": 0.011, "num_input_tokens_seen": 173592928, "step": 80365 }, { "epoch": 13.110929853181077, "grad_norm": 0.008577114902436733, "learning_rate": 0.0003199391745326585, "loss": 0.0028, "num_input_tokens_seen": 173604032, "step": 80370 }, { "epoch": 13.111745513866232, "grad_norm": 0.006519824266433716, "learning_rate": 0.0003198727723199072, "loss": 0.0074, "num_input_tokens_seen": 173614848, "step": 80375 }, { "epoch": 13.112561174551386, "grad_norm": 0.001890502288006246, "learning_rate": 0.0003198063737576625, "loss": 0.0039, "num_input_tokens_seen": 173625472, "step": 80380 }, { "epoch": 13.113376835236542, "grad_norm": 0.2293672114610672, "learning_rate": 0.0003197399788472705, "loss": 0.0216, "num_input_tokens_seen": 173637376, "step": 80385 }, { "epoch": 13.114192495921696, "grad_norm": 0.005673011764883995, "learning_rate": 0.0003196735875900762, "loss": 0.0033, "num_input_tokens_seen": 173648800, "step": 80390 }, { "epoch": 13.115008156606851, "grad_norm": 0.004373501054942608, "learning_rate": 0.00031960719998742567, "loss": 0.0113, "num_input_tokens_seen": 173659200, "step": 80395 }, { "epoch": 13.115823817292007, "grad_norm": 0.024961533024907112, "learning_rate": 0.0003195408160406638, "loss": 0.0129, "num_input_tokens_seen": 173671168, "step": 80400 }, { "epoch": 13.116639477977161, "grad_norm": 0.0037182255182415247, "learning_rate": 0.00031947443575113655, "loss": 0.011, "num_input_tokens_seen": 173681280, "step": 80405 }, { "epoch": 13.117455138662317, "grad_norm": 0.023325273767113686, "learning_rate": 0.00031940805912018854, "loss": 0.1272, "num_input_tokens_seen": 173693024, "step": 80410 }, { "epoch": 13.11827079934747, "grad_norm": 0.005407822318375111, "learning_rate": 0.0003193416861491656, "loss": 0.0109, "num_input_tokens_seen": 173702944, "step": 80415 }, { "epoch": 13.119086460032626, "grad_norm": 0.013056616298854351, "learning_rate": 0.00031927531683941234, "loss": 0.0054, "num_input_tokens_seen": 173714592, "step": 80420 }, { "epoch": 13.119902120717782, "grad_norm": 0.016717007383704185, "learning_rate": 0.0003192089511922742, "loss": 0.0647, "num_input_tokens_seen": 173726592, "step": 80425 }, { "epoch": 13.120717781402936, "grad_norm": 0.2105436474084854, "learning_rate": 0.0003191425892090959, "loss": 0.0068, "num_input_tokens_seen": 173736896, "step": 80430 }, { "epoch": 13.121533442088092, "grad_norm": 0.03821544349193573, "learning_rate": 0.0003190762308912226, "loss": 0.0942, "num_input_tokens_seen": 173748352, "step": 80435 }, { "epoch": 13.122349102773246, "grad_norm": 0.019991006702184677, "learning_rate": 0.0003190098762399989, "loss": 0.0113, "num_input_tokens_seen": 173759616, "step": 80440 }, { "epoch": 13.123164763458401, "grad_norm": 0.007260370999574661, "learning_rate": 0.0003189435252567697, "loss": 0.0209, "num_input_tokens_seen": 173769632, "step": 80445 }, { "epoch": 13.123980424143557, "grad_norm": 0.006024550646543503, "learning_rate": 0.00031887717794287963, "loss": 0.0035, "num_input_tokens_seen": 173780992, "step": 80450 }, { "epoch": 13.124796084828711, "grad_norm": 0.001860006363131106, "learning_rate": 0.0003188108342996732, "loss": 0.0171, "num_input_tokens_seen": 173792128, "step": 80455 }, { "epoch": 13.125611745513867, "grad_norm": 0.008678854443132877, "learning_rate": 0.0003187444943284953, "loss": 0.0036, "num_input_tokens_seen": 173802656, "step": 80460 }, { "epoch": 13.12642740619902, "grad_norm": 0.006701524835079908, "learning_rate": 0.00031867815803068996, "loss": 0.0072, "num_input_tokens_seen": 173813792, "step": 80465 }, { "epoch": 13.127243066884176, "grad_norm": 0.0022508781403303146, "learning_rate": 0.0003186118254076018, "loss": 0.0035, "num_input_tokens_seen": 173824800, "step": 80470 }, { "epoch": 13.12805872756933, "grad_norm": 0.02744651958346367, "learning_rate": 0.00031854549646057517, "loss": 0.0129, "num_input_tokens_seen": 173835168, "step": 80475 }, { "epoch": 13.128874388254486, "grad_norm": 0.544084370136261, "learning_rate": 0.00031847917119095425, "loss": 0.1083, "num_input_tokens_seen": 173847168, "step": 80480 }, { "epoch": 13.129690048939642, "grad_norm": 0.05963525548577309, "learning_rate": 0.0003184128496000832, "loss": 0.0823, "num_input_tokens_seen": 173857120, "step": 80485 }, { "epoch": 13.130505709624796, "grad_norm": 0.005323616787791252, "learning_rate": 0.00031834653168930614, "loss": 0.0125, "num_input_tokens_seen": 173868064, "step": 80490 }, { "epoch": 13.131321370309951, "grad_norm": 0.11876530200242996, "learning_rate": 0.0003182802174599669, "loss": 0.0094, "num_input_tokens_seen": 173878560, "step": 80495 }, { "epoch": 13.132137030995105, "grad_norm": 0.011408278718590736, "learning_rate": 0.00031821390691340985, "loss": 0.0021, "num_input_tokens_seen": 173890176, "step": 80500 }, { "epoch": 13.132952691680261, "grad_norm": 0.007186530157923698, "learning_rate": 0.0003181476000509783, "loss": 0.0035, "num_input_tokens_seen": 173900928, "step": 80505 }, { "epoch": 13.133768352365417, "grad_norm": 0.06655506044626236, "learning_rate": 0.00031808129687401664, "loss": 0.0123, "num_input_tokens_seen": 173912128, "step": 80510 }, { "epoch": 13.13458401305057, "grad_norm": 0.0020698008593171835, "learning_rate": 0.00031801499738386797, "loss": 0.0049, "num_input_tokens_seen": 173923200, "step": 80515 }, { "epoch": 13.135399673735726, "grad_norm": 0.00747555959969759, "learning_rate": 0.0003179487015818765, "loss": 0.0039, "num_input_tokens_seen": 173933120, "step": 80520 }, { "epoch": 13.13621533442088, "grad_norm": 0.12066885083913803, "learning_rate": 0.00031788240946938534, "loss": 0.0349, "num_input_tokens_seen": 173944160, "step": 80525 }, { "epoch": 13.137030995106036, "grad_norm": 0.047779396176338196, "learning_rate": 0.00031781612104773836, "loss": 0.0247, "num_input_tokens_seen": 173955040, "step": 80530 }, { "epoch": 13.137846655791192, "grad_norm": 0.021179448813199997, "learning_rate": 0.00031774983631827866, "loss": 0.004, "num_input_tokens_seen": 173966272, "step": 80535 }, { "epoch": 13.138662316476346, "grad_norm": 0.002796266693621874, "learning_rate": 0.00031768355528234986, "loss": 0.0051, "num_input_tokens_seen": 173976352, "step": 80540 }, { "epoch": 13.139477977161501, "grad_norm": 0.0028553269803524017, "learning_rate": 0.0003176172779412949, "loss": 0.0065, "num_input_tokens_seen": 173987008, "step": 80545 }, { "epoch": 13.140293637846655, "grad_norm": 0.07305045425891876, "learning_rate": 0.00031755100429645746, "loss": 0.0135, "num_input_tokens_seen": 173997856, "step": 80550 }, { "epoch": 13.141109298531811, "grad_norm": 0.15813440084457397, "learning_rate": 0.00031748473434918014, "loss": 0.0102, "num_input_tokens_seen": 174009120, "step": 80555 }, { "epoch": 13.141924959216965, "grad_norm": 0.4499052166938782, "learning_rate": 0.0003174184681008061, "loss": 0.0158, "num_input_tokens_seen": 174019776, "step": 80560 }, { "epoch": 13.14274061990212, "grad_norm": 0.006134378258138895, "learning_rate": 0.00031735220555267874, "loss": 0.0038, "num_input_tokens_seen": 174030816, "step": 80565 }, { "epoch": 13.143556280587276, "grad_norm": 0.013704081997275352, "learning_rate": 0.0003172859467061404, "loss": 0.0618, "num_input_tokens_seen": 174041472, "step": 80570 }, { "epoch": 13.14437194127243, "grad_norm": 0.0008135527605190873, "learning_rate": 0.0003172196915625344, "loss": 0.002, "num_input_tokens_seen": 174054368, "step": 80575 }, { "epoch": 13.145187601957586, "grad_norm": 0.024581970646977425, "learning_rate": 0.0003171534401232029, "loss": 0.0936, "num_input_tokens_seen": 174065504, "step": 80580 }, { "epoch": 13.14600326264274, "grad_norm": 0.022131305187940598, "learning_rate": 0.0003170871923894892, "loss": 0.0075, "num_input_tokens_seen": 174075744, "step": 80585 }, { "epoch": 13.146818923327896, "grad_norm": 0.028195692226290703, "learning_rate": 0.0003170209483627353, "loss": 0.0041, "num_input_tokens_seen": 174085920, "step": 80590 }, { "epoch": 13.147634584013051, "grad_norm": 0.08796575665473938, "learning_rate": 0.00031695470804428427, "loss": 0.0071, "num_input_tokens_seen": 174095968, "step": 80595 }, { "epoch": 13.148450244698205, "grad_norm": 0.005800274666398764, "learning_rate": 0.0003168884714354781, "loss": 0.0551, "num_input_tokens_seen": 174106496, "step": 80600 }, { "epoch": 13.149265905383361, "grad_norm": 0.6200029850006104, "learning_rate": 0.0003168222385376596, "loss": 0.0266, "num_input_tokens_seen": 174116256, "step": 80605 }, { "epoch": 13.150081566068515, "grad_norm": 0.02263057976961136, "learning_rate": 0.0003167560093521705, "loss": 0.1138, "num_input_tokens_seen": 174127648, "step": 80610 }, { "epoch": 13.15089722675367, "grad_norm": 0.002096653450280428, "learning_rate": 0.00031668978388035347, "loss": 0.0062, "num_input_tokens_seen": 174137856, "step": 80615 }, { "epoch": 13.151712887438826, "grad_norm": 0.00246584415435791, "learning_rate": 0.0003166235621235505, "loss": 0.0066, "num_input_tokens_seen": 174148160, "step": 80620 }, { "epoch": 13.15252854812398, "grad_norm": 0.15061096847057343, "learning_rate": 0.00031655734408310367, "loss": 0.026, "num_input_tokens_seen": 174158144, "step": 80625 }, { "epoch": 13.153344208809136, "grad_norm": 0.03953423351049423, "learning_rate": 0.000316491129760355, "loss": 0.0116, "num_input_tokens_seen": 174168096, "step": 80630 }, { "epoch": 13.15415986949429, "grad_norm": 0.002793251071125269, "learning_rate": 0.0003164249191566464, "loss": 0.0034, "num_input_tokens_seen": 174178752, "step": 80635 }, { "epoch": 13.154975530179446, "grad_norm": 0.00592702254652977, "learning_rate": 0.00031635871227331957, "loss": 0.0725, "num_input_tokens_seen": 174190720, "step": 80640 }, { "epoch": 13.1557911908646, "grad_norm": 0.04241395369172096, "learning_rate": 0.00031629250911171657, "loss": 0.004, "num_input_tokens_seen": 174202080, "step": 80645 }, { "epoch": 13.156606851549755, "grad_norm": 0.014272456057369709, "learning_rate": 0.0003162263096731788, "loss": 0.0305, "num_input_tokens_seen": 174213728, "step": 80650 }, { "epoch": 13.15742251223491, "grad_norm": 0.013360538519918919, "learning_rate": 0.0003161601139590482, "loss": 0.0037, "num_input_tokens_seen": 174224320, "step": 80655 }, { "epoch": 13.158238172920065, "grad_norm": 0.4321325123310089, "learning_rate": 0.0003160939219706658, "loss": 0.1981, "num_input_tokens_seen": 174235808, "step": 80660 }, { "epoch": 13.15905383360522, "grad_norm": 0.004693038295954466, "learning_rate": 0.00031602773370937345, "loss": 0.002, "num_input_tokens_seen": 174246848, "step": 80665 }, { "epoch": 13.159869494290374, "grad_norm": 0.005606858525425196, "learning_rate": 0.00031596154917651266, "loss": 0.0071, "num_input_tokens_seen": 174258720, "step": 80670 }, { "epoch": 13.16068515497553, "grad_norm": 0.04412202537059784, "learning_rate": 0.0003158953683734244, "loss": 0.0239, "num_input_tokens_seen": 174268736, "step": 80675 }, { "epoch": 13.161500815660686, "grad_norm": 0.04025116190314293, "learning_rate": 0.00031582919130145016, "loss": 0.0068, "num_input_tokens_seen": 174279136, "step": 80680 }, { "epoch": 13.16231647634584, "grad_norm": 0.01368759199976921, "learning_rate": 0.0003157630179619308, "loss": 0.0037, "num_input_tokens_seen": 174291168, "step": 80685 }, { "epoch": 13.163132137030995, "grad_norm": 0.03054218553006649, "learning_rate": 0.00031569684835620784, "loss": 0.0106, "num_input_tokens_seen": 174301440, "step": 80690 }, { "epoch": 13.16394779771615, "grad_norm": 0.04629860818386078, "learning_rate": 0.00031563068248562185, "loss": 0.011, "num_input_tokens_seen": 174312640, "step": 80695 }, { "epoch": 13.164763458401305, "grad_norm": 0.019359026104211807, "learning_rate": 0.00031556452035151416, "loss": 0.0132, "num_input_tokens_seen": 174322880, "step": 80700 }, { "epoch": 13.16557911908646, "grad_norm": 0.029904767870903015, "learning_rate": 0.00031549836195522517, "loss": 0.0043, "num_input_tokens_seen": 174333600, "step": 80705 }, { "epoch": 13.166394779771615, "grad_norm": 0.0014057289808988571, "learning_rate": 0.00031543220729809626, "loss": 0.0035, "num_input_tokens_seen": 174344256, "step": 80710 }, { "epoch": 13.16721044045677, "grad_norm": 0.002946326043456793, "learning_rate": 0.00031536605638146756, "loss": 0.0092, "num_input_tokens_seen": 174353888, "step": 80715 }, { "epoch": 13.168026101141924, "grad_norm": 0.0019200635142624378, "learning_rate": 0.0003152999092066801, "loss": 0.0028, "num_input_tokens_seen": 174365312, "step": 80720 }, { "epoch": 13.16884176182708, "grad_norm": 0.003745671361684799, "learning_rate": 0.0003152337657750741, "loss": 0.0026, "num_input_tokens_seen": 174375168, "step": 80725 }, { "epoch": 13.169657422512234, "grad_norm": 0.0020364460069686174, "learning_rate": 0.00031516762608799047, "loss": 0.0073, "num_input_tokens_seen": 174387264, "step": 80730 }, { "epoch": 13.17047308319739, "grad_norm": 0.052907831966876984, "learning_rate": 0.0003151014901467691, "loss": 0.0101, "num_input_tokens_seen": 174397152, "step": 80735 }, { "epoch": 13.171288743882545, "grad_norm": 0.017299756407737732, "learning_rate": 0.00031503535795275096, "loss": 0.0206, "num_input_tokens_seen": 174407136, "step": 80740 }, { "epoch": 13.1721044045677, "grad_norm": 0.058468643575906754, "learning_rate": 0.00031496922950727556, "loss": 0.0256, "num_input_tokens_seen": 174416896, "step": 80745 }, { "epoch": 13.172920065252855, "grad_norm": 0.016621742397546768, "learning_rate": 0.00031490310481168375, "loss": 0.0082, "num_input_tokens_seen": 174426976, "step": 80750 }, { "epoch": 13.173735725938009, "grad_norm": 0.0437234528362751, "learning_rate": 0.0003148369838673151, "loss": 0.0062, "num_input_tokens_seen": 174436544, "step": 80755 }, { "epoch": 13.174551386623165, "grad_norm": 0.006686724256724119, "learning_rate": 0.00031477086667551003, "loss": 0.014, "num_input_tokens_seen": 174447040, "step": 80760 }, { "epoch": 13.17536704730832, "grad_norm": 0.007379279471933842, "learning_rate": 0.00031470475323760826, "loss": 0.0077, "num_input_tokens_seen": 174456640, "step": 80765 }, { "epoch": 13.176182707993474, "grad_norm": 0.004275395534932613, "learning_rate": 0.0003146386435549496, "loss": 0.0015, "num_input_tokens_seen": 174467328, "step": 80770 }, { "epoch": 13.17699836867863, "grad_norm": 0.014936030842363834, "learning_rate": 0.0003145725376288742, "loss": 0.0272, "num_input_tokens_seen": 174478176, "step": 80775 }, { "epoch": 13.177814029363784, "grad_norm": 0.0015776983927935362, "learning_rate": 0.00031450643546072145, "loss": 0.0035, "num_input_tokens_seen": 174490432, "step": 80780 }, { "epoch": 13.17862969004894, "grad_norm": 0.3135847747325897, "learning_rate": 0.0003144403370518311, "loss": 0.0104, "num_input_tokens_seen": 174501120, "step": 80785 }, { "epoch": 13.179445350734095, "grad_norm": 0.012622885406017303, "learning_rate": 0.00031437424240354274, "loss": 0.1161, "num_input_tokens_seen": 174511232, "step": 80790 }, { "epoch": 13.18026101141925, "grad_norm": 0.10842663049697876, "learning_rate": 0.00031430815151719583, "loss": 0.124, "num_input_tokens_seen": 174521664, "step": 80795 }, { "epoch": 13.181076672104405, "grad_norm": 0.15451642870903015, "learning_rate": 0.00031424206439412984, "loss": 0.0168, "num_input_tokens_seen": 174531424, "step": 80800 }, { "epoch": 13.181892332789559, "grad_norm": 0.04291224852204323, "learning_rate": 0.00031417598103568404, "loss": 0.025, "num_input_tokens_seen": 174542016, "step": 80805 }, { "epoch": 13.182707993474715, "grad_norm": 0.2300167977809906, "learning_rate": 0.00031410990144319756, "loss": 0.0131, "num_input_tokens_seen": 174553088, "step": 80810 }, { "epoch": 13.18352365415987, "grad_norm": 0.001681014895439148, "learning_rate": 0.00031404382561801006, "loss": 0.0275, "num_input_tokens_seen": 174563840, "step": 80815 }, { "epoch": 13.184339314845024, "grad_norm": 0.007472775410860777, "learning_rate": 0.00031397775356146004, "loss": 0.0659, "num_input_tokens_seen": 174575648, "step": 80820 }, { "epoch": 13.18515497553018, "grad_norm": 0.0014233127003535628, "learning_rate": 0.000313911685274887, "loss": 0.0134, "num_input_tokens_seen": 174587392, "step": 80825 }, { "epoch": 13.185970636215334, "grad_norm": 0.003380796406418085, "learning_rate": 0.0003138456207596296, "loss": 0.0472, "num_input_tokens_seen": 174598912, "step": 80830 }, { "epoch": 13.18678629690049, "grad_norm": 0.004419084172695875, "learning_rate": 0.0003137795600170271, "loss": 0.0046, "num_input_tokens_seen": 174610112, "step": 80835 }, { "epoch": 13.187601957585644, "grad_norm": 0.0024323707912117243, "learning_rate": 0.0003137135030484177, "loss": 0.0029, "num_input_tokens_seen": 174620928, "step": 80840 }, { "epoch": 13.1884176182708, "grad_norm": 0.0012152445269748569, "learning_rate": 0.00031364744985514084, "loss": 0.0028, "num_input_tokens_seen": 174631712, "step": 80845 }, { "epoch": 13.189233278955955, "grad_norm": 0.02515444904565811, "learning_rate": 0.00031358140043853455, "loss": 0.0042, "num_input_tokens_seen": 174642176, "step": 80850 }, { "epoch": 13.190048939641109, "grad_norm": 0.0368582084774971, "learning_rate": 0.00031351535479993785, "loss": 0.0051, "num_input_tokens_seen": 174653152, "step": 80855 }, { "epoch": 13.190864600326265, "grad_norm": 0.022137103602290154, "learning_rate": 0.0003134493129406889, "loss": 0.0068, "num_input_tokens_seen": 174664160, "step": 80860 }, { "epoch": 13.191680261011419, "grad_norm": 0.003924847114831209, "learning_rate": 0.00031338327486212647, "loss": 0.0013, "num_input_tokens_seen": 174674432, "step": 80865 }, { "epoch": 13.192495921696574, "grad_norm": 0.002630516653880477, "learning_rate": 0.00031331724056558847, "loss": 0.0098, "num_input_tokens_seen": 174685088, "step": 80870 }, { "epoch": 13.19331158238173, "grad_norm": 0.011913495138287544, "learning_rate": 0.0003132512100524134, "loss": 0.0735, "num_input_tokens_seen": 174696256, "step": 80875 }, { "epoch": 13.194127243066884, "grad_norm": 0.0055811344645917416, "learning_rate": 0.00031318518332393975, "loss": 0.011, "num_input_tokens_seen": 174707328, "step": 80880 }, { "epoch": 13.19494290375204, "grad_norm": 0.003324878169223666, "learning_rate": 0.0003131191603815051, "loss": 0.0064, "num_input_tokens_seen": 174717248, "step": 80885 }, { "epoch": 13.195758564437194, "grad_norm": 0.010468652471899986, "learning_rate": 0.000313053141226448, "loss": 0.0043, "num_input_tokens_seen": 174728160, "step": 80890 }, { "epoch": 13.19657422512235, "grad_norm": 0.004916087724268436, "learning_rate": 0.0003129871258601059, "loss": 0.0087, "num_input_tokens_seen": 174738688, "step": 80895 }, { "epoch": 13.197389885807505, "grad_norm": 0.05585349351167679, "learning_rate": 0.0003129211142838171, "loss": 0.0146, "num_input_tokens_seen": 174750016, "step": 80900 }, { "epoch": 13.198205546492659, "grad_norm": 0.0034299406688660383, "learning_rate": 0.0003128551064989191, "loss": 0.0039, "num_input_tokens_seen": 174759456, "step": 80905 }, { "epoch": 13.199021207177815, "grad_norm": 0.33458200097084045, "learning_rate": 0.00031278910250674994, "loss": 0.0168, "num_input_tokens_seen": 174769440, "step": 80910 }, { "epoch": 13.199836867862969, "grad_norm": 0.009169838391244411, "learning_rate": 0.00031272310230864695, "loss": 0.0061, "num_input_tokens_seen": 174779488, "step": 80915 }, { "epoch": 13.200652528548124, "grad_norm": 0.01070548314601183, "learning_rate": 0.0003126571059059481, "loss": 0.0055, "num_input_tokens_seen": 174790048, "step": 80920 }, { "epoch": 13.201468189233278, "grad_norm": 0.013349352404475212, "learning_rate": 0.00031259111329999035, "loss": 0.003, "num_input_tokens_seen": 174801056, "step": 80925 }, { "epoch": 13.202283849918434, "grad_norm": 0.0030847955495119095, "learning_rate": 0.00031252512449211163, "loss": 0.0017, "num_input_tokens_seen": 174811872, "step": 80930 }, { "epoch": 13.20309951060359, "grad_norm": 0.2001817226409912, "learning_rate": 0.0003124591394836491, "loss": 0.0055, "num_input_tokens_seen": 174822464, "step": 80935 }, { "epoch": 13.203915171288743, "grad_norm": 0.009598773904144764, "learning_rate": 0.00031239315827593994, "loss": 0.0042, "num_input_tokens_seen": 174833984, "step": 80940 }, { "epoch": 13.2047308319739, "grad_norm": 0.0008926258306019008, "learning_rate": 0.0003123271808703215, "loss": 0.008, "num_input_tokens_seen": 174844608, "step": 80945 }, { "epoch": 13.205546492659053, "grad_norm": 0.00110113644041121, "learning_rate": 0.0003122612072681308, "loss": 0.0029, "num_input_tokens_seen": 174856000, "step": 80950 }, { "epoch": 13.206362153344209, "grad_norm": 0.044156067073345184, "learning_rate": 0.00031219523747070475, "loss": 0.0154, "num_input_tokens_seen": 174867200, "step": 80955 }, { "epoch": 13.207177814029365, "grad_norm": 0.24586060643196106, "learning_rate": 0.00031212927147938066, "loss": 0.0109, "num_input_tokens_seen": 174877312, "step": 80960 }, { "epoch": 13.207993474714518, "grad_norm": 0.008321182802319527, "learning_rate": 0.0003120633092954951, "loss": 0.002, "num_input_tokens_seen": 174888960, "step": 80965 }, { "epoch": 13.208809135399674, "grad_norm": 0.0031696436926722527, "learning_rate": 0.0003119973509203851, "loss": 0.0027, "num_input_tokens_seen": 174900448, "step": 80970 }, { "epoch": 13.209624796084828, "grad_norm": 0.0024241674691438675, "learning_rate": 0.00031193139635538714, "loss": 0.0031, "num_input_tokens_seen": 174910208, "step": 80975 }, { "epoch": 13.210440456769984, "grad_norm": 0.0023144527804106474, "learning_rate": 0.00031186544560183796, "loss": 0.0045, "num_input_tokens_seen": 174921664, "step": 80980 }, { "epoch": 13.21125611745514, "grad_norm": 0.0020555031951516867, "learning_rate": 0.00031179949866107443, "loss": 0.0658, "num_input_tokens_seen": 174932416, "step": 80985 }, { "epoch": 13.212071778140293, "grad_norm": 0.005608719773590565, "learning_rate": 0.0003117335555344326, "loss": 0.0849, "num_input_tokens_seen": 174942048, "step": 80990 }, { "epoch": 13.21288743882545, "grad_norm": 0.07525213807821274, "learning_rate": 0.00031166761622324936, "loss": 0.0628, "num_input_tokens_seen": 174952960, "step": 80995 }, { "epoch": 13.213703099510603, "grad_norm": 0.005778506398200989, "learning_rate": 0.00031160168072886054, "loss": 0.0014, "num_input_tokens_seen": 174963392, "step": 81000 }, { "epoch": 13.214518760195759, "grad_norm": 0.22658313810825348, "learning_rate": 0.00031153574905260287, "loss": 0.0072, "num_input_tokens_seen": 174974624, "step": 81005 }, { "epoch": 13.215334420880913, "grad_norm": 0.001897740876302123, "learning_rate": 0.000311469821195812, "loss": 0.0024, "num_input_tokens_seen": 174985024, "step": 81010 }, { "epoch": 13.216150081566068, "grad_norm": 0.03674355894327164, "learning_rate": 0.00031140389715982476, "loss": 0.1143, "num_input_tokens_seen": 174995104, "step": 81015 }, { "epoch": 13.216965742251224, "grad_norm": 0.47961094975471497, "learning_rate": 0.00031133797694597655, "loss": 0.127, "num_input_tokens_seen": 175006464, "step": 81020 }, { "epoch": 13.217781402936378, "grad_norm": 0.012490352615714073, "learning_rate": 0.0003112720605556037, "loss": 0.0024, "num_input_tokens_seen": 175017280, "step": 81025 }, { "epoch": 13.218597063621534, "grad_norm": 0.00828443095088005, "learning_rate": 0.00031120614799004184, "loss": 0.0141, "num_input_tokens_seen": 175028800, "step": 81030 }, { "epoch": 13.219412724306688, "grad_norm": 0.0018339533125981688, "learning_rate": 0.0003111402392506271, "loss": 0.0065, "num_input_tokens_seen": 175036800, "step": 81035 }, { "epoch": 13.220228384991843, "grad_norm": 0.5048210024833679, "learning_rate": 0.0003110743343386947, "loss": 0.048, "num_input_tokens_seen": 175047840, "step": 81040 }, { "epoch": 13.221044045676999, "grad_norm": 0.007345435209572315, "learning_rate": 0.0003110084332555808, "loss": 0.0202, "num_input_tokens_seen": 175059552, "step": 81045 }, { "epoch": 13.221859706362153, "grad_norm": 0.4212218225002289, "learning_rate": 0.00031094253600262063, "loss": 0.1887, "num_input_tokens_seen": 175070400, "step": 81050 }, { "epoch": 13.222675367047309, "grad_norm": 0.0020032948814332485, "learning_rate": 0.00031087664258115, "loss": 0.0059, "num_input_tokens_seen": 175080992, "step": 81055 }, { "epoch": 13.223491027732463, "grad_norm": 0.017578184604644775, "learning_rate": 0.0003108107529925038, "loss": 0.1716, "num_input_tokens_seen": 175090976, "step": 81060 }, { "epoch": 13.224306688417618, "grad_norm": 0.005086452234536409, "learning_rate": 0.0003107448672380181, "loss": 0.0103, "num_input_tokens_seen": 175101408, "step": 81065 }, { "epoch": 13.225122349102774, "grad_norm": 0.003535451367497444, "learning_rate": 0.0003106789853190274, "loss": 0.007, "num_input_tokens_seen": 175110912, "step": 81070 }, { "epoch": 13.225938009787928, "grad_norm": 0.08481805771589279, "learning_rate": 0.0003106131072368674, "loss": 0.0081, "num_input_tokens_seen": 175122112, "step": 81075 }, { "epoch": 13.226753670473084, "grad_norm": 0.03594028204679489, "learning_rate": 0.00031054723299287303, "loss": 0.0036, "num_input_tokens_seen": 175131808, "step": 81080 }, { "epoch": 13.227569331158238, "grad_norm": 0.0058160750195384026, "learning_rate": 0.00031048136258837923, "loss": 0.0075, "num_input_tokens_seen": 175142752, "step": 81085 }, { "epoch": 13.228384991843393, "grad_norm": 0.3698887228965759, "learning_rate": 0.0003104154960247211, "loss": 0.0261, "num_input_tokens_seen": 175152928, "step": 81090 }, { "epoch": 13.229200652528547, "grad_norm": 0.24206924438476562, "learning_rate": 0.0003103496333032334, "loss": 0.0204, "num_input_tokens_seen": 175164352, "step": 81095 }, { "epoch": 13.230016313213703, "grad_norm": 0.001961946953088045, "learning_rate": 0.00031028377442525104, "loss": 0.0115, "num_input_tokens_seen": 175175840, "step": 81100 }, { "epoch": 13.230831973898859, "grad_norm": 0.025502916425466537, "learning_rate": 0.0003102179193921086, "loss": 0.0073, "num_input_tokens_seen": 175186848, "step": 81105 }, { "epoch": 13.231647634584013, "grad_norm": 0.05788380652666092, "learning_rate": 0.00031015206820514087, "loss": 0.0067, "num_input_tokens_seen": 175197888, "step": 81110 }, { "epoch": 13.232463295269168, "grad_norm": 0.010292734019458294, "learning_rate": 0.0003100862208656823, "loss": 0.0454, "num_input_tokens_seen": 175208000, "step": 81115 }, { "epoch": 13.233278955954322, "grad_norm": 0.006660535931587219, "learning_rate": 0.0003100203773750674, "loss": 0.0299, "num_input_tokens_seen": 175219008, "step": 81120 }, { "epoch": 13.234094616639478, "grad_norm": 0.003208763664588332, "learning_rate": 0.00030995453773463035, "loss": 0.0183, "num_input_tokens_seen": 175229728, "step": 81125 }, { "epoch": 13.234910277324634, "grad_norm": 0.013174341060221195, "learning_rate": 0.00030988870194570596, "loss": 0.0169, "num_input_tokens_seen": 175241536, "step": 81130 }, { "epoch": 13.235725938009788, "grad_norm": 0.00958284828811884, "learning_rate": 0.00030982287000962805, "loss": 0.0043, "num_input_tokens_seen": 175252992, "step": 81135 }, { "epoch": 13.236541598694943, "grad_norm": 0.0013074814341962337, "learning_rate": 0.000309757041927731, "loss": 0.0028, "num_input_tokens_seen": 175263424, "step": 81140 }, { "epoch": 13.237357259380097, "grad_norm": 0.02158118039369583, "learning_rate": 0.00030969121770134877, "loss": 0.01, "num_input_tokens_seen": 175274304, "step": 81145 }, { "epoch": 13.238172920065253, "grad_norm": 0.011400923132896423, "learning_rate": 0.0003096253973318156, "loss": 0.0184, "num_input_tokens_seen": 175284864, "step": 81150 }, { "epoch": 13.238988580750409, "grad_norm": 0.00538475438952446, "learning_rate": 0.000309559580820465, "loss": 0.0028, "num_input_tokens_seen": 175295488, "step": 81155 }, { "epoch": 13.239804241435563, "grad_norm": 0.013732331804931164, "learning_rate": 0.0003094937681686314, "loss": 0.0118, "num_input_tokens_seen": 175305216, "step": 81160 }, { "epoch": 13.240619902120718, "grad_norm": 0.05636392906308174, "learning_rate": 0.00030942795937764794, "loss": 0.0198, "num_input_tokens_seen": 175316832, "step": 81165 }, { "epoch": 13.241435562805872, "grad_norm": 0.0052260602824389935, "learning_rate": 0.00030936215444884893, "loss": 0.0071, "num_input_tokens_seen": 175328480, "step": 81170 }, { "epoch": 13.242251223491028, "grad_norm": 0.005099338013678789, "learning_rate": 0.00030929635338356745, "loss": 0.0098, "num_input_tokens_seen": 175340736, "step": 81175 }, { "epoch": 13.243066884176184, "grad_norm": 0.000985561404377222, "learning_rate": 0.0003092305561831375, "loss": 0.0034, "num_input_tokens_seen": 175351072, "step": 81180 }, { "epoch": 13.243882544861338, "grad_norm": 0.0050900704227387905, "learning_rate": 0.0003091647628488922, "loss": 0.002, "num_input_tokens_seen": 175361984, "step": 81185 }, { "epoch": 13.244698205546493, "grad_norm": 0.06734821945428848, "learning_rate": 0.0003090989733821652, "loss": 0.0092, "num_input_tokens_seen": 175373120, "step": 81190 }, { "epoch": 13.245513866231647, "grad_norm": 0.002881730208173394, "learning_rate": 0.0003090331877842895, "loss": 0.0019, "num_input_tokens_seen": 175385248, "step": 81195 }, { "epoch": 13.246329526916803, "grad_norm": 0.005004175938665867, "learning_rate": 0.00030896740605659845, "loss": 0.0079, "num_input_tokens_seen": 175395776, "step": 81200 }, { "epoch": 13.247145187601957, "grad_norm": 0.000822130125015974, "learning_rate": 0.00030890162820042553, "loss": 0.0016, "num_input_tokens_seen": 175405920, "step": 81205 }, { "epoch": 13.247960848287113, "grad_norm": 0.33728986978530884, "learning_rate": 0.00030883585421710334, "loss": 0.0201, "num_input_tokens_seen": 175415936, "step": 81210 }, { "epoch": 13.248776508972268, "grad_norm": 0.0008757903706282377, "learning_rate": 0.00030877008410796526, "loss": 0.0006, "num_input_tokens_seen": 175425856, "step": 81215 }, { "epoch": 13.249592169657422, "grad_norm": 0.00804536696523428, "learning_rate": 0.00030870431787434385, "loss": 0.0031, "num_input_tokens_seen": 175436800, "step": 81220 }, { "epoch": 13.250407830342578, "grad_norm": 0.3633720278739929, "learning_rate": 0.00030863855551757223, "loss": 0.1165, "num_input_tokens_seen": 175447072, "step": 81225 }, { "epoch": 13.251223491027732, "grad_norm": 0.15613357722759247, "learning_rate": 0.0003085727970389829, "loss": 0.0103, "num_input_tokens_seen": 175457472, "step": 81230 }, { "epoch": 13.252039151712887, "grad_norm": 0.0012791818007826805, "learning_rate": 0.0003085070424399089, "loss": 0.0071, "num_input_tokens_seen": 175468672, "step": 81235 }, { "epoch": 13.252854812398043, "grad_norm": 0.02506769821047783, "learning_rate": 0.00030844129172168236, "loss": 0.0057, "num_input_tokens_seen": 175479008, "step": 81240 }, { "epoch": 13.253670473083197, "grad_norm": 0.002241175388917327, "learning_rate": 0.0003083755448856361, "loss": 0.0022, "num_input_tokens_seen": 175490144, "step": 81245 }, { "epoch": 13.254486133768353, "grad_norm": 0.006746441125869751, "learning_rate": 0.00030830980193310265, "loss": 0.0983, "num_input_tokens_seen": 175501376, "step": 81250 }, { "epoch": 13.255301794453507, "grad_norm": 0.0012192511931061745, "learning_rate": 0.00030824406286541415, "loss": 0.152, "num_input_tokens_seen": 175511808, "step": 81255 }, { "epoch": 13.256117455138662, "grad_norm": 0.016503628343343735, "learning_rate": 0.00030817832768390306, "loss": 0.0109, "num_input_tokens_seen": 175523584, "step": 81260 }, { "epoch": 13.256933115823816, "grad_norm": 0.009415880776941776, "learning_rate": 0.0003081125963899014, "loss": 0.0033, "num_input_tokens_seen": 175533856, "step": 81265 }, { "epoch": 13.257748776508972, "grad_norm": 0.039036672562360764, "learning_rate": 0.0003080468689847414, "loss": 0.0214, "num_input_tokens_seen": 175543840, "step": 81270 }, { "epoch": 13.258564437194128, "grad_norm": 0.006212171167135239, "learning_rate": 0.00030798114546975525, "loss": 0.045, "num_input_tokens_seen": 175555968, "step": 81275 }, { "epoch": 13.259380097879282, "grad_norm": 0.06656394898891449, "learning_rate": 0.00030791542584627455, "loss": 0.0046, "num_input_tokens_seen": 175567424, "step": 81280 }, { "epoch": 13.260195758564437, "grad_norm": 0.0009761211695149541, "learning_rate": 0.0003078497101156317, "loss": 0.0053, "num_input_tokens_seen": 175578016, "step": 81285 }, { "epoch": 13.261011419249591, "grad_norm": 0.000771388178691268, "learning_rate": 0.00030778399827915796, "loss": 0.003, "num_input_tokens_seen": 175588576, "step": 81290 }, { "epoch": 13.261827079934747, "grad_norm": 0.000772759725805372, "learning_rate": 0.0003077182903381856, "loss": 0.0016, "num_input_tokens_seen": 175598848, "step": 81295 }, { "epoch": 13.262642740619903, "grad_norm": 0.01377933844923973, "learning_rate": 0.0003076525862940458, "loss": 0.1336, "num_input_tokens_seen": 175609504, "step": 81300 }, { "epoch": 13.263458401305057, "grad_norm": 0.6922663450241089, "learning_rate": 0.00030758688614807033, "loss": 0.0214, "num_input_tokens_seen": 175621184, "step": 81305 }, { "epoch": 13.264274061990212, "grad_norm": 0.0026870991569012403, "learning_rate": 0.0003075211899015909, "loss": 0.0131, "num_input_tokens_seen": 175630816, "step": 81310 }, { "epoch": 13.265089722675366, "grad_norm": 0.002322110114619136, "learning_rate": 0.0003074554975559386, "loss": 0.005, "num_input_tokens_seen": 175640064, "step": 81315 }, { "epoch": 13.265905383360522, "grad_norm": 0.009497624821960926, "learning_rate": 0.000307389809112445, "loss": 0.002, "num_input_tokens_seen": 175650720, "step": 81320 }, { "epoch": 13.266721044045678, "grad_norm": 0.015067550353705883, "learning_rate": 0.0003073241245724411, "loss": 0.1275, "num_input_tokens_seen": 175661920, "step": 81325 }, { "epoch": 13.267536704730832, "grad_norm": 0.006270645186305046, "learning_rate": 0.00030725844393725846, "loss": 0.0038, "num_input_tokens_seen": 175672224, "step": 81330 }, { "epoch": 13.268352365415987, "grad_norm": 0.016796309500932693, "learning_rate": 0.00030719276720822774, "loss": 0.0499, "num_input_tokens_seen": 175681792, "step": 81335 }, { "epoch": 13.269168026101141, "grad_norm": 1.0089938640594482, "learning_rate": 0.0003071270943866804, "loss": 0.0447, "num_input_tokens_seen": 175692736, "step": 81340 }, { "epoch": 13.269983686786297, "grad_norm": 0.17437076568603516, "learning_rate": 0.000307061425473947, "loss": 0.0069, "num_input_tokens_seen": 175705088, "step": 81345 }, { "epoch": 13.270799347471453, "grad_norm": 0.36235466599464417, "learning_rate": 0.00030699576047135875, "loss": 0.0071, "num_input_tokens_seen": 175715392, "step": 81350 }, { "epoch": 13.271615008156607, "grad_norm": 0.0024113464169204235, "learning_rate": 0.0003069300993802461, "loss": 0.002, "num_input_tokens_seen": 175726464, "step": 81355 }, { "epoch": 13.272430668841762, "grad_norm": 0.020136456936597824, "learning_rate": 0.00030686444220194, "loss": 0.0067, "num_input_tokens_seen": 175737120, "step": 81360 }, { "epoch": 13.273246329526916, "grad_norm": 0.3934916853904724, "learning_rate": 0.00030679878893777085, "loss": 0.1054, "num_input_tokens_seen": 175747328, "step": 81365 }, { "epoch": 13.274061990212072, "grad_norm": 0.0014037713408470154, "learning_rate": 0.0003067331395890696, "loss": 0.0101, "num_input_tokens_seen": 175758784, "step": 81370 }, { "epoch": 13.274877650897226, "grad_norm": 0.001718403771519661, "learning_rate": 0.0003066674941571661, "loss": 0.0027, "num_input_tokens_seen": 175770656, "step": 81375 }, { "epoch": 13.275693311582382, "grad_norm": 0.0006151496199890971, "learning_rate": 0.0003066018526433914, "loss": 0.0019, "num_input_tokens_seen": 175780992, "step": 81380 }, { "epoch": 13.276508972267537, "grad_norm": 0.0005740922060795128, "learning_rate": 0.00030653621504907533, "loss": 0.0177, "num_input_tokens_seen": 175791680, "step": 81385 }, { "epoch": 13.277324632952691, "grad_norm": 0.04168350249528885, "learning_rate": 0.0003064705813755483, "loss": 0.0109, "num_input_tokens_seen": 175801664, "step": 81390 }, { "epoch": 13.278140293637847, "grad_norm": 0.028292395174503326, "learning_rate": 0.0003064049516241405, "loss": 0.0043, "num_input_tokens_seen": 175811264, "step": 81395 }, { "epoch": 13.278955954323001, "grad_norm": 0.012449781410396099, "learning_rate": 0.00030633932579618195, "loss": 0.0051, "num_input_tokens_seen": 175823616, "step": 81400 }, { "epoch": 13.279771615008157, "grad_norm": 0.010674958117306232, "learning_rate": 0.00030627370389300256, "loss": 0.0072, "num_input_tokens_seen": 175834464, "step": 81405 }, { "epoch": 13.280587275693312, "grad_norm": 0.05215312913060188, "learning_rate": 0.0003062080859159323, "loss": 0.0028, "num_input_tokens_seen": 175845728, "step": 81410 }, { "epoch": 13.281402936378466, "grad_norm": 0.0022178348153829575, "learning_rate": 0.0003061424718663011, "loss": 0.0164, "num_input_tokens_seen": 175856576, "step": 81415 }, { "epoch": 13.282218597063622, "grad_norm": 0.02007582038640976, "learning_rate": 0.00030607686174543864, "loss": 0.0042, "num_input_tokens_seen": 175867424, "step": 81420 }, { "epoch": 13.283034257748776, "grad_norm": 0.34574416279792786, "learning_rate": 0.00030601125555467456, "loss": 0.0755, "num_input_tokens_seen": 175878496, "step": 81425 }, { "epoch": 13.283849918433932, "grad_norm": 0.003219359088689089, "learning_rate": 0.0003059456532953385, "loss": 0.0046, "num_input_tokens_seen": 175889152, "step": 81430 }, { "epoch": 13.284665579119087, "grad_norm": 0.029977506026625633, "learning_rate": 0.00030588005496876, "loss": 0.0027, "num_input_tokens_seen": 175899328, "step": 81435 }, { "epoch": 13.285481239804241, "grad_norm": 0.0027198202442377806, "learning_rate": 0.00030581446057626827, "loss": 0.0013, "num_input_tokens_seen": 175908480, "step": 81440 }, { "epoch": 13.286296900489397, "grad_norm": 0.028357025235891342, "learning_rate": 0.00030574887011919306, "loss": 0.0755, "num_input_tokens_seen": 175918752, "step": 81445 }, { "epoch": 13.28711256117455, "grad_norm": 0.0038904561661183834, "learning_rate": 0.0003056832835988632, "loss": 0.0129, "num_input_tokens_seen": 175929344, "step": 81450 }, { "epoch": 13.287928221859707, "grad_norm": 0.004254198633134365, "learning_rate": 0.00030561770101660837, "loss": 0.0765, "num_input_tokens_seen": 175937792, "step": 81455 }, { "epoch": 13.28874388254486, "grad_norm": 0.010023529641330242, "learning_rate": 0.0003055521223737572, "loss": 0.0023, "num_input_tokens_seen": 175948960, "step": 81460 }, { "epoch": 13.289559543230016, "grad_norm": 0.007340825628489256, "learning_rate": 0.0003054865476716391, "loss": 0.0018, "num_input_tokens_seen": 175960512, "step": 81465 }, { "epoch": 13.290375203915172, "grad_norm": 0.7565301656723022, "learning_rate": 0.0003054209769115827, "loss": 0.0896, "num_input_tokens_seen": 175971904, "step": 81470 }, { "epoch": 13.291190864600326, "grad_norm": 0.0019012526609003544, "learning_rate": 0.0003053554100949173, "loss": 0.0042, "num_input_tokens_seen": 175983136, "step": 81475 }, { "epoch": 13.292006525285482, "grad_norm": 0.007661410607397556, "learning_rate": 0.0003052898472229711, "loss": 0.0024, "num_input_tokens_seen": 175994272, "step": 81480 }, { "epoch": 13.292822185970635, "grad_norm": 0.20898938179016113, "learning_rate": 0.0003052242882970735, "loss": 0.0078, "num_input_tokens_seen": 176005056, "step": 81485 }, { "epoch": 13.293637846655791, "grad_norm": 0.00497079873457551, "learning_rate": 0.0003051587333185525, "loss": 0.0102, "num_input_tokens_seen": 176015104, "step": 81490 }, { "epoch": 13.294453507340947, "grad_norm": 0.011369774118065834, "learning_rate": 0.00030509318228873715, "loss": 0.0291, "num_input_tokens_seen": 176026080, "step": 81495 }, { "epoch": 13.2952691680261, "grad_norm": 0.003535378258675337, "learning_rate": 0.00030502763520895556, "loss": 0.0019, "num_input_tokens_seen": 176036096, "step": 81500 }, { "epoch": 13.296084828711257, "grad_norm": 0.030523095279932022, "learning_rate": 0.00030496209208053643, "loss": 0.004, "num_input_tokens_seen": 176048128, "step": 81505 }, { "epoch": 13.29690048939641, "grad_norm": 0.0029729788657277822, "learning_rate": 0.0003048965529048078, "loss": 0.0946, "num_input_tokens_seen": 176058848, "step": 81510 }, { "epoch": 13.297716150081566, "grad_norm": 0.004624223802238703, "learning_rate": 0.00030483101768309797, "loss": 0.0067, "num_input_tokens_seen": 176069888, "step": 81515 }, { "epoch": 13.298531810766722, "grad_norm": 0.05178157985210419, "learning_rate": 0.00030476548641673537, "loss": 0.0042, "num_input_tokens_seen": 176081216, "step": 81520 }, { "epoch": 13.299347471451876, "grad_norm": 0.06961380690336227, "learning_rate": 0.0003046999591070476, "loss": 0.0042, "num_input_tokens_seen": 176092288, "step": 81525 }, { "epoch": 13.300163132137031, "grad_norm": 0.09609808772802353, "learning_rate": 0.0003046344357553632, "loss": 0.0458, "num_input_tokens_seen": 176103456, "step": 81530 }, { "epoch": 13.300978792822185, "grad_norm": 0.6587584018707275, "learning_rate": 0.0003045689163630095, "loss": 0.1092, "num_input_tokens_seen": 176114560, "step": 81535 }, { "epoch": 13.301794453507341, "grad_norm": 0.043220143765211105, "learning_rate": 0.000304503400931315, "loss": 0.0121, "num_input_tokens_seen": 176126112, "step": 81540 }, { "epoch": 13.302610114192497, "grad_norm": 0.0008794625173322856, "learning_rate": 0.00030443788946160676, "loss": 0.0022, "num_input_tokens_seen": 176137632, "step": 81545 }, { "epoch": 13.30342577487765, "grad_norm": 0.001853870926424861, "learning_rate": 0.000304372381955213, "loss": 0.0051, "num_input_tokens_seen": 176148512, "step": 81550 }, { "epoch": 13.304241435562806, "grad_norm": 0.0031397638376802206, "learning_rate": 0.00030430687841346096, "loss": 0.0021, "num_input_tokens_seen": 176159680, "step": 81555 }, { "epoch": 13.30505709624796, "grad_norm": 0.013141549192368984, "learning_rate": 0.00030424137883767826, "loss": 0.0033, "num_input_tokens_seen": 176169568, "step": 81560 }, { "epoch": 13.305872756933116, "grad_norm": 0.0030862074345350266, "learning_rate": 0.00030417588322919243, "loss": 0.0033, "num_input_tokens_seen": 176181120, "step": 81565 }, { "epoch": 13.30668841761827, "grad_norm": 0.10375366359949112, "learning_rate": 0.00030411039158933075, "loss": 0.0037, "num_input_tokens_seen": 176191264, "step": 81570 }, { "epoch": 13.307504078303426, "grad_norm": 0.0012237022165209055, "learning_rate": 0.0003040449039194205, "loss": 0.0048, "num_input_tokens_seen": 176202464, "step": 81575 }, { "epoch": 13.308319738988581, "grad_norm": 0.008354654535651207, "learning_rate": 0.00030397942022078884, "loss": 0.0634, "num_input_tokens_seen": 176213920, "step": 81580 }, { "epoch": 13.309135399673735, "grad_norm": 0.022409193217754364, "learning_rate": 0.00030391394049476275, "loss": 0.002, "num_input_tokens_seen": 176224672, "step": 81585 }, { "epoch": 13.309951060358891, "grad_norm": 0.0038927465211600065, "learning_rate": 0.00030384846474266965, "loss": 0.0267, "num_input_tokens_seen": 176234176, "step": 81590 }, { "epoch": 13.310766721044045, "grad_norm": 0.004279454704374075, "learning_rate": 0.0003037829929658361, "loss": 0.0043, "num_input_tokens_seen": 176245792, "step": 81595 }, { "epoch": 13.3115823817292, "grad_norm": 0.006501410156488419, "learning_rate": 0.0003037175251655892, "loss": 0.0762, "num_input_tokens_seen": 176255392, "step": 81600 }, { "epoch": 13.312398042414356, "grad_norm": 0.07710537314414978, "learning_rate": 0.0003036520613432555, "loss": 0.0291, "num_input_tokens_seen": 176266368, "step": 81605 }, { "epoch": 13.31321370309951, "grad_norm": 0.01355685107409954, "learning_rate": 0.0003035866015001621, "loss": 0.1107, "num_input_tokens_seen": 176278080, "step": 81610 }, { "epoch": 13.314029363784666, "grad_norm": 0.00818372517824173, "learning_rate": 0.00030352114563763515, "loss": 0.003, "num_input_tokens_seen": 176289792, "step": 81615 }, { "epoch": 13.31484502446982, "grad_norm": 0.39141932129859924, "learning_rate": 0.00030345569375700145, "loss": 0.1082, "num_input_tokens_seen": 176299840, "step": 81620 }, { "epoch": 13.315660685154976, "grad_norm": 0.4454955458641052, "learning_rate": 0.0003033902458595877, "loss": 0.0579, "num_input_tokens_seen": 176310688, "step": 81625 }, { "epoch": 13.31647634584013, "grad_norm": 0.0065238154493272305, "learning_rate": 0.00030332480194671975, "loss": 0.0022, "num_input_tokens_seen": 176321312, "step": 81630 }, { "epoch": 13.317292006525285, "grad_norm": 0.0018881976138800383, "learning_rate": 0.0003032593620197245, "loss": 0.0037, "num_input_tokens_seen": 176333344, "step": 81635 }, { "epoch": 13.318107667210441, "grad_norm": 0.0015710759907960892, "learning_rate": 0.0003031939260799276, "loss": 0.0308, "num_input_tokens_seen": 176343552, "step": 81640 }, { "epoch": 13.318923327895595, "grad_norm": 0.8399984240531921, "learning_rate": 0.00030312849412865564, "loss": 0.0926, "num_input_tokens_seen": 176354912, "step": 81645 }, { "epoch": 13.31973898858075, "grad_norm": 0.366643488407135, "learning_rate": 0.00030306306616723424, "loss": 0.0228, "num_input_tokens_seen": 176366048, "step": 81650 }, { "epoch": 13.320554649265905, "grad_norm": 0.031812455505132675, "learning_rate": 0.00030299764219698987, "loss": 0.0104, "num_input_tokens_seen": 176377120, "step": 81655 }, { "epoch": 13.32137030995106, "grad_norm": 0.08288736641407013, "learning_rate": 0.00030293222221924805, "loss": 0.0131, "num_input_tokens_seen": 176387968, "step": 81660 }, { "epoch": 13.322185970636216, "grad_norm": 0.003492532530799508, "learning_rate": 0.0003028668062353349, "loss": 0.0057, "num_input_tokens_seen": 176397472, "step": 81665 }, { "epoch": 13.32300163132137, "grad_norm": 0.004395823460072279, "learning_rate": 0.0003028013942465758, "loss": 0.0439, "num_input_tokens_seen": 176408224, "step": 81670 }, { "epoch": 13.323817292006526, "grad_norm": 0.002135923132300377, "learning_rate": 0.00030273598625429687, "loss": 0.0013, "num_input_tokens_seen": 176419552, "step": 81675 }, { "epoch": 13.32463295269168, "grad_norm": 0.002409202978014946, "learning_rate": 0.00030267058225982315, "loss": 0.0033, "num_input_tokens_seen": 176430272, "step": 81680 }, { "epoch": 13.325448613376835, "grad_norm": 0.0041789524257183075, "learning_rate": 0.00030260518226448064, "loss": 0.0046, "num_input_tokens_seen": 176440064, "step": 81685 }, { "epoch": 13.326264274061991, "grad_norm": 0.00487999664619565, "learning_rate": 0.00030253978626959435, "loss": 0.0181, "num_input_tokens_seen": 176450400, "step": 81690 }, { "epoch": 13.327079934747145, "grad_norm": 0.00734216021373868, "learning_rate": 0.00030247439427649, "loss": 0.0113, "num_input_tokens_seen": 176459968, "step": 81695 }, { "epoch": 13.3278955954323, "grad_norm": 0.0037031807005405426, "learning_rate": 0.0003024090062864924, "loss": 0.0018, "num_input_tokens_seen": 176470080, "step": 81700 }, { "epoch": 13.328711256117455, "grad_norm": 0.023402415215969086, "learning_rate": 0.00030234362230092705, "loss": 0.0032, "num_input_tokens_seen": 176481120, "step": 81705 }, { "epoch": 13.32952691680261, "grad_norm": 0.012723736464977264, "learning_rate": 0.0003022782423211189, "loss": 0.0144, "num_input_tokens_seen": 176492864, "step": 81710 }, { "epoch": 13.330342577487766, "grad_norm": 0.01561590563505888, "learning_rate": 0.0003022128663483931, "loss": 0.009, "num_input_tokens_seen": 176503232, "step": 81715 }, { "epoch": 13.33115823817292, "grad_norm": 0.009133810177445412, "learning_rate": 0.0003021474943840743, "loss": 0.0068, "num_input_tokens_seen": 176513024, "step": 81720 }, { "epoch": 13.331973898858076, "grad_norm": 0.0011698536109179258, "learning_rate": 0.00030208212642948755, "loss": 0.0057, "num_input_tokens_seen": 176524288, "step": 81725 }, { "epoch": 13.33278955954323, "grad_norm": 0.3569418489933014, "learning_rate": 0.0003020167624859577, "loss": 0.018, "num_input_tokens_seen": 176535744, "step": 81730 }, { "epoch": 13.333605220228385, "grad_norm": 0.13893947005271912, "learning_rate": 0.00030195140255480927, "loss": 0.008, "num_input_tokens_seen": 176546848, "step": 81735 }, { "epoch": 13.33442088091354, "grad_norm": 0.004899358842521906, "learning_rate": 0.0003018860466373669, "loss": 0.0664, "num_input_tokens_seen": 176558336, "step": 81740 }, { "epoch": 13.335236541598695, "grad_norm": 0.006934888660907745, "learning_rate": 0.0003018206947349551, "loss": 0.0036, "num_input_tokens_seen": 176570304, "step": 81745 }, { "epoch": 13.33605220228385, "grad_norm": 0.6603171825408936, "learning_rate": 0.00030175534684889836, "loss": 0.0089, "num_input_tokens_seen": 176580224, "step": 81750 }, { "epoch": 13.336867862969005, "grad_norm": 0.07441363483667374, "learning_rate": 0.00030169000298052096, "loss": 0.0081, "num_input_tokens_seen": 176590784, "step": 81755 }, { "epoch": 13.33768352365416, "grad_norm": 0.021589141339063644, "learning_rate": 0.00030162466313114734, "loss": 0.0076, "num_input_tokens_seen": 176601248, "step": 81760 }, { "epoch": 13.338499184339314, "grad_norm": 0.007798145059496164, "learning_rate": 0.00030155932730210145, "loss": 0.0069, "num_input_tokens_seen": 176612704, "step": 81765 }, { "epoch": 13.33931484502447, "grad_norm": 0.018131252378225327, "learning_rate": 0.00030149399549470767, "loss": 0.0031, "num_input_tokens_seen": 176623424, "step": 81770 }, { "epoch": 13.340130505709626, "grad_norm": 0.014698930084705353, "learning_rate": 0.00030142866771028974, "loss": 0.0048, "num_input_tokens_seen": 176634048, "step": 81775 }, { "epoch": 13.34094616639478, "grad_norm": 0.0022598044015467167, "learning_rate": 0.00030136334395017197, "loss": 0.1079, "num_input_tokens_seen": 176644704, "step": 81780 }, { "epoch": 13.341761827079935, "grad_norm": 0.0038392143324017525, "learning_rate": 0.0003012980242156778, "loss": 0.0019, "num_input_tokens_seen": 176654848, "step": 81785 }, { "epoch": 13.34257748776509, "grad_norm": 0.04017645865678787, "learning_rate": 0.00030123270850813147, "loss": 0.0367, "num_input_tokens_seen": 176666048, "step": 81790 }, { "epoch": 13.343393148450245, "grad_norm": 0.00553273456171155, "learning_rate": 0.0003011673968288562, "loss": 0.0073, "num_input_tokens_seen": 176677472, "step": 81795 }, { "epoch": 13.3442088091354, "grad_norm": 0.012419447302818298, "learning_rate": 0.00030110208917917607, "loss": 0.0023, "num_input_tokens_seen": 176688864, "step": 81800 }, { "epoch": 13.345024469820554, "grad_norm": 0.03526076301932335, "learning_rate": 0.00030103678556041427, "loss": 0.0053, "num_input_tokens_seen": 176700864, "step": 81805 }, { "epoch": 13.34584013050571, "grad_norm": 0.0011652401881292462, "learning_rate": 0.00030097148597389456, "loss": 0.0067, "num_input_tokens_seen": 176711936, "step": 81810 }, { "epoch": 13.346655791190864, "grad_norm": 0.0031598478090018034, "learning_rate": 0.00030090619042094, "loss": 0.0289, "num_input_tokens_seen": 176722464, "step": 81815 }, { "epoch": 13.34747145187602, "grad_norm": 0.002571272198110819, "learning_rate": 0.0003008408989028743, "loss": 0.0023, "num_input_tokens_seen": 176733440, "step": 81820 }, { "epoch": 13.348287112561174, "grad_norm": 0.012336530722677708, "learning_rate": 0.00030077561142102024, "loss": 0.0098, "num_input_tokens_seen": 176743104, "step": 81825 }, { "epoch": 13.34910277324633, "grad_norm": 0.007683815434575081, "learning_rate": 0.0003007103279767013, "loss": 0.009, "num_input_tokens_seen": 176753792, "step": 81830 }, { "epoch": 13.349918433931485, "grad_norm": 0.06616160273551941, "learning_rate": 0.0003006450485712402, "loss": 0.0036, "num_input_tokens_seen": 176764832, "step": 81835 }, { "epoch": 13.350734094616639, "grad_norm": 0.0150661151856184, "learning_rate": 0.00030057977320596007, "loss": 0.007, "num_input_tokens_seen": 176776128, "step": 81840 }, { "epoch": 13.351549755301795, "grad_norm": 0.0010423744097352028, "learning_rate": 0.00030051450188218397, "loss": 0.0071, "num_input_tokens_seen": 176787072, "step": 81845 }, { "epoch": 13.352365415986949, "grad_norm": 0.10020679235458374, "learning_rate": 0.0003004492346012345, "loss": 0.019, "num_input_tokens_seen": 176798816, "step": 81850 }, { "epoch": 13.353181076672104, "grad_norm": 0.0933566614985466, "learning_rate": 0.0003003839713644345, "loss": 0.0031, "num_input_tokens_seen": 176809984, "step": 81855 }, { "epoch": 13.35399673735726, "grad_norm": 0.023334039375185966, "learning_rate": 0.0003003187121731064, "loss": 0.0031, "num_input_tokens_seen": 176820288, "step": 81860 }, { "epoch": 13.354812398042414, "grad_norm": 0.0006777640082873404, "learning_rate": 0.0003002534570285731, "loss": 0.0032, "num_input_tokens_seen": 176830080, "step": 81865 }, { "epoch": 13.35562805872757, "grad_norm": 0.0137909771874547, "learning_rate": 0.00030018820593215675, "loss": 0.0277, "num_input_tokens_seen": 176841088, "step": 81870 }, { "epoch": 13.356443719412724, "grad_norm": 0.0035228354390710592, "learning_rate": 0.0003001229588851799, "loss": 0.0044, "num_input_tokens_seen": 176852576, "step": 81875 }, { "epoch": 13.35725938009788, "grad_norm": 0.010440012440085411, "learning_rate": 0.0003000577158889649, "loss": 0.0767, "num_input_tokens_seen": 176863584, "step": 81880 }, { "epoch": 13.358075040783035, "grad_norm": 0.001993312034755945, "learning_rate": 0.00029999247694483395, "loss": 0.0017, "num_input_tokens_seen": 176875584, "step": 81885 }, { "epoch": 13.358890701468189, "grad_norm": 0.04758576303720474, "learning_rate": 0.00029992724205410914, "loss": 0.004, "num_input_tokens_seen": 176886752, "step": 81890 }, { "epoch": 13.359706362153345, "grad_norm": 0.2109840214252472, "learning_rate": 0.0002998620112181126, "loss": 0.0065, "num_input_tokens_seen": 176896672, "step": 81895 }, { "epoch": 13.360522022838499, "grad_norm": 0.043509677052497864, "learning_rate": 0.0002997967844381662, "loss": 0.0056, "num_input_tokens_seen": 176907840, "step": 81900 }, { "epoch": 13.361337683523654, "grad_norm": 0.016660314053297043, "learning_rate": 0.00029973156171559214, "loss": 0.0402, "num_input_tokens_seen": 176918368, "step": 81905 }, { "epoch": 13.362153344208808, "grad_norm": 0.002962973900139332, "learning_rate": 0.0002996663430517118, "loss": 0.0023, "num_input_tokens_seen": 176929600, "step": 81910 }, { "epoch": 13.362969004893964, "grad_norm": 0.0031862088944762945, "learning_rate": 0.0002996011284478474, "loss": 0.0023, "num_input_tokens_seen": 176939360, "step": 81915 }, { "epoch": 13.36378466557912, "grad_norm": 0.004130939487367868, "learning_rate": 0.00029953591790532014, "loss": 0.0028, "num_input_tokens_seen": 176950272, "step": 81920 }, { "epoch": 13.364600326264274, "grad_norm": 0.0006332904449664056, "learning_rate": 0.000299470711425452, "loss": 0.0013, "num_input_tokens_seen": 176959648, "step": 81925 }, { "epoch": 13.36541598694943, "grad_norm": 0.013381035067141056, "learning_rate": 0.0002994055090095641, "loss": 0.0059, "num_input_tokens_seen": 176969600, "step": 81930 }, { "epoch": 13.366231647634583, "grad_norm": 0.002639003796502948, "learning_rate": 0.00029934031065897824, "loss": 0.0027, "num_input_tokens_seen": 176980608, "step": 81935 }, { "epoch": 13.367047308319739, "grad_norm": 0.004228521604090929, "learning_rate": 0.00029927511637501536, "loss": 0.0027, "num_input_tokens_seen": 176990656, "step": 81940 }, { "epoch": 13.367862969004895, "grad_norm": 0.4222617447376251, "learning_rate": 0.0002992099261589968, "loss": 0.0184, "num_input_tokens_seen": 177001120, "step": 81945 }, { "epoch": 13.368678629690049, "grad_norm": 0.0035730917006731033, "learning_rate": 0.00029914474001224413, "loss": 0.0054, "num_input_tokens_seen": 177012448, "step": 81950 }, { "epoch": 13.369494290375204, "grad_norm": 0.10068176686763763, "learning_rate": 0.0002990795579360778, "loss": 0.1867, "num_input_tokens_seen": 177023168, "step": 81955 }, { "epoch": 13.370309951060358, "grad_norm": 0.008547582663595676, "learning_rate": 0.00029901437993181936, "loss": 0.0062, "num_input_tokens_seen": 177035136, "step": 81960 }, { "epoch": 13.371125611745514, "grad_norm": 0.4465034306049347, "learning_rate": 0.0002989492060007893, "loss": 0.0537, "num_input_tokens_seen": 177046208, "step": 81965 }, { "epoch": 13.37194127243067, "grad_norm": 0.0010136293713003397, "learning_rate": 0.0002988840361443088, "loss": 0.0012, "num_input_tokens_seen": 177056256, "step": 81970 }, { "epoch": 13.372756933115824, "grad_norm": 0.001000964897684753, "learning_rate": 0.0002988188703636983, "loss": 0.0265, "num_input_tokens_seen": 177067040, "step": 81975 }, { "epoch": 13.37357259380098, "grad_norm": 0.9909574389457703, "learning_rate": 0.0002987537086602787, "loss": 0.0442, "num_input_tokens_seen": 177077120, "step": 81980 }, { "epoch": 13.374388254486133, "grad_norm": 0.0032265952322632074, "learning_rate": 0.0002986885510353703, "loss": 0.1605, "num_input_tokens_seen": 177087744, "step": 81985 }, { "epoch": 13.375203915171289, "grad_norm": 0.013295507058501244, "learning_rate": 0.00029862339749029413, "loss": 0.0092, "num_input_tokens_seen": 177100096, "step": 81990 }, { "epoch": 13.376019575856443, "grad_norm": 0.00572703592479229, "learning_rate": 0.0002985582480263699, "loss": 0.0115, "num_input_tokens_seen": 177110816, "step": 81995 }, { "epoch": 13.376835236541599, "grad_norm": 0.013873127289116383, "learning_rate": 0.00029849310264491865, "loss": 0.0026, "num_input_tokens_seen": 177121792, "step": 82000 }, { "epoch": 13.377650897226754, "grad_norm": 1.48568856716156, "learning_rate": 0.00029842796134726, "loss": 0.0551, "num_input_tokens_seen": 177131936, "step": 82005 }, { "epoch": 13.378466557911908, "grad_norm": 0.036852333694696426, "learning_rate": 0.0002983628241347147, "loss": 0.0021, "num_input_tokens_seen": 177142848, "step": 82010 }, { "epoch": 13.379282218597064, "grad_norm": 0.10424373298883438, "learning_rate": 0.0002982976910086024, "loss": 0.017, "num_input_tokens_seen": 177153312, "step": 82015 }, { "epoch": 13.380097879282218, "grad_norm": 0.08175593614578247, "learning_rate": 0.0002982325619702433, "loss": 0.0063, "num_input_tokens_seen": 177164224, "step": 82020 }, { "epoch": 13.380913539967374, "grad_norm": 0.006303591188043356, "learning_rate": 0.0002981674370209573, "loss": 0.0893, "num_input_tokens_seen": 177174944, "step": 82025 }, { "epoch": 13.38172920065253, "grad_norm": 0.0339217446744442, "learning_rate": 0.00029810231616206426, "loss": 0.0165, "num_input_tokens_seen": 177185184, "step": 82030 }, { "epoch": 13.382544861337683, "grad_norm": 0.002143233548849821, "learning_rate": 0.00029803719939488387, "loss": 0.0025, "num_input_tokens_seen": 177197536, "step": 82035 }, { "epoch": 13.383360522022839, "grad_norm": 0.0740416944026947, "learning_rate": 0.0002979720867207358, "loss": 0.0063, "num_input_tokens_seen": 177209184, "step": 82040 }, { "epoch": 13.384176182707993, "grad_norm": 0.005400381051003933, "learning_rate": 0.0002979069781409397, "loss": 0.0072, "num_input_tokens_seen": 177220416, "step": 82045 }, { "epoch": 13.384991843393149, "grad_norm": 0.012265348806977272, "learning_rate": 0.00029784187365681516, "loss": 0.0061, "num_input_tokens_seen": 177232032, "step": 82050 }, { "epoch": 13.385807504078304, "grad_norm": 0.002419403288513422, "learning_rate": 0.00029777677326968144, "loss": 0.0047, "num_input_tokens_seen": 177243328, "step": 82055 }, { "epoch": 13.386623164763458, "grad_norm": 0.0014468590961769223, "learning_rate": 0.0002977116769808579, "loss": 0.0027, "num_input_tokens_seen": 177254368, "step": 82060 }, { "epoch": 13.387438825448614, "grad_norm": 0.0005729938857257366, "learning_rate": 0.000297646584791664, "loss": 0.0059, "num_input_tokens_seen": 177264960, "step": 82065 }, { "epoch": 13.388254486133768, "grad_norm": 0.023177186027169228, "learning_rate": 0.0002975814967034185, "loss": 0.0064, "num_input_tokens_seen": 177275936, "step": 82070 }, { "epoch": 13.389070146818923, "grad_norm": 0.02205835096538067, "learning_rate": 0.000297516412717441, "loss": 0.0033, "num_input_tokens_seen": 177286144, "step": 82075 }, { "epoch": 13.38988580750408, "grad_norm": 0.004381283186376095, "learning_rate": 0.0002974513328350501, "loss": 0.0034, "num_input_tokens_seen": 177297408, "step": 82080 }, { "epoch": 13.390701468189233, "grad_norm": 0.004533675499260426, "learning_rate": 0.00029738625705756514, "loss": 0.0061, "num_input_tokens_seen": 177309568, "step": 82085 }, { "epoch": 13.391517128874389, "grad_norm": 0.002455994486808777, "learning_rate": 0.0002973211853863044, "loss": 0.0036, "num_input_tokens_seen": 177320352, "step": 82090 }, { "epoch": 13.392332789559543, "grad_norm": 0.0036011829506605864, "learning_rate": 0.0002972561178225872, "loss": 0.0024, "num_input_tokens_seen": 177330048, "step": 82095 }, { "epoch": 13.393148450244698, "grad_norm": 0.0032857232727110386, "learning_rate": 0.00029719105436773187, "loss": 0.0017, "num_input_tokens_seen": 177340896, "step": 82100 }, { "epoch": 13.393964110929852, "grad_norm": 0.002111183013767004, "learning_rate": 0.00029712599502305714, "loss": 0.0053, "num_input_tokens_seen": 177352480, "step": 82105 }, { "epoch": 13.394779771615008, "grad_norm": 0.0007205134606920183, "learning_rate": 0.0002970609397898814, "loss": 0.0022, "num_input_tokens_seen": 177361856, "step": 82110 }, { "epoch": 13.395595432300164, "grad_norm": 0.0033076724503189325, "learning_rate": 0.0002969958886695233, "loss": 0.1464, "num_input_tokens_seen": 177372768, "step": 82115 }, { "epoch": 13.396411092985318, "grad_norm": 0.0014311681734398007, "learning_rate": 0.00029693084166330084, "loss": 0.009, "num_input_tokens_seen": 177383360, "step": 82120 }, { "epoch": 13.397226753670473, "grad_norm": 0.017201535403728485, "learning_rate": 0.00029686579877253276, "loss": 0.0025, "num_input_tokens_seen": 177393888, "step": 82125 }, { "epoch": 13.398042414355627, "grad_norm": 0.00801269244402647, "learning_rate": 0.0002968007599985367, "loss": 0.0014, "num_input_tokens_seen": 177404992, "step": 82130 }, { "epoch": 13.398858075040783, "grad_norm": 0.018369020894169807, "learning_rate": 0.0002967357253426313, "loss": 0.0023, "num_input_tokens_seen": 177415136, "step": 82135 }, { "epoch": 13.399673735725939, "grad_norm": 0.008904990740120411, "learning_rate": 0.000296670694806134, "loss": 0.0965, "num_input_tokens_seen": 177424256, "step": 82140 }, { "epoch": 13.400489396411093, "grad_norm": 0.003861672943457961, "learning_rate": 0.00029660566839036315, "loss": 0.0014, "num_input_tokens_seen": 177435456, "step": 82145 }, { "epoch": 13.401305057096248, "grad_norm": 0.447512149810791, "learning_rate": 0.0002965406460966364, "loss": 0.0375, "num_input_tokens_seen": 177445536, "step": 82150 }, { "epoch": 13.402120717781402, "grad_norm": 0.00251060351729393, "learning_rate": 0.00029647562792627145, "loss": 0.0217, "num_input_tokens_seen": 177457184, "step": 82155 }, { "epoch": 13.402936378466558, "grad_norm": 0.016238614916801453, "learning_rate": 0.0002964106138805864, "loss": 0.0031, "num_input_tokens_seen": 177467488, "step": 82160 }, { "epoch": 13.403752039151712, "grad_norm": 0.010289501398801804, "learning_rate": 0.00029634560396089827, "loss": 0.0092, "num_input_tokens_seen": 177477856, "step": 82165 }, { "epoch": 13.404567699836868, "grad_norm": 0.0029544297140091658, "learning_rate": 0.00029628059816852497, "loss": 0.0494, "num_input_tokens_seen": 177488480, "step": 82170 }, { "epoch": 13.405383360522023, "grad_norm": 0.012710874900221825, "learning_rate": 0.0002962155965047837, "loss": 0.029, "num_input_tokens_seen": 177498656, "step": 82175 }, { "epoch": 13.406199021207177, "grad_norm": 0.0044509959407150745, "learning_rate": 0.00029615059897099196, "loss": 0.0113, "num_input_tokens_seen": 177510016, "step": 82180 }, { "epoch": 13.407014681892333, "grad_norm": 0.019407030194997787, "learning_rate": 0.0002960856055684668, "loss": 0.0017, "num_input_tokens_seen": 177521216, "step": 82185 }, { "epoch": 13.407830342577487, "grad_norm": 0.004138377495110035, "learning_rate": 0.0002960206162985256, "loss": 0.0646, "num_input_tokens_seen": 177530784, "step": 82190 }, { "epoch": 13.408646003262643, "grad_norm": 0.0222416240721941, "learning_rate": 0.0002959556311624855, "loss": 0.0104, "num_input_tokens_seen": 177540800, "step": 82195 }, { "epoch": 13.409461663947798, "grad_norm": 0.0034956561867147684, "learning_rate": 0.0002958906501616632, "loss": 0.0054, "num_input_tokens_seen": 177551328, "step": 82200 }, { "epoch": 13.410277324632952, "grad_norm": 0.0069087352603673935, "learning_rate": 0.0002958256732973759, "loss": 0.0024, "num_input_tokens_seen": 177563872, "step": 82205 }, { "epoch": 13.411092985318108, "grad_norm": 0.0064132362604141235, "learning_rate": 0.00029576070057094034, "loss": 0.0173, "num_input_tokens_seen": 177575808, "step": 82210 }, { "epoch": 13.411908646003262, "grad_norm": 0.4843251407146454, "learning_rate": 0.00029569573198367317, "loss": 0.05, "num_input_tokens_seen": 177586304, "step": 82215 }, { "epoch": 13.412724306688418, "grad_norm": 0.007687193341553211, "learning_rate": 0.00029563076753689137, "loss": 0.0161, "num_input_tokens_seen": 177597760, "step": 82220 }, { "epoch": 13.413539967373573, "grad_norm": 0.018417716026306152, "learning_rate": 0.00029556580723191116, "loss": 0.0154, "num_input_tokens_seen": 177608992, "step": 82225 }, { "epoch": 13.414355628058727, "grad_norm": 0.03263521566987038, "learning_rate": 0.00029550085107004937, "loss": 0.0021, "num_input_tokens_seen": 177620096, "step": 82230 }, { "epoch": 13.415171288743883, "grad_norm": 0.006521139293909073, "learning_rate": 0.0002954358990526221, "loss": 0.0106, "num_input_tokens_seen": 177630848, "step": 82235 }, { "epoch": 13.415986949429037, "grad_norm": 0.010773967020213604, "learning_rate": 0.000295370951180946, "loss": 0.1575, "num_input_tokens_seen": 177641600, "step": 82240 }, { "epoch": 13.416802610114193, "grad_norm": 0.032416198402643204, "learning_rate": 0.00029530600745633693, "loss": 0.0056, "num_input_tokens_seen": 177652992, "step": 82245 }, { "epoch": 13.417618270799348, "grad_norm": 0.0022988219279795885, "learning_rate": 0.0002952410678801116, "loss": 0.0608, "num_input_tokens_seen": 177664384, "step": 82250 }, { "epoch": 13.418433931484502, "grad_norm": 0.002156211994588375, "learning_rate": 0.0002951761324535855, "loss": 0.0025, "num_input_tokens_seen": 177674784, "step": 82255 }, { "epoch": 13.419249592169658, "grad_norm": 0.002031585667282343, "learning_rate": 0.00029511120117807493, "loss": 0.0055, "num_input_tokens_seen": 177684768, "step": 82260 }, { "epoch": 13.420065252854812, "grad_norm": 0.002410220680758357, "learning_rate": 0.00029504627405489605, "loss": 0.0031, "num_input_tokens_seen": 177695456, "step": 82265 }, { "epoch": 13.420880913539968, "grad_norm": 0.11466678231954575, "learning_rate": 0.0002949813510853641, "loss": 0.0986, "num_input_tokens_seen": 177706912, "step": 82270 }, { "epoch": 13.421696574225122, "grad_norm": 0.0004403699131216854, "learning_rate": 0.00029491643227079543, "loss": 0.0071, "num_input_tokens_seen": 177718240, "step": 82275 }, { "epoch": 13.422512234910277, "grad_norm": 0.019405366852879524, "learning_rate": 0.00029485151761250527, "loss": 0.1391, "num_input_tokens_seen": 177728384, "step": 82280 }, { "epoch": 13.423327895595433, "grad_norm": 0.0068669854663312435, "learning_rate": 0.0002947866071118095, "loss": 0.0063, "num_input_tokens_seen": 177739328, "step": 82285 }, { "epoch": 13.424143556280587, "grad_norm": 0.4046556353569031, "learning_rate": 0.00029472170077002324, "loss": 0.129, "num_input_tokens_seen": 177749120, "step": 82290 }, { "epoch": 13.424959216965743, "grad_norm": 0.0009423498995602131, "learning_rate": 0.0002946567985884624, "loss": 0.0036, "num_input_tokens_seen": 177759808, "step": 82295 }, { "epoch": 13.425774877650896, "grad_norm": 0.004357687663286924, "learning_rate": 0.0002945919005684418, "loss": 0.0039, "num_input_tokens_seen": 177768512, "step": 82300 }, { "epoch": 13.426590538336052, "grad_norm": 0.0007036282331682742, "learning_rate": 0.0002945270067112771, "loss": 0.0063, "num_input_tokens_seen": 177779104, "step": 82305 }, { "epoch": 13.427406199021208, "grad_norm": 0.004518335685133934, "learning_rate": 0.0002944621170182831, "loss": 0.0024, "num_input_tokens_seen": 177789024, "step": 82310 }, { "epoch": 13.428221859706362, "grad_norm": 0.002193318447098136, "learning_rate": 0.00029439723149077523, "loss": 0.0025, "num_input_tokens_seen": 177799392, "step": 82315 }, { "epoch": 13.429037520391518, "grad_norm": 0.050887856632471085, "learning_rate": 0.0002943323501300681, "loss": 0.0037, "num_input_tokens_seen": 177809088, "step": 82320 }, { "epoch": 13.429853181076671, "grad_norm": 0.05680084228515625, "learning_rate": 0.00029426747293747685, "loss": 0.0064, "num_input_tokens_seen": 177820608, "step": 82325 }, { "epoch": 13.430668841761827, "grad_norm": 0.00340280425734818, "learning_rate": 0.00029420259991431633, "loss": 0.006, "num_input_tokens_seen": 177832352, "step": 82330 }, { "epoch": 13.431484502446983, "grad_norm": 0.018648672848939896, "learning_rate": 0.0002941377310619011, "loss": 0.0082, "num_input_tokens_seen": 177843616, "step": 82335 }, { "epoch": 13.432300163132137, "grad_norm": 1.2718156576156616, "learning_rate": 0.00029407286638154597, "loss": 0.0984, "num_input_tokens_seen": 177854144, "step": 82340 }, { "epoch": 13.433115823817293, "grad_norm": 0.009632064029574394, "learning_rate": 0.00029400800587456544, "loss": 0.0028, "num_input_tokens_seen": 177865152, "step": 82345 }, { "epoch": 13.433931484502446, "grad_norm": 0.0709516853094101, "learning_rate": 0.00029394314954227387, "loss": 0.0043, "num_input_tokens_seen": 177876416, "step": 82350 }, { "epoch": 13.434747145187602, "grad_norm": 0.0647798404097557, "learning_rate": 0.000293878297385986, "loss": 0.0214, "num_input_tokens_seen": 177887424, "step": 82355 }, { "epoch": 13.435562805872756, "grad_norm": 0.007885068655014038, "learning_rate": 0.0002938134494070157, "loss": 0.0053, "num_input_tokens_seen": 177899392, "step": 82360 }, { "epoch": 13.436378466557912, "grad_norm": 0.016093425452709198, "learning_rate": 0.00029374860560667747, "loss": 0.0134, "num_input_tokens_seen": 177910048, "step": 82365 }, { "epoch": 13.437194127243067, "grad_norm": 0.01708192005753517, "learning_rate": 0.00029368376598628545, "loss": 0.0023, "num_input_tokens_seen": 177920224, "step": 82370 }, { "epoch": 13.438009787928221, "grad_norm": 0.0012648747069761157, "learning_rate": 0.00029361893054715365, "loss": 0.0066, "num_input_tokens_seen": 177930880, "step": 82375 }, { "epoch": 13.438825448613377, "grad_norm": 0.007827023044228554, "learning_rate": 0.000293554099290596, "loss": 0.0212, "num_input_tokens_seen": 177942016, "step": 82380 }, { "epoch": 13.439641109298531, "grad_norm": 0.009230383671820164, "learning_rate": 0.0002934892722179264, "loss": 0.0015, "num_input_tokens_seen": 177950560, "step": 82385 }, { "epoch": 13.440456769983687, "grad_norm": 0.0039056178648024797, "learning_rate": 0.0002934244493304588, "loss": 0.1685, "num_input_tokens_seen": 177961088, "step": 82390 }, { "epoch": 13.441272430668842, "grad_norm": 0.0029746955260634422, "learning_rate": 0.0002933596306295066, "loss": 0.0086, "num_input_tokens_seen": 177970656, "step": 82395 }, { "epoch": 13.442088091353996, "grad_norm": 0.009408979676663876, "learning_rate": 0.0002932948161163839, "loss": 0.1229, "num_input_tokens_seen": 177981792, "step": 82400 }, { "epoch": 13.442903752039152, "grad_norm": 0.06069672852754593, "learning_rate": 0.0002932300057924037, "loss": 0.0038, "num_input_tokens_seen": 177992480, "step": 82405 }, { "epoch": 13.443719412724306, "grad_norm": 0.0016424978384748101, "learning_rate": 0.0002931651996588799, "loss": 0.0783, "num_input_tokens_seen": 178002944, "step": 82410 }, { "epoch": 13.444535073409462, "grad_norm": 0.12447664141654968, "learning_rate": 0.0002931003977171256, "loss": 0.009, "num_input_tokens_seen": 178012704, "step": 82415 }, { "epoch": 13.445350734094617, "grad_norm": 0.0165871512144804, "learning_rate": 0.00029303559996845434, "loss": 0.0542, "num_input_tokens_seen": 178024160, "step": 82420 }, { "epoch": 13.446166394779771, "grad_norm": 0.003005496459081769, "learning_rate": 0.00029297080641417907, "loss": 0.034, "num_input_tokens_seen": 178035552, "step": 82425 }, { "epoch": 13.446982055464927, "grad_norm": 0.0012617846950888634, "learning_rate": 0.0002929060170556132, "loss": 0.1092, "num_input_tokens_seen": 178046848, "step": 82430 }, { "epoch": 13.447797716150081, "grad_norm": 0.6911592483520508, "learning_rate": 0.00029284123189406944, "loss": 0.1113, "num_input_tokens_seen": 178058432, "step": 82435 }, { "epoch": 13.448613376835237, "grad_norm": 0.03883085772395134, "learning_rate": 0.00029277645093086114, "loss": 0.0076, "num_input_tokens_seen": 178070496, "step": 82440 }, { "epoch": 13.449429037520392, "grad_norm": 0.0312496330589056, "learning_rate": 0.00029271167416730073, "loss": 0.0099, "num_input_tokens_seen": 178080960, "step": 82445 }, { "epoch": 13.450244698205546, "grad_norm": 0.008447905071079731, "learning_rate": 0.0002926469016047013, "loss": 0.0788, "num_input_tokens_seen": 178092000, "step": 82450 }, { "epoch": 13.451060358890702, "grad_norm": 0.031177420169115067, "learning_rate": 0.00029258213324437533, "loss": 0.0185, "num_input_tokens_seen": 178102336, "step": 82455 }, { "epoch": 13.451876019575856, "grad_norm": 0.10189617425203323, "learning_rate": 0.00029251736908763584, "loss": 0.0469, "num_input_tokens_seen": 178113248, "step": 82460 }, { "epoch": 13.452691680261012, "grad_norm": 0.6185612678527832, "learning_rate": 0.00029245260913579477, "loss": 0.0228, "num_input_tokens_seen": 178124192, "step": 82465 }, { "epoch": 13.453507340946166, "grad_norm": 0.013284931890666485, "learning_rate": 0.00029238785339016487, "loss": 0.0224, "num_input_tokens_seen": 178135424, "step": 82470 }, { "epoch": 13.454323001631321, "grad_norm": 0.19014760851860046, "learning_rate": 0.0002923231018520588, "loss": 0.0096, "num_input_tokens_seen": 178145952, "step": 82475 }, { "epoch": 13.455138662316477, "grad_norm": 0.006610923446714878, "learning_rate": 0.0002922583545227882, "loss": 0.0039, "num_input_tokens_seen": 178156768, "step": 82480 }, { "epoch": 13.455954323001631, "grad_norm": 0.14724524319171906, "learning_rate": 0.00029219361140366587, "loss": 0.0108, "num_input_tokens_seen": 178167616, "step": 82485 }, { "epoch": 13.456769983686787, "grad_norm": 0.026569068431854248, "learning_rate": 0.0002921288724960034, "loss": 0.0036, "num_input_tokens_seen": 178179008, "step": 82490 }, { "epoch": 13.45758564437194, "grad_norm": 0.009068429470062256, "learning_rate": 0.00029206413780111305, "loss": 0.0427, "num_input_tokens_seen": 178189312, "step": 82495 }, { "epoch": 13.458401305057096, "grad_norm": 0.002162341959774494, "learning_rate": 0.00029199940732030686, "loss": 0.0084, "num_input_tokens_seen": 178198688, "step": 82500 }, { "epoch": 13.459216965742252, "grad_norm": 0.008699199184775352, "learning_rate": 0.0002919346810548965, "loss": 0.052, "num_input_tokens_seen": 178208768, "step": 82505 }, { "epoch": 13.460032626427406, "grad_norm": 1.6339211463928223, "learning_rate": 0.00029186995900619373, "loss": 0.0332, "num_input_tokens_seen": 178219776, "step": 82510 }, { "epoch": 13.460848287112562, "grad_norm": 0.012227796018123627, "learning_rate": 0.00029180524117551035, "loss": 0.0082, "num_input_tokens_seen": 178230560, "step": 82515 }, { "epoch": 13.461663947797716, "grad_norm": 0.000886613386683166, "learning_rate": 0.0002917405275641578, "loss": 0.0126, "num_input_tokens_seen": 178240704, "step": 82520 }, { "epoch": 13.462479608482871, "grad_norm": 0.04731287062168121, "learning_rate": 0.00029167581817344775, "loss": 0.0819, "num_input_tokens_seen": 178252096, "step": 82525 }, { "epoch": 13.463295269168025, "grad_norm": 0.0025267750024795532, "learning_rate": 0.00029161111300469143, "loss": 0.0022, "num_input_tokens_seen": 178262400, "step": 82530 }, { "epoch": 13.464110929853181, "grad_norm": 0.04110927879810333, "learning_rate": 0.0002915464120592003, "loss": 0.0065, "num_input_tokens_seen": 178273568, "step": 82535 }, { "epoch": 13.464926590538337, "grad_norm": 0.007446099538356066, "learning_rate": 0.0002914817153382856, "loss": 0.0027, "num_input_tokens_seen": 178284576, "step": 82540 }, { "epoch": 13.46574225122349, "grad_norm": 0.005990834906697273, "learning_rate": 0.00029141702284325846, "loss": 0.0069, "num_input_tokens_seen": 178295456, "step": 82545 }, { "epoch": 13.466557911908646, "grad_norm": 0.017972951754927635, "learning_rate": 0.0002913523345754299, "loss": 0.0037, "num_input_tokens_seen": 178306848, "step": 82550 }, { "epoch": 13.4673735725938, "grad_norm": 0.0014292324194684625, "learning_rate": 0.0002912876505361111, "loss": 0.0045, "num_input_tokens_seen": 178318688, "step": 82555 }, { "epoch": 13.468189233278956, "grad_norm": 0.010235908441245556, "learning_rate": 0.00029122297072661264, "loss": 0.0165, "num_input_tokens_seen": 178328288, "step": 82560 }, { "epoch": 13.469004893964112, "grad_norm": 0.019145233556628227, "learning_rate": 0.00029115829514824565, "loss": 0.018, "num_input_tokens_seen": 178337600, "step": 82565 }, { "epoch": 13.469820554649266, "grad_norm": 0.0088666882365942, "learning_rate": 0.00029109362380232075, "loss": 0.0045, "num_input_tokens_seen": 178347552, "step": 82570 }, { "epoch": 13.470636215334421, "grad_norm": 0.036711398512125015, "learning_rate": 0.0002910289566901485, "loss": 0.0779, "num_input_tokens_seen": 178359520, "step": 82575 }, { "epoch": 13.471451876019575, "grad_norm": 0.004469173029065132, "learning_rate": 0.0002909642938130394, "loss": 0.0034, "num_input_tokens_seen": 178371104, "step": 82580 }, { "epoch": 13.47226753670473, "grad_norm": 0.005990062840282917, "learning_rate": 0.0002908996351723043, "loss": 0.0013, "num_input_tokens_seen": 178381984, "step": 82585 }, { "epoch": 13.473083197389887, "grad_norm": 0.0013577784411609173, "learning_rate": 0.0002908349807692533, "loss": 0.008, "num_input_tokens_seen": 178392032, "step": 82590 }, { "epoch": 13.47389885807504, "grad_norm": 0.29914239048957825, "learning_rate": 0.00029077033060519674, "loss": 0.0557, "num_input_tokens_seen": 178402272, "step": 82595 }, { "epoch": 13.474714518760196, "grad_norm": 0.006352984346449375, "learning_rate": 0.0002907056846814449, "loss": 0.0029, "num_input_tokens_seen": 178412992, "step": 82600 }, { "epoch": 13.47553017944535, "grad_norm": 0.003830127650871873, "learning_rate": 0.00029064104299930785, "loss": 0.0168, "num_input_tokens_seen": 178423904, "step": 82605 }, { "epoch": 13.476345840130506, "grad_norm": 0.0034729652106761932, "learning_rate": 0.00029057640556009567, "loss": 0.0399, "num_input_tokens_seen": 178433376, "step": 82610 }, { "epoch": 13.477161500815662, "grad_norm": 0.0034876209683716297, "learning_rate": 0.0002905117723651183, "loss": 0.094, "num_input_tokens_seen": 178444480, "step": 82615 }, { "epoch": 13.477977161500815, "grad_norm": 0.06251949816942215, "learning_rate": 0.0002904471434156856, "loss": 0.0109, "num_input_tokens_seen": 178455328, "step": 82620 }, { "epoch": 13.478792822185971, "grad_norm": 0.006139388307929039, "learning_rate": 0.0002903825187131074, "loss": 0.0031, "num_input_tokens_seen": 178465888, "step": 82625 }, { "epoch": 13.479608482871125, "grad_norm": 0.01711699180305004, "learning_rate": 0.00029031789825869334, "loss": 0.0057, "num_input_tokens_seen": 178476288, "step": 82630 }, { "epoch": 13.48042414355628, "grad_norm": 0.009003594517707825, "learning_rate": 0.0002902532820537531, "loss": 0.0763, "num_input_tokens_seen": 178486688, "step": 82635 }, { "epoch": 13.481239804241435, "grad_norm": 0.0052217403426766396, "learning_rate": 0.00029018867009959623, "loss": 0.0043, "num_input_tokens_seen": 178496928, "step": 82640 }, { "epoch": 13.48205546492659, "grad_norm": 0.15717969834804535, "learning_rate": 0.0002901240623975321, "loss": 0.0928, "num_input_tokens_seen": 178507296, "step": 82645 }, { "epoch": 13.482871125611746, "grad_norm": 0.13155721127986908, "learning_rate": 0.00029005945894887, "loss": 0.119, "num_input_tokens_seen": 178518592, "step": 82650 }, { "epoch": 13.4836867862969, "grad_norm": 0.026101280003786087, "learning_rate": 0.0002899948597549194, "loss": 0.0057, "num_input_tokens_seen": 178528896, "step": 82655 }, { "epoch": 13.484502446982056, "grad_norm": 0.35371142625808716, "learning_rate": 0.00028993026481698934, "loss": 0.1464, "num_input_tokens_seen": 178539712, "step": 82660 }, { "epoch": 13.48531810766721, "grad_norm": 0.16254015266895294, "learning_rate": 0.00028986567413638895, "loss": 0.0077, "num_input_tokens_seen": 178549728, "step": 82665 }, { "epoch": 13.486133768352365, "grad_norm": 0.010674665682017803, "learning_rate": 0.00028980108771442726, "loss": 0.0255, "num_input_tokens_seen": 178561248, "step": 82670 }, { "epoch": 13.486949429037521, "grad_norm": 0.0005801775259897113, "learning_rate": 0.00028973650555241316, "loss": 0.0074, "num_input_tokens_seen": 178572256, "step": 82675 }, { "epoch": 13.487765089722675, "grad_norm": 0.04390391334891319, "learning_rate": 0.0002896719276516555, "loss": 0.0084, "num_input_tokens_seen": 178583584, "step": 82680 }, { "epoch": 13.48858075040783, "grad_norm": 0.718368649482727, "learning_rate": 0.0002896073540134631, "loss": 0.0385, "num_input_tokens_seen": 178594720, "step": 82685 }, { "epoch": 13.489396411092985, "grad_norm": 0.02619067020714283, "learning_rate": 0.00028954278463914435, "loss": 0.0058, "num_input_tokens_seen": 178604480, "step": 82690 }, { "epoch": 13.49021207177814, "grad_norm": 0.005772117991000414, "learning_rate": 0.00028947821953000845, "loss": 0.0516, "num_input_tokens_seen": 178614272, "step": 82695 }, { "epoch": 13.491027732463296, "grad_norm": 0.0048257578164339066, "learning_rate": 0.00028941365868736315, "loss": 0.0043, "num_input_tokens_seen": 178624608, "step": 82700 }, { "epoch": 13.49184339314845, "grad_norm": 0.1376570165157318, "learning_rate": 0.00028934910211251755, "loss": 0.0073, "num_input_tokens_seen": 178635552, "step": 82705 }, { "epoch": 13.492659053833606, "grad_norm": 0.0016758694546297193, "learning_rate": 0.0002892845498067792, "loss": 0.1607, "num_input_tokens_seen": 178647008, "step": 82710 }, { "epoch": 13.49347471451876, "grad_norm": 0.00668095238506794, "learning_rate": 0.0002892200017714572, "loss": 0.0149, "num_input_tokens_seen": 178659040, "step": 82715 }, { "epoch": 13.494290375203915, "grad_norm": 0.056710727512836456, "learning_rate": 0.00028915545800785883, "loss": 0.044, "num_input_tokens_seen": 178669600, "step": 82720 }, { "epoch": 13.49510603588907, "grad_norm": 0.02826865203678608, "learning_rate": 0.0002890909185172928, "loss": 0.0159, "num_input_tokens_seen": 178678560, "step": 82725 }, { "epoch": 13.495921696574225, "grad_norm": 1.417874813079834, "learning_rate": 0.00028902638330106684, "loss": 0.0397, "num_input_tokens_seen": 178689504, "step": 82730 }, { "epoch": 13.49673735725938, "grad_norm": 0.0027501648291945457, "learning_rate": 0.0002889618523604889, "loss": 0.1004, "num_input_tokens_seen": 178700608, "step": 82735 }, { "epoch": 13.497553017944535, "grad_norm": 0.003090892219915986, "learning_rate": 0.0002888973256968667, "loss": 0.0373, "num_input_tokens_seen": 178711744, "step": 82740 }, { "epoch": 13.49836867862969, "grad_norm": 0.009183863177895546, "learning_rate": 0.000288832803311508, "loss": 0.0142, "num_input_tokens_seen": 178722016, "step": 82745 }, { "epoch": 13.499184339314844, "grad_norm": 0.0019286853494122624, "learning_rate": 0.00028876828520572043, "loss": 0.0031, "num_input_tokens_seen": 178733056, "step": 82750 }, { "epoch": 13.5, "grad_norm": 0.013043577782809734, "learning_rate": 0.0002887037713808116, "loss": 0.0038, "num_input_tokens_seen": 178744384, "step": 82755 }, { "epoch": 13.500815660685156, "grad_norm": 0.005044702906161547, "learning_rate": 0.0002886392618380888, "loss": 0.0636, "num_input_tokens_seen": 178755104, "step": 82760 }, { "epoch": 13.50163132137031, "grad_norm": 0.08455964177846909, "learning_rate": 0.00028857475657885956, "loss": 0.0137, "num_input_tokens_seen": 178766208, "step": 82765 }, { "epoch": 13.502446982055465, "grad_norm": 0.1594812124967575, "learning_rate": 0.00028851025560443103, "loss": 0.0424, "num_input_tokens_seen": 178776672, "step": 82770 }, { "epoch": 13.50326264274062, "grad_norm": 0.0038309351075440645, "learning_rate": 0.0002884457589161105, "loss": 0.0946, "num_input_tokens_seen": 178787648, "step": 82775 }, { "epoch": 13.504078303425775, "grad_norm": 0.004739274736493826, "learning_rate": 0.000288381266515205, "loss": 0.004, "num_input_tokens_seen": 178799328, "step": 82780 }, { "epoch": 13.50489396411093, "grad_norm": 0.010969329625368118, "learning_rate": 0.0002883167784030216, "loss": 0.0991, "num_input_tokens_seen": 178809984, "step": 82785 }, { "epoch": 13.505709624796085, "grad_norm": 0.20839625597000122, "learning_rate": 0.00028825229458086726, "loss": 0.0167, "num_input_tokens_seen": 178819392, "step": 82790 }, { "epoch": 13.50652528548124, "grad_norm": 0.0025618516374379396, "learning_rate": 0.0002881878150500486, "loss": 0.1612, "num_input_tokens_seen": 178830624, "step": 82795 }, { "epoch": 13.507340946166394, "grad_norm": 0.116312175989151, "learning_rate": 0.00028812333981187297, "loss": 0.1245, "num_input_tokens_seen": 178840512, "step": 82800 }, { "epoch": 13.50815660685155, "grad_norm": 0.020539624616503716, "learning_rate": 0.00028805886886764623, "loss": 0.0041, "num_input_tokens_seen": 178851168, "step": 82805 }, { "epoch": 13.508972267536706, "grad_norm": 0.052292123436927795, "learning_rate": 0.00028799440221867576, "loss": 0.007, "num_input_tokens_seen": 178862112, "step": 82810 }, { "epoch": 13.50978792822186, "grad_norm": 0.04409019276499748, "learning_rate": 0.00028792993986626725, "loss": 0.0053, "num_input_tokens_seen": 178873888, "step": 82815 }, { "epoch": 13.510603588907015, "grad_norm": 0.10002454370260239, "learning_rate": 0.000287865481811728, "loss": 0.0152, "num_input_tokens_seen": 178884448, "step": 82820 }, { "epoch": 13.51141924959217, "grad_norm": 0.004765619989484549, "learning_rate": 0.00028780102805636346, "loss": 0.0023, "num_input_tokens_seen": 178895648, "step": 82825 }, { "epoch": 13.512234910277325, "grad_norm": 0.011683987453579903, "learning_rate": 0.0002877365786014806, "loss": 0.0039, "num_input_tokens_seen": 178906304, "step": 82830 }, { "epoch": 13.513050570962479, "grad_norm": 0.0020222472958266735, "learning_rate": 0.00028767213344838493, "loss": 0.0322, "num_input_tokens_seen": 178917440, "step": 82835 }, { "epoch": 13.513866231647635, "grad_norm": 0.0032288068905472755, "learning_rate": 0.00028760769259838327, "loss": 0.1212, "num_input_tokens_seen": 178929216, "step": 82840 }, { "epoch": 13.51468189233279, "grad_norm": 0.008228391408920288, "learning_rate": 0.00028754325605278067, "loss": 0.1432, "num_input_tokens_seen": 178940512, "step": 82845 }, { "epoch": 13.515497553017944, "grad_norm": 0.0787166953086853, "learning_rate": 0.00028747882381288393, "loss": 0.016, "num_input_tokens_seen": 178951040, "step": 82850 }, { "epoch": 13.5163132137031, "grad_norm": 0.007508369162678719, "learning_rate": 0.00028741439587999805, "loss": 0.0067, "num_input_tokens_seen": 178962784, "step": 82855 }, { "epoch": 13.517128874388254, "grad_norm": 0.0014908368466421962, "learning_rate": 0.00028734997225542954, "loss": 0.0084, "num_input_tokens_seen": 178973696, "step": 82860 }, { "epoch": 13.51794453507341, "grad_norm": 0.519140362739563, "learning_rate": 0.0002872855529404832, "loss": 0.0338, "num_input_tokens_seen": 178984960, "step": 82865 }, { "epoch": 13.518760195758565, "grad_norm": 0.015057248063385487, "learning_rate": 0.0002872211379364651, "loss": 0.0158, "num_input_tokens_seen": 178995904, "step": 82870 }, { "epoch": 13.51957585644372, "grad_norm": 0.19566139578819275, "learning_rate": 0.00028715672724468065, "loss": 0.0091, "num_input_tokens_seen": 179006784, "step": 82875 }, { "epoch": 13.520391517128875, "grad_norm": 0.012074259109795094, "learning_rate": 0.0002870923208664351, "loss": 0.0082, "num_input_tokens_seen": 179017472, "step": 82880 }, { "epoch": 13.521207177814029, "grad_norm": 0.01812121272087097, "learning_rate": 0.0002870279188030338, "loss": 0.0047, "num_input_tokens_seen": 179028960, "step": 82885 }, { "epoch": 13.522022838499185, "grad_norm": 0.08663219213485718, "learning_rate": 0.00028696352105578185, "loss": 0.0073, "num_input_tokens_seen": 179040448, "step": 82890 }, { "epoch": 13.522838499184338, "grad_norm": 0.008986803703010082, "learning_rate": 0.0002868991276259844, "loss": 0.0028, "num_input_tokens_seen": 179052064, "step": 82895 }, { "epoch": 13.523654159869494, "grad_norm": 0.001685730996541679, "learning_rate": 0.0002868347385149465, "loss": 0.0104, "num_input_tokens_seen": 179062464, "step": 82900 }, { "epoch": 13.52446982055465, "grad_norm": 0.033600907772779465, "learning_rate": 0.000286770353723973, "loss": 0.0366, "num_input_tokens_seen": 179073152, "step": 82905 }, { "epoch": 13.525285481239804, "grad_norm": 0.014643524773418903, "learning_rate": 0.00028670597325436886, "loss": 0.0072, "num_input_tokens_seen": 179083328, "step": 82910 }, { "epoch": 13.52610114192496, "grad_norm": 0.010406344197690487, "learning_rate": 0.0002866415971074387, "loss": 0.0086, "num_input_tokens_seen": 179093920, "step": 82915 }, { "epoch": 13.526916802610113, "grad_norm": 0.006890468765050173, "learning_rate": 0.000286577225284487, "loss": 0.0045, "num_input_tokens_seen": 179105344, "step": 82920 }, { "epoch": 13.52773246329527, "grad_norm": 0.0029026255942881107, "learning_rate": 0.00028651285778681906, "loss": 0.0105, "num_input_tokens_seen": 179116704, "step": 82925 }, { "epoch": 13.528548123980425, "grad_norm": 0.003112967126071453, "learning_rate": 0.00028644849461573847, "loss": 0.0316, "num_input_tokens_seen": 179128032, "step": 82930 }, { "epoch": 13.529363784665579, "grad_norm": 0.026661496609449387, "learning_rate": 0.0002863841357725504, "loss": 0.0105, "num_input_tokens_seen": 179139104, "step": 82935 }, { "epoch": 13.530179445350734, "grad_norm": 0.011198869906365871, "learning_rate": 0.00028631978125855844, "loss": 0.0066, "num_input_tokens_seen": 179149984, "step": 82940 }, { "epoch": 13.530995106035888, "grad_norm": 0.29773199558258057, "learning_rate": 0.0002862554310750676, "loss": 0.0216, "num_input_tokens_seen": 179161184, "step": 82945 }, { "epoch": 13.531810766721044, "grad_norm": 0.010510992258787155, "learning_rate": 0.0002861910852233812, "loss": 0.0101, "num_input_tokens_seen": 179173024, "step": 82950 }, { "epoch": 13.5326264274062, "grad_norm": 0.002821574453264475, "learning_rate": 0.00028612674370480406, "loss": 0.0054, "num_input_tokens_seen": 179183520, "step": 82955 }, { "epoch": 13.533442088091354, "grad_norm": 0.005407446064054966, "learning_rate": 0.0002860624065206394, "loss": 0.0724, "num_input_tokens_seen": 179194240, "step": 82960 }, { "epoch": 13.53425774877651, "grad_norm": 0.0026232078671455383, "learning_rate": 0.0002859980736721918, "loss": 0.0075, "num_input_tokens_seen": 179203424, "step": 82965 }, { "epoch": 13.535073409461663, "grad_norm": 0.06447270512580872, "learning_rate": 0.0002859337451607644, "loss": 0.0889, "num_input_tokens_seen": 179214880, "step": 82970 }, { "epoch": 13.535889070146819, "grad_norm": 0.005597327370196581, "learning_rate": 0.0002858694209876616, "loss": 0.0119, "num_input_tokens_seen": 179226080, "step": 82975 }, { "epoch": 13.536704730831975, "grad_norm": 0.04041731357574463, "learning_rate": 0.00028580510115418624, "loss": 0.1063, "num_input_tokens_seen": 179237856, "step": 82980 }, { "epoch": 13.537520391517129, "grad_norm": 0.0014987689210101962, "learning_rate": 0.0002857407856616426, "loss": 0.0037, "num_input_tokens_seen": 179248928, "step": 82985 }, { "epoch": 13.538336052202284, "grad_norm": 0.018002718687057495, "learning_rate": 0.0002856764745113334, "loss": 0.0121, "num_input_tokens_seen": 179260128, "step": 82990 }, { "epoch": 13.539151712887438, "grad_norm": 0.3203493356704712, "learning_rate": 0.00028561216770456267, "loss": 0.0239, "num_input_tokens_seen": 179270912, "step": 82995 }, { "epoch": 13.539967373572594, "grad_norm": 0.01611095853149891, "learning_rate": 0.000285547865242633, "loss": 0.0031, "num_input_tokens_seen": 179280864, "step": 83000 }, { "epoch": 13.540783034257748, "grad_norm": 0.012098937295377254, "learning_rate": 0.000285483567126848, "loss": 0.0111, "num_input_tokens_seen": 179292288, "step": 83005 }, { "epoch": 13.541598694942904, "grad_norm": 0.009693530388176441, "learning_rate": 0.0002854192733585107, "loss": 0.0047, "num_input_tokens_seen": 179302432, "step": 83010 }, { "epoch": 13.54241435562806, "grad_norm": 0.0010672948556020856, "learning_rate": 0.000285354983938924, "loss": 0.0031, "num_input_tokens_seen": 179313440, "step": 83015 }, { "epoch": 13.543230016313213, "grad_norm": 0.05182463303208351, "learning_rate": 0.0002852906988693909, "loss": 0.0099, "num_input_tokens_seen": 179324960, "step": 83020 }, { "epoch": 13.544045676998369, "grad_norm": 0.10404568165540695, "learning_rate": 0.0002852264181512142, "loss": 0.0073, "num_input_tokens_seen": 179335424, "step": 83025 }, { "epoch": 13.544861337683523, "grad_norm": 0.013150133192539215, "learning_rate": 0.00028516214178569656, "loss": 0.0091, "num_input_tokens_seen": 179347392, "step": 83030 }, { "epoch": 13.545676998368679, "grad_norm": 0.05724117159843445, "learning_rate": 0.0002850978697741406, "loss": 0.0117, "num_input_tokens_seen": 179357920, "step": 83035 }, { "epoch": 13.546492659053834, "grad_norm": 0.004341833759099245, "learning_rate": 0.000285033602117849, "loss": 0.0033, "num_input_tokens_seen": 179368736, "step": 83040 }, { "epoch": 13.547308319738988, "grad_norm": 0.03877821937203407, "learning_rate": 0.0002849693388181241, "loss": 0.0161, "num_input_tokens_seen": 179380320, "step": 83045 }, { "epoch": 13.548123980424144, "grad_norm": 0.015632281079888344, "learning_rate": 0.00028490507987626837, "loss": 0.0901, "num_input_tokens_seen": 179389792, "step": 83050 }, { "epoch": 13.548939641109298, "grad_norm": 0.0029565368313342333, "learning_rate": 0.00028484082529358403, "loss": 0.0129, "num_input_tokens_seen": 179401504, "step": 83055 }, { "epoch": 13.549755301794454, "grad_norm": 0.6263442039489746, "learning_rate": 0.0002847765750713733, "loss": 0.1232, "num_input_tokens_seen": 179412736, "step": 83060 }, { "epoch": 13.550570962479608, "grad_norm": 0.009329462423920631, "learning_rate": 0.0002847123292109382, "loss": 0.0026, "num_input_tokens_seen": 179424288, "step": 83065 }, { "epoch": 13.551386623164763, "grad_norm": 0.004615492187440395, "learning_rate": 0.0002846480877135812, "loss": 0.0025, "num_input_tokens_seen": 179435904, "step": 83070 }, { "epoch": 13.552202283849919, "grad_norm": 0.08717557042837143, "learning_rate": 0.00028458385058060355, "loss": 0.0095, "num_input_tokens_seen": 179446688, "step": 83075 }, { "epoch": 13.553017944535073, "grad_norm": 0.05257457494735718, "learning_rate": 0.0002845196178133078, "loss": 0.104, "num_input_tokens_seen": 179456992, "step": 83080 }, { "epoch": 13.553833605220229, "grad_norm": 0.0037878549192100763, "learning_rate": 0.00028445538941299493, "loss": 0.0027, "num_input_tokens_seen": 179468000, "step": 83085 }, { "epoch": 13.554649265905383, "grad_norm": 0.015577950514853, "learning_rate": 0.00028439116538096743, "loss": 0.0133, "num_input_tokens_seen": 179478880, "step": 83090 }, { "epoch": 13.555464926590538, "grad_norm": 0.011540939100086689, "learning_rate": 0.0002843269457185261, "loss": 0.1051, "num_input_tokens_seen": 179490208, "step": 83095 }, { "epoch": 13.556280587275694, "grad_norm": 0.005807126872241497, "learning_rate": 0.00028426273042697327, "loss": 0.0278, "num_input_tokens_seen": 179501056, "step": 83100 }, { "epoch": 13.557096247960848, "grad_norm": 0.3871236741542816, "learning_rate": 0.0002841985195076094, "loss": 0.1398, "num_input_tokens_seen": 179511872, "step": 83105 }, { "epoch": 13.557911908646004, "grad_norm": 0.002544855000451207, "learning_rate": 0.0002841343129617365, "loss": 0.0028, "num_input_tokens_seen": 179522784, "step": 83110 }, { "epoch": 13.558727569331158, "grad_norm": 0.006097372155636549, "learning_rate": 0.0002840701107906557, "loss": 0.0069, "num_input_tokens_seen": 179534528, "step": 83115 }, { "epoch": 13.559543230016313, "grad_norm": 0.22003108263015747, "learning_rate": 0.00028400591299566793, "loss": 0.0302, "num_input_tokens_seen": 179545312, "step": 83120 }, { "epoch": 13.560358890701469, "grad_norm": 0.006511087529361248, "learning_rate": 0.00028394171957807433, "loss": 0.0388, "num_input_tokens_seen": 179554720, "step": 83125 }, { "epoch": 13.561174551386623, "grad_norm": 0.013055982068181038, "learning_rate": 0.000283877530539176, "loss": 0.1136, "num_input_tokens_seen": 179566304, "step": 83130 }, { "epoch": 13.561990212071779, "grad_norm": 1.0412561893463135, "learning_rate": 0.00028381334588027353, "loss": 0.0204, "num_input_tokens_seen": 179575968, "step": 83135 }, { "epoch": 13.562805872756933, "grad_norm": 0.006631617899984121, "learning_rate": 0.00028374916560266794, "loss": 0.0116, "num_input_tokens_seen": 179586688, "step": 83140 }, { "epoch": 13.563621533442088, "grad_norm": 0.004046297632157803, "learning_rate": 0.0002836849897076598, "loss": 0.0019, "num_input_tokens_seen": 179597280, "step": 83145 }, { "epoch": 13.564437194127244, "grad_norm": 0.054977841675281525, "learning_rate": 0.00028362081819654984, "loss": 0.0079, "num_input_tokens_seen": 179607904, "step": 83150 }, { "epoch": 13.565252854812398, "grad_norm": 0.00645839050412178, "learning_rate": 0.00028355665107063845, "loss": 0.0023, "num_input_tokens_seen": 179620256, "step": 83155 }, { "epoch": 13.566068515497554, "grad_norm": 0.0034914060961455107, "learning_rate": 0.00028349248833122603, "loss": 0.0818, "num_input_tokens_seen": 179630720, "step": 83160 }, { "epoch": 13.566884176182707, "grad_norm": 0.03189859911799431, "learning_rate": 0.0002834283299796131, "loss": 0.0569, "num_input_tokens_seen": 179640352, "step": 83165 }, { "epoch": 13.567699836867863, "grad_norm": 0.010987777262926102, "learning_rate": 0.00028336417601709975, "loss": 0.0198, "num_input_tokens_seen": 179652128, "step": 83170 }, { "epoch": 13.568515497553017, "grad_norm": 0.009719179011881351, "learning_rate": 0.0002833000264449862, "loss": 0.0786, "num_input_tokens_seen": 179663584, "step": 83175 }, { "epoch": 13.569331158238173, "grad_norm": 0.008665206842124462, "learning_rate": 0.00028323588126457255, "loss": 0.0055, "num_input_tokens_seen": 179674912, "step": 83180 }, { "epoch": 13.570146818923329, "grad_norm": 0.002040495164692402, "learning_rate": 0.00028317174047715873, "loss": 0.093, "num_input_tokens_seen": 179685664, "step": 83185 }, { "epoch": 13.570962479608482, "grad_norm": 0.010339323431253433, "learning_rate": 0.0002831076040840446, "loss": 0.0843, "num_input_tokens_seen": 179696608, "step": 83190 }, { "epoch": 13.571778140293638, "grad_norm": 0.03784068301320076, "learning_rate": 0.0002830434720865301, "loss": 0.0898, "num_input_tokens_seen": 179706560, "step": 83195 }, { "epoch": 13.572593800978792, "grad_norm": 0.011570471338927746, "learning_rate": 0.0002829793444859148, "loss": 0.0027, "num_input_tokens_seen": 179718336, "step": 83200 }, { "epoch": 13.573409461663948, "grad_norm": 0.0015834379009902477, "learning_rate": 0.0002829152212834984, "loss": 0.0662, "num_input_tokens_seen": 179728896, "step": 83205 }, { "epoch": 13.574225122349104, "grad_norm": 0.025010643526911736, "learning_rate": 0.0002828511024805803, "loss": 0.0212, "num_input_tokens_seen": 179739424, "step": 83210 }, { "epoch": 13.575040783034257, "grad_norm": 0.03622874245047569, "learning_rate": 0.0002827869880784605, "loss": 0.0386, "num_input_tokens_seen": 179749024, "step": 83215 }, { "epoch": 13.575856443719413, "grad_norm": 0.2895975112915039, "learning_rate": 0.00028272287807843744, "loss": 0.0153, "num_input_tokens_seen": 179759712, "step": 83220 }, { "epoch": 13.576672104404567, "grad_norm": 0.03724903613328934, "learning_rate": 0.00028265877248181113, "loss": 0.1597, "num_input_tokens_seen": 179769344, "step": 83225 }, { "epoch": 13.577487765089723, "grad_norm": 0.005164381116628647, "learning_rate": 0.0002825946712898806, "loss": 0.0158, "num_input_tokens_seen": 179781216, "step": 83230 }, { "epoch": 13.578303425774878, "grad_norm": 0.08302219212055206, "learning_rate": 0.0002825305745039447, "loss": 0.1403, "num_input_tokens_seen": 179792448, "step": 83235 }, { "epoch": 13.579119086460032, "grad_norm": 0.05873296037316322, "learning_rate": 0.00028246648212530267, "loss": 0.0066, "num_input_tokens_seen": 179802528, "step": 83240 }, { "epoch": 13.579934747145188, "grad_norm": 0.04700169339776039, "learning_rate": 0.00028240239415525337, "loss": 0.0096, "num_input_tokens_seen": 179813216, "step": 83245 }, { "epoch": 13.580750407830342, "grad_norm": 0.05630794167518616, "learning_rate": 0.0002823383105950955, "loss": 0.0401, "num_input_tokens_seen": 179824864, "step": 83250 }, { "epoch": 13.581566068515498, "grad_norm": 0.035866476595401764, "learning_rate": 0.00028227423144612794, "loss": 0.0995, "num_input_tokens_seen": 179836416, "step": 83255 }, { "epoch": 13.582381729200652, "grad_norm": 0.021164124831557274, "learning_rate": 0.00028221015670964935, "loss": 0.0109, "num_input_tokens_seen": 179848064, "step": 83260 }, { "epoch": 13.583197389885807, "grad_norm": 0.40941694378852844, "learning_rate": 0.0002821460863869582, "loss": 0.0318, "num_input_tokens_seen": 179859968, "step": 83265 }, { "epoch": 13.584013050570963, "grad_norm": 0.0200693067163229, "learning_rate": 0.0002820820204793529, "loss": 0.0065, "num_input_tokens_seen": 179870816, "step": 83270 }, { "epoch": 13.584828711256117, "grad_norm": 0.6771963238716125, "learning_rate": 0.0002820179589881319, "loss": 0.0799, "num_input_tokens_seen": 179881952, "step": 83275 }, { "epoch": 13.585644371941273, "grad_norm": 0.01566133089363575, "learning_rate": 0.00028195390191459356, "loss": 0.0265, "num_input_tokens_seen": 179892896, "step": 83280 }, { "epoch": 13.586460032626427, "grad_norm": 0.00984056293964386, "learning_rate": 0.000281889849260036, "loss": 0.0339, "num_input_tokens_seen": 179903584, "step": 83285 }, { "epoch": 13.587275693311582, "grad_norm": 0.014284429140388966, "learning_rate": 0.00028182580102575726, "loss": 0.0325, "num_input_tokens_seen": 179914528, "step": 83290 }, { "epoch": 13.588091353996738, "grad_norm": 0.010695664212107658, "learning_rate": 0.00028176175721305555, "loss": 0.0376, "num_input_tokens_seen": 179925184, "step": 83295 }, { "epoch": 13.588907014681892, "grad_norm": 0.04023078456521034, "learning_rate": 0.0002816977178232286, "loss": 0.0056, "num_input_tokens_seen": 179934880, "step": 83300 }, { "epoch": 13.589722675367048, "grad_norm": 0.11148897558450699, "learning_rate": 0.0002816336828575744, "loss": 0.0134, "num_input_tokens_seen": 179944768, "step": 83305 }, { "epoch": 13.590538336052202, "grad_norm": 0.02717600390315056, "learning_rate": 0.0002815696523173906, "loss": 0.0038, "num_input_tokens_seen": 179954240, "step": 83310 }, { "epoch": 13.591353996737357, "grad_norm": 0.05596160888671875, "learning_rate": 0.0002815056262039749, "loss": 0.0077, "num_input_tokens_seen": 179965568, "step": 83315 }, { "epoch": 13.592169657422513, "grad_norm": 0.008291252888739109, "learning_rate": 0.0002814416045186249, "loss": 0.0573, "num_input_tokens_seen": 179976032, "step": 83320 }, { "epoch": 13.592985318107667, "grad_norm": 1.1229381561279297, "learning_rate": 0.00028137758726263796, "loss": 0.0458, "num_input_tokens_seen": 179987232, "step": 83325 }, { "epoch": 13.593800978792823, "grad_norm": 0.07029259949922562, "learning_rate": 0.0002813135744373114, "loss": 0.0212, "num_input_tokens_seen": 179998176, "step": 83330 }, { "epoch": 13.594616639477977, "grad_norm": 0.0030100038275122643, "learning_rate": 0.000281249566043943, "loss": 0.018, "num_input_tokens_seen": 180008768, "step": 83335 }, { "epoch": 13.595432300163132, "grad_norm": 0.005541645456105471, "learning_rate": 0.0002811855620838294, "loss": 0.0071, "num_input_tokens_seen": 180018208, "step": 83340 }, { "epoch": 13.596247960848288, "grad_norm": 0.008036543615162373, "learning_rate": 0.00028112156255826826, "loss": 0.0709, "num_input_tokens_seen": 180029792, "step": 83345 }, { "epoch": 13.597063621533442, "grad_norm": 0.0052529601380229, "learning_rate": 0.000281057567468556, "loss": 0.009, "num_input_tokens_seen": 180041184, "step": 83350 }, { "epoch": 13.597879282218598, "grad_norm": 0.1867624670267105, "learning_rate": 0.00028099357681599004, "loss": 0.011, "num_input_tokens_seen": 180052032, "step": 83355 }, { "epoch": 13.598694942903752, "grad_norm": 0.04028286039829254, "learning_rate": 0.0002809295906018671, "loss": 0.0057, "num_input_tokens_seen": 180061664, "step": 83360 }, { "epoch": 13.599510603588907, "grad_norm": 0.056735120713710785, "learning_rate": 0.00028086560882748386, "loss": 0.1783, "num_input_tokens_seen": 180072960, "step": 83365 }, { "epoch": 13.600326264274061, "grad_norm": 0.0013902663486078382, "learning_rate": 0.00028080163149413705, "loss": 0.0133, "num_input_tokens_seen": 180084576, "step": 83370 }, { "epoch": 13.601141924959217, "grad_norm": 0.0006005800678394735, "learning_rate": 0.0002807376586031233, "loss": 0.0062, "num_input_tokens_seen": 180095008, "step": 83375 }, { "epoch": 13.601957585644373, "grad_norm": 0.002407664433121681, "learning_rate": 0.0002806736901557391, "loss": 0.0853, "num_input_tokens_seen": 180106784, "step": 83380 }, { "epoch": 13.602773246329527, "grad_norm": 0.04227178543806076, "learning_rate": 0.00028060972615328065, "loss": 0.013, "num_input_tokens_seen": 180117152, "step": 83385 }, { "epoch": 13.603588907014682, "grad_norm": 0.00501489220187068, "learning_rate": 0.00028054576659704457, "loss": 0.0408, "num_input_tokens_seen": 180128576, "step": 83390 }, { "epoch": 13.604404567699836, "grad_norm": 0.00669768825173378, "learning_rate": 0.00028048181148832685, "loss": 0.0052, "num_input_tokens_seen": 180139264, "step": 83395 }, { "epoch": 13.605220228384992, "grad_norm": 0.02733282558619976, "learning_rate": 0.00028041786082842366, "loss": 0.0031, "num_input_tokens_seen": 180148736, "step": 83400 }, { "epoch": 13.606035889070148, "grad_norm": 0.0024951754603534937, "learning_rate": 0.0002803539146186311, "loss": 0.0068, "num_input_tokens_seen": 180158944, "step": 83405 }, { "epoch": 13.606851549755302, "grad_norm": 0.0043714833445847034, "learning_rate": 0.0002802899728602452, "loss": 0.0069, "num_input_tokens_seen": 180169664, "step": 83410 }, { "epoch": 13.607667210440457, "grad_norm": 0.0753655731678009, "learning_rate": 0.00028022603555456164, "loss": 0.0284, "num_input_tokens_seen": 180180064, "step": 83415 }, { "epoch": 13.608482871125611, "grad_norm": 0.013464689254760742, "learning_rate": 0.00028016210270287635, "loss": 0.0039, "num_input_tokens_seen": 180190912, "step": 83420 }, { "epoch": 13.609298531810767, "grad_norm": 0.06028769165277481, "learning_rate": 0.00028009817430648483, "loss": 0.0051, "num_input_tokens_seen": 180202880, "step": 83425 }, { "epoch": 13.61011419249592, "grad_norm": 0.014267779886722565, "learning_rate": 0.00028003425036668287, "loss": 0.0056, "num_input_tokens_seen": 180213728, "step": 83430 }, { "epoch": 13.610929853181077, "grad_norm": 0.0189402736723423, "learning_rate": 0.00027997033088476554, "loss": 0.0193, "num_input_tokens_seen": 180223936, "step": 83435 }, { "epoch": 13.611745513866232, "grad_norm": 0.18437455594539642, "learning_rate": 0.000279906415862029, "loss": 0.0112, "num_input_tokens_seen": 180234624, "step": 83440 }, { "epoch": 13.612561174551386, "grad_norm": 0.2045287787914276, "learning_rate": 0.00027984250529976783, "loss": 0.0131, "num_input_tokens_seen": 180245088, "step": 83445 }, { "epoch": 13.613376835236542, "grad_norm": 0.06810999661684036, "learning_rate": 0.000279778599199278, "loss": 0.0113, "num_input_tokens_seen": 180257344, "step": 83450 }, { "epoch": 13.614192495921696, "grad_norm": 0.01237307209521532, "learning_rate": 0.0002797146975618538, "loss": 0.0069, "num_input_tokens_seen": 180268672, "step": 83455 }, { "epoch": 13.615008156606851, "grad_norm": 0.0055781882256269455, "learning_rate": 0.0002796508003887911, "loss": 0.0053, "num_input_tokens_seen": 180280640, "step": 83460 }, { "epoch": 13.615823817292007, "grad_norm": 0.08466996997594833, "learning_rate": 0.00027958690768138406, "loss": 0.0102, "num_input_tokens_seen": 180290656, "step": 83465 }, { "epoch": 13.616639477977161, "grad_norm": 0.2827630639076233, "learning_rate": 0.0002795230194409283, "loss": 0.0136, "num_input_tokens_seen": 180300672, "step": 83470 }, { "epoch": 13.617455138662317, "grad_norm": 0.037059321999549866, "learning_rate": 0.00027945913566871793, "loss": 0.0044, "num_input_tokens_seen": 180311008, "step": 83475 }, { "epoch": 13.61827079934747, "grad_norm": 0.38653436303138733, "learning_rate": 0.0002793952563660483, "loss": 0.0219, "num_input_tokens_seen": 180322048, "step": 83480 }, { "epoch": 13.619086460032626, "grad_norm": 0.250503808259964, "learning_rate": 0.0002793313815342133, "loss": 0.0205, "num_input_tokens_seen": 180332960, "step": 83485 }, { "epoch": 13.619902120717782, "grad_norm": 0.4236673414707184, "learning_rate": 0.0002792675111745081, "loss": 0.0141, "num_input_tokens_seen": 180343040, "step": 83490 }, { "epoch": 13.620717781402936, "grad_norm": 0.009462445043027401, "learning_rate": 0.0002792036452882265, "loss": 0.1319, "num_input_tokens_seen": 180353792, "step": 83495 }, { "epoch": 13.621533442088092, "grad_norm": 0.005843911319971085, "learning_rate": 0.00027913978387666326, "loss": 0.0058, "num_input_tokens_seen": 180363264, "step": 83500 }, { "epoch": 13.622349102773246, "grad_norm": 0.06309421360492706, "learning_rate": 0.0002790759269411125, "loss": 0.0062, "num_input_tokens_seen": 180374272, "step": 83505 }, { "epoch": 13.623164763458401, "grad_norm": 0.005838216748088598, "learning_rate": 0.00027901207448286836, "loss": 0.112, "num_input_tokens_seen": 180385952, "step": 83510 }, { "epoch": 13.623980424143557, "grad_norm": 0.15519148111343384, "learning_rate": 0.0002789482265032249, "loss": 0.0082, "num_input_tokens_seen": 180396192, "step": 83515 }, { "epoch": 13.624796084828711, "grad_norm": 0.024984611198306084, "learning_rate": 0.00027888438300347607, "loss": 0.1304, "num_input_tokens_seen": 180406976, "step": 83520 }, { "epoch": 13.625611745513867, "grad_norm": 0.3052389323711395, "learning_rate": 0.00027882054398491564, "loss": 0.0616, "num_input_tokens_seen": 180418752, "step": 83525 }, { "epoch": 13.62642740619902, "grad_norm": 0.007396694738417864, "learning_rate": 0.0002787567094488375, "loss": 0.0045, "num_input_tokens_seen": 180430048, "step": 83530 }, { "epoch": 13.627243066884176, "grad_norm": 0.019618911668658257, "learning_rate": 0.00027869287939653534, "loss": 0.0045, "num_input_tokens_seen": 180441792, "step": 83535 }, { "epoch": 13.62805872756933, "grad_norm": 0.012303023599088192, "learning_rate": 0.0002786290538293027, "loss": 0.0082, "num_input_tokens_seen": 180453152, "step": 83540 }, { "epoch": 13.628874388254486, "grad_norm": 0.09575760364532471, "learning_rate": 0.00027856523274843314, "loss": 0.0049, "num_input_tokens_seen": 180463296, "step": 83545 }, { "epoch": 13.629690048939642, "grad_norm": 0.014896899461746216, "learning_rate": 0.00027850141615521983, "loss": 0.1152, "num_input_tokens_seen": 180473856, "step": 83550 }, { "epoch": 13.630505709624796, "grad_norm": 0.14120285212993622, "learning_rate": 0.0002784376040509567, "loss": 0.0099, "num_input_tokens_seen": 180483264, "step": 83555 }, { "epoch": 13.631321370309951, "grad_norm": 0.01524326205253601, "learning_rate": 0.00027837379643693615, "loss": 0.0083, "num_input_tokens_seen": 180493376, "step": 83560 }, { "epoch": 13.632137030995105, "grad_norm": 0.161854088306427, "learning_rate": 0.0002783099933144523, "loss": 0.0526, "num_input_tokens_seen": 180504064, "step": 83565 }, { "epoch": 13.632952691680261, "grad_norm": 0.0047560338862240314, "learning_rate": 0.00027824619468479715, "loss": 0.0112, "num_input_tokens_seen": 180515392, "step": 83570 }, { "epoch": 13.633768352365417, "grad_norm": 0.027127450332045555, "learning_rate": 0.00027818240054926463, "loss": 0.007, "num_input_tokens_seen": 180525216, "step": 83575 }, { "epoch": 13.63458401305057, "grad_norm": 0.0030362384859472513, "learning_rate": 0.0002781186109091467, "loss": 0.0076, "num_input_tokens_seen": 180537024, "step": 83580 }, { "epoch": 13.635399673735726, "grad_norm": 0.00605916790664196, "learning_rate": 0.0002780548257657371, "loss": 0.0026, "num_input_tokens_seen": 180547744, "step": 83585 }, { "epoch": 13.63621533442088, "grad_norm": 0.06712619960308075, "learning_rate": 0.00027799104512032756, "loss": 0.0096, "num_input_tokens_seen": 180557472, "step": 83590 }, { "epoch": 13.637030995106036, "grad_norm": 0.0055035678669810295, "learning_rate": 0.0002779272689742115, "loss": 0.0017, "num_input_tokens_seen": 180568320, "step": 83595 }, { "epoch": 13.63784665579119, "grad_norm": 0.009830011986196041, "learning_rate": 0.0002778634973286807, "loss": 0.0036, "num_input_tokens_seen": 180579040, "step": 83600 }, { "epoch": 13.638662316476346, "grad_norm": 0.059241216629743576, "learning_rate": 0.00027779973018502834, "loss": 0.0062, "num_input_tokens_seen": 180590016, "step": 83605 }, { "epoch": 13.639477977161501, "grad_norm": 0.002657790668308735, "learning_rate": 0.0002777359675445459, "loss": 0.0021, "num_input_tokens_seen": 180600960, "step": 83610 }, { "epoch": 13.640293637846655, "grad_norm": 0.11882360279560089, "learning_rate": 0.00027767220940852646, "loss": 0.0083, "num_input_tokens_seen": 180610848, "step": 83615 }, { "epoch": 13.641109298531811, "grad_norm": 0.004164085257798433, "learning_rate": 0.0002776084557782613, "loss": 0.0014, "num_input_tokens_seen": 180622048, "step": 83620 }, { "epoch": 13.641924959216965, "grad_norm": 0.007447056006640196, "learning_rate": 0.00027754470665504336, "loss": 0.0055, "num_input_tokens_seen": 180634016, "step": 83625 }, { "epoch": 13.64274061990212, "grad_norm": 0.007610693573951721, "learning_rate": 0.0002774809620401637, "loss": 0.002, "num_input_tokens_seen": 180644576, "step": 83630 }, { "epoch": 13.643556280587276, "grad_norm": 0.017725123092532158, "learning_rate": 0.000277417221934915, "loss": 0.0033, "num_input_tokens_seen": 180656544, "step": 83635 }, { "epoch": 13.64437194127243, "grad_norm": 0.002556213643401861, "learning_rate": 0.00027735348634058834, "loss": 0.0045, "num_input_tokens_seen": 180668480, "step": 83640 }, { "epoch": 13.645187601957586, "grad_norm": 2.327058792114258, "learning_rate": 0.0002772897552584759, "loss": 0.1113, "num_input_tokens_seen": 180679424, "step": 83645 }, { "epoch": 13.64600326264274, "grad_norm": 0.0008912270423024893, "learning_rate": 0.000277226028689869, "loss": 0.0042, "num_input_tokens_seen": 180690208, "step": 83650 }, { "epoch": 13.646818923327896, "grad_norm": 0.03987288847565651, "learning_rate": 0.00027716230663605933, "loss": 0.013, "num_input_tokens_seen": 180700704, "step": 83655 }, { "epoch": 13.647634584013051, "grad_norm": 0.09051557630300522, "learning_rate": 0.00027709858909833823, "loss": 0.0127, "num_input_tokens_seen": 180711488, "step": 83660 }, { "epoch": 13.648450244698205, "grad_norm": 0.2718043923377991, "learning_rate": 0.000277034876077997, "loss": 0.0173, "num_input_tokens_seen": 180722400, "step": 83665 }, { "epoch": 13.649265905383361, "grad_norm": 0.012056714855134487, "learning_rate": 0.00027697116757632677, "loss": 0.0162, "num_input_tokens_seen": 180733472, "step": 83670 }, { "epoch": 13.650081566068515, "grad_norm": 0.0026014503091573715, "learning_rate": 0.0002769074635946188, "loss": 0.0022, "num_input_tokens_seen": 180744416, "step": 83675 }, { "epoch": 13.65089722675367, "grad_norm": 0.006171499844640493, "learning_rate": 0.0002768437641341641, "loss": 0.0018, "num_input_tokens_seen": 180756480, "step": 83680 }, { "epoch": 13.651712887438826, "grad_norm": 0.018329549580812454, "learning_rate": 0.00027678006919625367, "loss": 0.0038, "num_input_tokens_seen": 180766912, "step": 83685 }, { "epoch": 13.65252854812398, "grad_norm": 0.09856487810611725, "learning_rate": 0.00027671637878217824, "loss": 0.0099, "num_input_tokens_seen": 180777888, "step": 83690 }, { "epoch": 13.653344208809136, "grad_norm": 0.0025012048427015543, "learning_rate": 0.0002766526928932285, "loss": 0.0018, "num_input_tokens_seen": 180789408, "step": 83695 }, { "epoch": 13.65415986949429, "grad_norm": 0.007546972017735243, "learning_rate": 0.0002765890115306956, "loss": 0.0046, "num_input_tokens_seen": 180799712, "step": 83700 }, { "epoch": 13.654975530179446, "grad_norm": 0.023198723793029785, "learning_rate": 0.0002765253346958695, "loss": 0.0029, "num_input_tokens_seen": 180811104, "step": 83705 }, { "epoch": 13.655791190864601, "grad_norm": 0.0014068408636376262, "learning_rate": 0.00027646166239004134, "loss": 0.0065, "num_input_tokens_seen": 180823296, "step": 83710 }, { "epoch": 13.656606851549755, "grad_norm": 0.005566820967942476, "learning_rate": 0.0002763979946145008, "loss": 0.0051, "num_input_tokens_seen": 180834336, "step": 83715 }, { "epoch": 13.65742251223491, "grad_norm": 0.0010641274275258183, "learning_rate": 0.00027633433137053885, "loss": 0.0171, "num_input_tokens_seen": 180844192, "step": 83720 }, { "epoch": 13.658238172920065, "grad_norm": 0.006609211675822735, "learning_rate": 0.00027627067265944514, "loss": 0.0055, "num_input_tokens_seen": 180855616, "step": 83725 }, { "epoch": 13.65905383360522, "grad_norm": 0.06236208230257034, "learning_rate": 0.0002762070184825104, "loss": 0.0218, "num_input_tokens_seen": 180867552, "step": 83730 }, { "epoch": 13.659869494290374, "grad_norm": 0.0006166854873299599, "learning_rate": 0.00027614336884102393, "loss": 0.0025, "num_input_tokens_seen": 180878208, "step": 83735 }, { "epoch": 13.66068515497553, "grad_norm": 0.7124009728431702, "learning_rate": 0.0002760797237362765, "loss": 0.0983, "num_input_tokens_seen": 180889664, "step": 83740 }, { "epoch": 13.661500815660686, "grad_norm": 0.029279787093400955, "learning_rate": 0.00027601608316955715, "loss": 0.0246, "num_input_tokens_seen": 180901280, "step": 83745 }, { "epoch": 13.66231647634584, "grad_norm": 0.0014781494392082095, "learning_rate": 0.0002759524471421562, "loss": 0.0012, "num_input_tokens_seen": 180911712, "step": 83750 }, { "epoch": 13.663132137030995, "grad_norm": 0.007471561431884766, "learning_rate": 0.00027588881565536303, "loss": 0.0023, "num_input_tokens_seen": 180922368, "step": 83755 }, { "epoch": 13.66394779771615, "grad_norm": 0.0038479752838611603, "learning_rate": 0.00027582518871046744, "loss": 0.0024, "num_input_tokens_seen": 180933504, "step": 83760 }, { "epoch": 13.664763458401305, "grad_norm": 0.03010265901684761, "learning_rate": 0.00027576156630875875, "loss": 0.0081, "num_input_tokens_seen": 180943296, "step": 83765 }, { "epoch": 13.66557911908646, "grad_norm": 0.0018516803393140435, "learning_rate": 0.0002756979484515264, "loss": 0.0027, "num_input_tokens_seen": 180954944, "step": 83770 }, { "epoch": 13.666394779771615, "grad_norm": 0.00570902694016695, "learning_rate": 0.00027563433514005966, "loss": 0.0198, "num_input_tokens_seen": 180965504, "step": 83775 }, { "epoch": 13.66721044045677, "grad_norm": 0.5824912190437317, "learning_rate": 0.0002755707263756477, "loss": 0.1139, "num_input_tokens_seen": 180977504, "step": 83780 }, { "epoch": 13.668026101141924, "grad_norm": 0.01962290145456791, "learning_rate": 0.0002755071221595798, "loss": 0.0032, "num_input_tokens_seen": 180988160, "step": 83785 }, { "epoch": 13.66884176182708, "grad_norm": 0.004911630880087614, "learning_rate": 0.0002754435224931447, "loss": 0.0109, "num_input_tokens_seen": 180998368, "step": 83790 }, { "epoch": 13.669657422512234, "grad_norm": 0.00739239202812314, "learning_rate": 0.00027537992737763163, "loss": 0.0415, "num_input_tokens_seen": 181009024, "step": 83795 }, { "epoch": 13.67047308319739, "grad_norm": 0.04392838850617409, "learning_rate": 0.00027531633681432925, "loss": 0.0236, "num_input_tokens_seen": 181020640, "step": 83800 }, { "epoch": 13.671288743882545, "grad_norm": 0.004304789938032627, "learning_rate": 0.0002752527508045263, "loss": 0.1498, "num_input_tokens_seen": 181030368, "step": 83805 }, { "epoch": 13.6721044045677, "grad_norm": 0.0005669619422405958, "learning_rate": 0.0002751891693495115, "loss": 0.0048, "num_input_tokens_seen": 181040896, "step": 83810 }, { "epoch": 13.672920065252855, "grad_norm": 0.011217975057661533, "learning_rate": 0.00027512559245057333, "loss": 0.064, "num_input_tokens_seen": 181052256, "step": 83815 }, { "epoch": 13.673735725938009, "grad_norm": 0.0023022620007395744, "learning_rate": 0.00027506202010900037, "loss": 0.0026, "num_input_tokens_seen": 181062912, "step": 83820 }, { "epoch": 13.674551386623165, "grad_norm": 0.0035705927293747663, "learning_rate": 0.00027499845232608087, "loss": 0.0021, "num_input_tokens_seen": 181073632, "step": 83825 }, { "epoch": 13.67536704730832, "grad_norm": 0.01892230100929737, "learning_rate": 0.00027493488910310316, "loss": 0.0078, "num_input_tokens_seen": 181084576, "step": 83830 }, { "epoch": 13.676182707993474, "grad_norm": 0.008273656480014324, "learning_rate": 0.0002748713304413555, "loss": 0.0102, "num_input_tokens_seen": 181095360, "step": 83835 }, { "epoch": 13.67699836867863, "grad_norm": 0.009142505936324596, "learning_rate": 0.0002748077763421257, "loss": 0.1122, "num_input_tokens_seen": 181107712, "step": 83840 }, { "epoch": 13.677814029363784, "grad_norm": 0.06141829863190651, "learning_rate": 0.0002747442268067024, "loss": 0.0052, "num_input_tokens_seen": 181119008, "step": 83845 }, { "epoch": 13.67862969004894, "grad_norm": 0.0016691337805241346, "learning_rate": 0.00027468068183637265, "loss": 0.0104, "num_input_tokens_seen": 181130720, "step": 83850 }, { "epoch": 13.679445350734095, "grad_norm": 0.0046843248419463634, "learning_rate": 0.0002746171414324249, "loss": 0.0013, "num_input_tokens_seen": 181142976, "step": 83855 }, { "epoch": 13.68026101141925, "grad_norm": 0.44291582703590393, "learning_rate": 0.00027455360559614677, "loss": 0.027, "num_input_tokens_seen": 181153920, "step": 83860 }, { "epoch": 13.681076672104405, "grad_norm": 0.00250981654971838, "learning_rate": 0.00027449007432882576, "loss": 0.0043, "num_input_tokens_seen": 181163712, "step": 83865 }, { "epoch": 13.681892332789559, "grad_norm": 0.011784011498093605, "learning_rate": 0.00027442654763174955, "loss": 0.0846, "num_input_tokens_seen": 181173568, "step": 83870 }, { "epoch": 13.682707993474715, "grad_norm": 0.008331255055963993, "learning_rate": 0.00027436302550620545, "loss": 0.0074, "num_input_tokens_seen": 181182752, "step": 83875 }, { "epoch": 13.68352365415987, "grad_norm": 0.008108728565275669, "learning_rate": 0.0002742995079534809, "loss": 0.0083, "num_input_tokens_seen": 181193216, "step": 83880 }, { "epoch": 13.684339314845024, "grad_norm": 0.006428302265703678, "learning_rate": 0.0002742359949748632, "loss": 0.0036, "num_input_tokens_seen": 181204512, "step": 83885 }, { "epoch": 13.68515497553018, "grad_norm": 0.005887574050575495, "learning_rate": 0.0002741724865716394, "loss": 0.0097, "num_input_tokens_seen": 181215520, "step": 83890 }, { "epoch": 13.685970636215334, "grad_norm": 0.021212387830018997, "learning_rate": 0.0002741089827450966, "loss": 0.0233, "num_input_tokens_seen": 181225984, "step": 83895 }, { "epoch": 13.68678629690049, "grad_norm": 0.19733788073062897, "learning_rate": 0.0002740454834965219, "loss": 0.0804, "num_input_tokens_seen": 181237952, "step": 83900 }, { "epoch": 13.687601957585644, "grad_norm": 0.058703526854515076, "learning_rate": 0.0002739819888272021, "loss": 0.0144, "num_input_tokens_seen": 181247840, "step": 83905 }, { "epoch": 13.6884176182708, "grad_norm": 0.0021352344192564487, "learning_rate": 0.000273918498738424, "loss": 0.1252, "num_input_tokens_seen": 181258048, "step": 83910 }, { "epoch": 13.689233278955955, "grad_norm": 0.001603231648914516, "learning_rate": 0.00027385501323147433, "loss": 0.0109, "num_input_tokens_seen": 181268288, "step": 83915 }, { "epoch": 13.690048939641109, "grad_norm": 0.0029747539665549994, "learning_rate": 0.00027379153230763976, "loss": 0.0035, "num_input_tokens_seen": 181278304, "step": 83920 }, { "epoch": 13.690864600326265, "grad_norm": 0.0037416473496705294, "learning_rate": 0.00027372805596820673, "loss": 0.0032, "num_input_tokens_seen": 181289312, "step": 83925 }, { "epoch": 13.691680261011419, "grad_norm": 0.0018563204212114215, "learning_rate": 0.0002736645842144616, "loss": 0.004, "num_input_tokens_seen": 181300928, "step": 83930 }, { "epoch": 13.692495921696574, "grad_norm": 0.042291343212127686, "learning_rate": 0.00027360111704769093, "loss": 0.0041, "num_input_tokens_seen": 181311840, "step": 83935 }, { "epoch": 13.69331158238173, "grad_norm": 0.0007176153594627976, "learning_rate": 0.00027353765446918075, "loss": 0.0386, "num_input_tokens_seen": 181321696, "step": 83940 }, { "epoch": 13.694127243066884, "grad_norm": 0.0034198344219475985, "learning_rate": 0.0002734741964802173, "loss": 0.0029, "num_input_tokens_seen": 181333088, "step": 83945 }, { "epoch": 13.69494290375204, "grad_norm": 0.0027348636649549007, "learning_rate": 0.00027341074308208667, "loss": 0.0026, "num_input_tokens_seen": 181344736, "step": 83950 }, { "epoch": 13.695758564437194, "grad_norm": 0.0024968599900603294, "learning_rate": 0.00027334729427607476, "loss": 0.1133, "num_input_tokens_seen": 181355136, "step": 83955 }, { "epoch": 13.69657422512235, "grad_norm": 0.003038478083908558, "learning_rate": 0.00027328385006346746, "loss": 0.0012, "num_input_tokens_seen": 181365824, "step": 83960 }, { "epoch": 13.697389885807503, "grad_norm": 0.1116863489151001, "learning_rate": 0.00027322041044555045, "loss": 0.0469, "num_input_tokens_seen": 181376576, "step": 83965 }, { "epoch": 13.698205546492659, "grad_norm": 0.016957435756921768, "learning_rate": 0.00027315697542360944, "loss": 0.0081, "num_input_tokens_seen": 181387744, "step": 83970 }, { "epoch": 13.699021207177815, "grad_norm": 0.08671362698078156, "learning_rate": 0.00027309354499893045, "loss": 0.006, "num_input_tokens_seen": 181398912, "step": 83975 }, { "epoch": 13.699836867862969, "grad_norm": 0.0035883912350982428, "learning_rate": 0.00027303011917279826, "loss": 0.0014, "num_input_tokens_seen": 181410112, "step": 83980 }, { "epoch": 13.700652528548124, "grad_norm": 0.21623927354812622, "learning_rate": 0.00027296669794649875, "loss": 0.011, "num_input_tokens_seen": 181421600, "step": 83985 }, { "epoch": 13.701468189233278, "grad_norm": 0.001589043764397502, "learning_rate": 0.0002729032813213172, "loss": 0.0017, "num_input_tokens_seen": 181432192, "step": 83990 }, { "epoch": 13.702283849918434, "grad_norm": 0.0007788580842316151, "learning_rate": 0.00027283986929853873, "loss": 0.0078, "num_input_tokens_seen": 181443200, "step": 83995 }, { "epoch": 13.70309951060359, "grad_norm": 0.005699070170521736, "learning_rate": 0.0002727764618794485, "loss": 0.0345, "num_input_tokens_seen": 181453728, "step": 84000 }, { "epoch": 13.703915171288743, "grad_norm": 0.00218218844383955, "learning_rate": 0.00027271305906533146, "loss": 0.0417, "num_input_tokens_seen": 181464192, "step": 84005 }, { "epoch": 13.7047308319739, "grad_norm": 0.026671582832932472, "learning_rate": 0.00027264966085747267, "loss": 0.0071, "num_input_tokens_seen": 181475040, "step": 84010 }, { "epoch": 13.705546492659053, "grad_norm": 0.010932182893157005, "learning_rate": 0.00027258626725715684, "loss": 0.0182, "num_input_tokens_seen": 181485952, "step": 84015 }, { "epoch": 13.706362153344209, "grad_norm": 0.0035217327531427145, "learning_rate": 0.0002725228782656689, "loss": 0.0092, "num_input_tokens_seen": 181497504, "step": 84020 }, { "epoch": 13.707177814029365, "grad_norm": 0.0011677873553708196, "learning_rate": 0.00027245949388429334, "loss": 0.0076, "num_input_tokens_seen": 181508896, "step": 84025 }, { "epoch": 13.707993474714518, "grad_norm": 0.007658544462174177, "learning_rate": 0.0002723961141143148, "loss": 0.0066, "num_input_tokens_seen": 181519904, "step": 84030 }, { "epoch": 13.708809135399674, "grad_norm": 0.009968415834009647, "learning_rate": 0.0002723327389570177, "loss": 0.0185, "num_input_tokens_seen": 181530688, "step": 84035 }, { "epoch": 13.709624796084828, "grad_norm": 0.05588683858513832, "learning_rate": 0.00027226936841368655, "loss": 0.0064, "num_input_tokens_seen": 181541792, "step": 84040 }, { "epoch": 13.710440456769984, "grad_norm": 0.0008577611879445612, "learning_rate": 0.00027220600248560557, "loss": 0.0713, "num_input_tokens_seen": 181552864, "step": 84045 }, { "epoch": 13.71125611745514, "grad_norm": 0.4839954376220703, "learning_rate": 0.00027214264117405884, "loss": 0.0166, "num_input_tokens_seen": 181563264, "step": 84050 }, { "epoch": 13.712071778140293, "grad_norm": 0.006796311587095261, "learning_rate": 0.0002720792844803306, "loss": 0.0013, "num_input_tokens_seen": 181574848, "step": 84055 }, { "epoch": 13.71288743882545, "grad_norm": 0.0028383873868733644, "learning_rate": 0.00027201593240570475, "loss": 0.0028, "num_input_tokens_seen": 181584864, "step": 84060 }, { "epoch": 13.713703099510603, "grad_norm": 0.011330782435834408, "learning_rate": 0.00027195258495146525, "loss": 0.0236, "num_input_tokens_seen": 181596320, "step": 84065 }, { "epoch": 13.714518760195759, "grad_norm": 0.004811130929738283, "learning_rate": 0.00027188924211889593, "loss": 0.002, "num_input_tokens_seen": 181605632, "step": 84070 }, { "epoch": 13.715334420880914, "grad_norm": 0.0025700810365378857, "learning_rate": 0.0002718259039092803, "loss": 0.0044, "num_input_tokens_seen": 181617888, "step": 84075 }, { "epoch": 13.716150081566068, "grad_norm": 0.022676723077893257, "learning_rate": 0.0002717625703239026, "loss": 0.0028, "num_input_tokens_seen": 181629088, "step": 84080 }, { "epoch": 13.716965742251224, "grad_norm": 0.05975797772407532, "learning_rate": 0.00027169924136404553, "loss": 0.0042, "num_input_tokens_seen": 181640064, "step": 84085 }, { "epoch": 13.717781402936378, "grad_norm": 0.0030763172544538975, "learning_rate": 0.00027163591703099335, "loss": 0.0517, "num_input_tokens_seen": 181650432, "step": 84090 }, { "epoch": 13.718597063621534, "grad_norm": 0.007155897095799446, "learning_rate": 0.0002715725973260286, "loss": 0.0326, "num_input_tokens_seen": 181660512, "step": 84095 }, { "epoch": 13.719412724306688, "grad_norm": 0.12784910202026367, "learning_rate": 0.00027150928225043545, "loss": 0.0048, "num_input_tokens_seen": 181672000, "step": 84100 }, { "epoch": 13.720228384991843, "grad_norm": 0.006302386522293091, "learning_rate": 0.00027144597180549603, "loss": 0.0012, "num_input_tokens_seen": 181682688, "step": 84105 }, { "epoch": 13.721044045676999, "grad_norm": 0.004140893928706646, "learning_rate": 0.0002713826659924944, "loss": 0.0049, "num_input_tokens_seen": 181693056, "step": 84110 }, { "epoch": 13.721859706362153, "grad_norm": 0.009677722118794918, "learning_rate": 0.00027131936481271265, "loss": 0.0483, "num_input_tokens_seen": 181703072, "step": 84115 }, { "epoch": 13.722675367047309, "grad_norm": 0.0024610969703644514, "learning_rate": 0.00027125606826743445, "loss": 0.0525, "num_input_tokens_seen": 181714112, "step": 84120 }, { "epoch": 13.723491027732463, "grad_norm": 0.01368603203445673, "learning_rate": 0.0002711927763579418, "loss": 0.0082, "num_input_tokens_seen": 181725088, "step": 84125 }, { "epoch": 13.724306688417618, "grad_norm": 0.00212348741479218, "learning_rate": 0.00027112948908551807, "loss": 0.0019, "num_input_tokens_seen": 181734368, "step": 84130 }, { "epoch": 13.725122349102774, "grad_norm": 0.0046083335764706135, "learning_rate": 0.00027106620645144555, "loss": 0.0172, "num_input_tokens_seen": 181744576, "step": 84135 }, { "epoch": 13.725938009787928, "grad_norm": 0.004685471300035715, "learning_rate": 0.00027100292845700676, "loss": 0.0795, "num_input_tokens_seen": 181754496, "step": 84140 }, { "epoch": 13.726753670473084, "grad_norm": 0.002028076443821192, "learning_rate": 0.0002709396551034842, "loss": 0.0427, "num_input_tokens_seen": 181763840, "step": 84145 }, { "epoch": 13.727569331158238, "grad_norm": 0.0031546044629067183, "learning_rate": 0.00027087638639215994, "loss": 0.005, "num_input_tokens_seen": 181774656, "step": 84150 }, { "epoch": 13.728384991843393, "grad_norm": 0.0017633294919505715, "learning_rate": 0.00027081312232431654, "loss": 0.0322, "num_input_tokens_seen": 181785696, "step": 84155 }, { "epoch": 13.729200652528547, "grad_norm": 0.03633604198694229, "learning_rate": 0.00027074986290123596, "loss": 0.0109, "num_input_tokens_seen": 181796288, "step": 84160 }, { "epoch": 13.730016313213703, "grad_norm": 0.013025002554059029, "learning_rate": 0.0002706866081242001, "loss": 0.028, "num_input_tokens_seen": 181805376, "step": 84165 }, { "epoch": 13.730831973898859, "grad_norm": 0.003769845236092806, "learning_rate": 0.0002706233579944911, "loss": 0.0287, "num_input_tokens_seen": 181814752, "step": 84170 }, { "epoch": 13.731647634584013, "grad_norm": 0.006845514755696058, "learning_rate": 0.00027056011251339073, "loss": 0.0957, "num_input_tokens_seen": 181826496, "step": 84175 }, { "epoch": 13.732463295269168, "grad_norm": 0.009542626328766346, "learning_rate": 0.0002704968716821806, "loss": 0.0032, "num_input_tokens_seen": 181836096, "step": 84180 }, { "epoch": 13.733278955954322, "grad_norm": 0.0029800382908433676, "learning_rate": 0.00027043363550214287, "loss": 0.0934, "num_input_tokens_seen": 181846976, "step": 84185 }, { "epoch": 13.734094616639478, "grad_norm": 0.06575489789247513, "learning_rate": 0.00027037040397455837, "loss": 0.0069, "num_input_tokens_seen": 181856160, "step": 84190 }, { "epoch": 13.734910277324634, "grad_norm": 0.002500128000974655, "learning_rate": 0.0002703071771007093, "loss": 0.0008, "num_input_tokens_seen": 181866496, "step": 84195 }, { "epoch": 13.735725938009788, "grad_norm": 1.1656486988067627, "learning_rate": 0.0002702439548818763, "loss": 0.0549, "num_input_tokens_seen": 181876736, "step": 84200 }, { "epoch": 13.736541598694943, "grad_norm": 0.0023803820367902517, "learning_rate": 0.0002701807373193414, "loss": 0.1148, "num_input_tokens_seen": 181887616, "step": 84205 }, { "epoch": 13.737357259380097, "grad_norm": 0.008336534723639488, "learning_rate": 0.000270117524414385, "loss": 0.0364, "num_input_tokens_seen": 181897536, "step": 84210 }, { "epoch": 13.738172920065253, "grad_norm": 0.003979240078479052, "learning_rate": 0.000270054316168289, "loss": 0.0057, "num_input_tokens_seen": 181909504, "step": 84215 }, { "epoch": 13.738988580750409, "grad_norm": 0.003386629745364189, "learning_rate": 0.0002699911125823336, "loss": 0.004, "num_input_tokens_seen": 181921088, "step": 84220 }, { "epoch": 13.739804241435563, "grad_norm": 0.0035156127996742725, "learning_rate": 0.0002699279136578005, "loss": 0.1362, "num_input_tokens_seen": 181931712, "step": 84225 }, { "epoch": 13.740619902120718, "grad_norm": 0.00738931680098176, "learning_rate": 0.0002698647193959697, "loss": 0.0135, "num_input_tokens_seen": 181942816, "step": 84230 }, { "epoch": 13.741435562805872, "grad_norm": 0.0014010763261467218, "learning_rate": 0.00026980152979812265, "loss": 0.0037, "num_input_tokens_seen": 181954016, "step": 84235 }, { "epoch": 13.742251223491028, "grad_norm": 0.007098844274878502, "learning_rate": 0.0002697383448655393, "loss": 0.0134, "num_input_tokens_seen": 181964448, "step": 84240 }, { "epoch": 13.743066884176184, "grad_norm": 0.0010831760009750724, "learning_rate": 0.00026967516459950084, "loss": 0.0161, "num_input_tokens_seen": 181974880, "step": 84245 }, { "epoch": 13.743882544861338, "grad_norm": 0.46945667266845703, "learning_rate": 0.000269611989001287, "loss": 0.0636, "num_input_tokens_seen": 181986080, "step": 84250 }, { "epoch": 13.744698205546493, "grad_norm": 0.006263429298996925, "learning_rate": 0.0002695488180721789, "loss": 0.0072, "num_input_tokens_seen": 181995360, "step": 84255 }, { "epoch": 13.745513866231647, "grad_norm": 0.025617733597755432, "learning_rate": 0.0002694856518134559, "loss": 0.0214, "num_input_tokens_seen": 182007648, "step": 84260 }, { "epoch": 13.746329526916803, "grad_norm": 0.048633527010679245, "learning_rate": 0.000269422490226399, "loss": 0.0076, "num_input_tokens_seen": 182018080, "step": 84265 }, { "epoch": 13.747145187601957, "grad_norm": 0.017586344853043556, "learning_rate": 0.00026935933331228743, "loss": 0.1629, "num_input_tokens_seen": 182030368, "step": 84270 }, { "epoch": 13.747960848287113, "grad_norm": 0.563724935054779, "learning_rate": 0.00026929618107240173, "loss": 0.0444, "num_input_tokens_seen": 182041504, "step": 84275 }, { "epoch": 13.748776508972268, "grad_norm": 0.002480928786098957, "learning_rate": 0.0002692330335080216, "loss": 0.0222, "num_input_tokens_seen": 182050208, "step": 84280 }, { "epoch": 13.749592169657422, "grad_norm": 0.6787986159324646, "learning_rate": 0.00026916989062042684, "loss": 0.0167, "num_input_tokens_seen": 182060704, "step": 84285 }, { "epoch": 13.750407830342578, "grad_norm": 0.010774930939078331, "learning_rate": 0.0002691067524108971, "loss": 0.0108, "num_input_tokens_seen": 182072544, "step": 84290 }, { "epoch": 13.751223491027732, "grad_norm": 0.022650204598903656, "learning_rate": 0.00026904361888071193, "loss": 0.0087, "num_input_tokens_seen": 182082656, "step": 84295 }, { "epoch": 13.752039151712887, "grad_norm": 0.0077858190052211285, "learning_rate": 0.0002689804900311508, "loss": 0.0397, "num_input_tokens_seen": 182093088, "step": 84300 }, { "epoch": 13.752854812398043, "grad_norm": 0.011099128052592278, "learning_rate": 0.000268917365863493, "loss": 0.0061, "num_input_tokens_seen": 182103648, "step": 84305 }, { "epoch": 13.753670473083197, "grad_norm": 0.001572756445966661, "learning_rate": 0.000268854246379018, "loss": 0.0149, "num_input_tokens_seen": 182114976, "step": 84310 }, { "epoch": 13.754486133768353, "grad_norm": 0.012012061662971973, "learning_rate": 0.00026879113157900496, "loss": 0.0025, "num_input_tokens_seen": 182125056, "step": 84315 }, { "epoch": 13.755301794453507, "grad_norm": 0.004653714597225189, "learning_rate": 0.00026872802146473296, "loss": 0.0057, "num_input_tokens_seen": 182135104, "step": 84320 }, { "epoch": 13.756117455138662, "grad_norm": 2.9742019176483154, "learning_rate": 0.0002686649160374808, "loss": 0.0497, "num_input_tokens_seen": 182146144, "step": 84325 }, { "epoch": 13.756933115823816, "grad_norm": 0.0020722979679703712, "learning_rate": 0.0002686018152985279, "loss": 0.0117, "num_input_tokens_seen": 182158464, "step": 84330 }, { "epoch": 13.757748776508972, "grad_norm": 0.0032649089116603136, "learning_rate": 0.0002685387192491524, "loss": 0.0133, "num_input_tokens_seen": 182168640, "step": 84335 }, { "epoch": 13.758564437194128, "grad_norm": 0.014669415540993214, "learning_rate": 0.0002684756278906338, "loss": 0.0286, "num_input_tokens_seen": 182179712, "step": 84340 }, { "epoch": 13.759380097879282, "grad_norm": 0.08696259558200836, "learning_rate": 0.0002684125412242499, "loss": 0.0501, "num_input_tokens_seen": 182191296, "step": 84345 }, { "epoch": 13.760195758564437, "grad_norm": 0.19893871247768402, "learning_rate": 0.00026834945925128005, "loss": 0.1038, "num_input_tokens_seen": 182201696, "step": 84350 }, { "epoch": 13.761011419249591, "grad_norm": 0.004175386857241392, "learning_rate": 0.00026828638197300185, "loss": 0.005, "num_input_tokens_seen": 182211328, "step": 84355 }, { "epoch": 13.761827079934747, "grad_norm": 0.01928592659533024, "learning_rate": 0.0002682233093906945, "loss": 0.0027, "num_input_tokens_seen": 182222336, "step": 84360 }, { "epoch": 13.762642740619903, "grad_norm": 0.2888076603412628, "learning_rate": 0.00026816024150563546, "loss": 0.0144, "num_input_tokens_seen": 182233728, "step": 84365 }, { "epoch": 13.763458401305057, "grad_norm": 0.46259981393814087, "learning_rate": 0.00026809717831910353, "loss": 0.0295, "num_input_tokens_seen": 182243296, "step": 84370 }, { "epoch": 13.764274061990212, "grad_norm": 0.012987246736884117, "learning_rate": 0.0002680341198323761, "loss": 0.1032, "num_input_tokens_seen": 182254240, "step": 84375 }, { "epoch": 13.765089722675366, "grad_norm": 0.009005763567984104, "learning_rate": 0.0002679710660467319, "loss": 0.0064, "num_input_tokens_seen": 182265632, "step": 84380 }, { "epoch": 13.765905383360522, "grad_norm": 0.0051773120649158955, "learning_rate": 0.00026790801696344814, "loss": 0.0109, "num_input_tokens_seen": 182276960, "step": 84385 }, { "epoch": 13.766721044045678, "grad_norm": 0.01954108476638794, "learning_rate": 0.00026784497258380293, "loss": 0.045, "num_input_tokens_seen": 182288800, "step": 84390 }, { "epoch": 13.767536704730832, "grad_norm": 0.02403266169130802, "learning_rate": 0.0002677819329090738, "loss": 0.0131, "num_input_tokens_seen": 182299136, "step": 84395 }, { "epoch": 13.768352365415987, "grad_norm": 0.005993073806166649, "learning_rate": 0.00026771889794053845, "loss": 0.0096, "num_input_tokens_seen": 182309728, "step": 84400 }, { "epoch": 13.769168026101141, "grad_norm": 0.004992147441953421, "learning_rate": 0.00026765586767947433, "loss": 0.0061, "num_input_tokens_seen": 182320352, "step": 84405 }, { "epoch": 13.769983686786297, "grad_norm": 0.009609325788915157, "learning_rate": 0.00026759284212715873, "loss": 0.0066, "num_input_tokens_seen": 182330880, "step": 84410 }, { "epoch": 13.770799347471453, "grad_norm": 0.004481468815356493, "learning_rate": 0.000267529821284869, "loss": 0.0013, "num_input_tokens_seen": 182341664, "step": 84415 }, { "epoch": 13.771615008156607, "grad_norm": 0.00853494182229042, "learning_rate": 0.0002674668051538824, "loss": 0.0531, "num_input_tokens_seen": 182353024, "step": 84420 }, { "epoch": 13.772430668841762, "grad_norm": 0.05223782733082771, "learning_rate": 0.0002674037937354761, "loss": 0.0315, "num_input_tokens_seen": 182363840, "step": 84425 }, { "epoch": 13.773246329526916, "grad_norm": 0.009661542251706123, "learning_rate": 0.00026734078703092684, "loss": 0.0793, "num_input_tokens_seen": 182374368, "step": 84430 }, { "epoch": 13.774061990212072, "grad_norm": 0.004293730482459068, "learning_rate": 0.0002672777850415117, "loss": 0.0047, "num_input_tokens_seen": 182385024, "step": 84435 }, { "epoch": 13.774877650897226, "grad_norm": 0.018209824338555336, "learning_rate": 0.0002672147877685075, "loss": 0.1032, "num_input_tokens_seen": 182394656, "step": 84440 }, { "epoch": 13.775693311582382, "grad_norm": 0.001877213828265667, "learning_rate": 0.00026715179521319095, "loss": 0.0017, "num_input_tokens_seen": 182405536, "step": 84445 }, { "epoch": 13.776508972267537, "grad_norm": 0.0022872108966112137, "learning_rate": 0.00026708880737683863, "loss": 0.1174, "num_input_tokens_seen": 182416640, "step": 84450 }, { "epoch": 13.777324632952691, "grad_norm": 0.07921797782182693, "learning_rate": 0.00026702582426072705, "loss": 0.008, "num_input_tokens_seen": 182427712, "step": 84455 }, { "epoch": 13.778140293637847, "grad_norm": 0.39221569895744324, "learning_rate": 0.0002669628458661326, "loss": 0.1612, "num_input_tokens_seen": 182438432, "step": 84460 }, { "epoch": 13.778955954323001, "grad_norm": 0.02193089947104454, "learning_rate": 0.000266899872194332, "loss": 0.1029, "num_input_tokens_seen": 182448608, "step": 84465 }, { "epoch": 13.779771615008157, "grad_norm": 0.015678465366363525, "learning_rate": 0.0002668369032466009, "loss": 0.0052, "num_input_tokens_seen": 182459168, "step": 84470 }, { "epoch": 13.780587275693312, "grad_norm": 0.18530204892158508, "learning_rate": 0.0002667739390242161, "loss": 0.0063, "num_input_tokens_seen": 182469664, "step": 84475 }, { "epoch": 13.781402936378466, "grad_norm": 0.373296320438385, "learning_rate": 0.00026671097952845284, "loss": 0.0379, "num_input_tokens_seen": 182479136, "step": 84480 }, { "epoch": 13.782218597063622, "grad_norm": 0.0032780959736555815, "learning_rate": 0.00026664802476058803, "loss": 0.0055, "num_input_tokens_seen": 182489632, "step": 84485 }, { "epoch": 13.783034257748776, "grad_norm": 0.027845237404108047, "learning_rate": 0.00026658507472189654, "loss": 0.0056, "num_input_tokens_seen": 182501024, "step": 84490 }, { "epoch": 13.783849918433932, "grad_norm": 0.013598952442407608, "learning_rate": 0.0002665221294136548, "loss": 0.0163, "num_input_tokens_seen": 182511936, "step": 84495 }, { "epoch": 13.784665579119086, "grad_norm": 0.009678972885012627, "learning_rate": 0.0002664591888371384, "loss": 0.0924, "num_input_tokens_seen": 182521248, "step": 84500 }, { "epoch": 13.785481239804241, "grad_norm": 0.003536543343216181, "learning_rate": 0.00026639625299362276, "loss": 0.0078, "num_input_tokens_seen": 182532256, "step": 84505 }, { "epoch": 13.786296900489397, "grad_norm": 0.005610863212496042, "learning_rate": 0.00026633332188438335, "loss": 0.0093, "num_input_tokens_seen": 182543392, "step": 84510 }, { "epoch": 13.78711256117455, "grad_norm": 0.004347702953964472, "learning_rate": 0.00026627039551069563, "loss": 0.0015, "num_input_tokens_seen": 182553536, "step": 84515 }, { "epoch": 13.787928221859707, "grad_norm": 0.013319053687155247, "learning_rate": 0.00026620747387383494, "loss": 0.0597, "num_input_tokens_seen": 182564448, "step": 84520 }, { "epoch": 13.78874388254486, "grad_norm": 0.033942725509405136, "learning_rate": 0.0002661445569750762, "loss": 0.0065, "num_input_tokens_seen": 182575424, "step": 84525 }, { "epoch": 13.789559543230016, "grad_norm": 0.07782161235809326, "learning_rate": 0.00026608164481569486, "loss": 0.0079, "num_input_tokens_seen": 182586528, "step": 84530 }, { "epoch": 13.790375203915172, "grad_norm": 0.1057814285159111, "learning_rate": 0.0002660187373969656, "loss": 0.0152, "num_input_tokens_seen": 182596224, "step": 84535 }, { "epoch": 13.791190864600326, "grad_norm": 0.014159079641103745, "learning_rate": 0.00026595583472016355, "loss": 0.0036, "num_input_tokens_seen": 182607424, "step": 84540 }, { "epoch": 13.792006525285482, "grad_norm": 0.0020694267004728317, "learning_rate": 0.00026589293678656336, "loss": 0.0023, "num_input_tokens_seen": 182618368, "step": 84545 }, { "epoch": 13.792822185970635, "grad_norm": 0.017610616981983185, "learning_rate": 0.0002658300435974398, "loss": 0.0715, "num_input_tokens_seen": 182628832, "step": 84550 }, { "epoch": 13.793637846655791, "grad_norm": 0.011342491954565048, "learning_rate": 0.00026576715515406747, "loss": 0.0203, "num_input_tokens_seen": 182639552, "step": 84555 }, { "epoch": 13.794453507340947, "grad_norm": 0.0068123419769108295, "learning_rate": 0.0002657042714577209, "loss": 0.0311, "num_input_tokens_seen": 182650560, "step": 84560 }, { "epoch": 13.7952691680261, "grad_norm": 0.0036243554204702377, "learning_rate": 0.0002656413925096745, "loss": 0.004, "num_input_tokens_seen": 182660992, "step": 84565 }, { "epoch": 13.796084828711257, "grad_norm": 0.0034546160604804754, "learning_rate": 0.00026557851831120254, "loss": 0.0406, "num_input_tokens_seen": 182671648, "step": 84570 }, { "epoch": 13.79690048939641, "grad_norm": 0.03969002887606621, "learning_rate": 0.00026551564886357937, "loss": 0.1201, "num_input_tokens_seen": 182683264, "step": 84575 }, { "epoch": 13.797716150081566, "grad_norm": 0.0044247061014175415, "learning_rate": 0.00026545278416807895, "loss": 0.002, "num_input_tokens_seen": 182694976, "step": 84580 }, { "epoch": 13.798531810766722, "grad_norm": 0.04211275652050972, "learning_rate": 0.00026538992422597547, "loss": 0.0143, "num_input_tokens_seen": 182706752, "step": 84585 }, { "epoch": 13.799347471451876, "grad_norm": 0.0638275295495987, "learning_rate": 0.0002653270690385428, "loss": 0.0599, "num_input_tokens_seen": 182718848, "step": 84590 }, { "epoch": 13.800163132137031, "grad_norm": 0.006323820445686579, "learning_rate": 0.00026526421860705474, "loss": 0.0086, "num_input_tokens_seen": 182728928, "step": 84595 }, { "epoch": 13.800978792822185, "grad_norm": 0.007789155002683401, "learning_rate": 0.0002652013729327849, "loss": 0.0041, "num_input_tokens_seen": 182740832, "step": 84600 }, { "epoch": 13.801794453507341, "grad_norm": 0.022198403254151344, "learning_rate": 0.00026513853201700727, "loss": 0.1351, "num_input_tokens_seen": 182752352, "step": 84605 }, { "epoch": 13.802610114192497, "grad_norm": 0.007201942149549723, "learning_rate": 0.00026507569586099527, "loss": 0.0069, "num_input_tokens_seen": 182763488, "step": 84610 }, { "epoch": 13.80342577487765, "grad_norm": 0.009613179601728916, "learning_rate": 0.0002650128644660223, "loss": 0.0054, "num_input_tokens_seen": 182774048, "step": 84615 }, { "epoch": 13.804241435562806, "grad_norm": 0.055438682436943054, "learning_rate": 0.0002649500378333617, "loss": 0.0239, "num_input_tokens_seen": 182784480, "step": 84620 }, { "epoch": 13.80505709624796, "grad_norm": 0.00259758229367435, "learning_rate": 0.0002648872159642868, "loss": 0.0156, "num_input_tokens_seen": 182794912, "step": 84625 }, { "epoch": 13.805872756933116, "grad_norm": 0.01642073690891266, "learning_rate": 0.00026482439886007077, "loss": 0.0083, "num_input_tokens_seen": 182805792, "step": 84630 }, { "epoch": 13.80668841761827, "grad_norm": 0.5491905212402344, "learning_rate": 0.00026476158652198655, "loss": 0.0656, "num_input_tokens_seen": 182818080, "step": 84635 }, { "epoch": 13.807504078303426, "grad_norm": 0.00983067974448204, "learning_rate": 0.00026469877895130727, "loss": 0.211, "num_input_tokens_seen": 182828608, "step": 84640 }, { "epoch": 13.808319738988581, "grad_norm": 0.1057048887014389, "learning_rate": 0.00026463597614930575, "loss": 0.0174, "num_input_tokens_seen": 182839488, "step": 84645 }, { "epoch": 13.809135399673735, "grad_norm": 0.01889374665915966, "learning_rate": 0.00026457317811725466, "loss": 0.0118, "num_input_tokens_seen": 182849856, "step": 84650 }, { "epoch": 13.809951060358891, "grad_norm": 0.012557010166347027, "learning_rate": 0.00026451038485642687, "loss": 0.0055, "num_input_tokens_seen": 182857760, "step": 84655 }, { "epoch": 13.810766721044045, "grad_norm": 0.0034552598372101784, "learning_rate": 0.0002644475963680948, "loss": 0.0596, "num_input_tokens_seen": 182868000, "step": 84660 }, { "epoch": 13.8115823817292, "grad_norm": 0.1016710177063942, "learning_rate": 0.0002643848126535311, "loss": 0.0273, "num_input_tokens_seen": 182880032, "step": 84665 }, { "epoch": 13.812398042414356, "grad_norm": 0.0039877621456980705, "learning_rate": 0.000264322033714008, "loss": 0.0034, "num_input_tokens_seen": 182891136, "step": 84670 }, { "epoch": 13.81321370309951, "grad_norm": 0.009610519744455814, "learning_rate": 0.0002642592595507979, "loss": 0.0055, "num_input_tokens_seen": 182902944, "step": 84675 }, { "epoch": 13.814029363784666, "grad_norm": 0.047046490013599396, "learning_rate": 0.0002641964901651729, "loss": 0.0061, "num_input_tokens_seen": 182913440, "step": 84680 }, { "epoch": 13.81484502446982, "grad_norm": 0.002050283830612898, "learning_rate": 0.0002641337255584052, "loss": 0.0189, "num_input_tokens_seen": 182923872, "step": 84685 }, { "epoch": 13.815660685154976, "grad_norm": 0.016913149505853653, "learning_rate": 0.0002640709657317668, "loss": 0.0074, "num_input_tokens_seen": 182934432, "step": 84690 }, { "epoch": 13.81647634584013, "grad_norm": 0.007640815805643797, "learning_rate": 0.0002640082106865295, "loss": 0.0087, "num_input_tokens_seen": 182946464, "step": 84695 }, { "epoch": 13.817292006525285, "grad_norm": 0.008956543169915676, "learning_rate": 0.00026394546042396525, "loss": 0.0204, "num_input_tokens_seen": 182957728, "step": 84700 }, { "epoch": 13.818107667210441, "grad_norm": 1.4677728414535522, "learning_rate": 0.0002638827149453457, "loss": 0.1167, "num_input_tokens_seen": 182966976, "step": 84705 }, { "epoch": 13.818923327895595, "grad_norm": 0.018476588651537895, "learning_rate": 0.0002638199742519425, "loss": 0.0567, "num_input_tokens_seen": 182977920, "step": 84710 }, { "epoch": 13.81973898858075, "grad_norm": 0.03129766136407852, "learning_rate": 0.00026375723834502686, "loss": 0.008, "num_input_tokens_seen": 182989504, "step": 84715 }, { "epoch": 13.820554649265905, "grad_norm": 0.061310023069381714, "learning_rate": 0.0002636945072258709, "loss": 0.1149, "num_input_tokens_seen": 183000992, "step": 84720 }, { "epoch": 13.82137030995106, "grad_norm": 0.04653109982609749, "learning_rate": 0.00026363178089574516, "loss": 0.0465, "num_input_tokens_seen": 183011776, "step": 84725 }, { "epoch": 13.822185970636216, "grad_norm": 0.0033070375211536884, "learning_rate": 0.0002635690593559216, "loss": 0.0212, "num_input_tokens_seen": 183023072, "step": 84730 }, { "epoch": 13.82300163132137, "grad_norm": 0.0212293341755867, "learning_rate": 0.0002635063426076706, "loss": 0.0363, "num_input_tokens_seen": 183034144, "step": 84735 }, { "epoch": 13.823817292006526, "grad_norm": 0.0049853515811264515, "learning_rate": 0.000263443630652264, "loss": 0.0179, "num_input_tokens_seen": 183044320, "step": 84740 }, { "epoch": 13.82463295269168, "grad_norm": 0.002307276474311948, "learning_rate": 0.00026338092349097186, "loss": 0.0022, "num_input_tokens_seen": 183054368, "step": 84745 }, { "epoch": 13.825448613376835, "grad_norm": 0.004544033668935299, "learning_rate": 0.00026331822112506576, "loss": 0.0037, "num_input_tokens_seen": 183064800, "step": 84750 }, { "epoch": 13.826264274061991, "grad_norm": 0.002783995820209384, "learning_rate": 0.0002632555235558161, "loss": 0.005, "num_input_tokens_seen": 183075168, "step": 84755 }, { "epoch": 13.827079934747145, "grad_norm": 0.04571967199444771, "learning_rate": 0.00026319283078449365, "loss": 0.0074, "num_input_tokens_seen": 183087296, "step": 84760 }, { "epoch": 13.8278955954323, "grad_norm": 0.6793679594993591, "learning_rate": 0.0002631301428123688, "loss": 0.1606, "num_input_tokens_seen": 183097184, "step": 84765 }, { "epoch": 13.828711256117455, "grad_norm": 0.04691294580698013, "learning_rate": 0.00026306745964071223, "loss": 0.0063, "num_input_tokens_seen": 183107712, "step": 84770 }, { "epoch": 13.82952691680261, "grad_norm": 0.08239693194627762, "learning_rate": 0.00026300478127079405, "loss": 0.0297, "num_input_tokens_seen": 183118080, "step": 84775 }, { "epoch": 13.830342577487766, "grad_norm": 0.0026116548106074333, "learning_rate": 0.0002629421077038846, "loss": 0.0378, "num_input_tokens_seen": 183129216, "step": 84780 }, { "epoch": 13.83115823817292, "grad_norm": 0.03541423752903938, "learning_rate": 0.00026287943894125415, "loss": 0.0059, "num_input_tokens_seen": 183140736, "step": 84785 }, { "epoch": 13.831973898858076, "grad_norm": 0.00940261036157608, "learning_rate": 0.0002628167749841727, "loss": 0.005, "num_input_tokens_seen": 183151296, "step": 84790 }, { "epoch": 13.83278955954323, "grad_norm": 0.006862754467874765, "learning_rate": 0.0002627541158339101, "loss": 0.0065, "num_input_tokens_seen": 183163488, "step": 84795 }, { "epoch": 13.833605220228385, "grad_norm": 0.015600350685417652, "learning_rate": 0.0002626914614917364, "loss": 0.0099, "num_input_tokens_seen": 183174272, "step": 84800 }, { "epoch": 13.83442088091354, "grad_norm": 0.008726726286113262, "learning_rate": 0.0002626288119589212, "loss": 0.0078, "num_input_tokens_seen": 183184384, "step": 84805 }, { "epoch": 13.835236541598695, "grad_norm": 0.009681164287030697, "learning_rate": 0.0002625661672367343, "loss": 0.0034, "num_input_tokens_seen": 183195552, "step": 84810 }, { "epoch": 13.83605220228385, "grad_norm": 0.012375738471746445, "learning_rate": 0.00026250352732644524, "loss": 0.0035, "num_input_tokens_seen": 183207328, "step": 84815 }, { "epoch": 13.836867862969005, "grad_norm": 1.4686344861984253, "learning_rate": 0.0002624408922293232, "loss": 0.0164, "num_input_tokens_seen": 183218048, "step": 84820 }, { "epoch": 13.83768352365416, "grad_norm": 0.004356234800070524, "learning_rate": 0.0002623782619466383, "loss": 0.0714, "num_input_tokens_seen": 183228928, "step": 84825 }, { "epoch": 13.838499184339314, "grad_norm": 0.0027383537963032722, "learning_rate": 0.00026231563647965896, "loss": 0.0056, "num_input_tokens_seen": 183239776, "step": 84830 }, { "epoch": 13.83931484502447, "grad_norm": 0.9869494438171387, "learning_rate": 0.00026225301582965524, "loss": 0.0909, "num_input_tokens_seen": 183250144, "step": 84835 }, { "epoch": 13.840130505709626, "grad_norm": 0.003971732687205076, "learning_rate": 0.0002621903999978953, "loss": 0.022, "num_input_tokens_seen": 183261344, "step": 84840 }, { "epoch": 13.84094616639478, "grad_norm": 0.004560328088700771, "learning_rate": 0.0002621277889856489, "loss": 0.0167, "num_input_tokens_seen": 183273120, "step": 84845 }, { "epoch": 13.841761827079935, "grad_norm": 0.0031801187433302402, "learning_rate": 0.0002620651827941843, "loss": 0.0026, "num_input_tokens_seen": 183284384, "step": 84850 }, { "epoch": 13.84257748776509, "grad_norm": 0.218495711684227, "learning_rate": 0.00026200258142477107, "loss": 0.0102, "num_input_tokens_seen": 183294720, "step": 84855 }, { "epoch": 13.843393148450245, "grad_norm": 0.0040638744831085205, "learning_rate": 0.00026193998487867697, "loss": 0.0164, "num_input_tokens_seen": 183305792, "step": 84860 }, { "epoch": 13.844208809135399, "grad_norm": 0.6690198183059692, "learning_rate": 0.0002618773931571715, "loss": 0.1063, "num_input_tokens_seen": 183316448, "step": 84865 }, { "epoch": 13.845024469820554, "grad_norm": 0.010879079811275005, "learning_rate": 0.00026181480626152236, "loss": 0.0068, "num_input_tokens_seen": 183326880, "step": 84870 }, { "epoch": 13.84584013050571, "grad_norm": 0.30008864402770996, "learning_rate": 0.0002617522241929987, "loss": 0.0098, "num_input_tokens_seen": 183337856, "step": 84875 }, { "epoch": 13.846655791190864, "grad_norm": 0.00918416865170002, "learning_rate": 0.0002616896469528681, "loss": 0.0109, "num_input_tokens_seen": 183348256, "step": 84880 }, { "epoch": 13.84747145187602, "grad_norm": 0.0027300086803734303, "learning_rate": 0.00026162707454239944, "loss": 0.0145, "num_input_tokens_seen": 183360160, "step": 84885 }, { "epoch": 13.848287112561174, "grad_norm": 0.047441281378269196, "learning_rate": 0.00026156450696286014, "loss": 0.0451, "num_input_tokens_seen": 183371648, "step": 84890 }, { "epoch": 13.84910277324633, "grad_norm": 0.002008978510275483, "learning_rate": 0.0002615019442155189, "loss": 0.0036, "num_input_tokens_seen": 183384032, "step": 84895 }, { "epoch": 13.849918433931485, "grad_norm": 0.002454567002132535, "learning_rate": 0.00026143938630164316, "loss": 0.0367, "num_input_tokens_seen": 183395072, "step": 84900 }, { "epoch": 13.850734094616639, "grad_norm": 0.013349570333957672, "learning_rate": 0.00026137683322250094, "loss": 0.0039, "num_input_tokens_seen": 183405824, "step": 84905 }, { "epoch": 13.851549755301795, "grad_norm": 0.06632737070322037, "learning_rate": 0.00026131428497935995, "loss": 0.003, "num_input_tokens_seen": 183416704, "step": 84910 }, { "epoch": 13.852365415986949, "grad_norm": 0.4345989525318146, "learning_rate": 0.0002612517415734877, "loss": 0.0761, "num_input_tokens_seen": 183427616, "step": 84915 }, { "epoch": 13.853181076672104, "grad_norm": 0.15262597799301147, "learning_rate": 0.00026118920300615187, "loss": 0.0094, "num_input_tokens_seen": 183436960, "step": 84920 }, { "epoch": 13.85399673735726, "grad_norm": 0.00835461262613535, "learning_rate": 0.0002611266692786197, "loss": 0.0061, "num_input_tokens_seen": 183447296, "step": 84925 }, { "epoch": 13.854812398042414, "grad_norm": 0.10195163637399673, "learning_rate": 0.00026106414039215865, "loss": 0.0077, "num_input_tokens_seen": 183458048, "step": 84930 }, { "epoch": 13.85562805872757, "grad_norm": 0.030093245208263397, "learning_rate": 0.00026100161634803594, "loss": 0.0028, "num_input_tokens_seen": 183469376, "step": 84935 }, { "epoch": 13.856443719412724, "grad_norm": 0.007934209890663624, "learning_rate": 0.0002609390971475186, "loss": 0.0029, "num_input_tokens_seen": 183480064, "step": 84940 }, { "epoch": 13.85725938009788, "grad_norm": 0.1264921873807907, "learning_rate": 0.00026087658279187357, "loss": 0.0071, "num_input_tokens_seen": 183490880, "step": 84945 }, { "epoch": 13.858075040783035, "grad_norm": 1.145666241645813, "learning_rate": 0.0002608140732823684, "loss": 0.0163, "num_input_tokens_seen": 183502048, "step": 84950 }, { "epoch": 13.858890701468189, "grad_norm": 0.013281790539622307, "learning_rate": 0.00026075156862026896, "loss": 0.0027, "num_input_tokens_seen": 183512960, "step": 84955 }, { "epoch": 13.859706362153345, "grad_norm": 0.003783997381106019, "learning_rate": 0.00026068906880684297, "loss": 0.0039, "num_input_tokens_seen": 183523840, "step": 84960 }, { "epoch": 13.860522022838499, "grad_norm": 0.20053230226039886, "learning_rate": 0.0002606265738433561, "loss": 0.0741, "num_input_tokens_seen": 183534176, "step": 84965 }, { "epoch": 13.861337683523654, "grad_norm": 0.02767985127866268, "learning_rate": 0.0002605640837310758, "loss": 0.0041, "num_input_tokens_seen": 183544320, "step": 84970 }, { "epoch": 13.86215334420881, "grad_norm": 0.0010478844633325934, "learning_rate": 0.0002605015984712678, "loss": 0.0343, "num_input_tokens_seen": 183553600, "step": 84975 }, { "epoch": 13.862969004893964, "grad_norm": 0.002999087329953909, "learning_rate": 0.000260439118065199, "loss": 0.0269, "num_input_tokens_seen": 183563744, "step": 84980 }, { "epoch": 13.86378466557912, "grad_norm": 0.015121141448616982, "learning_rate": 0.000260376642514135, "loss": 0.009, "num_input_tokens_seen": 183574592, "step": 84985 }, { "epoch": 13.864600326264274, "grad_norm": 0.014611300081014633, "learning_rate": 0.00026031417181934276, "loss": 0.0026, "num_input_tokens_seen": 183585344, "step": 84990 }, { "epoch": 13.86541598694943, "grad_norm": 0.0058423797599971294, "learning_rate": 0.0002602517059820875, "loss": 0.0028, "num_input_tokens_seen": 183596544, "step": 84995 }, { "epoch": 13.866231647634583, "grad_norm": 0.04339960962533951, "learning_rate": 0.0002601892450036359, "loss": 0.0038, "num_input_tokens_seen": 183606976, "step": 85000 }, { "epoch": 13.867047308319739, "grad_norm": 0.003346919547766447, "learning_rate": 0.0002601267888852531, "loss": 0.0084, "num_input_tokens_seen": 183618720, "step": 85005 }, { "epoch": 13.867862969004895, "grad_norm": 0.005301931872963905, "learning_rate": 0.0002600643376282056, "loss": 0.0903, "num_input_tokens_seen": 183629760, "step": 85010 }, { "epoch": 13.868678629690049, "grad_norm": 0.005934323649853468, "learning_rate": 0.0002600018912337584, "loss": 0.156, "num_input_tokens_seen": 183639584, "step": 85015 }, { "epoch": 13.869494290375204, "grad_norm": 0.006170249078422785, "learning_rate": 0.00025993944970317763, "loss": 0.065, "num_input_tokens_seen": 183651040, "step": 85020 }, { "epoch": 13.870309951060358, "grad_norm": 0.005769214127212763, "learning_rate": 0.00025987701303772806, "loss": 0.0222, "num_input_tokens_seen": 183663200, "step": 85025 }, { "epoch": 13.871125611745514, "grad_norm": 0.17763355374336243, "learning_rate": 0.00025981458123867566, "loss": 0.0116, "num_input_tokens_seen": 183672864, "step": 85030 }, { "epoch": 13.87194127243067, "grad_norm": 0.016868766397237778, "learning_rate": 0.0002597521543072854, "loss": 0.0018, "num_input_tokens_seen": 183683296, "step": 85035 }, { "epoch": 13.872756933115824, "grad_norm": 0.0015195843297988176, "learning_rate": 0.00025968973224482257, "loss": 0.0126, "num_input_tokens_seen": 183694176, "step": 85040 }, { "epoch": 13.87357259380098, "grad_norm": 0.07358266413211823, "learning_rate": 0.00025962731505255215, "loss": 0.0057, "num_input_tokens_seen": 183706304, "step": 85045 }, { "epoch": 13.874388254486133, "grad_norm": 0.0072519490495324135, "learning_rate": 0.0002595649027317392, "loss": 0.0041, "num_input_tokens_seen": 183717312, "step": 85050 }, { "epoch": 13.875203915171289, "grad_norm": 0.013897925615310669, "learning_rate": 0.0002595024952836484, "loss": 0.2139, "num_input_tokens_seen": 183728928, "step": 85055 }, { "epoch": 13.876019575856443, "grad_norm": 0.0067464206367731094, "learning_rate": 0.00025944009270954463, "loss": 0.007, "num_input_tokens_seen": 183740896, "step": 85060 }, { "epoch": 13.876835236541599, "grad_norm": 0.009500235319137573, "learning_rate": 0.00025937769501069264, "loss": 0.003, "num_input_tokens_seen": 183749856, "step": 85065 }, { "epoch": 13.877650897226754, "grad_norm": 0.6065912246704102, "learning_rate": 0.00025931530218835684, "loss": 0.1251, "num_input_tokens_seen": 183760256, "step": 85070 }, { "epoch": 13.878466557911908, "grad_norm": 0.013110343366861343, "learning_rate": 0.00025925291424380183, "loss": 0.0033, "num_input_tokens_seen": 183771616, "step": 85075 }, { "epoch": 13.879282218597064, "grad_norm": 0.10349424928426743, "learning_rate": 0.00025919053117829185, "loss": 0.0053, "num_input_tokens_seen": 183781984, "step": 85080 }, { "epoch": 13.880097879282218, "grad_norm": 0.10486262291669846, "learning_rate": 0.0002591281529930913, "loss": 0.0494, "num_input_tokens_seen": 183793440, "step": 85085 }, { "epoch": 13.880913539967374, "grad_norm": 0.13725866377353668, "learning_rate": 0.0002590657796894641, "loss": 0.0872, "num_input_tokens_seen": 183803072, "step": 85090 }, { "epoch": 13.88172920065253, "grad_norm": 0.003872843226417899, "learning_rate": 0.0002590034112686749, "loss": 0.0015, "num_input_tokens_seen": 183814176, "step": 85095 }, { "epoch": 13.882544861337683, "grad_norm": 0.005437849089503288, "learning_rate": 0.0002589410477319869, "loss": 0.0028, "num_input_tokens_seen": 183823712, "step": 85100 }, { "epoch": 13.883360522022839, "grad_norm": 0.0025222674012184143, "learning_rate": 0.0002588786890806647, "loss": 0.0515, "num_input_tokens_seen": 183834048, "step": 85105 }, { "epoch": 13.884176182707993, "grad_norm": 0.006505226716399193, "learning_rate": 0.0002588163353159715, "loss": 0.0485, "num_input_tokens_seen": 183844864, "step": 85110 }, { "epoch": 13.884991843393149, "grad_norm": 0.007280663587152958, "learning_rate": 0.00025875398643917147, "loss": 0.0031, "num_input_tokens_seen": 183856672, "step": 85115 }, { "epoch": 13.885807504078304, "grad_norm": 0.26501476764678955, "learning_rate": 0.00025869164245152765, "loss": 0.0507, "num_input_tokens_seen": 183867840, "step": 85120 }, { "epoch": 13.886623164763458, "grad_norm": 0.003495575860142708, "learning_rate": 0.00025862930335430426, "loss": 0.0043, "num_input_tokens_seen": 183878688, "step": 85125 }, { "epoch": 13.887438825448614, "grad_norm": 0.8573818206787109, "learning_rate": 0.0002585669691487637, "loss": 0.0293, "num_input_tokens_seen": 183889920, "step": 85130 }, { "epoch": 13.888254486133768, "grad_norm": 0.09581318497657776, "learning_rate": 0.00025850463983617005, "loss": 0.0196, "num_input_tokens_seen": 183901888, "step": 85135 }, { "epoch": 13.889070146818923, "grad_norm": 0.010820590890944004, "learning_rate": 0.0002584423154177863, "loss": 0.0068, "num_input_tokens_seen": 183912672, "step": 85140 }, { "epoch": 13.88988580750408, "grad_norm": 0.002558765932917595, "learning_rate": 0.0002583799958948754, "loss": 0.0057, "num_input_tokens_seen": 183922752, "step": 85145 }, { "epoch": 13.890701468189233, "grad_norm": 0.007388087455183268, "learning_rate": 0.00025831768126870035, "loss": 0.0068, "num_input_tokens_seen": 183934272, "step": 85150 }, { "epoch": 13.891517128874389, "grad_norm": 0.013861283659934998, "learning_rate": 0.00025825537154052414, "loss": 0.0042, "num_input_tokens_seen": 183944320, "step": 85155 }, { "epoch": 13.892332789559543, "grad_norm": 0.38176852464675903, "learning_rate": 0.00025819306671160953, "loss": 0.1258, "num_input_tokens_seen": 183954688, "step": 85160 }, { "epoch": 13.893148450244698, "grad_norm": 0.06643305718898773, "learning_rate": 0.00025813076678321914, "loss": 0.0046, "num_input_tokens_seen": 183965312, "step": 85165 }, { "epoch": 13.893964110929852, "grad_norm": 0.012475523166358471, "learning_rate": 0.0002580684717566156, "loss": 0.0137, "num_input_tokens_seen": 183977056, "step": 85170 }, { "epoch": 13.894779771615008, "grad_norm": 0.015131724998354912, "learning_rate": 0.0002580061816330614, "loss": 0.096, "num_input_tokens_seen": 183988224, "step": 85175 }, { "epoch": 13.895595432300164, "grad_norm": 0.28440892696380615, "learning_rate": 0.00025794389641381894, "loss": 0.0517, "num_input_tokens_seen": 183999136, "step": 85180 }, { "epoch": 13.896411092985318, "grad_norm": 0.01567523181438446, "learning_rate": 0.0002578816161001505, "loss": 0.0035, "num_input_tokens_seen": 184008928, "step": 85185 }, { "epoch": 13.897226753670473, "grad_norm": 0.00586994644254446, "learning_rate": 0.0002578193406933182, "loss": 0.0591, "num_input_tokens_seen": 184019520, "step": 85190 }, { "epoch": 13.898042414355627, "grad_norm": 0.002432781970128417, "learning_rate": 0.00025775707019458415, "loss": 0.0084, "num_input_tokens_seen": 184030592, "step": 85195 }, { "epoch": 13.898858075040783, "grad_norm": 0.005925891920924187, "learning_rate": 0.0002576948046052105, "loss": 0.185, "num_input_tokens_seen": 184041216, "step": 85200 }, { "epoch": 13.899673735725939, "grad_norm": 0.039722055196762085, "learning_rate": 0.000257632543926459, "loss": 0.0128, "num_input_tokens_seen": 184051712, "step": 85205 }, { "epoch": 13.900489396411093, "grad_norm": 0.01062532514333725, "learning_rate": 0.0002575702881595914, "loss": 0.0024, "num_input_tokens_seen": 184063424, "step": 85210 }, { "epoch": 13.901305057096248, "grad_norm": 0.02566523477435112, "learning_rate": 0.0002575080373058695, "loss": 0.0604, "num_input_tokens_seen": 184074912, "step": 85215 }, { "epoch": 13.902120717781402, "grad_norm": 0.4470478296279907, "learning_rate": 0.0002574457913665548, "loss": 0.0136, "num_input_tokens_seen": 184085696, "step": 85220 }, { "epoch": 13.902936378466558, "grad_norm": 0.049299828708171844, "learning_rate": 0.00025738355034290886, "loss": 0.0032, "num_input_tokens_seen": 184095552, "step": 85225 }, { "epoch": 13.903752039151712, "grad_norm": 0.06083039939403534, "learning_rate": 0.00025732131423619303, "loss": 0.0059, "num_input_tokens_seen": 184107008, "step": 85230 }, { "epoch": 13.904567699836868, "grad_norm": 0.028353175148367882, "learning_rate": 0.0002572590830476685, "loss": 0.0139, "num_input_tokens_seen": 184117120, "step": 85235 }, { "epoch": 13.905383360522023, "grad_norm": 0.37317419052124023, "learning_rate": 0.0002571968567785967, "loss": 0.139, "num_input_tokens_seen": 184129408, "step": 85240 }, { "epoch": 13.906199021207177, "grad_norm": 0.009255973622202873, "learning_rate": 0.0002571346354302387, "loss": 0.0156, "num_input_tokens_seen": 184140384, "step": 85245 }, { "epoch": 13.907014681892333, "grad_norm": 0.005792307201772928, "learning_rate": 0.0002570724190038554, "loss": 0.004, "num_input_tokens_seen": 184150720, "step": 85250 }, { "epoch": 13.907830342577487, "grad_norm": 0.001610096194781363, "learning_rate": 0.00025701020750070765, "loss": 0.0021, "num_input_tokens_seen": 184161344, "step": 85255 }, { "epoch": 13.908646003262643, "grad_norm": 0.008702469989657402, "learning_rate": 0.0002569480009220563, "loss": 0.026, "num_input_tokens_seen": 184171936, "step": 85260 }, { "epoch": 13.909461663947798, "grad_norm": 0.009642422199249268, "learning_rate": 0.00025688579926916213, "loss": 0.0096, "num_input_tokens_seen": 184183200, "step": 85265 }, { "epoch": 13.910277324632952, "grad_norm": 0.009435923770070076, "learning_rate": 0.0002568236025432855, "loss": 0.0263, "num_input_tokens_seen": 184194880, "step": 85270 }, { "epoch": 13.911092985318108, "grad_norm": 0.5569436550140381, "learning_rate": 0.00025676141074568713, "loss": 0.0775, "num_input_tokens_seen": 184205248, "step": 85275 }, { "epoch": 13.911908646003262, "grad_norm": 0.023024236783385277, "learning_rate": 0.00025669922387762747, "loss": 0.0051, "num_input_tokens_seen": 184214976, "step": 85280 }, { "epoch": 13.912724306688418, "grad_norm": 0.4745536744594574, "learning_rate": 0.00025663704194036653, "loss": 0.1285, "num_input_tokens_seen": 184226240, "step": 85285 }, { "epoch": 13.913539967373573, "grad_norm": 0.04086502268910408, "learning_rate": 0.0002565748649351647, "loss": 0.0046, "num_input_tokens_seen": 184237248, "step": 85290 }, { "epoch": 13.914355628058727, "grad_norm": 0.005516092758625746, "learning_rate": 0.0002565126928632821, "loss": 0.0065, "num_input_tokens_seen": 184248640, "step": 85295 }, { "epoch": 13.915171288743883, "grad_norm": 0.02265411801636219, "learning_rate": 0.00025645052572597856, "loss": 0.0034, "num_input_tokens_seen": 184259328, "step": 85300 }, { "epoch": 13.915986949429037, "grad_norm": 0.026116758584976196, "learning_rate": 0.0002563883635245141, "loss": 0.017, "num_input_tokens_seen": 184269984, "step": 85305 }, { "epoch": 13.916802610114193, "grad_norm": 0.0021625554654747248, "learning_rate": 0.0002563262062601486, "loss": 0.0723, "num_input_tokens_seen": 184279712, "step": 85310 }, { "epoch": 13.917618270799348, "grad_norm": 0.02202964387834072, "learning_rate": 0.0002562640539341415, "loss": 0.005, "num_input_tokens_seen": 184289760, "step": 85315 }, { "epoch": 13.918433931484502, "grad_norm": 0.10059604048728943, "learning_rate": 0.0002562019065477527, "loss": 0.0061, "num_input_tokens_seen": 184300512, "step": 85320 }, { "epoch": 13.919249592169658, "grad_norm": 0.08960135281085968, "learning_rate": 0.00025613976410224145, "loss": 0.0109, "num_input_tokens_seen": 184312224, "step": 85325 }, { "epoch": 13.920065252854812, "grad_norm": 0.07900179177522659, "learning_rate": 0.00025607762659886726, "loss": 0.0702, "num_input_tokens_seen": 184323360, "step": 85330 }, { "epoch": 13.920880913539968, "grad_norm": 0.024132825434207916, "learning_rate": 0.00025601549403888934, "loss": 0.0092, "num_input_tokens_seen": 184333152, "step": 85335 }, { "epoch": 13.921696574225122, "grad_norm": 0.02644437924027443, "learning_rate": 0.00025595336642356706, "loss": 0.0067, "num_input_tokens_seen": 184343488, "step": 85340 }, { "epoch": 13.922512234910277, "grad_norm": 0.01221081055700779, "learning_rate": 0.0002558912437541594, "loss": 0.0037, "num_input_tokens_seen": 184354080, "step": 85345 }, { "epoch": 13.923327895595433, "grad_norm": 0.20858865976333618, "learning_rate": 0.0002558291260319253, "loss": 0.0064, "num_input_tokens_seen": 184364352, "step": 85350 }, { "epoch": 13.924143556280587, "grad_norm": 0.006945399101823568, "learning_rate": 0.0002557670132581235, "loss": 0.0041, "num_input_tokens_seen": 184376672, "step": 85355 }, { "epoch": 13.924959216965743, "grad_norm": 0.470683753490448, "learning_rate": 0.00025570490543401345, "loss": 0.0933, "num_input_tokens_seen": 184387104, "step": 85360 }, { "epoch": 13.925774877650896, "grad_norm": 0.4291492700576782, "learning_rate": 0.00025564280256085305, "loss": 0.0118, "num_input_tokens_seen": 184396736, "step": 85365 }, { "epoch": 13.926590538336052, "grad_norm": 0.04624152556061745, "learning_rate": 0.0002555807046399016, "loss": 0.0194, "num_input_tokens_seen": 184406208, "step": 85370 }, { "epoch": 13.927406199021208, "grad_norm": 0.021825360134243965, "learning_rate": 0.00025551861167241675, "loss": 0.0034, "num_input_tokens_seen": 184417280, "step": 85375 }, { "epoch": 13.928221859706362, "grad_norm": 0.33354225754737854, "learning_rate": 0.00025545652365965767, "loss": 0.0929, "num_input_tokens_seen": 184428768, "step": 85380 }, { "epoch": 13.929037520391518, "grad_norm": 0.02854643389582634, "learning_rate": 0.00025539444060288235, "loss": 0.0055, "num_input_tokens_seen": 184439904, "step": 85385 }, { "epoch": 13.929853181076671, "grad_norm": 0.0056158872321248055, "learning_rate": 0.000255332362503349, "loss": 0.0103, "num_input_tokens_seen": 184450496, "step": 85390 }, { "epoch": 13.930668841761827, "grad_norm": 0.027426814660429955, "learning_rate": 0.00025527028936231567, "loss": 0.0137, "num_input_tokens_seen": 184461152, "step": 85395 }, { "epoch": 13.931484502446983, "grad_norm": 0.03571975603699684, "learning_rate": 0.0002552082211810405, "loss": 0.0196, "num_input_tokens_seen": 184472544, "step": 85400 }, { "epoch": 13.932300163132137, "grad_norm": 0.0027300782967358828, "learning_rate": 0.0002551461579607811, "loss": 0.0026, "num_input_tokens_seen": 184482240, "step": 85405 }, { "epoch": 13.933115823817293, "grad_norm": 0.08104506134986877, "learning_rate": 0.00025508409970279554, "loss": 0.0108, "num_input_tokens_seen": 184492448, "step": 85410 }, { "epoch": 13.933931484502446, "grad_norm": 0.0030523252207785845, "learning_rate": 0.00025502204640834135, "loss": 0.0523, "num_input_tokens_seen": 184502816, "step": 85415 }, { "epoch": 13.934747145187602, "grad_norm": 0.04271979257464409, "learning_rate": 0.0002549599980786762, "loss": 0.055, "num_input_tokens_seen": 184514080, "step": 85420 }, { "epoch": 13.935562805872756, "grad_norm": 0.0023610251955688, "learning_rate": 0.0002548979547150576, "loss": 0.0028, "num_input_tokens_seen": 184525312, "step": 85425 }, { "epoch": 13.936378466557912, "grad_norm": 0.00905569177120924, "learning_rate": 0.0002548359163187428, "loss": 0.0094, "num_input_tokens_seen": 184536224, "step": 85430 }, { "epoch": 13.937194127243067, "grad_norm": 0.017019646242260933, "learning_rate": 0.0002547738828909891, "loss": 0.0426, "num_input_tokens_seen": 184547776, "step": 85435 }, { "epoch": 13.938009787928221, "grad_norm": 0.004658792167901993, "learning_rate": 0.0002547118544330539, "loss": 0.1296, "num_input_tokens_seen": 184558656, "step": 85440 }, { "epoch": 13.938825448613377, "grad_norm": 0.001412428799085319, "learning_rate": 0.0002546498309461941, "loss": 0.0019, "num_input_tokens_seen": 184568640, "step": 85445 }, { "epoch": 13.939641109298531, "grad_norm": 3.1154069900512695, "learning_rate": 0.00025458781243166667, "loss": 0.0838, "num_input_tokens_seen": 184580736, "step": 85450 }, { "epoch": 13.940456769983687, "grad_norm": 0.03391305357217789, "learning_rate": 0.0002545257988907286, "loss": 0.0034, "num_input_tokens_seen": 184593184, "step": 85455 }, { "epoch": 13.941272430668842, "grad_norm": 0.00259930407628417, "learning_rate": 0.0002544637903246364, "loss": 0.0108, "num_input_tokens_seen": 184603392, "step": 85460 }, { "epoch": 13.942088091353996, "grad_norm": 0.0017219664296135306, "learning_rate": 0.0002544017867346474, "loss": 0.0688, "num_input_tokens_seen": 184614528, "step": 85465 }, { "epoch": 13.942903752039152, "grad_norm": 0.017398755997419357, "learning_rate": 0.0002543397881220173, "loss": 0.0081, "num_input_tokens_seen": 184625888, "step": 85470 }, { "epoch": 13.943719412724306, "grad_norm": 0.05955229327082634, "learning_rate": 0.00025427779448800345, "loss": 0.0093, "num_input_tokens_seen": 184636000, "step": 85475 }, { "epoch": 13.944535073409462, "grad_norm": 0.012460576370358467, "learning_rate": 0.0002542158058338615, "loss": 0.0027, "num_input_tokens_seen": 184647808, "step": 85480 }, { "epoch": 13.945350734094617, "grad_norm": 0.0023759007453918457, "learning_rate": 0.00025415382216084837, "loss": 0.0084, "num_input_tokens_seen": 184660000, "step": 85485 }, { "epoch": 13.946166394779771, "grad_norm": 0.16356733441352844, "learning_rate": 0.0002540918434702195, "loss": 0.0066, "num_input_tokens_seen": 184670240, "step": 85490 }, { "epoch": 13.946982055464927, "grad_norm": 0.00025423121405765414, "learning_rate": 0.0002540298697632318, "loss": 0.0015, "num_input_tokens_seen": 184681056, "step": 85495 }, { "epoch": 13.947797716150081, "grad_norm": 0.11059121787548065, "learning_rate": 0.0002539679010411404, "loss": 0.0095, "num_input_tokens_seen": 184691264, "step": 85500 }, { "epoch": 13.948613376835237, "grad_norm": 0.020271632820367813, "learning_rate": 0.00025390593730520206, "loss": 0.0056, "num_input_tokens_seen": 184701824, "step": 85505 }, { "epoch": 13.949429037520392, "grad_norm": 0.4572683572769165, "learning_rate": 0.00025384397855667164, "loss": 0.0541, "num_input_tokens_seen": 184712640, "step": 85510 }, { "epoch": 13.950244698205546, "grad_norm": 0.005310698878020048, "learning_rate": 0.0002537820247968057, "loss": 0.0183, "num_input_tokens_seen": 184723552, "step": 85515 }, { "epoch": 13.951060358890702, "grad_norm": 0.005224080290645361, "learning_rate": 0.00025372007602685894, "loss": 0.0073, "num_input_tokens_seen": 184735008, "step": 85520 }, { "epoch": 13.951876019575856, "grad_norm": 0.007012546062469482, "learning_rate": 0.00025365813224808746, "loss": 0.0024, "num_input_tokens_seen": 184745856, "step": 85525 }, { "epoch": 13.952691680261012, "grad_norm": 0.0013683406868949533, "learning_rate": 0.00025359619346174644, "loss": 0.0022, "num_input_tokens_seen": 184756000, "step": 85530 }, { "epoch": 13.953507340946166, "grad_norm": 0.027201242744922638, "learning_rate": 0.0002535342596690912, "loss": 0.0067, "num_input_tokens_seen": 184766752, "step": 85535 }, { "epoch": 13.954323001631321, "grad_norm": 0.6355860829353333, "learning_rate": 0.0002534723308713768, "loss": 0.0863, "num_input_tokens_seen": 184778592, "step": 85540 }, { "epoch": 13.955138662316477, "grad_norm": 0.002448199549689889, "learning_rate": 0.0002534104070698584, "loss": 0.0034, "num_input_tokens_seen": 184789376, "step": 85545 }, { "epoch": 13.955954323001631, "grad_norm": 0.0003716732608154416, "learning_rate": 0.00025334848826579095, "loss": 0.008, "num_input_tokens_seen": 184801152, "step": 85550 }, { "epoch": 13.956769983686787, "grad_norm": 0.3382447361946106, "learning_rate": 0.0002532865744604292, "loss": 0.0286, "num_input_tokens_seen": 184812256, "step": 85555 }, { "epoch": 13.95758564437194, "grad_norm": 0.003654716769233346, "learning_rate": 0.000253224665655028, "loss": 0.0095, "num_input_tokens_seen": 184823680, "step": 85560 }, { "epoch": 13.958401305057096, "grad_norm": 0.12044371664524078, "learning_rate": 0.0002531627618508421, "loss": 0.0155, "num_input_tokens_seen": 184835072, "step": 85565 }, { "epoch": 13.959216965742252, "grad_norm": 0.6087353825569153, "learning_rate": 0.00025310086304912584, "loss": 0.0122, "num_input_tokens_seen": 184844864, "step": 85570 }, { "epoch": 13.960032626427406, "grad_norm": 0.018098818138241768, "learning_rate": 0.0002530389692511337, "loss": 0.0032, "num_input_tokens_seen": 184855776, "step": 85575 }, { "epoch": 13.960848287112562, "grad_norm": 0.014368905685842037, "learning_rate": 0.0002529770804581205, "loss": 0.1265, "num_input_tokens_seen": 184866272, "step": 85580 }, { "epoch": 13.961663947797716, "grad_norm": 0.012142996303737164, "learning_rate": 0.0002529151966713398, "loss": 0.0033, "num_input_tokens_seen": 184877312, "step": 85585 }, { "epoch": 13.962479608482871, "grad_norm": 0.002565637230873108, "learning_rate": 0.00025285331789204633, "loss": 0.0027, "num_input_tokens_seen": 184888192, "step": 85590 }, { "epoch": 13.963295269168025, "grad_norm": 0.0035322627518326044, "learning_rate": 0.0002527914441214937, "loss": 0.0052, "num_input_tokens_seen": 184896448, "step": 85595 }, { "epoch": 13.964110929853181, "grad_norm": 0.3586525022983551, "learning_rate": 0.00025272957536093634, "loss": 0.0117, "num_input_tokens_seen": 184908032, "step": 85600 }, { "epoch": 13.964926590538337, "grad_norm": 0.0012097652070224285, "learning_rate": 0.00025266771161162736, "loss": 0.0018, "num_input_tokens_seen": 184918176, "step": 85605 }, { "epoch": 13.96574225122349, "grad_norm": 0.005765705835074186, "learning_rate": 0.00025260585287482153, "loss": 0.0018, "num_input_tokens_seen": 184927104, "step": 85610 }, { "epoch": 13.966557911908646, "grad_norm": 0.0335857979953289, "learning_rate": 0.0002525439991517714, "loss": 0.004, "num_input_tokens_seen": 184938240, "step": 85615 }, { "epoch": 13.9673735725938, "grad_norm": 0.004372979048639536, "learning_rate": 0.0002524821504437316, "loss": 0.0012, "num_input_tokens_seen": 184949088, "step": 85620 }, { "epoch": 13.968189233278956, "grad_norm": 0.0033910067286342382, "learning_rate": 0.0002524203067519545, "loss": 0.0017, "num_input_tokens_seen": 184960096, "step": 85625 }, { "epoch": 13.969004893964112, "grad_norm": 0.021418794989585876, "learning_rate": 0.00025235846807769433, "loss": 0.0166, "num_input_tokens_seen": 184969504, "step": 85630 }, { "epoch": 13.969820554649266, "grad_norm": 0.004271005280315876, "learning_rate": 0.0002522966344222036, "loss": 0.1278, "num_input_tokens_seen": 184980320, "step": 85635 }, { "epoch": 13.970636215334421, "grad_norm": 0.026607416570186615, "learning_rate": 0.00025223480578673627, "loss": 0.0158, "num_input_tokens_seen": 184991616, "step": 85640 }, { "epoch": 13.971451876019575, "grad_norm": 0.005434891674667597, "learning_rate": 0.00025217298217254446, "loss": 0.012, "num_input_tokens_seen": 185000352, "step": 85645 }, { "epoch": 13.97226753670473, "grad_norm": 0.00019001559121534228, "learning_rate": 0.0002521111635808819, "loss": 0.0244, "num_input_tokens_seen": 185011328, "step": 85650 }, { "epoch": 13.973083197389887, "grad_norm": 0.0019092840375378728, "learning_rate": 0.0002520493500130008, "loss": 0.0061, "num_input_tokens_seen": 185021408, "step": 85655 }, { "epoch": 13.97389885807504, "grad_norm": 0.0820712149143219, "learning_rate": 0.0002519875414701545, "loss": 0.0187, "num_input_tokens_seen": 185031392, "step": 85660 }, { "epoch": 13.974714518760196, "grad_norm": 0.01357912179082632, "learning_rate": 0.0002519257379535949, "loss": 0.0098, "num_input_tokens_seen": 185042176, "step": 85665 }, { "epoch": 13.97553017944535, "grad_norm": 0.0026590253692120314, "learning_rate": 0.00025186393946457516, "loss": 0.1236, "num_input_tokens_seen": 185053248, "step": 85670 }, { "epoch": 13.976345840130506, "grad_norm": 0.25764936208724976, "learning_rate": 0.0002518021460043474, "loss": 0.0109, "num_input_tokens_seen": 185063872, "step": 85675 }, { "epoch": 13.977161500815662, "grad_norm": 0.0026979451067745686, "learning_rate": 0.0002517403575741641, "loss": 0.0074, "num_input_tokens_seen": 185076192, "step": 85680 }, { "epoch": 13.977977161500815, "grad_norm": 0.27346065640449524, "learning_rate": 0.0002516785741752773, "loss": 0.0194, "num_input_tokens_seen": 185086656, "step": 85685 }, { "epoch": 13.978792822185971, "grad_norm": 0.0007937068003229797, "learning_rate": 0.0002516167958089393, "loss": 0.0018, "num_input_tokens_seen": 185097728, "step": 85690 }, { "epoch": 13.979608482871125, "grad_norm": 0.01872055046260357, "learning_rate": 0.00025155502247640196, "loss": 0.0173, "num_input_tokens_seen": 185109120, "step": 85695 }, { "epoch": 13.98042414355628, "grad_norm": 0.04877206310629845, "learning_rate": 0.0002514932541789173, "loss": 0.0046, "num_input_tokens_seen": 185117792, "step": 85700 }, { "epoch": 13.981239804241435, "grad_norm": 0.005501462146639824, "learning_rate": 0.0002514314909177371, "loss": 0.002, "num_input_tokens_seen": 185128928, "step": 85705 }, { "epoch": 13.98205546492659, "grad_norm": 0.01914447546005249, "learning_rate": 0.00025136973269411305, "loss": 0.0059, "num_input_tokens_seen": 185138400, "step": 85710 }, { "epoch": 13.982871125611746, "grad_norm": 0.0917380303144455, "learning_rate": 0.0002513079795092968, "loss": 0.0073, "num_input_tokens_seen": 185149760, "step": 85715 }, { "epoch": 13.9836867862969, "grad_norm": 0.0017335203010588884, "learning_rate": 0.0002512462313645396, "loss": 0.0126, "num_input_tokens_seen": 185160864, "step": 85720 }, { "epoch": 13.984502446982056, "grad_norm": 0.023793328553438187, "learning_rate": 0.0002511844882610935, "loss": 0.0031, "num_input_tokens_seen": 185169888, "step": 85725 }, { "epoch": 13.98531810766721, "grad_norm": 0.20731094479560852, "learning_rate": 0.00025112275020020903, "loss": 0.0397, "num_input_tokens_seen": 185181632, "step": 85730 }, { "epoch": 13.986133768352365, "grad_norm": 0.09553972631692886, "learning_rate": 0.0002510610171831381, "loss": 0.0038, "num_input_tokens_seen": 185192576, "step": 85735 }, { "epoch": 13.986949429037521, "grad_norm": 0.008362910710275173, "learning_rate": 0.00025099928921113113, "loss": 0.0123, "num_input_tokens_seen": 185203968, "step": 85740 }, { "epoch": 13.987765089722675, "grad_norm": 0.1448216736316681, "learning_rate": 0.0002509375662854397, "loss": 0.0107, "num_input_tokens_seen": 185215808, "step": 85745 }, { "epoch": 13.98858075040783, "grad_norm": 0.07602065801620483, "learning_rate": 0.0002508758484073142, "loss": 0.033, "num_input_tokens_seen": 185226720, "step": 85750 }, { "epoch": 13.989396411092985, "grad_norm": 0.0024841674603521824, "learning_rate": 0.00025081413557800604, "loss": 0.0387, "num_input_tokens_seen": 185236704, "step": 85755 }, { "epoch": 13.99021207177814, "grad_norm": 0.10913265496492386, "learning_rate": 0.0002507524277987651, "loss": 0.0089, "num_input_tokens_seen": 185248256, "step": 85760 }, { "epoch": 13.991027732463294, "grad_norm": 0.005257087759673595, "learning_rate": 0.0002506907250708428, "loss": 0.0047, "num_input_tokens_seen": 185258976, "step": 85765 }, { "epoch": 13.99184339314845, "grad_norm": 0.003243145067244768, "learning_rate": 0.0002506290273954888, "loss": 0.0076, "num_input_tokens_seen": 185270048, "step": 85770 }, { "epoch": 13.992659053833606, "grad_norm": 0.033470362424850464, "learning_rate": 0.00025056733477395415, "loss": 0.0084, "num_input_tokens_seen": 185281856, "step": 85775 }, { "epoch": 13.99347471451876, "grad_norm": 0.015511090867221355, "learning_rate": 0.0002505056472074889, "loss": 0.0025, "num_input_tokens_seen": 185293152, "step": 85780 }, { "epoch": 13.994290375203915, "grad_norm": 0.0026244802866131067, "learning_rate": 0.0002504439646973432, "loss": 0.0656, "num_input_tokens_seen": 185305184, "step": 85785 }, { "epoch": 13.99510603588907, "grad_norm": 0.005293934140354395, "learning_rate": 0.00025038228724476715, "loss": 0.0078, "num_input_tokens_seen": 185316480, "step": 85790 }, { "epoch": 13.995921696574225, "grad_norm": 0.3593876361846924, "learning_rate": 0.00025032061485101066, "loss": 0.0218, "num_input_tokens_seen": 185325888, "step": 85795 }, { "epoch": 13.99673735725938, "grad_norm": 0.0052078114822506905, "learning_rate": 0.0002502589475173237, "loss": 0.0028, "num_input_tokens_seen": 185335808, "step": 85800 }, { "epoch": 13.997553017944535, "grad_norm": 0.011453598737716675, "learning_rate": 0.000250197285244956, "loss": 0.0037, "num_input_tokens_seen": 185347040, "step": 85805 }, { "epoch": 13.99836867862969, "grad_norm": 0.00860854797065258, "learning_rate": 0.0002501356280351572, "loss": 0.0203, "num_input_tokens_seen": 185357888, "step": 85810 }, { "epoch": 13.999184339314844, "grad_norm": 0.0036592965479940176, "learning_rate": 0.00025007397588917683, "loss": 0.0063, "num_input_tokens_seen": 185368896, "step": 85815 }, { "epoch": 14.0, "grad_norm": 0.8848853707313538, "learning_rate": 0.0002500123288082644, "loss": 0.2741, "num_input_tokens_seen": 185378480, "step": 85820 }, { "epoch": 14.0, "eval_loss": 0.2316729873418808, "eval_runtime": 104.2082, "eval_samples_per_second": 26.15, "eval_steps_per_second": 6.545, "num_input_tokens_seen": 185378480, "step": 85820 }, { "epoch": 14.000815660685156, "grad_norm": 0.0017862764652818441, "learning_rate": 0.00024995068679366933, "loss": 0.001, "num_input_tokens_seen": 185389712, "step": 85825 }, { "epoch": 14.00163132137031, "grad_norm": 0.00797954760491848, "learning_rate": 0.00024988904984664075, "loss": 0.0041, "num_input_tokens_seen": 185400880, "step": 85830 }, { "epoch": 14.002446982055465, "grad_norm": 0.09861485660076141, "learning_rate": 0.00024982741796842787, "loss": 0.0046, "num_input_tokens_seen": 185411248, "step": 85835 }, { "epoch": 14.00326264274062, "grad_norm": 0.013391591608524323, "learning_rate": 0.00024976579116027975, "loss": 0.1774, "num_input_tokens_seen": 185422608, "step": 85840 }, { "epoch": 14.004078303425775, "grad_norm": 0.0022156049963086843, "learning_rate": 0.00024970416942344533, "loss": 0.0014, "num_input_tokens_seen": 185433040, "step": 85845 }, { "epoch": 14.00489396411093, "grad_norm": 0.009373231790959835, "learning_rate": 0.00024964255275917335, "loss": 0.002, "num_input_tokens_seen": 185444656, "step": 85850 }, { "epoch": 14.005709624796085, "grad_norm": 0.08181966841220856, "learning_rate": 0.00024958094116871274, "loss": 0.0051, "num_input_tokens_seen": 185456368, "step": 85855 }, { "epoch": 14.00652528548124, "grad_norm": 0.003635540371760726, "learning_rate": 0.000249519334653312, "loss": 0.003, "num_input_tokens_seen": 185465872, "step": 85860 }, { "epoch": 14.007340946166394, "grad_norm": 0.0031431580428034067, "learning_rate": 0.0002494577332142195, "loss": 0.0015, "num_input_tokens_seen": 185477520, "step": 85865 }, { "epoch": 14.00815660685155, "grad_norm": 0.04018649458885193, "learning_rate": 0.0002493961368526843, "loss": 0.0112, "num_input_tokens_seen": 185488688, "step": 85870 }, { "epoch": 14.008972267536704, "grad_norm": 0.017326852306723595, "learning_rate": 0.0002493345455699538, "loss": 0.0032, "num_input_tokens_seen": 185499632, "step": 85875 }, { "epoch": 14.00978792822186, "grad_norm": 0.12720097601413727, "learning_rate": 0.000249272959367277, "loss": 0.0134, "num_input_tokens_seen": 185510928, "step": 85880 }, { "epoch": 14.010603588907015, "grad_norm": 0.006081325467675924, "learning_rate": 0.0002492113782459017, "loss": 0.0023, "num_input_tokens_seen": 185521712, "step": 85885 }, { "epoch": 14.01141924959217, "grad_norm": 0.075434111058712, "learning_rate": 0.00024914980220707605, "loss": 0.0024, "num_input_tokens_seen": 185532272, "step": 85890 }, { "epoch": 14.012234910277325, "grad_norm": 0.01512030977755785, "learning_rate": 0.00024908823125204785, "loss": 0.0018, "num_input_tokens_seen": 185543280, "step": 85895 }, { "epoch": 14.013050570962479, "grad_norm": 0.08885496854782104, "learning_rate": 0.00024902666538206494, "loss": 0.0064, "num_input_tokens_seen": 185553392, "step": 85900 }, { "epoch": 14.013866231647635, "grad_norm": 0.022448772564530373, "learning_rate": 0.000248965104598375, "loss": 0.0022, "num_input_tokens_seen": 185564464, "step": 85905 }, { "epoch": 14.01468189233279, "grad_norm": 0.09179883450269699, "learning_rate": 0.0002489035489022257, "loss": 0.0146, "num_input_tokens_seen": 185575664, "step": 85910 }, { "epoch": 14.015497553017944, "grad_norm": 0.00924753863364458, "learning_rate": 0.0002488419982948646, "loss": 0.0129, "num_input_tokens_seen": 185586224, "step": 85915 }, { "epoch": 14.0163132137031, "grad_norm": 0.007630279287695885, "learning_rate": 0.0002487804527775389, "loss": 0.0089, "num_input_tokens_seen": 185596112, "step": 85920 }, { "epoch": 14.017128874388254, "grad_norm": 0.006223857868462801, "learning_rate": 0.0002487189123514961, "loss": 0.0053, "num_input_tokens_seen": 185606544, "step": 85925 }, { "epoch": 14.01794453507341, "grad_norm": 0.019415950402617455, "learning_rate": 0.0002486573770179833, "loss": 0.0046, "num_input_tokens_seen": 185615824, "step": 85930 }, { "epoch": 14.018760195758565, "grad_norm": 0.024973466992378235, "learning_rate": 0.00024859584677824757, "loss": 0.0171, "num_input_tokens_seen": 185627312, "step": 85935 }, { "epoch": 14.01957585644372, "grad_norm": 0.020961524918675423, "learning_rate": 0.00024853432163353596, "loss": 0.0049, "num_input_tokens_seen": 185638064, "step": 85940 }, { "epoch": 14.020391517128875, "grad_norm": 0.06421557813882828, "learning_rate": 0.00024847280158509535, "loss": 0.0089, "num_input_tokens_seen": 185649200, "step": 85945 }, { "epoch": 14.021207177814029, "grad_norm": 0.032138630747795105, "learning_rate": 0.00024841128663417243, "loss": 0.1026, "num_input_tokens_seen": 185658928, "step": 85950 }, { "epoch": 14.022022838499185, "grad_norm": 0.035478681325912476, "learning_rate": 0.000248349776782014, "loss": 0.0041, "num_input_tokens_seen": 185670544, "step": 85955 }, { "epoch": 14.022838499184338, "grad_norm": 0.016271864995360374, "learning_rate": 0.0002482882720298666, "loss": 0.0032, "num_input_tokens_seen": 185681808, "step": 85960 }, { "epoch": 14.023654159869494, "grad_norm": 0.03173820301890373, "learning_rate": 0.0002482267723789767, "loss": 0.0087, "num_input_tokens_seen": 185692720, "step": 85965 }, { "epoch": 14.02446982055465, "grad_norm": 0.004889285191893578, "learning_rate": 0.0002481652778305906, "loss": 0.0028, "num_input_tokens_seen": 185702992, "step": 85970 }, { "epoch": 14.025285481239804, "grad_norm": 0.0015836816746741533, "learning_rate": 0.00024810378838595467, "loss": 0.0017, "num_input_tokens_seen": 185714416, "step": 85975 }, { "epoch": 14.02610114192496, "grad_norm": 0.30528175830841064, "learning_rate": 0.00024804230404631495, "loss": 0.1344, "num_input_tokens_seen": 185724624, "step": 85980 }, { "epoch": 14.026916802610113, "grad_norm": 0.011321947909891605, "learning_rate": 0.0002479808248129174, "loss": 0.0047, "num_input_tokens_seen": 185735248, "step": 85985 }, { "epoch": 14.02773246329527, "grad_norm": 0.007731277495622635, "learning_rate": 0.00024791935068700855, "loss": 0.0023, "num_input_tokens_seen": 185745168, "step": 85990 }, { "epoch": 14.028548123980425, "grad_norm": 0.0016969919670373201, "learning_rate": 0.0002478578816698335, "loss": 0.0021, "num_input_tokens_seen": 185757200, "step": 85995 }, { "epoch": 14.029363784665579, "grad_norm": 0.06662982702255249, "learning_rate": 0.00024779641776263866, "loss": 0.0056, "num_input_tokens_seen": 185767664, "step": 86000 }, { "epoch": 14.030179445350734, "grad_norm": 0.013063081540167332, "learning_rate": 0.00024773495896666904, "loss": 0.0066, "num_input_tokens_seen": 185777040, "step": 86005 }, { "epoch": 14.030995106035888, "grad_norm": 0.0027070636861026287, "learning_rate": 0.0002476735052831706, "loss": 0.0099, "num_input_tokens_seen": 185788432, "step": 86010 }, { "epoch": 14.031810766721044, "grad_norm": 0.05823206901550293, "learning_rate": 0.0002476120567133888, "loss": 0.0028, "num_input_tokens_seen": 185799088, "step": 86015 }, { "epoch": 14.0326264274062, "grad_norm": 0.022384580224752426, "learning_rate": 0.0002475506132585687, "loss": 0.0028, "num_input_tokens_seen": 185808944, "step": 86020 }, { "epoch": 14.033442088091354, "grad_norm": 0.022329630330204964, "learning_rate": 0.0002474891749199558, "loss": 0.0029, "num_input_tokens_seen": 185819376, "step": 86025 }, { "epoch": 14.03425774877651, "grad_norm": 0.0012048856588080525, "learning_rate": 0.000247427741698795, "loss": 0.0026, "num_input_tokens_seen": 185829904, "step": 86030 }, { "epoch": 14.035073409461663, "grad_norm": 0.0027390222530812025, "learning_rate": 0.00024736631359633147, "loss": 0.0061, "num_input_tokens_seen": 185840528, "step": 86035 }, { "epoch": 14.035889070146819, "grad_norm": 0.01180250570178032, "learning_rate": 0.00024730489061381013, "loss": 0.002, "num_input_tokens_seen": 185851984, "step": 86040 }, { "epoch": 14.036704730831975, "grad_norm": 0.008089886978268623, "learning_rate": 0.00024724347275247564, "loss": 0.0603, "num_input_tokens_seen": 185862896, "step": 86045 }, { "epoch": 14.037520391517129, "grad_norm": 0.0009739563683979213, "learning_rate": 0.0002471820600135729, "loss": 0.0009, "num_input_tokens_seen": 185873904, "step": 86050 }, { "epoch": 14.038336052202284, "grad_norm": 0.002611349569633603, "learning_rate": 0.0002471206523983465, "loss": 0.0064, "num_input_tokens_seen": 185885232, "step": 86055 }, { "epoch": 14.039151712887438, "grad_norm": 0.0013060198398306966, "learning_rate": 0.00024705924990804076, "loss": 0.0075, "num_input_tokens_seen": 185895920, "step": 86060 }, { "epoch": 14.039967373572594, "grad_norm": 0.0006362605490721762, "learning_rate": 0.0002469978525439002, "loss": 0.002, "num_input_tokens_seen": 185906928, "step": 86065 }, { "epoch": 14.040783034257748, "grad_norm": 0.0027871110942214727, "learning_rate": 0.00024693646030716923, "loss": 0.0035, "num_input_tokens_seen": 185917840, "step": 86070 }, { "epoch": 14.041598694942904, "grad_norm": 0.044228702783584595, "learning_rate": 0.0002468750731990918, "loss": 0.0053, "num_input_tokens_seen": 185928304, "step": 86075 }, { "epoch": 14.04241435562806, "grad_norm": 0.016400212422013283, "learning_rate": 0.0002468136912209122, "loss": 0.1018, "num_input_tokens_seen": 185938992, "step": 86080 }, { "epoch": 14.043230016313213, "grad_norm": 0.03972737863659859, "learning_rate": 0.0002467523143738743, "loss": 0.0096, "num_input_tokens_seen": 185949584, "step": 86085 }, { "epoch": 14.044045676998369, "grad_norm": 0.0019362044986337423, "learning_rate": 0.00024669094265922204, "loss": 0.003, "num_input_tokens_seen": 185961296, "step": 86090 }, { "epoch": 14.044861337683523, "grad_norm": 0.2900139391422272, "learning_rate": 0.00024662957607819914, "loss": 0.0069, "num_input_tokens_seen": 185972400, "step": 86095 }, { "epoch": 14.045676998368679, "grad_norm": 0.0015170919941738248, "learning_rate": 0.00024656821463204913, "loss": 0.0109, "num_input_tokens_seen": 185982736, "step": 86100 }, { "epoch": 14.046492659053834, "grad_norm": 0.04802202805876732, "learning_rate": 0.0002465068583220161, "loss": 0.0033, "num_input_tokens_seen": 185993712, "step": 86105 }, { "epoch": 14.047308319738988, "grad_norm": 0.0018274127505719662, "learning_rate": 0.0002464455071493429, "loss": 0.0122, "num_input_tokens_seen": 186004368, "step": 86110 }, { "epoch": 14.048123980424144, "grad_norm": 0.0069192443042993546, "learning_rate": 0.00024638416111527346, "loss": 0.0012, "num_input_tokens_seen": 186014736, "step": 86115 }, { "epoch": 14.048939641109298, "grad_norm": 0.0008330877753905952, "learning_rate": 0.0002463228202210503, "loss": 0.0139, "num_input_tokens_seen": 186025392, "step": 86120 }, { "epoch": 14.049755301794454, "grad_norm": 0.002327044727280736, "learning_rate": 0.00024626148446791745, "loss": 0.0051, "num_input_tokens_seen": 186037008, "step": 86125 }, { "epoch": 14.05057096247961, "grad_norm": 0.003909484948962927, "learning_rate": 0.00024620015385711706, "loss": 0.0033, "num_input_tokens_seen": 186048048, "step": 86130 }, { "epoch": 14.051386623164763, "grad_norm": 0.006379165221005678, "learning_rate": 0.000246138828389893, "loss": 0.0064, "num_input_tokens_seen": 186058448, "step": 86135 }, { "epoch": 14.052202283849919, "grad_norm": 0.003839249489828944, "learning_rate": 0.0002460775080674872, "loss": 0.0026, "num_input_tokens_seen": 186068400, "step": 86140 }, { "epoch": 14.053017944535073, "grad_norm": 0.07193329930305481, "learning_rate": 0.0002460161928911432, "loss": 0.0051, "num_input_tokens_seen": 186080752, "step": 86145 }, { "epoch": 14.053833605220229, "grad_norm": 0.011074123904109001, "learning_rate": 0.0002459548828621028, "loss": 0.0067, "num_input_tokens_seen": 186092336, "step": 86150 }, { "epoch": 14.054649265905383, "grad_norm": 0.002707752399146557, "learning_rate": 0.00024589357798160925, "loss": 0.0011, "num_input_tokens_seen": 186103632, "step": 86155 }, { "epoch": 14.055464926590538, "grad_norm": 0.0034250058233737946, "learning_rate": 0.0002458322782509047, "loss": 0.0016, "num_input_tokens_seen": 186114256, "step": 86160 }, { "epoch": 14.056280587275694, "grad_norm": 0.010328114964067936, "learning_rate": 0.00024577098367123146, "loss": 0.0024, "num_input_tokens_seen": 186124816, "step": 86165 }, { "epoch": 14.057096247960848, "grad_norm": 0.0008839413640089333, "learning_rate": 0.00024570969424383174, "loss": 0.0036, "num_input_tokens_seen": 186136624, "step": 86170 }, { "epoch": 14.057911908646004, "grad_norm": 0.00860717985779047, "learning_rate": 0.00024564840996994764, "loss": 0.0072, "num_input_tokens_seen": 186146864, "step": 86175 }, { "epoch": 14.058727569331158, "grad_norm": 0.0009140484617091715, "learning_rate": 0.0002455871308508212, "loss": 0.0044, "num_input_tokens_seen": 186157904, "step": 86180 }, { "epoch": 14.059543230016313, "grad_norm": 0.8887136578559875, "learning_rate": 0.0002455258568876943, "loss": 0.1385, "num_input_tokens_seen": 186168464, "step": 86185 }, { "epoch": 14.060358890701469, "grad_norm": 0.0013078611809760332, "learning_rate": 0.0002454645880818087, "loss": 0.002, "num_input_tokens_seen": 186179568, "step": 86190 }, { "epoch": 14.061174551386623, "grad_norm": 0.7345595955848694, "learning_rate": 0.00024540332443440615, "loss": 0.0126, "num_input_tokens_seen": 186191152, "step": 86195 }, { "epoch": 14.061990212071779, "grad_norm": 0.027177168056368828, "learning_rate": 0.0002453420659467282, "loss": 0.0056, "num_input_tokens_seen": 186201776, "step": 86200 }, { "epoch": 14.062805872756933, "grad_norm": 0.009244061075150967, "learning_rate": 0.00024528081262001615, "loss": 0.0008, "num_input_tokens_seen": 186213392, "step": 86205 }, { "epoch": 14.063621533442088, "grad_norm": 0.0036014586221426725, "learning_rate": 0.000245219564455512, "loss": 0.0021, "num_input_tokens_seen": 186222448, "step": 86210 }, { "epoch": 14.064437194127244, "grad_norm": 0.006802697200328112, "learning_rate": 0.00024515832145445614, "loss": 0.0043, "num_input_tokens_seen": 186232624, "step": 86215 }, { "epoch": 14.065252854812398, "grad_norm": 0.004956814460456371, "learning_rate": 0.0002450970836180906, "loss": 0.006, "num_input_tokens_seen": 186242896, "step": 86220 }, { "epoch": 14.066068515497554, "grad_norm": 0.051006123423576355, "learning_rate": 0.0002450358509476556, "loss": 0.0054, "num_input_tokens_seen": 186253616, "step": 86225 }, { "epoch": 14.066884176182707, "grad_norm": 0.08910132199525833, "learning_rate": 0.00024497462344439297, "loss": 0.0032, "num_input_tokens_seen": 186264080, "step": 86230 }, { "epoch": 14.067699836867863, "grad_norm": 0.002252694685012102, "learning_rate": 0.0002449134011095427, "loss": 0.0855, "num_input_tokens_seen": 186275760, "step": 86235 }, { "epoch": 14.068515497553017, "grad_norm": 0.0041665323078632355, "learning_rate": 0.0002448521839443464, "loss": 0.1032, "num_input_tokens_seen": 186286864, "step": 86240 }, { "epoch": 14.069331158238173, "grad_norm": 0.00127582682762295, "learning_rate": 0.00024479097195004377, "loss": 0.032, "num_input_tokens_seen": 186296560, "step": 86245 }, { "epoch": 14.070146818923329, "grad_norm": 0.0045285290107131, "learning_rate": 0.0002447297651278763, "loss": 0.0039, "num_input_tokens_seen": 186307600, "step": 86250 }, { "epoch": 14.070962479608482, "grad_norm": 0.0880713164806366, "learning_rate": 0.0002446685634790836, "loss": 0.0075, "num_input_tokens_seen": 186318704, "step": 86255 }, { "epoch": 14.071778140293638, "grad_norm": 0.0022388026118278503, "learning_rate": 0.00024460736700490676, "loss": 0.0101, "num_input_tokens_seen": 186328016, "step": 86260 }, { "epoch": 14.072593800978792, "grad_norm": 0.005308662075549364, "learning_rate": 0.00024454617570658524, "loss": 0.0015, "num_input_tokens_seen": 186340048, "step": 86265 }, { "epoch": 14.073409461663948, "grad_norm": 0.010273891501128674, "learning_rate": 0.00024448498958535984, "loss": 0.004, "num_input_tokens_seen": 186351408, "step": 86270 }, { "epoch": 14.074225122349104, "grad_norm": 0.0025942821521312, "learning_rate": 0.00024442380864247, "loss": 0.0075, "num_input_tokens_seen": 186361904, "step": 86275 }, { "epoch": 14.075040783034257, "grad_norm": 0.0011069714091718197, "learning_rate": 0.00024436263287915623, "loss": 0.0047, "num_input_tokens_seen": 186372848, "step": 86280 }, { "epoch": 14.075856443719413, "grad_norm": 0.001931919134221971, "learning_rate": 0.00024430146229665754, "loss": 0.0014, "num_input_tokens_seen": 186384240, "step": 86285 }, { "epoch": 14.076672104404567, "grad_norm": 0.018992312252521515, "learning_rate": 0.0002442402968962146, "loss": 0.0064, "num_input_tokens_seen": 186394608, "step": 86290 }, { "epoch": 14.077487765089723, "grad_norm": 0.017261963337659836, "learning_rate": 0.00024417913667906604, "loss": 0.0025, "num_input_tokens_seen": 186404560, "step": 86295 }, { "epoch": 14.078303425774878, "grad_norm": 0.007981205359101295, "learning_rate": 0.00024411798164645205, "loss": 0.0014, "num_input_tokens_seen": 186415248, "step": 86300 }, { "epoch": 14.079119086460032, "grad_norm": 0.4204510450363159, "learning_rate": 0.00024405683179961176, "loss": 0.0144, "num_input_tokens_seen": 186426544, "step": 86305 }, { "epoch": 14.079934747145188, "grad_norm": 0.012655510567128658, "learning_rate": 0.00024399568713978444, "loss": 0.0017, "num_input_tokens_seen": 186437040, "step": 86310 }, { "epoch": 14.080750407830342, "grad_norm": 0.00166032905690372, "learning_rate": 0.00024393454766820927, "loss": 0.0016, "num_input_tokens_seen": 186448336, "step": 86315 }, { "epoch": 14.081566068515498, "grad_norm": 0.11170172691345215, "learning_rate": 0.00024387341338612535, "loss": 0.0052, "num_input_tokens_seen": 186459152, "step": 86320 }, { "epoch": 14.082381729200652, "grad_norm": 0.0015688682906329632, "learning_rate": 0.00024381228429477166, "loss": 0.002, "num_input_tokens_seen": 186471504, "step": 86325 }, { "epoch": 14.083197389885807, "grad_norm": 0.003982523921877146, "learning_rate": 0.00024375116039538697, "loss": 0.1025, "num_input_tokens_seen": 186481360, "step": 86330 }, { "epoch": 14.084013050570963, "grad_norm": 0.0194878950715065, "learning_rate": 0.0002436900416892101, "loss": 0.0028, "num_input_tokens_seen": 186493328, "step": 86335 }, { "epoch": 14.084828711256117, "grad_norm": 0.33860138058662415, "learning_rate": 0.00024362892817747972, "loss": 0.0053, "num_input_tokens_seen": 186504688, "step": 86340 }, { "epoch": 14.085644371941273, "grad_norm": 0.0014220515731722116, "learning_rate": 0.00024356781986143434, "loss": 0.0162, "num_input_tokens_seen": 186516144, "step": 86345 }, { "epoch": 14.086460032626427, "grad_norm": 0.002930557122454047, "learning_rate": 0.00024350671674231217, "loss": 0.1392, "num_input_tokens_seen": 186526288, "step": 86350 }, { "epoch": 14.087275693311582, "grad_norm": 0.01115860790014267, "learning_rate": 0.0002434456188213522, "loss": 0.0016, "num_input_tokens_seen": 186536432, "step": 86355 }, { "epoch": 14.088091353996738, "grad_norm": 0.004121196456253529, "learning_rate": 0.00024338452609979177, "loss": 0.0095, "num_input_tokens_seen": 186546064, "step": 86360 }, { "epoch": 14.088907014681892, "grad_norm": 0.005770614370703697, "learning_rate": 0.0002433234385788699, "loss": 0.0037, "num_input_tokens_seen": 186557872, "step": 86365 }, { "epoch": 14.089722675367048, "grad_norm": 0.004038193728774786, "learning_rate": 0.00024326235625982378, "loss": 0.0027, "num_input_tokens_seen": 186569136, "step": 86370 }, { "epoch": 14.090538336052202, "grad_norm": 0.03139469772577286, "learning_rate": 0.00024320127914389213, "loss": 0.0035, "num_input_tokens_seen": 186581232, "step": 86375 }, { "epoch": 14.091353996737357, "grad_norm": 0.16970932483673096, "learning_rate": 0.00024314020723231183, "loss": 0.0117, "num_input_tokens_seen": 186592144, "step": 86380 }, { "epoch": 14.092169657422513, "grad_norm": 0.04780832678079605, "learning_rate": 0.00024307914052632159, "loss": 0.0031, "num_input_tokens_seen": 186603504, "step": 86385 }, { "epoch": 14.092985318107667, "grad_norm": 0.002771410159766674, "learning_rate": 0.000243018079027158, "loss": 0.0022, "num_input_tokens_seen": 186615024, "step": 86390 }, { "epoch": 14.093800978792823, "grad_norm": 0.029407048597931862, "learning_rate": 0.0002429570227360595, "loss": 0.0024, "num_input_tokens_seen": 186624976, "step": 86395 }, { "epoch": 14.094616639477977, "grad_norm": 0.0027717319317162037, "learning_rate": 0.00024289597165426264, "loss": 0.0026, "num_input_tokens_seen": 186636752, "step": 86400 }, { "epoch": 14.095432300163132, "grad_norm": 0.016943685710430145, "learning_rate": 0.00024283492578300542, "loss": 0.0805, "num_input_tokens_seen": 186647632, "step": 86405 }, { "epoch": 14.096247960848286, "grad_norm": 0.002924926346167922, "learning_rate": 0.00024277388512352428, "loss": 0.0033, "num_input_tokens_seen": 186658896, "step": 86410 }, { "epoch": 14.097063621533442, "grad_norm": 0.014270029030740261, "learning_rate": 0.00024271284967705687, "loss": 0.016, "num_input_tokens_seen": 186671056, "step": 86415 }, { "epoch": 14.097879282218598, "grad_norm": 0.0008768712286837399, "learning_rate": 0.00024265181944483995, "loss": 0.0006, "num_input_tokens_seen": 186681872, "step": 86420 }, { "epoch": 14.098694942903752, "grad_norm": 0.004642259329557419, "learning_rate": 0.0002425907944281104, "loss": 0.0129, "num_input_tokens_seen": 186692880, "step": 86425 }, { "epoch": 14.099510603588907, "grad_norm": 0.017092658206820488, "learning_rate": 0.00024252977462810494, "loss": 0.0037, "num_input_tokens_seen": 186702704, "step": 86430 }, { "epoch": 14.100326264274061, "grad_norm": 0.0033077350817620754, "learning_rate": 0.0002424687600460602, "loss": 0.0097, "num_input_tokens_seen": 186712944, "step": 86435 }, { "epoch": 14.101141924959217, "grad_norm": 0.0017457004869356751, "learning_rate": 0.00024240775068321273, "loss": 0.0038, "num_input_tokens_seen": 186723408, "step": 86440 }, { "epoch": 14.101957585644373, "grad_norm": 0.0013185646384954453, "learning_rate": 0.00024234674654079901, "loss": 0.0053, "num_input_tokens_seen": 186735312, "step": 86445 }, { "epoch": 14.102773246329527, "grad_norm": 0.0159031692892313, "learning_rate": 0.00024228574762005534, "loss": 0.0064, "num_input_tokens_seen": 186746544, "step": 86450 }, { "epoch": 14.103588907014682, "grad_norm": 0.03777594491839409, "learning_rate": 0.00024222475392221787, "loss": 0.0022, "num_input_tokens_seen": 186757712, "step": 86455 }, { "epoch": 14.104404567699836, "grad_norm": 0.015740415081381798, "learning_rate": 0.0002421637654485228, "loss": 0.002, "num_input_tokens_seen": 186768144, "step": 86460 }, { "epoch": 14.105220228384992, "grad_norm": 2.156970977783203, "learning_rate": 0.00024210278220020614, "loss": 0.0342, "num_input_tokens_seen": 186778096, "step": 86465 }, { "epoch": 14.106035889070148, "grad_norm": 0.005683009047061205, "learning_rate": 0.00024204180417850373, "loss": 0.0037, "num_input_tokens_seen": 186789008, "step": 86470 }, { "epoch": 14.106851549755302, "grad_norm": 0.07511241734027863, "learning_rate": 0.00024198083138465143, "loss": 0.005, "num_input_tokens_seen": 186799280, "step": 86475 }, { "epoch": 14.107667210440457, "grad_norm": 0.05983182042837143, "learning_rate": 0.0002419198638198849, "loss": 0.0105, "num_input_tokens_seen": 186809808, "step": 86480 }, { "epoch": 14.108482871125611, "grad_norm": 0.011176004074513912, "learning_rate": 0.0002418589014854397, "loss": 0.0046, "num_input_tokens_seen": 186821104, "step": 86485 }, { "epoch": 14.109298531810767, "grad_norm": 0.024094609543681145, "learning_rate": 0.00024179794438255133, "loss": 0.0024, "num_input_tokens_seen": 186830864, "step": 86490 }, { "epoch": 14.11011419249592, "grad_norm": 0.020474612712860107, "learning_rate": 0.000241736992512455, "loss": 0.0016, "num_input_tokens_seen": 186841936, "step": 86495 }, { "epoch": 14.110929853181077, "grad_norm": 0.0037670359015464783, "learning_rate": 0.00024167604587638653, "loss": 0.0012, "num_input_tokens_seen": 186852944, "step": 86500 }, { "epoch": 14.111745513866232, "grad_norm": 0.020464560016989708, "learning_rate": 0.00024161510447558032, "loss": 0.0025, "num_input_tokens_seen": 186863152, "step": 86505 }, { "epoch": 14.112561174551386, "grad_norm": 0.030841641128063202, "learning_rate": 0.0002415541683112722, "loss": 0.0022, "num_input_tokens_seen": 186873904, "step": 86510 }, { "epoch": 14.113376835236542, "grad_norm": 0.0019353614188730717, "learning_rate": 0.0002414932373846963, "loss": 0.0033, "num_input_tokens_seen": 186884592, "step": 86515 }, { "epoch": 14.114192495921696, "grad_norm": 0.0009746397845447063, "learning_rate": 0.00024143231169708806, "loss": 0.0008, "num_input_tokens_seen": 186895536, "step": 86520 }, { "epoch": 14.115008156606851, "grad_norm": 0.01233255211263895, "learning_rate": 0.0002413713912496821, "loss": 0.0145, "num_input_tokens_seen": 186906448, "step": 86525 }, { "epoch": 14.115823817292007, "grad_norm": 0.005642162170261145, "learning_rate": 0.00024131047604371292, "loss": 0.0019, "num_input_tokens_seen": 186917328, "step": 86530 }, { "epoch": 14.116639477977161, "grad_norm": 0.012053254060447216, "learning_rate": 0.0002412495660804152, "loss": 0.0545, "num_input_tokens_seen": 186928336, "step": 86535 }, { "epoch": 14.117455138662317, "grad_norm": 0.01833995431661606, "learning_rate": 0.0002411886613610232, "loss": 0.0031, "num_input_tokens_seen": 186938896, "step": 86540 }, { "epoch": 14.11827079934747, "grad_norm": 0.007797705475240946, "learning_rate": 0.00024112776188677133, "loss": 0.0014, "num_input_tokens_seen": 186948688, "step": 86545 }, { "epoch": 14.119086460032626, "grad_norm": 0.01950424164533615, "learning_rate": 0.0002410668676588938, "loss": 0.0015, "num_input_tokens_seen": 186958928, "step": 86550 }, { "epoch": 14.119902120717782, "grad_norm": 0.6473231315612793, "learning_rate": 0.0002410059786786246, "loss": 0.0235, "num_input_tokens_seen": 186969744, "step": 86555 }, { "epoch": 14.120717781402936, "grad_norm": 0.07638765871524811, "learning_rate": 0.00024094509494719784, "loss": 0.0198, "num_input_tokens_seen": 186980528, "step": 86560 }, { "epoch": 14.121533442088092, "grad_norm": 0.03419485315680504, "learning_rate": 0.0002408842164658474, "loss": 0.0053, "num_input_tokens_seen": 186991280, "step": 86565 }, { "epoch": 14.122349102773246, "grad_norm": 0.0056074392050504684, "learning_rate": 0.00024082334323580695, "loss": 0.0012, "num_input_tokens_seen": 187002448, "step": 86570 }, { "epoch": 14.123164763458401, "grad_norm": 0.043900150805711746, "learning_rate": 0.0002407624752583103, "loss": 0.0037, "num_input_tokens_seen": 187013552, "step": 86575 }, { "epoch": 14.123980424143557, "grad_norm": 0.006590542383491993, "learning_rate": 0.00024070161253459093, "loss": 0.0052, "num_input_tokens_seen": 187023984, "step": 86580 }, { "epoch": 14.124796084828711, "grad_norm": 0.0019354376709088683, "learning_rate": 0.00024064075506588235, "loss": 0.0567, "num_input_tokens_seen": 187036496, "step": 86585 }, { "epoch": 14.125611745513867, "grad_norm": 0.013171792961657047, "learning_rate": 0.00024057990285341786, "loss": 0.0026, "num_input_tokens_seen": 187047056, "step": 86590 }, { "epoch": 14.12642740619902, "grad_norm": 0.06522294878959656, "learning_rate": 0.00024051905589843076, "loss": 0.0573, "num_input_tokens_seen": 187057712, "step": 86595 }, { "epoch": 14.127243066884176, "grad_norm": 0.0005969098419882357, "learning_rate": 0.00024045821420215412, "loss": 0.0051, "num_input_tokens_seen": 187069296, "step": 86600 }, { "epoch": 14.12805872756933, "grad_norm": 0.6809018850326538, "learning_rate": 0.0002403973777658211, "loss": 0.0124, "num_input_tokens_seen": 187080368, "step": 86605 }, { "epoch": 14.128874388254486, "grad_norm": 0.0005238762823864818, "learning_rate": 0.0002403365465906645, "loss": 0.003, "num_input_tokens_seen": 187091728, "step": 86610 }, { "epoch": 14.129690048939642, "grad_norm": 0.0008300320478156209, "learning_rate": 0.0002402757206779172, "loss": 0.0009, "num_input_tokens_seen": 187101584, "step": 86615 }, { "epoch": 14.130505709624796, "grad_norm": 0.006851747632026672, "learning_rate": 0.00024021490002881186, "loss": 0.0029, "num_input_tokens_seen": 187112112, "step": 86620 }, { "epoch": 14.131321370309951, "grad_norm": 0.004974581766873598, "learning_rate": 0.000240154084644581, "loss": 0.0284, "num_input_tokens_seen": 187123440, "step": 86625 }, { "epoch": 14.132137030995105, "grad_norm": 0.5447921752929688, "learning_rate": 0.0002400932745264574, "loss": 0.0427, "num_input_tokens_seen": 187134416, "step": 86630 }, { "epoch": 14.132952691680261, "grad_norm": 0.02428017184138298, "learning_rate": 0.00024003246967567332, "loss": 0.0018, "num_input_tokens_seen": 187146128, "step": 86635 }, { "epoch": 14.133768352365417, "grad_norm": 0.001342527917586267, "learning_rate": 0.00023997167009346104, "loss": 0.0012, "num_input_tokens_seen": 187156624, "step": 86640 }, { "epoch": 14.13458401305057, "grad_norm": 0.03645501285791397, "learning_rate": 0.00023991087578105274, "loss": 0.0031, "num_input_tokens_seen": 187167600, "step": 86645 }, { "epoch": 14.135399673735726, "grad_norm": 0.00650134077295661, "learning_rate": 0.00023985008673968052, "loss": 0.0191, "num_input_tokens_seen": 187179248, "step": 86650 }, { "epoch": 14.13621533442088, "grad_norm": 0.0668526217341423, "learning_rate": 0.00023978930297057627, "loss": 0.0066, "num_input_tokens_seen": 187189872, "step": 86655 }, { "epoch": 14.137030995106036, "grad_norm": 0.05055573210120201, "learning_rate": 0.0002397285244749719, "loss": 0.0036, "num_input_tokens_seen": 187201616, "step": 86660 }, { "epoch": 14.137846655791192, "grad_norm": 0.008112654089927673, "learning_rate": 0.00023966775125409918, "loss": 0.0029, "num_input_tokens_seen": 187211856, "step": 86665 }, { "epoch": 14.138662316476346, "grad_norm": 0.0011952114291489124, "learning_rate": 0.00023960698330918972, "loss": 0.002, "num_input_tokens_seen": 187222864, "step": 86670 }, { "epoch": 14.139477977161501, "grad_norm": 0.0007925685495138168, "learning_rate": 0.00023954622064147507, "loss": 0.0017, "num_input_tokens_seen": 187232912, "step": 86675 }, { "epoch": 14.140293637846655, "grad_norm": 0.05681190267205238, "learning_rate": 0.00023948546325218667, "loss": 0.0032, "num_input_tokens_seen": 187243984, "step": 86680 }, { "epoch": 14.141109298531811, "grad_norm": 0.036385323852300644, "learning_rate": 0.00023942471114255588, "loss": 0.0228, "num_input_tokens_seen": 187255280, "step": 86685 }, { "epoch": 14.141924959216965, "grad_norm": 0.0023627562914043665, "learning_rate": 0.00023936396431381386, "loss": 0.0032, "num_input_tokens_seen": 187265488, "step": 86690 }, { "epoch": 14.14274061990212, "grad_norm": 0.1351795792579651, "learning_rate": 0.00023930322276719175, "loss": 0.0041, "num_input_tokens_seen": 187277872, "step": 86695 }, { "epoch": 14.143556280587276, "grad_norm": 0.011718549765646458, "learning_rate": 0.0002392424865039205, "loss": 0.0008, "num_input_tokens_seen": 187288144, "step": 86700 }, { "epoch": 14.14437194127243, "grad_norm": 0.001978749642148614, "learning_rate": 0.0002391817555252311, "loss": 0.0029, "num_input_tokens_seen": 187297488, "step": 86705 }, { "epoch": 14.145187601957586, "grad_norm": 0.03943789377808571, "learning_rate": 0.0002391210298323543, "loss": 0.012, "num_input_tokens_seen": 187309904, "step": 86710 }, { "epoch": 14.14600326264274, "grad_norm": 0.0004417496966198087, "learning_rate": 0.00023906030942652073, "loss": 0.0018, "num_input_tokens_seen": 187321136, "step": 86715 }, { "epoch": 14.146818923327896, "grad_norm": 0.010585743933916092, "learning_rate": 0.00023899959430896106, "loss": 0.2156, "num_input_tokens_seen": 187331440, "step": 86720 }, { "epoch": 14.147634584013051, "grad_norm": 0.006574005354195833, "learning_rate": 0.00023893888448090573, "loss": 0.0024, "num_input_tokens_seen": 187342352, "step": 86725 }, { "epoch": 14.148450244698205, "grad_norm": 0.004500469658523798, "learning_rate": 0.00023887817994358484, "loss": 0.0024, "num_input_tokens_seen": 187352496, "step": 86730 }, { "epoch": 14.149265905383361, "grad_norm": 0.00272945174947381, "learning_rate": 0.0002388174806982293, "loss": 0.0015, "num_input_tokens_seen": 187362480, "step": 86735 }, { "epoch": 14.150081566068515, "grad_norm": 0.004126972518861294, "learning_rate": 0.00023875678674606848, "loss": 0.0034, "num_input_tokens_seen": 187374064, "step": 86740 }, { "epoch": 14.15089722675367, "grad_norm": 0.030439136549830437, "learning_rate": 0.00023869609808833316, "loss": 0.0111, "num_input_tokens_seen": 187384176, "step": 86745 }, { "epoch": 14.151712887438826, "grad_norm": 0.009578816592693329, "learning_rate": 0.0002386354147262525, "loss": 0.0014, "num_input_tokens_seen": 187395568, "step": 86750 }, { "epoch": 14.15252854812398, "grad_norm": 0.004324750974774361, "learning_rate": 0.0002385747366610571, "loss": 0.0011, "num_input_tokens_seen": 187407408, "step": 86755 }, { "epoch": 14.153344208809136, "grad_norm": 0.0028149730060249567, "learning_rate": 0.00023851406389397594, "loss": 0.003, "num_input_tokens_seen": 187418032, "step": 86760 }, { "epoch": 14.15415986949429, "grad_norm": 0.003918309696018696, "learning_rate": 0.00023845339642623937, "loss": 0.0042, "num_input_tokens_seen": 187428464, "step": 86765 }, { "epoch": 14.154975530179446, "grad_norm": 0.004220111761242151, "learning_rate": 0.00023839273425907615, "loss": 0.0045, "num_input_tokens_seen": 187438256, "step": 86770 }, { "epoch": 14.1557911908646, "grad_norm": 0.0007073664455674589, "learning_rate": 0.0002383320773937162, "loss": 0.0956, "num_input_tokens_seen": 187448496, "step": 86775 }, { "epoch": 14.156606851549755, "grad_norm": 0.0012666697148233652, "learning_rate": 0.00023827142583138873, "loss": 0.0027, "num_input_tokens_seen": 187459792, "step": 86780 }, { "epoch": 14.15742251223491, "grad_norm": 0.0030706448014825583, "learning_rate": 0.00023821077957332276, "loss": 0.002, "num_input_tokens_seen": 187471984, "step": 86785 }, { "epoch": 14.158238172920065, "grad_norm": 0.0018182602943852544, "learning_rate": 0.00023815013862074746, "loss": 0.004, "num_input_tokens_seen": 187482960, "step": 86790 }, { "epoch": 14.15905383360522, "grad_norm": 0.006480704993009567, "learning_rate": 0.0002380895029748918, "loss": 0.0023, "num_input_tokens_seen": 187493680, "step": 86795 }, { "epoch": 14.159869494290374, "grad_norm": 0.005142877344042063, "learning_rate": 0.00023802887263698464, "loss": 0.0995, "num_input_tokens_seen": 187505488, "step": 86800 }, { "epoch": 14.16068515497553, "grad_norm": 0.025212204083800316, "learning_rate": 0.00023796824760825464, "loss": 0.0031, "num_input_tokens_seen": 187516336, "step": 86805 }, { "epoch": 14.161500815660686, "grad_norm": 0.004730370827019215, "learning_rate": 0.0002379076278899306, "loss": 0.0015, "num_input_tokens_seen": 187526576, "step": 86810 }, { "epoch": 14.16231647634584, "grad_norm": 0.008115909993648529, "learning_rate": 0.0002378470134832409, "loss": 0.0023, "num_input_tokens_seen": 187537936, "step": 86815 }, { "epoch": 14.163132137030995, "grad_norm": 0.019025664776563644, "learning_rate": 0.00023778640438941408, "loss": 0.0023, "num_input_tokens_seen": 187547792, "step": 86820 }, { "epoch": 14.16394779771615, "grad_norm": 0.04850027337670326, "learning_rate": 0.00023772580060967834, "loss": 0.0032, "num_input_tokens_seen": 187559024, "step": 86825 }, { "epoch": 14.164763458401305, "grad_norm": 0.0032715355046093464, "learning_rate": 0.00023766520214526206, "loss": 0.005, "num_input_tokens_seen": 187570288, "step": 86830 }, { "epoch": 14.16557911908646, "grad_norm": 0.03740094229578972, "learning_rate": 0.00023760460899739322, "loss": 0.0042, "num_input_tokens_seen": 187580624, "step": 86835 }, { "epoch": 14.166394779771615, "grad_norm": 0.0004254610976204276, "learning_rate": 0.00023754402116729983, "loss": 0.0035, "num_input_tokens_seen": 187591248, "step": 86840 }, { "epoch": 14.16721044045677, "grad_norm": 0.003979322500526905, "learning_rate": 0.00023748343865620964, "loss": 0.0847, "num_input_tokens_seen": 187601136, "step": 86845 }, { "epoch": 14.168026101141924, "grad_norm": 0.0024420591071248055, "learning_rate": 0.00023742286146535098, "loss": 0.0011, "num_input_tokens_seen": 187611472, "step": 86850 }, { "epoch": 14.16884176182708, "grad_norm": 0.027474381029605865, "learning_rate": 0.00023736228959595073, "loss": 0.0055, "num_input_tokens_seen": 187621488, "step": 86855 }, { "epoch": 14.169657422512234, "grad_norm": 0.0005245811189524829, "learning_rate": 0.00023730172304923725, "loss": 0.0667, "num_input_tokens_seen": 187631696, "step": 86860 }, { "epoch": 14.17047308319739, "grad_norm": 0.002572552999481559, "learning_rate": 0.00023724116182643725, "loss": 0.009, "num_input_tokens_seen": 187642768, "step": 86865 }, { "epoch": 14.171288743882545, "grad_norm": 0.5119752287864685, "learning_rate": 0.00023718060592877878, "loss": 0.046, "num_input_tokens_seen": 187653488, "step": 86870 }, { "epoch": 14.1721044045677, "grad_norm": 0.0018368182936683297, "learning_rate": 0.00023712005535748838, "loss": 0.0024, "num_input_tokens_seen": 187663632, "step": 86875 }, { "epoch": 14.172920065252855, "grad_norm": 0.0015098400181159377, "learning_rate": 0.0002370595101137939, "loss": 0.0063, "num_input_tokens_seen": 187672912, "step": 86880 }, { "epoch": 14.173735725938009, "grad_norm": 0.014908955432474613, "learning_rate": 0.00023699897019892165, "loss": 0.001, "num_input_tokens_seen": 187683536, "step": 86885 }, { "epoch": 14.174551386623165, "grad_norm": 0.008537651039659977, "learning_rate": 0.00023693843561409928, "loss": 0.0108, "num_input_tokens_seen": 187695280, "step": 86890 }, { "epoch": 14.17536704730832, "grad_norm": 0.0010102560045197606, "learning_rate": 0.0002368779063605529, "loss": 0.001, "num_input_tokens_seen": 187705744, "step": 86895 }, { "epoch": 14.176182707993474, "grad_norm": 0.17667905986309052, "learning_rate": 0.00023681738243950984, "loss": 0.0188, "num_input_tokens_seen": 187716976, "step": 86900 }, { "epoch": 14.17699836867863, "grad_norm": 0.5679713487625122, "learning_rate": 0.00023675686385219607, "loss": 0.0884, "num_input_tokens_seen": 187728208, "step": 86905 }, { "epoch": 14.177814029363784, "grad_norm": 0.004683853592723608, "learning_rate": 0.0002366963505998388, "loss": 0.0113, "num_input_tokens_seen": 187738608, "step": 86910 }, { "epoch": 14.17862969004894, "grad_norm": 0.02120167389512062, "learning_rate": 0.00023663584268366356, "loss": 0.0052, "num_input_tokens_seen": 187750032, "step": 86915 }, { "epoch": 14.179445350734095, "grad_norm": 0.010204663500189781, "learning_rate": 0.00023657534010489733, "loss": 0.0141, "num_input_tokens_seen": 187760272, "step": 86920 }, { "epoch": 14.18026101141925, "grad_norm": 0.0016371725359931588, "learning_rate": 0.000236514842864766, "loss": 0.0654, "num_input_tokens_seen": 187770832, "step": 86925 }, { "epoch": 14.181076672104405, "grad_norm": 0.0017514110077172518, "learning_rate": 0.00023645435096449557, "loss": 0.0035, "num_input_tokens_seen": 187782192, "step": 86930 }, { "epoch": 14.181892332789559, "grad_norm": 0.0801762267947197, "learning_rate": 0.00023639386440531208, "loss": 0.0106, "num_input_tokens_seen": 187793456, "step": 86935 }, { "epoch": 14.182707993474715, "grad_norm": 0.5289106369018555, "learning_rate": 0.00023633338318844137, "loss": 0.1003, "num_input_tokens_seen": 187804976, "step": 86940 }, { "epoch": 14.18352365415987, "grad_norm": 0.006982157472521067, "learning_rate": 0.00023627290731510908, "loss": 0.0869, "num_input_tokens_seen": 187815120, "step": 86945 }, { "epoch": 14.184339314845024, "grad_norm": 0.010652474127709866, "learning_rate": 0.00023621243678654099, "loss": 0.0167, "num_input_tokens_seen": 187825776, "step": 86950 }, { "epoch": 14.18515497553018, "grad_norm": 0.16883718967437744, "learning_rate": 0.0002361519716039624, "loss": 0.0064, "num_input_tokens_seen": 187835920, "step": 86955 }, { "epoch": 14.185970636215334, "grad_norm": 0.024424118921160698, "learning_rate": 0.00023609151176859884, "loss": 0.0013, "num_input_tokens_seen": 187846832, "step": 86960 }, { "epoch": 14.18678629690049, "grad_norm": 0.09532174468040466, "learning_rate": 0.00023603105728167562, "loss": 0.0077, "num_input_tokens_seen": 187856624, "step": 86965 }, { "epoch": 14.187601957585644, "grad_norm": 0.026332538574934006, "learning_rate": 0.00023597060814441767, "loss": 0.1008, "num_input_tokens_seen": 187867120, "step": 86970 }, { "epoch": 14.1884176182708, "grad_norm": 0.0035820265766233206, "learning_rate": 0.00023591016435805067, "loss": 0.0036, "num_input_tokens_seen": 187877040, "step": 86975 }, { "epoch": 14.189233278955955, "grad_norm": 0.0029624279122799635, "learning_rate": 0.00023584972592379888, "loss": 0.0022, "num_input_tokens_seen": 187888304, "step": 86980 }, { "epoch": 14.190048939641109, "grad_norm": 0.03424012288451195, "learning_rate": 0.0002357892928428878, "loss": 0.0189, "num_input_tokens_seen": 187897680, "step": 86985 }, { "epoch": 14.190864600326265, "grad_norm": 0.0046348837204277515, "learning_rate": 0.00023572886511654157, "loss": 0.002, "num_input_tokens_seen": 187908784, "step": 86990 }, { "epoch": 14.191680261011419, "grad_norm": 0.0020335959270596504, "learning_rate": 0.00023566844274598548, "loss": 0.0019, "num_input_tokens_seen": 187919920, "step": 86995 }, { "epoch": 14.192495921696574, "grad_norm": 0.06452670693397522, "learning_rate": 0.00023560802573244333, "loss": 0.0028, "num_input_tokens_seen": 187931408, "step": 87000 }, { "epoch": 14.19331158238173, "grad_norm": 0.006201412994414568, "learning_rate": 0.00023554761407714036, "loss": 0.0519, "num_input_tokens_seen": 187940816, "step": 87005 }, { "epoch": 14.194127243066884, "grad_norm": 0.027852863073349, "learning_rate": 0.00023548720778130005, "loss": 0.0053, "num_input_tokens_seen": 187951664, "step": 87010 }, { "epoch": 14.19494290375204, "grad_norm": 0.09883508831262589, "learning_rate": 0.0002354268068461475, "loss": 0.018, "num_input_tokens_seen": 187962768, "step": 87015 }, { "epoch": 14.195758564437194, "grad_norm": 0.008603896014392376, "learning_rate": 0.00023536641127290588, "loss": 0.0037, "num_input_tokens_seen": 187972624, "step": 87020 }, { "epoch": 14.19657422512235, "grad_norm": 0.0015382606070488691, "learning_rate": 0.00023530602106280004, "loss": 0.2456, "num_input_tokens_seen": 187984592, "step": 87025 }, { "epoch": 14.197389885807505, "grad_norm": 0.21547020971775055, "learning_rate": 0.00023524563621705308, "loss": 0.0141, "num_input_tokens_seen": 187996688, "step": 87030 }, { "epoch": 14.198205546492659, "grad_norm": 0.018739258870482445, "learning_rate": 0.00023518525673688957, "loss": 0.003, "num_input_tokens_seen": 188007600, "step": 87035 }, { "epoch": 14.199021207177815, "grad_norm": 0.3604316711425781, "learning_rate": 0.0002351248826235324, "loss": 0.0733, "num_input_tokens_seen": 188018288, "step": 87040 }, { "epoch": 14.199836867862969, "grad_norm": 0.0037503130733966827, "learning_rate": 0.00023506451387820588, "loss": 0.0786, "num_input_tokens_seen": 188029456, "step": 87045 }, { "epoch": 14.200652528548124, "grad_norm": 0.015447917394340038, "learning_rate": 0.0002350041505021327, "loss": 0.0033, "num_input_tokens_seen": 188042864, "step": 87050 }, { "epoch": 14.201468189233278, "grad_norm": 0.04216151684522629, "learning_rate": 0.00023494379249653675, "loss": 0.0032, "num_input_tokens_seen": 188053904, "step": 87055 }, { "epoch": 14.202283849918434, "grad_norm": 0.13910488784313202, "learning_rate": 0.0002348834398626411, "loss": 0.0084, "num_input_tokens_seen": 188063504, "step": 87060 }, { "epoch": 14.20309951060359, "grad_norm": 0.0654771700501442, "learning_rate": 0.0002348230926016689, "loss": 0.0045, "num_input_tokens_seen": 188073712, "step": 87065 }, { "epoch": 14.203915171288743, "grad_norm": 0.011523857712745667, "learning_rate": 0.00023476275071484309, "loss": 0.0073, "num_input_tokens_seen": 188083888, "step": 87070 }, { "epoch": 14.2047308319739, "grad_norm": 0.003080186201259494, "learning_rate": 0.0002347024142033866, "loss": 0.0123, "num_input_tokens_seen": 188094640, "step": 87075 }, { "epoch": 14.205546492659053, "grad_norm": 0.5070033669471741, "learning_rate": 0.0002346420830685223, "loss": 0.1119, "num_input_tokens_seen": 188106256, "step": 87080 }, { "epoch": 14.206362153344209, "grad_norm": 0.008827326819300652, "learning_rate": 0.0002345817573114728, "loss": 0.0108, "num_input_tokens_seen": 188115536, "step": 87085 }, { "epoch": 14.207177814029365, "grad_norm": 0.003627562429755926, "learning_rate": 0.00023452143693346067, "loss": 0.0109, "num_input_tokens_seen": 188126800, "step": 87090 }, { "epoch": 14.207993474714518, "grad_norm": 0.007856626994907856, "learning_rate": 0.0002344611219357084, "loss": 0.05, "num_input_tokens_seen": 188137680, "step": 87095 }, { "epoch": 14.208809135399674, "grad_norm": 0.10289426147937775, "learning_rate": 0.0002344008123194384, "loss": 0.0075, "num_input_tokens_seen": 188149200, "step": 87100 }, { "epoch": 14.209624796084828, "grad_norm": 0.00227095908485353, "learning_rate": 0.0002343405080858728, "loss": 0.0009, "num_input_tokens_seen": 188160080, "step": 87105 }, { "epoch": 14.210440456769984, "grad_norm": 0.11845573782920837, "learning_rate": 0.00023428020923623382, "loss": 0.0087, "num_input_tokens_seen": 188171856, "step": 87110 }, { "epoch": 14.21125611745514, "grad_norm": 0.0016365720657631755, "learning_rate": 0.0002342199157717434, "loss": 0.0095, "num_input_tokens_seen": 188181936, "step": 87115 }, { "epoch": 14.212071778140293, "grad_norm": 0.0020826237741857767, "learning_rate": 0.00023415962769362386, "loss": 0.0029, "num_input_tokens_seen": 188193040, "step": 87120 }, { "epoch": 14.21288743882545, "grad_norm": 0.023198019713163376, "learning_rate": 0.00023409934500309633, "loss": 0.0093, "num_input_tokens_seen": 188202864, "step": 87125 }, { "epoch": 14.213703099510603, "grad_norm": 0.010550117120146751, "learning_rate": 0.00023403906770138328, "loss": 0.0084, "num_input_tokens_seen": 188214032, "step": 87130 }, { "epoch": 14.214518760195759, "grad_norm": 0.005151396617293358, "learning_rate": 0.00023397879578970554, "loss": 0.008, "num_input_tokens_seen": 188224784, "step": 87135 }, { "epoch": 14.215334420880913, "grad_norm": 0.010745275765657425, "learning_rate": 0.00023391852926928536, "loss": 0.0078, "num_input_tokens_seen": 188235152, "step": 87140 }, { "epoch": 14.216150081566068, "grad_norm": 0.004303690977394581, "learning_rate": 0.0002338582681413433, "loss": 0.0568, "num_input_tokens_seen": 188246288, "step": 87145 }, { "epoch": 14.216965742251224, "grad_norm": 0.008361046202480793, "learning_rate": 0.0002337980124071015, "loss": 0.0046, "num_input_tokens_seen": 188258160, "step": 87150 }, { "epoch": 14.217781402936378, "grad_norm": 0.0012672094162553549, "learning_rate": 0.0002337377620677803, "loss": 0.0008, "num_input_tokens_seen": 188268656, "step": 87155 }, { "epoch": 14.218597063621534, "grad_norm": 0.004940166603773832, "learning_rate": 0.00023367751712460134, "loss": 0.0013, "num_input_tokens_seen": 188279664, "step": 87160 }, { "epoch": 14.219412724306688, "grad_norm": 0.016097739338874817, "learning_rate": 0.00023361727757878527, "loss": 0.0072, "num_input_tokens_seen": 188290512, "step": 87165 }, { "epoch": 14.220228384991843, "grad_norm": 0.009004230611026287, "learning_rate": 0.00023355704343155305, "loss": 0.0047, "num_input_tokens_seen": 188300848, "step": 87170 }, { "epoch": 14.221044045676999, "grad_norm": 0.014903482049703598, "learning_rate": 0.00023349681468412537, "loss": 0.0047, "num_input_tokens_seen": 188310832, "step": 87175 }, { "epoch": 14.221859706362153, "grad_norm": 0.030258629471063614, "learning_rate": 0.00023343659133772277, "loss": 0.0055, "num_input_tokens_seen": 188322704, "step": 87180 }, { "epoch": 14.222675367047309, "grad_norm": 0.40277299284935, "learning_rate": 0.0002333763733935659, "loss": 0.0047, "num_input_tokens_seen": 188332656, "step": 87185 }, { "epoch": 14.223491027732463, "grad_norm": 0.001041868468746543, "learning_rate": 0.00023331616085287492, "loss": 0.002, "num_input_tokens_seen": 188343408, "step": 87190 }, { "epoch": 14.224306688417618, "grad_norm": 0.03579118847846985, "learning_rate": 0.00023325595371687037, "loss": 0.0271, "num_input_tokens_seen": 188354736, "step": 87195 }, { "epoch": 14.225122349102774, "grad_norm": 0.006182889919728041, "learning_rate": 0.00023319575198677223, "loss": 0.0046, "num_input_tokens_seen": 188364944, "step": 87200 }, { "epoch": 14.225938009787928, "grad_norm": 0.0006182396900840104, "learning_rate": 0.00023313555566380068, "loss": 0.0076, "num_input_tokens_seen": 188374928, "step": 87205 }, { "epoch": 14.226753670473084, "grad_norm": 0.006085644010454416, "learning_rate": 0.00023307536474917567, "loss": 0.0022, "num_input_tokens_seen": 188384816, "step": 87210 }, { "epoch": 14.227569331158238, "grad_norm": 0.052729446440935135, "learning_rate": 0.00023301517924411696, "loss": 0.0035, "num_input_tokens_seen": 188394128, "step": 87215 }, { "epoch": 14.228384991843393, "grad_norm": 0.003131187055259943, "learning_rate": 0.00023295499914984436, "loss": 0.002, "num_input_tokens_seen": 188405552, "step": 87220 }, { "epoch": 14.229200652528547, "grad_norm": 0.004549449775367975, "learning_rate": 0.00023289482446757747, "loss": 0.0029, "num_input_tokens_seen": 188416368, "step": 87225 }, { "epoch": 14.230016313213703, "grad_norm": 0.48102879524230957, "learning_rate": 0.0002328346551985358, "loss": 0.1339, "num_input_tokens_seen": 188426512, "step": 87230 }, { "epoch": 14.230831973898859, "grad_norm": 0.002627470064908266, "learning_rate": 0.00023277449134393875, "loss": 0.0024, "num_input_tokens_seen": 188437968, "step": 87235 }, { "epoch": 14.231647634584013, "grad_norm": 0.021344967186450958, "learning_rate": 0.00023271433290500567, "loss": 0.0099, "num_input_tokens_seen": 188448304, "step": 87240 }, { "epoch": 14.232463295269168, "grad_norm": 0.003346246900036931, "learning_rate": 0.00023265417988295567, "loss": 0.0014, "num_input_tokens_seen": 188458320, "step": 87245 }, { "epoch": 14.233278955954322, "grad_norm": 1.6928178071975708, "learning_rate": 0.0002325940322790079, "loss": 0.0198, "num_input_tokens_seen": 188469040, "step": 87250 }, { "epoch": 14.234094616639478, "grad_norm": 0.0035675603430718184, "learning_rate": 0.0002325338900943813, "loss": 0.0016, "num_input_tokens_seen": 188480880, "step": 87255 }, { "epoch": 14.234910277324634, "grad_norm": 0.004161432385444641, "learning_rate": 0.00023247375333029452, "loss": 0.0012, "num_input_tokens_seen": 188491824, "step": 87260 }, { "epoch": 14.235725938009788, "grad_norm": 0.04490378871560097, "learning_rate": 0.00023241362198796666, "loss": 0.0091, "num_input_tokens_seen": 188502896, "step": 87265 }, { "epoch": 14.236541598694943, "grad_norm": 0.4051935374736786, "learning_rate": 0.00023235349606861628, "loss": 0.0071, "num_input_tokens_seen": 188514800, "step": 87270 }, { "epoch": 14.237357259380097, "grad_norm": 0.005929006729274988, "learning_rate": 0.00023229337557346174, "loss": 0.0267, "num_input_tokens_seen": 188524048, "step": 87275 }, { "epoch": 14.238172920065253, "grad_norm": 0.0033224388025701046, "learning_rate": 0.00023223326050372163, "loss": 0.0026, "num_input_tokens_seen": 188535600, "step": 87280 }, { "epoch": 14.238988580750409, "grad_norm": 0.024302540346980095, "learning_rate": 0.0002321731508606142, "loss": 0.0253, "num_input_tokens_seen": 188546288, "step": 87285 }, { "epoch": 14.239804241435563, "grad_norm": 0.004064117558300495, "learning_rate": 0.0002321130466453576, "loss": 0.1404, "num_input_tokens_seen": 188557424, "step": 87290 }, { "epoch": 14.240619902120718, "grad_norm": 0.05272166058421135, "learning_rate": 0.0002320529478591699, "loss": 0.0123, "num_input_tokens_seen": 188566928, "step": 87295 }, { "epoch": 14.241435562805872, "grad_norm": 0.006723629776388407, "learning_rate": 0.00023199285450326918, "loss": 0.0062, "num_input_tokens_seen": 188576528, "step": 87300 }, { "epoch": 14.242251223491028, "grad_norm": 0.0016809606458991766, "learning_rate": 0.00023193276657887326, "loss": 0.004, "num_input_tokens_seen": 188586992, "step": 87305 }, { "epoch": 14.243066884176184, "grad_norm": 0.014636986888945103, "learning_rate": 0.00023187268408719986, "loss": 0.0013, "num_input_tokens_seen": 188597776, "step": 87310 }, { "epoch": 14.243882544861338, "grad_norm": 0.5428915023803711, "learning_rate": 0.00023181260702946673, "loss": 0.0146, "num_input_tokens_seen": 188608784, "step": 87315 }, { "epoch": 14.244698205546493, "grad_norm": 0.003895719302818179, "learning_rate": 0.00023175253540689124, "loss": 0.0031, "num_input_tokens_seen": 188620656, "step": 87320 }, { "epoch": 14.245513866231647, "grad_norm": 0.0009167763637378812, "learning_rate": 0.00023169246922069098, "loss": 0.0056, "num_input_tokens_seen": 188631920, "step": 87325 }, { "epoch": 14.246329526916803, "grad_norm": 0.035440631210803986, "learning_rate": 0.00023163240847208318, "loss": 0.0026, "num_input_tokens_seen": 188641744, "step": 87330 }, { "epoch": 14.247145187601957, "grad_norm": 0.6476132273674011, "learning_rate": 0.0002315723531622851, "loss": 0.1426, "num_input_tokens_seen": 188653168, "step": 87335 }, { "epoch": 14.247960848287113, "grad_norm": 0.10940902680158615, "learning_rate": 0.00023151230329251376, "loss": 0.0045, "num_input_tokens_seen": 188664144, "step": 87340 }, { "epoch": 14.248776508972268, "grad_norm": 0.002636961406096816, "learning_rate": 0.00023145225886398617, "loss": 0.033, "num_input_tokens_seen": 188674736, "step": 87345 }, { "epoch": 14.249592169657422, "grad_norm": 0.00327065447345376, "learning_rate": 0.0002313922198779193, "loss": 0.0024, "num_input_tokens_seen": 188686960, "step": 87350 }, { "epoch": 14.250407830342578, "grad_norm": 0.007365102879703045, "learning_rate": 0.00023133218633552982, "loss": 0.0091, "num_input_tokens_seen": 188698576, "step": 87355 }, { "epoch": 14.251223491027732, "grad_norm": 0.03469528630375862, "learning_rate": 0.00023127215823803444, "loss": 0.0061, "num_input_tokens_seen": 188709808, "step": 87360 }, { "epoch": 14.252039151712887, "grad_norm": 0.30641695857048035, "learning_rate": 0.00023121213558664966, "loss": 0.0111, "num_input_tokens_seen": 188721712, "step": 87365 }, { "epoch": 14.252854812398043, "grad_norm": 0.0382346473634243, "learning_rate": 0.00023115211838259175, "loss": 0.0027, "num_input_tokens_seen": 188731920, "step": 87370 }, { "epoch": 14.253670473083197, "grad_norm": 0.0007187232258729637, "learning_rate": 0.00023109210662707757, "loss": 0.0062, "num_input_tokens_seen": 188742448, "step": 87375 }, { "epoch": 14.254486133768353, "grad_norm": 0.004653709474951029, "learning_rate": 0.00023103210032132267, "loss": 0.0013, "num_input_tokens_seen": 188752976, "step": 87380 }, { "epoch": 14.255301794453507, "grad_norm": 0.0012320553651079535, "learning_rate": 0.0002309720994665438, "loss": 0.0124, "num_input_tokens_seen": 188763664, "step": 87385 }, { "epoch": 14.256117455138662, "grad_norm": 0.0008974694646894932, "learning_rate": 0.00023091210406395624, "loss": 0.0017, "num_input_tokens_seen": 188773936, "step": 87390 }, { "epoch": 14.256933115823816, "grad_norm": 0.008518456481397152, "learning_rate": 0.00023085211411477663, "loss": 0.001, "num_input_tokens_seen": 188784176, "step": 87395 }, { "epoch": 14.257748776508972, "grad_norm": 0.019535856321454048, "learning_rate": 0.00023079212962022, "loss": 0.0054, "num_input_tokens_seen": 188794736, "step": 87400 }, { "epoch": 14.258564437194128, "grad_norm": 0.015992237254977226, "learning_rate": 0.00023073215058150255, "loss": 0.0012, "num_input_tokens_seen": 188805200, "step": 87405 }, { "epoch": 14.259380097879282, "grad_norm": 0.0015473555540665984, "learning_rate": 0.00023067217699983966, "loss": 0.0373, "num_input_tokens_seen": 188816240, "step": 87410 }, { "epoch": 14.260195758564437, "grad_norm": 0.005111176986247301, "learning_rate": 0.00023061220887644679, "loss": 0.0025, "num_input_tokens_seen": 188824848, "step": 87415 }, { "epoch": 14.261011419249591, "grad_norm": 0.00272758980281651, "learning_rate": 0.00023055224621253923, "loss": 0.1147, "num_input_tokens_seen": 188835888, "step": 87420 }, { "epoch": 14.261827079934747, "grad_norm": 0.0052657704800367355, "learning_rate": 0.00023049228900933223, "loss": 0.0034, "num_input_tokens_seen": 188846608, "step": 87425 }, { "epoch": 14.262642740619903, "grad_norm": 0.005922100506722927, "learning_rate": 0.00023043233726804087, "loss": 0.0017, "num_input_tokens_seen": 188858096, "step": 87430 }, { "epoch": 14.263458401305057, "grad_norm": 0.09431520849466324, "learning_rate": 0.00023037239098988016, "loss": 0.016, "num_input_tokens_seen": 188868880, "step": 87435 }, { "epoch": 14.264274061990212, "grad_norm": 0.011786018498241901, "learning_rate": 0.00023031245017606506, "loss": 0.0122, "num_input_tokens_seen": 188880144, "step": 87440 }, { "epoch": 14.265089722675366, "grad_norm": 0.049001362174749374, "learning_rate": 0.00023025251482781023, "loss": 0.0072, "num_input_tokens_seen": 188890896, "step": 87445 }, { "epoch": 14.265905383360522, "grad_norm": 0.012988328002393246, "learning_rate": 0.00023019258494633038, "loss": 0.0042, "num_input_tokens_seen": 188901680, "step": 87450 }, { "epoch": 14.266721044045678, "grad_norm": 0.05504751577973366, "learning_rate": 0.0002301326605328401, "loss": 0.0049, "num_input_tokens_seen": 188912400, "step": 87455 }, { "epoch": 14.267536704730832, "grad_norm": 0.0019441379699856043, "learning_rate": 0.00023007274158855378, "loss": 0.0049, "num_input_tokens_seen": 188924048, "step": 87460 }, { "epoch": 14.268352365415987, "grad_norm": 0.01831858977675438, "learning_rate": 0.00023001282811468577, "loss": 0.0176, "num_input_tokens_seen": 188935536, "step": 87465 }, { "epoch": 14.269168026101141, "grad_norm": 0.3804619610309601, "learning_rate": 0.00022995292011245033, "loss": 0.0058, "num_input_tokens_seen": 188947216, "step": 87470 }, { "epoch": 14.269983686786297, "grad_norm": 0.0805104523897171, "learning_rate": 0.00022989301758306153, "loss": 0.0056, "num_input_tokens_seen": 188958288, "step": 87475 }, { "epoch": 14.270799347471453, "grad_norm": 0.00453876843675971, "learning_rate": 0.00022983312052773336, "loss": 0.003, "num_input_tokens_seen": 188967920, "step": 87480 }, { "epoch": 14.271615008156607, "grad_norm": 0.001278862589970231, "learning_rate": 0.0002297732289476796, "loss": 0.0046, "num_input_tokens_seen": 188977712, "step": 87485 }, { "epoch": 14.272430668841762, "grad_norm": 0.0043226066045463085, "learning_rate": 0.0002297133428441145, "loss": 0.0026, "num_input_tokens_seen": 188987728, "step": 87490 }, { "epoch": 14.273246329526916, "grad_norm": 0.005642724223434925, "learning_rate": 0.000229653462218251, "loss": 0.0047, "num_input_tokens_seen": 188999088, "step": 87495 }, { "epoch": 14.274061990212072, "grad_norm": 0.01294635608792305, "learning_rate": 0.00022959358707130346, "loss": 0.0259, "num_input_tokens_seen": 189010288, "step": 87500 }, { "epoch": 14.274877650897226, "grad_norm": 0.000444377918029204, "learning_rate": 0.00022953371740448453, "loss": 0.0061, "num_input_tokens_seen": 189021744, "step": 87505 }, { "epoch": 14.275693311582382, "grad_norm": 0.0009572534472681582, "learning_rate": 0.00022947385321900825, "loss": 0.0073, "num_input_tokens_seen": 189031568, "step": 87510 }, { "epoch": 14.276508972267537, "grad_norm": 0.0006329436437226832, "learning_rate": 0.00022941399451608725, "loss": 0.0006, "num_input_tokens_seen": 189042032, "step": 87515 }, { "epoch": 14.277324632952691, "grad_norm": 0.0018760806415230036, "learning_rate": 0.00022935414129693523, "loss": 0.0083, "num_input_tokens_seen": 189051856, "step": 87520 }, { "epoch": 14.278140293637847, "grad_norm": 0.017724091187119484, "learning_rate": 0.0002292942935627645, "loss": 0.0038, "num_input_tokens_seen": 189062096, "step": 87525 }, { "epoch": 14.278955954323001, "grad_norm": 0.000525585375726223, "learning_rate": 0.00022923445131478866, "loss": 0.1287, "num_input_tokens_seen": 189072656, "step": 87530 }, { "epoch": 14.279771615008157, "grad_norm": 0.1081756129860878, "learning_rate": 0.00022917461455421984, "loss": 0.0079, "num_input_tokens_seen": 189081904, "step": 87535 }, { "epoch": 14.280587275693312, "grad_norm": 0.043451886624097824, "learning_rate": 0.00022911478328227136, "loss": 0.1889, "num_input_tokens_seen": 189092176, "step": 87540 }, { "epoch": 14.281402936378466, "grad_norm": 0.005053219385445118, "learning_rate": 0.00022905495750015508, "loss": 0.0105, "num_input_tokens_seen": 189102096, "step": 87545 }, { "epoch": 14.282218597063622, "grad_norm": 0.1774892508983612, "learning_rate": 0.000228995137209084, "loss": 0.0058, "num_input_tokens_seen": 189113808, "step": 87550 }, { "epoch": 14.283034257748776, "grad_norm": 0.0033074861858040094, "learning_rate": 0.00022893532241027026, "loss": 0.0021, "num_input_tokens_seen": 189123312, "step": 87555 }, { "epoch": 14.283849918433932, "grad_norm": 0.0016520784702152014, "learning_rate": 0.00022887551310492605, "loss": 0.0038, "num_input_tokens_seen": 189135184, "step": 87560 }, { "epoch": 14.284665579119087, "grad_norm": 0.008472919464111328, "learning_rate": 0.00022881570929426354, "loss": 0.0043, "num_input_tokens_seen": 189145936, "step": 87565 }, { "epoch": 14.285481239804241, "grad_norm": 0.8978723287582397, "learning_rate": 0.00022875591097949472, "loss": 0.118, "num_input_tokens_seen": 189156304, "step": 87570 }, { "epoch": 14.286296900489397, "grad_norm": 0.02543191984295845, "learning_rate": 0.00022869611816183144, "loss": 0.0061, "num_input_tokens_seen": 189166448, "step": 87575 }, { "epoch": 14.28711256117455, "grad_norm": 0.0005691770347766578, "learning_rate": 0.00022863633084248549, "loss": 0.0024, "num_input_tokens_seen": 189177456, "step": 87580 }, { "epoch": 14.287928221859707, "grad_norm": 0.008937807753682137, "learning_rate": 0.00022857654902266856, "loss": 0.004, "num_input_tokens_seen": 189187248, "step": 87585 }, { "epoch": 14.28874388254486, "grad_norm": 0.0018283298704773188, "learning_rate": 0.00022851677270359217, "loss": 0.0235, "num_input_tokens_seen": 189197776, "step": 87590 }, { "epoch": 14.289559543230016, "grad_norm": 0.01428473275154829, "learning_rate": 0.0002284570018864678, "loss": 0.0025, "num_input_tokens_seen": 189208624, "step": 87595 }, { "epoch": 14.290375203915172, "grad_norm": 0.0034628030844032764, "learning_rate": 0.0002283972365725066, "loss": 0.0021, "num_input_tokens_seen": 189219568, "step": 87600 }, { "epoch": 14.291190864600326, "grad_norm": 0.012105366215109825, "learning_rate": 0.00022833747676292027, "loss": 0.0037, "num_input_tokens_seen": 189231120, "step": 87605 }, { "epoch": 14.292006525285482, "grad_norm": 0.3718734085559845, "learning_rate": 0.00022827772245891925, "loss": 0.0146, "num_input_tokens_seen": 189241904, "step": 87610 }, { "epoch": 14.292822185970635, "grad_norm": 0.08666027337312698, "learning_rate": 0.00022821797366171531, "loss": 0.0051, "num_input_tokens_seen": 189252336, "step": 87615 }, { "epoch": 14.293637846655791, "grad_norm": 0.01287077460438013, "learning_rate": 0.00022815823037251849, "loss": 0.0039, "num_input_tokens_seen": 189263184, "step": 87620 }, { "epoch": 14.294453507340947, "grad_norm": 0.0899038165807724, "learning_rate": 0.00022809849259254034, "loss": 0.0173, "num_input_tokens_seen": 189274384, "step": 87625 }, { "epoch": 14.2952691680261, "grad_norm": 0.03881601616740227, "learning_rate": 0.00022803876032299086, "loss": 0.0043, "num_input_tokens_seen": 189285648, "step": 87630 }, { "epoch": 14.296084828711257, "grad_norm": 0.04519505426287651, "learning_rate": 0.00022797903356508125, "loss": 0.0041, "num_input_tokens_seen": 189295920, "step": 87635 }, { "epoch": 14.29690048939641, "grad_norm": 0.003327068639919162, "learning_rate": 0.00022791931232002123, "loss": 0.0015, "num_input_tokens_seen": 189307280, "step": 87640 }, { "epoch": 14.297716150081566, "grad_norm": 0.04209542274475098, "learning_rate": 0.00022785959658902188, "loss": 0.0064, "num_input_tokens_seen": 189319568, "step": 87645 }, { "epoch": 14.298531810766722, "grad_norm": 0.0016900094924494624, "learning_rate": 0.00022779988637329263, "loss": 0.0014, "num_input_tokens_seen": 189329616, "step": 87650 }, { "epoch": 14.299347471451876, "grad_norm": 0.0013236630475148559, "learning_rate": 0.00022774018167404442, "loss": 0.0012, "num_input_tokens_seen": 189340496, "step": 87655 }, { "epoch": 14.300163132137031, "grad_norm": 0.022487998008728027, "learning_rate": 0.00022768048249248646, "loss": 0.0041, "num_input_tokens_seen": 189350416, "step": 87660 }, { "epoch": 14.300978792822185, "grad_norm": 0.0019900943152606487, "learning_rate": 0.00022762078882982928, "loss": 0.001, "num_input_tokens_seen": 189361840, "step": 87665 }, { "epoch": 14.301794453507341, "grad_norm": 0.007686574477702379, "learning_rate": 0.00022756110068728204, "loss": 0.0024, "num_input_tokens_seen": 189372880, "step": 87670 }, { "epoch": 14.302610114192497, "grad_norm": 0.00679362565279007, "learning_rate": 0.00022750141806605507, "loss": 0.0012, "num_input_tokens_seen": 189384944, "step": 87675 }, { "epoch": 14.30342577487765, "grad_norm": 0.028075747191905975, "learning_rate": 0.00022744174096735715, "loss": 0.0032, "num_input_tokens_seen": 189395824, "step": 87680 }, { "epoch": 14.304241435562806, "grad_norm": 1.1675291061401367, "learning_rate": 0.00022738206939239852, "loss": 0.019, "num_input_tokens_seen": 189406352, "step": 87685 }, { "epoch": 14.30505709624796, "grad_norm": 0.002868425566703081, "learning_rate": 0.0002273224033423877, "loss": 0.002, "num_input_tokens_seen": 189417584, "step": 87690 }, { "epoch": 14.305872756933116, "grad_norm": 0.21164196729660034, "learning_rate": 0.0002272627428185345, "loss": 0.0145, "num_input_tokens_seen": 189428944, "step": 87695 }, { "epoch": 14.30668841761827, "grad_norm": 0.042567916214466095, "learning_rate": 0.0002272030878220478, "loss": 0.0016, "num_input_tokens_seen": 189439632, "step": 87700 }, { "epoch": 14.307504078303426, "grad_norm": 0.058382321149110794, "learning_rate": 0.0002271434383541366, "loss": 0.0057, "num_input_tokens_seen": 189450672, "step": 87705 }, { "epoch": 14.308319738988581, "grad_norm": 0.01143584307283163, "learning_rate": 0.00022708379441600975, "loss": 0.0105, "num_input_tokens_seen": 189461200, "step": 87710 }, { "epoch": 14.309135399673735, "grad_norm": 0.0020317668095231056, "learning_rate": 0.000227024156008876, "loss": 0.0009, "num_input_tokens_seen": 189470480, "step": 87715 }, { "epoch": 14.309951060358891, "grad_norm": 0.006676700431853533, "learning_rate": 0.00022696452313394406, "loss": 0.0037, "num_input_tokens_seen": 189481520, "step": 87720 }, { "epoch": 14.310766721044045, "grad_norm": 0.0036642735358327627, "learning_rate": 0.0002269048957924224, "loss": 0.004, "num_input_tokens_seen": 189491280, "step": 87725 }, { "epoch": 14.3115823817292, "grad_norm": 0.0023649828508496284, "learning_rate": 0.0002268452739855195, "loss": 0.0078, "num_input_tokens_seen": 189501648, "step": 87730 }, { "epoch": 14.312398042414356, "grad_norm": 0.09851660579442978, "learning_rate": 0.00022678565771444364, "loss": 0.0031, "num_input_tokens_seen": 189513616, "step": 87735 }, { "epoch": 14.31321370309951, "grad_norm": 0.011403873562812805, "learning_rate": 0.00022672604698040306, "loss": 0.0149, "num_input_tokens_seen": 189523984, "step": 87740 }, { "epoch": 14.314029363784666, "grad_norm": 0.7276797294616699, "learning_rate": 0.00022666644178460555, "loss": 0.018, "num_input_tokens_seen": 189535088, "step": 87745 }, { "epoch": 14.31484502446982, "grad_norm": 0.0041549173183739185, "learning_rate": 0.00022660684212825978, "loss": 0.0015, "num_input_tokens_seen": 189546800, "step": 87750 }, { "epoch": 14.315660685154976, "grad_norm": 0.003960830625146627, "learning_rate": 0.00022654724801257276, "loss": 0.1172, "num_input_tokens_seen": 189558192, "step": 87755 }, { "epoch": 14.31647634584013, "grad_norm": 0.0012428623158484697, "learning_rate": 0.00022648765943875305, "loss": 0.002, "num_input_tokens_seen": 189568720, "step": 87760 }, { "epoch": 14.317292006525285, "grad_norm": 0.09902480244636536, "learning_rate": 0.00022642807640800756, "loss": 0.0084, "num_input_tokens_seen": 189579216, "step": 87765 }, { "epoch": 14.318107667210441, "grad_norm": 0.005643976386636496, "learning_rate": 0.0002263684989215445, "loss": 0.015, "num_input_tokens_seen": 189589648, "step": 87770 }, { "epoch": 14.318923327895595, "grad_norm": 0.015322118066251278, "learning_rate": 0.00022630892698057055, "loss": 0.0019, "num_input_tokens_seen": 189600592, "step": 87775 }, { "epoch": 14.31973898858075, "grad_norm": 0.29420289397239685, "learning_rate": 0.00022624936058629374, "loss": 0.0063, "num_input_tokens_seen": 189612176, "step": 87780 }, { "epoch": 14.320554649265905, "grad_norm": 0.004782841540873051, "learning_rate": 0.00022618979973992054, "loss": 0.0041, "num_input_tokens_seen": 189623600, "step": 87785 }, { "epoch": 14.32137030995106, "grad_norm": 0.00045195547863841057, "learning_rate": 0.00022613024444265883, "loss": 0.0041, "num_input_tokens_seen": 189633360, "step": 87790 }, { "epoch": 14.322185970636216, "grad_norm": 0.005995205603539944, "learning_rate": 0.00022607069469571473, "loss": 0.001, "num_input_tokens_seen": 189642288, "step": 87795 }, { "epoch": 14.32300163132137, "grad_norm": 0.009188726544380188, "learning_rate": 0.00022601115050029574, "loss": 0.0234, "num_input_tokens_seen": 189653040, "step": 87800 }, { "epoch": 14.323817292006526, "grad_norm": 0.0009074592380784452, "learning_rate": 0.0002259516118576083, "loss": 0.0017, "num_input_tokens_seen": 189663440, "step": 87805 }, { "epoch": 14.32463295269168, "grad_norm": 0.0021508075296878815, "learning_rate": 0.00022589207876885914, "loss": 0.0214, "num_input_tokens_seen": 189673392, "step": 87810 }, { "epoch": 14.325448613376835, "grad_norm": 0.033135004341602325, "learning_rate": 0.00022583255123525476, "loss": 0.1537, "num_input_tokens_seen": 189684144, "step": 87815 }, { "epoch": 14.326264274061991, "grad_norm": 0.003989869728684425, "learning_rate": 0.00022577302925800153, "loss": 0.0013, "num_input_tokens_seen": 189695440, "step": 87820 }, { "epoch": 14.327079934747145, "grad_norm": 0.0038525178097188473, "learning_rate": 0.0002257135128383057, "loss": 0.0013, "num_input_tokens_seen": 189705936, "step": 87825 }, { "epoch": 14.3278955954323, "grad_norm": 0.009815668687224388, "learning_rate": 0.00022565400197737352, "loss": 0.0371, "num_input_tokens_seen": 189716848, "step": 87830 }, { "epoch": 14.328711256117455, "grad_norm": 0.03508485481142998, "learning_rate": 0.000225594496676411, "loss": 0.0054, "num_input_tokens_seen": 189727088, "step": 87835 }, { "epoch": 14.32952691680261, "grad_norm": 0.0028070693369954824, "learning_rate": 0.0002255349969366241, "loss": 0.0021, "num_input_tokens_seen": 189738608, "step": 87840 }, { "epoch": 14.330342577487766, "grad_norm": 0.03543877974152565, "learning_rate": 0.0002254755027592187, "loss": 0.0081, "num_input_tokens_seen": 189749712, "step": 87845 }, { "epoch": 14.33115823817292, "grad_norm": 0.1265224665403366, "learning_rate": 0.00022541601414540052, "loss": 0.0086, "num_input_tokens_seen": 189760624, "step": 87850 }, { "epoch": 14.331973898858076, "grad_norm": 0.002698419615626335, "learning_rate": 0.00022535653109637512, "loss": 0.0019, "num_input_tokens_seen": 189770224, "step": 87855 }, { "epoch": 14.33278955954323, "grad_norm": 0.0011117426911368966, "learning_rate": 0.000225297053613348, "loss": 0.0014, "num_input_tokens_seen": 189780752, "step": 87860 }, { "epoch": 14.333605220228385, "grad_norm": 0.0030760967638343573, "learning_rate": 0.0002252375816975246, "loss": 0.0986, "num_input_tokens_seen": 189792240, "step": 87865 }, { "epoch": 14.33442088091354, "grad_norm": 0.3648514747619629, "learning_rate": 0.0002251781153501102, "loss": 0.0044, "num_input_tokens_seen": 189803120, "step": 87870 }, { "epoch": 14.335236541598695, "grad_norm": 0.006983945611864328, "learning_rate": 0.0002251186545723099, "loss": 0.0031, "num_input_tokens_seen": 189814416, "step": 87875 }, { "epoch": 14.33605220228385, "grad_norm": 0.01047761645168066, "learning_rate": 0.00022505919936532877, "loss": 0.0088, "num_input_tokens_seen": 189824016, "step": 87880 }, { "epoch": 14.336867862969005, "grad_norm": 0.016600316390395164, "learning_rate": 0.00022499974973037173, "loss": 0.0142, "num_input_tokens_seen": 189834416, "step": 87885 }, { "epoch": 14.33768352365416, "grad_norm": 0.0008033128106035292, "learning_rate": 0.0002249403056686435, "loss": 0.0016, "num_input_tokens_seen": 189844496, "step": 87890 }, { "epoch": 14.338499184339314, "grad_norm": 0.48811638355255127, "learning_rate": 0.0002248808671813492, "loss": 0.1388, "num_input_tokens_seen": 189855792, "step": 87895 }, { "epoch": 14.33931484502447, "grad_norm": 0.008555108681321144, "learning_rate": 0.00022482143426969282, "loss": 0.0044, "num_input_tokens_seen": 189867376, "step": 87900 }, { "epoch": 14.340130505709626, "grad_norm": 0.001388808828778565, "learning_rate": 0.00022476200693487936, "loss": 0.0041, "num_input_tokens_seen": 189878768, "step": 87905 }, { "epoch": 14.34094616639478, "grad_norm": 0.0068275779485702515, "learning_rate": 0.000224702585178113, "loss": 0.0032, "num_input_tokens_seen": 189889744, "step": 87910 }, { "epoch": 14.341761827079935, "grad_norm": 0.005051123443990946, "learning_rate": 0.00022464316900059795, "loss": 0.0016, "num_input_tokens_seen": 189900752, "step": 87915 }, { "epoch": 14.34257748776509, "grad_norm": 0.00225257920101285, "learning_rate": 0.0002245837584035384, "loss": 0.0038, "num_input_tokens_seen": 189910000, "step": 87920 }, { "epoch": 14.343393148450245, "grad_norm": 0.0017936478834599257, "learning_rate": 0.00022452435338813842, "loss": 0.1515, "num_input_tokens_seen": 189920240, "step": 87925 }, { "epoch": 14.3442088091354, "grad_norm": 0.0054099103435873985, "learning_rate": 0.00022446495395560186, "loss": 0.0035, "num_input_tokens_seen": 189930704, "step": 87930 }, { "epoch": 14.345024469820554, "grad_norm": 0.003530829679220915, "learning_rate": 0.00022440556010713253, "loss": 0.0072, "num_input_tokens_seen": 189941456, "step": 87935 }, { "epoch": 14.34584013050571, "grad_norm": 0.008934364654123783, "learning_rate": 0.00022434617184393418, "loss": 0.0069, "num_input_tokens_seen": 189952688, "step": 87940 }, { "epoch": 14.346655791190864, "grad_norm": 0.002388365101069212, "learning_rate": 0.00022428678916721029, "loss": 0.0331, "num_input_tokens_seen": 189963984, "step": 87945 }, { "epoch": 14.34747145187602, "grad_norm": 0.026174485683441162, "learning_rate": 0.00022422741207816444, "loss": 0.002, "num_input_tokens_seen": 189975024, "step": 87950 }, { "epoch": 14.348287112561174, "grad_norm": 0.00028221847605891526, "learning_rate": 0.00022416804057799988, "loss": 0.0013, "num_input_tokens_seen": 189986544, "step": 87955 }, { "epoch": 14.34910277324633, "grad_norm": 0.07184788584709167, "learning_rate": 0.00022410867466791996, "loss": 0.0021, "num_input_tokens_seen": 189997840, "step": 87960 }, { "epoch": 14.349918433931485, "grad_norm": 0.0035779913887381554, "learning_rate": 0.00022404931434912768, "loss": 0.0014, "num_input_tokens_seen": 190009360, "step": 87965 }, { "epoch": 14.350734094616639, "grad_norm": 0.02154787816107273, "learning_rate": 0.00022398995962282615, "loss": 0.0014, "num_input_tokens_seen": 190021328, "step": 87970 }, { "epoch": 14.351549755301795, "grad_norm": 0.05506477877497673, "learning_rate": 0.00022393061049021823, "loss": 0.0028, "num_input_tokens_seen": 190030256, "step": 87975 }, { "epoch": 14.352365415986949, "grad_norm": 0.0024936876725405455, "learning_rate": 0.0002238712669525067, "loss": 0.0014, "num_input_tokens_seen": 190040400, "step": 87980 }, { "epoch": 14.353181076672104, "grad_norm": 0.05352950841188431, "learning_rate": 0.0002238119290108942, "loss": 0.0029, "num_input_tokens_seen": 190051888, "step": 87985 }, { "epoch": 14.35399673735726, "grad_norm": 0.0007369217346422374, "learning_rate": 0.00022375259666658338, "loss": 0.0014, "num_input_tokens_seen": 190062576, "step": 87990 }, { "epoch": 14.354812398042414, "grad_norm": 0.0017139033880084753, "learning_rate": 0.0002236932699207766, "loss": 0.0022, "num_input_tokens_seen": 190073776, "step": 87995 }, { "epoch": 14.35562805872757, "grad_norm": 0.4416674077510834, "learning_rate": 0.00022363394877467625, "loss": 0.0075, "num_input_tokens_seen": 190085168, "step": 88000 }, { "epoch": 14.356443719412724, "grad_norm": 0.004349089693278074, "learning_rate": 0.0002235746332294845, "loss": 0.0022, "num_input_tokens_seen": 190096080, "step": 88005 }, { "epoch": 14.35725938009788, "grad_norm": 0.002140692202374339, "learning_rate": 0.00022351532328640335, "loss": 0.0021, "num_input_tokens_seen": 190107056, "step": 88010 }, { "epoch": 14.358075040783035, "grad_norm": 0.0060596526600420475, "learning_rate": 0.0002234560189466352, "loss": 0.0165, "num_input_tokens_seen": 190117968, "step": 88015 }, { "epoch": 14.358890701468189, "grad_norm": 0.004749086685478687, "learning_rate": 0.00022339672021138136, "loss": 0.0015, "num_input_tokens_seen": 190128112, "step": 88020 }, { "epoch": 14.359706362153345, "grad_norm": 0.005048302933573723, "learning_rate": 0.00022333742708184417, "loss": 0.0026, "num_input_tokens_seen": 190139120, "step": 88025 }, { "epoch": 14.360522022838499, "grad_norm": 0.04125377535820007, "learning_rate": 0.0002232781395592247, "loss": 0.0066, "num_input_tokens_seen": 190150256, "step": 88030 }, { "epoch": 14.361337683523654, "grad_norm": 0.001568611478433013, "learning_rate": 0.00022321885764472495, "loss": 0.0016, "num_input_tokens_seen": 190161904, "step": 88035 }, { "epoch": 14.362153344208808, "grad_norm": 0.014550375752151012, "learning_rate": 0.00022315958133954612, "loss": 0.0022, "num_input_tokens_seen": 190173104, "step": 88040 }, { "epoch": 14.362969004893964, "grad_norm": 0.005815689451992512, "learning_rate": 0.00022310031064488962, "loss": 0.0011, "num_input_tokens_seen": 190185200, "step": 88045 }, { "epoch": 14.36378466557912, "grad_norm": 0.0417536124587059, "learning_rate": 0.0002230410455619566, "loss": 0.0017, "num_input_tokens_seen": 190195664, "step": 88050 }, { "epoch": 14.364600326264274, "grad_norm": 0.0010946786496788263, "learning_rate": 0.00022298178609194807, "loss": 0.0033, "num_input_tokens_seen": 190207376, "step": 88055 }, { "epoch": 14.36541598694943, "grad_norm": 0.008044305257499218, "learning_rate": 0.00022292253223606513, "loss": 0.0018, "num_input_tokens_seen": 190217264, "step": 88060 }, { "epoch": 14.366231647634583, "grad_norm": 0.0005770522402599454, "learning_rate": 0.0002228632839955086, "loss": 0.0031, "num_input_tokens_seen": 190227600, "step": 88065 }, { "epoch": 14.367047308319739, "grad_norm": 0.030013680458068848, "learning_rate": 0.00022280404137147914, "loss": 0.0032, "num_input_tokens_seen": 190237584, "step": 88070 }, { "epoch": 14.367862969004895, "grad_norm": 0.002448596293106675, "learning_rate": 0.00022274480436517742, "loss": 0.0007, "num_input_tokens_seen": 190249040, "step": 88075 }, { "epoch": 14.368678629690049, "grad_norm": 0.010528423823416233, "learning_rate": 0.00022268557297780396, "loss": 0.0011, "num_input_tokens_seen": 190260240, "step": 88080 }, { "epoch": 14.369494290375204, "grad_norm": 0.0026562747079879045, "learning_rate": 0.00022262634721055918, "loss": 0.123, "num_input_tokens_seen": 190271856, "step": 88085 }, { "epoch": 14.370309951060358, "grad_norm": 0.044105686247348785, "learning_rate": 0.00022256712706464338, "loss": 0.0215, "num_input_tokens_seen": 190282832, "step": 88090 }, { "epoch": 14.371125611745514, "grad_norm": 0.03261735662817955, "learning_rate": 0.0002225079125412567, "loss": 0.0019, "num_input_tokens_seen": 190293296, "step": 88095 }, { "epoch": 14.37194127243067, "grad_norm": 0.004019154701381922, "learning_rate": 0.00022244870364159912, "loss": 0.1147, "num_input_tokens_seen": 190304752, "step": 88100 }, { "epoch": 14.372756933115824, "grad_norm": 0.00436245184391737, "learning_rate": 0.00022238950036687071, "loss": 0.0023, "num_input_tokens_seen": 190315280, "step": 88105 }, { "epoch": 14.37357259380098, "grad_norm": 0.002692397916689515, "learning_rate": 0.00022233030271827126, "loss": 0.0022, "num_input_tokens_seen": 190324880, "step": 88110 }, { "epoch": 14.374388254486133, "grad_norm": 0.17537789046764374, "learning_rate": 0.0002222711106970003, "loss": 0.1043, "num_input_tokens_seen": 190335792, "step": 88115 }, { "epoch": 14.375203915171289, "grad_norm": 0.02732802927494049, "learning_rate": 0.0002222119243042579, "loss": 0.0048, "num_input_tokens_seen": 190345936, "step": 88120 }, { "epoch": 14.376019575856443, "grad_norm": 0.0324014350771904, "learning_rate": 0.00022215274354124294, "loss": 0.0205, "num_input_tokens_seen": 190357488, "step": 88125 }, { "epoch": 14.376835236541599, "grad_norm": 0.0018137918086722493, "learning_rate": 0.00022209356840915552, "loss": 0.0031, "num_input_tokens_seen": 190368688, "step": 88130 }, { "epoch": 14.377650897226754, "grad_norm": 0.02366684377193451, "learning_rate": 0.00022203439890919403, "loss": 0.0018, "num_input_tokens_seen": 190379504, "step": 88135 }, { "epoch": 14.378466557911908, "grad_norm": 0.0013595132622867823, "learning_rate": 0.00022197523504255846, "loss": 0.0012, "num_input_tokens_seen": 190390256, "step": 88140 }, { "epoch": 14.379282218597064, "grad_norm": 0.0014586466131731868, "learning_rate": 0.00022191607681044712, "loss": 0.0029, "num_input_tokens_seen": 190400016, "step": 88145 }, { "epoch": 14.380097879282218, "grad_norm": 0.5295431017875671, "learning_rate": 0.00022185692421405962, "loss": 0.0821, "num_input_tokens_seen": 190410544, "step": 88150 }, { "epoch": 14.380913539967374, "grad_norm": 0.024179702624678612, "learning_rate": 0.000221797777254594, "loss": 0.0041, "num_input_tokens_seen": 190422032, "step": 88155 }, { "epoch": 14.38172920065253, "grad_norm": 0.05698935687541962, "learning_rate": 0.00022173863593324971, "loss": 0.0029, "num_input_tokens_seen": 190433392, "step": 88160 }, { "epoch": 14.382544861337683, "grad_norm": 0.002039377810433507, "learning_rate": 0.00022167950025122463, "loss": 0.0046, "num_input_tokens_seen": 190443760, "step": 88165 }, { "epoch": 14.383360522022839, "grad_norm": 0.09846755862236023, "learning_rate": 0.00022162037020971793, "loss": 0.0165, "num_input_tokens_seen": 190454288, "step": 88170 }, { "epoch": 14.384176182707993, "grad_norm": 0.002468029735609889, "learning_rate": 0.00022156124580992716, "loss": 0.0023, "num_input_tokens_seen": 190465168, "step": 88175 }, { "epoch": 14.384991843393149, "grad_norm": 0.0019818132277578115, "learning_rate": 0.00022150212705305118, "loss": 0.0026, "num_input_tokens_seen": 190476720, "step": 88180 }, { "epoch": 14.385807504078304, "grad_norm": 0.004764073994010687, "learning_rate": 0.00022144301394028793, "loss": 0.0399, "num_input_tokens_seen": 190488048, "step": 88185 }, { "epoch": 14.386623164763458, "grad_norm": 0.005842797923833132, "learning_rate": 0.0002213839064728353, "loss": 0.001, "num_input_tokens_seen": 190497488, "step": 88190 }, { "epoch": 14.387438825448614, "grad_norm": 0.06038953736424446, "learning_rate": 0.0002213248046518913, "loss": 0.0592, "num_input_tokens_seen": 190506640, "step": 88195 }, { "epoch": 14.388254486133768, "grad_norm": 0.0026502537075430155, "learning_rate": 0.00022126570847865368, "loss": 0.002, "num_input_tokens_seen": 190516656, "step": 88200 }, { "epoch": 14.389070146818923, "grad_norm": 0.04889017343521118, "learning_rate": 0.00022120661795432, "loss": 0.0083, "num_input_tokens_seen": 190528464, "step": 88205 }, { "epoch": 14.38988580750408, "grad_norm": 0.030095215886831284, "learning_rate": 0.00022114753308008795, "loss": 0.0019, "num_input_tokens_seen": 190539536, "step": 88210 }, { "epoch": 14.390701468189233, "grad_norm": 0.6848757863044739, "learning_rate": 0.00022108845385715488, "loss": 0.0973, "num_input_tokens_seen": 190551376, "step": 88215 }, { "epoch": 14.391517128874389, "grad_norm": 0.3863895833492279, "learning_rate": 0.00022102938028671816, "loss": 0.0152, "num_input_tokens_seen": 190562320, "step": 88220 }, { "epoch": 14.392332789559543, "grad_norm": 0.0029419090133160353, "learning_rate": 0.00022097031236997488, "loss": 0.0058, "num_input_tokens_seen": 190572432, "step": 88225 }, { "epoch": 14.393148450244698, "grad_norm": 0.001146205817349255, "learning_rate": 0.00022091125010812202, "loss": 0.001, "num_input_tokens_seen": 190583184, "step": 88230 }, { "epoch": 14.393964110929852, "grad_norm": 0.0018329378217458725, "learning_rate": 0.00022085219350235707, "loss": 0.0012, "num_input_tokens_seen": 190593712, "step": 88235 }, { "epoch": 14.394779771615008, "grad_norm": 0.0034815967082977295, "learning_rate": 0.00022079314255387623, "loss": 0.0024, "num_input_tokens_seen": 190604464, "step": 88240 }, { "epoch": 14.395595432300164, "grad_norm": 0.048917580395936966, "learning_rate": 0.00022073409726387688, "loss": 0.0023, "num_input_tokens_seen": 190615952, "step": 88245 }, { "epoch": 14.396411092985318, "grad_norm": 0.00788689311593771, "learning_rate": 0.000220675057633555, "loss": 0.002, "num_input_tokens_seen": 190627216, "step": 88250 }, { "epoch": 14.397226753670473, "grad_norm": 0.003668892662972212, "learning_rate": 0.00022061602366410776, "loss": 0.0803, "num_input_tokens_seen": 190637968, "step": 88255 }, { "epoch": 14.398042414355627, "grad_norm": 0.0010389218805357814, "learning_rate": 0.0002205569953567309, "loss": 0.0042, "num_input_tokens_seen": 190649168, "step": 88260 }, { "epoch": 14.398858075040783, "grad_norm": 0.2328641712665558, "learning_rate": 0.00022049797271262133, "loss": 0.0175, "num_input_tokens_seen": 190660240, "step": 88265 }, { "epoch": 14.399673735725939, "grad_norm": 0.14842480421066284, "learning_rate": 0.00022043895573297463, "loss": 0.0109, "num_input_tokens_seen": 190670640, "step": 88270 }, { "epoch": 14.400489396411093, "grad_norm": 0.02398735098540783, "learning_rate": 0.0002203799444189874, "loss": 0.0017, "num_input_tokens_seen": 190679984, "step": 88275 }, { "epoch": 14.401305057096248, "grad_norm": 0.0224875770509243, "learning_rate": 0.00022032093877185504, "loss": 0.0029, "num_input_tokens_seen": 190690608, "step": 88280 }, { "epoch": 14.402120717781402, "grad_norm": 0.020505795255303383, "learning_rate": 0.000220261938792774, "loss": 0.0039, "num_input_tokens_seen": 190701840, "step": 88285 }, { "epoch": 14.402936378466558, "grad_norm": 0.0015917181735858321, "learning_rate": 0.00022020294448293925, "loss": 0.0042, "num_input_tokens_seen": 190714096, "step": 88290 }, { "epoch": 14.403752039151712, "grad_norm": 0.003339561866596341, "learning_rate": 0.00022014395584354717, "loss": 0.0016, "num_input_tokens_seen": 190725296, "step": 88295 }, { "epoch": 14.404567699836868, "grad_norm": 0.007421552669256926, "learning_rate": 0.0002200849728757925, "loss": 0.0087, "num_input_tokens_seen": 190736016, "step": 88300 }, { "epoch": 14.405383360522023, "grad_norm": 0.0025911256670951843, "learning_rate": 0.00022002599558087126, "loss": 0.0018, "num_input_tokens_seen": 190748304, "step": 88305 }, { "epoch": 14.406199021207177, "grad_norm": 0.007793547119945288, "learning_rate": 0.00021996702395997807, "loss": 0.002, "num_input_tokens_seen": 190759824, "step": 88310 }, { "epoch": 14.407014681892333, "grad_norm": 0.035313066095113754, "learning_rate": 0.00021990805801430874, "loss": 0.0045, "num_input_tokens_seen": 190771056, "step": 88315 }, { "epoch": 14.407830342577487, "grad_norm": 0.0019238482927903533, "learning_rate": 0.00021984909774505756, "loss": 0.0063, "num_input_tokens_seen": 190782608, "step": 88320 }, { "epoch": 14.408646003262643, "grad_norm": 0.019743841141462326, "learning_rate": 0.00021979014315342, "loss": 0.0033, "num_input_tokens_seen": 190794352, "step": 88325 }, { "epoch": 14.409461663947798, "grad_norm": 0.0016499466728419065, "learning_rate": 0.00021973119424059068, "loss": 0.0689, "num_input_tokens_seen": 190805360, "step": 88330 }, { "epoch": 14.410277324632952, "grad_norm": 0.008232859894633293, "learning_rate": 0.00021967225100776424, "loss": 0.0011, "num_input_tokens_seen": 190815408, "step": 88335 }, { "epoch": 14.411092985318108, "grad_norm": 0.3779599070549011, "learning_rate": 0.00021961331345613522, "loss": 0.0097, "num_input_tokens_seen": 190826576, "step": 88340 }, { "epoch": 14.411908646003262, "grad_norm": 0.0002817259228322655, "learning_rate": 0.00021955438158689818, "loss": 0.0024, "num_input_tokens_seen": 190836688, "step": 88345 }, { "epoch": 14.412724306688418, "grad_norm": 0.5337850451469421, "learning_rate": 0.00021949545540124734, "loss": 0.0211, "num_input_tokens_seen": 190846800, "step": 88350 }, { "epoch": 14.413539967373573, "grad_norm": 0.08282860368490219, "learning_rate": 0.0002194365349003769, "loss": 0.1119, "num_input_tokens_seen": 190856240, "step": 88355 }, { "epoch": 14.414355628058727, "grad_norm": 0.006659589242190123, "learning_rate": 0.00021937762008548102, "loss": 0.0019, "num_input_tokens_seen": 190867024, "step": 88360 }, { "epoch": 14.415171288743883, "grad_norm": 0.005563710350543261, "learning_rate": 0.00021931871095775364, "loss": 0.0027, "num_input_tokens_seen": 190877200, "step": 88365 }, { "epoch": 14.415986949429037, "grad_norm": 0.00834878534078598, "learning_rate": 0.0002192598075183887, "loss": 0.0017, "num_input_tokens_seen": 190887536, "step": 88370 }, { "epoch": 14.416802610114193, "grad_norm": 0.37227877974510193, "learning_rate": 0.00021920090976857971, "loss": 0.0896, "num_input_tokens_seen": 190897872, "step": 88375 }, { "epoch": 14.417618270799348, "grad_norm": 0.00478452630341053, "learning_rate": 0.00021914201770952086, "loss": 0.0009, "num_input_tokens_seen": 190909072, "step": 88380 }, { "epoch": 14.418433931484502, "grad_norm": 0.6292222738265991, "learning_rate": 0.00021908313134240493, "loss": 0.0597, "num_input_tokens_seen": 190920080, "step": 88385 }, { "epoch": 14.419249592169658, "grad_norm": 0.0013072388246655464, "learning_rate": 0.00021902425066842608, "loss": 0.0168, "num_input_tokens_seen": 190931600, "step": 88390 }, { "epoch": 14.420065252854812, "grad_norm": 0.0008924621506594121, "learning_rate": 0.00021896537568877688, "loss": 0.0548, "num_input_tokens_seen": 190943312, "step": 88395 }, { "epoch": 14.420880913539968, "grad_norm": 0.004021234344691038, "learning_rate": 0.00021890650640465125, "loss": 0.0025, "num_input_tokens_seen": 190954064, "step": 88400 }, { "epoch": 14.421696574225122, "grad_norm": 0.004071609117090702, "learning_rate": 0.00021884764281724145, "loss": 0.0019, "num_input_tokens_seen": 190964848, "step": 88405 }, { "epoch": 14.422512234910277, "grad_norm": 0.00245073065161705, "learning_rate": 0.00021878878492774125, "loss": 0.0021, "num_input_tokens_seen": 190975696, "step": 88410 }, { "epoch": 14.423327895595433, "grad_norm": 0.006399341858923435, "learning_rate": 0.00021872993273734266, "loss": 0.0071, "num_input_tokens_seen": 190986448, "step": 88415 }, { "epoch": 14.424143556280587, "grad_norm": 0.0016032133717089891, "learning_rate": 0.0002186710862472392, "loss": 0.0032, "num_input_tokens_seen": 190996912, "step": 88420 }, { "epoch": 14.424959216965743, "grad_norm": 0.0015737857902422547, "learning_rate": 0.00021861224545862264, "loss": 0.006, "num_input_tokens_seen": 191007440, "step": 88425 }, { "epoch": 14.425774877650896, "grad_norm": 0.08014027774333954, "learning_rate": 0.0002185534103726863, "loss": 0.0076, "num_input_tokens_seen": 191019344, "step": 88430 }, { "epoch": 14.426590538336052, "grad_norm": 0.48051971197128296, "learning_rate": 0.00021849458099062175, "loss": 0.105, "num_input_tokens_seen": 191030000, "step": 88435 }, { "epoch": 14.427406199021208, "grad_norm": 0.003187313210219145, "learning_rate": 0.00021843575731362187, "loss": 0.1245, "num_input_tokens_seen": 191040656, "step": 88440 }, { "epoch": 14.428221859706362, "grad_norm": 0.6462088823318481, "learning_rate": 0.0002183769393428785, "loss": 0.0935, "num_input_tokens_seen": 191052240, "step": 88445 }, { "epoch": 14.429037520391518, "grad_norm": 0.04802269861102104, "learning_rate": 0.00021831812707958376, "loss": 0.0054, "num_input_tokens_seen": 191063408, "step": 88450 }, { "epoch": 14.429853181076671, "grad_norm": 0.03946414217352867, "learning_rate": 0.00021825932052492946, "loss": 0.1331, "num_input_tokens_seen": 191074352, "step": 88455 }, { "epoch": 14.430668841761827, "grad_norm": 0.01283244974911213, "learning_rate": 0.0002182005196801075, "loss": 0.171, "num_input_tokens_seen": 191084496, "step": 88460 }, { "epoch": 14.431484502446983, "grad_norm": 0.11222032457590103, "learning_rate": 0.0002181417245463095, "loss": 0.0201, "num_input_tokens_seen": 191095696, "step": 88465 }, { "epoch": 14.432300163132137, "grad_norm": 0.013230902142822742, "learning_rate": 0.00021808293512472698, "loss": 0.0154, "num_input_tokens_seen": 191106480, "step": 88470 }, { "epoch": 14.433115823817293, "grad_norm": 0.004127667285501957, "learning_rate": 0.0002180241514165514, "loss": 0.0075, "num_input_tokens_seen": 191118416, "step": 88475 }, { "epoch": 14.433931484502446, "grad_norm": 0.08697032928466797, "learning_rate": 0.00021796537342297413, "loss": 0.0057, "num_input_tokens_seen": 191129232, "step": 88480 }, { "epoch": 14.434747145187602, "grad_norm": 0.08197087794542313, "learning_rate": 0.00021790660114518633, "loss": 0.0069, "num_input_tokens_seen": 191139184, "step": 88485 }, { "epoch": 14.435562805872756, "grad_norm": 0.37481895089149475, "learning_rate": 0.0002178478345843792, "loss": 0.011, "num_input_tokens_seen": 191149456, "step": 88490 }, { "epoch": 14.436378466557912, "grad_norm": 0.011596872471272945, "learning_rate": 0.00021778907374174356, "loss": 0.0041, "num_input_tokens_seen": 191160656, "step": 88495 }, { "epoch": 14.437194127243067, "grad_norm": 0.010168236680328846, "learning_rate": 0.00021773031861847036, "loss": 0.0052, "num_input_tokens_seen": 191172336, "step": 88500 }, { "epoch": 14.438009787928221, "grad_norm": 0.12285585701465607, "learning_rate": 0.0002176715692157503, "loss": 0.0087, "num_input_tokens_seen": 191184176, "step": 88505 }, { "epoch": 14.438825448613377, "grad_norm": 0.049924153834581375, "learning_rate": 0.00021761282553477412, "loss": 0.0053, "num_input_tokens_seen": 191194256, "step": 88510 }, { "epoch": 14.439641109298531, "grad_norm": 0.5651575326919556, "learning_rate": 0.00021755408757673228, "loss": 0.0149, "num_input_tokens_seen": 191205584, "step": 88515 }, { "epoch": 14.440456769983687, "grad_norm": 0.0005408431752584875, "learning_rate": 0.00021749535534281488, "loss": 0.0041, "num_input_tokens_seen": 191217104, "step": 88520 }, { "epoch": 14.441272430668842, "grad_norm": 0.007131251972168684, "learning_rate": 0.00021743662883421294, "loss": 0.0016, "num_input_tokens_seen": 191228176, "step": 88525 }, { "epoch": 14.442088091353996, "grad_norm": 0.0007430678233504295, "learning_rate": 0.00021737790805211578, "loss": 0.0025, "num_input_tokens_seen": 191237872, "step": 88530 }, { "epoch": 14.442903752039152, "grad_norm": 0.0031173175666481256, "learning_rate": 0.00021731919299771424, "loss": 0.007, "num_input_tokens_seen": 191249040, "step": 88535 }, { "epoch": 14.443719412724306, "grad_norm": 0.0021781879477202892, "learning_rate": 0.00021726048367219747, "loss": 0.0029, "num_input_tokens_seen": 191260848, "step": 88540 }, { "epoch": 14.444535073409462, "grad_norm": 0.0342792384326458, "learning_rate": 0.00021720178007675583, "loss": 0.0022, "num_input_tokens_seen": 191272304, "step": 88545 }, { "epoch": 14.445350734094617, "grad_norm": 0.14234837889671326, "learning_rate": 0.00021714308221257889, "loss": 0.0083, "num_input_tokens_seen": 191281904, "step": 88550 }, { "epoch": 14.446166394779771, "grad_norm": 0.03180849552154541, "learning_rate": 0.00021708439008085624, "loss": 0.0165, "num_input_tokens_seen": 191292304, "step": 88555 }, { "epoch": 14.446982055464927, "grad_norm": 0.020385757088661194, "learning_rate": 0.0002170257036827773, "loss": 0.0034, "num_input_tokens_seen": 191303696, "step": 88560 }, { "epoch": 14.447797716150081, "grad_norm": 0.0029727816581726074, "learning_rate": 0.00021696702301953147, "loss": 0.0619, "num_input_tokens_seen": 191313648, "step": 88565 }, { "epoch": 14.448613376835237, "grad_norm": 0.007808151189237833, "learning_rate": 0.00021690834809230797, "loss": 0.0053, "num_input_tokens_seen": 191323856, "step": 88570 }, { "epoch": 14.449429037520392, "grad_norm": 0.005912197753787041, "learning_rate": 0.00021684967890229595, "loss": 0.0058, "num_input_tokens_seen": 191334512, "step": 88575 }, { "epoch": 14.450244698205546, "grad_norm": 0.5385004878044128, "learning_rate": 0.00021679101545068436, "loss": 0.2575, "num_input_tokens_seen": 191345616, "step": 88580 }, { "epoch": 14.451060358890702, "grad_norm": 0.004685994237661362, "learning_rate": 0.00021673235773866212, "loss": 0.0026, "num_input_tokens_seen": 191356592, "step": 88585 }, { "epoch": 14.451876019575856, "grad_norm": 0.0021670262794941664, "learning_rate": 0.00021667370576741802, "loss": 0.0019, "num_input_tokens_seen": 191367792, "step": 88590 }, { "epoch": 14.452691680261012, "grad_norm": 0.02692868933081627, "learning_rate": 0.00021661505953814064, "loss": 0.0081, "num_input_tokens_seen": 191378928, "step": 88595 }, { "epoch": 14.453507340946166, "grad_norm": 0.005074275657534599, "learning_rate": 0.0002165564190520186, "loss": 0.0835, "num_input_tokens_seen": 191390224, "step": 88600 }, { "epoch": 14.454323001631321, "grad_norm": 0.024734511971473694, "learning_rate": 0.00021649778431024035, "loss": 0.0061, "num_input_tokens_seen": 191401232, "step": 88605 }, { "epoch": 14.455138662316477, "grad_norm": 0.0013288380578160286, "learning_rate": 0.0002164391553139941, "loss": 0.0907, "num_input_tokens_seen": 191411248, "step": 88610 }, { "epoch": 14.455954323001631, "grad_norm": 0.002668878063559532, "learning_rate": 0.00021638053206446813, "loss": 0.0023, "num_input_tokens_seen": 191421840, "step": 88615 }, { "epoch": 14.456769983686787, "grad_norm": 0.01918703131377697, "learning_rate": 0.00021632191456285045, "loss": 0.0232, "num_input_tokens_seen": 191432688, "step": 88620 }, { "epoch": 14.45758564437194, "grad_norm": 0.28013843297958374, "learning_rate": 0.00021626330281032902, "loss": 0.027, "num_input_tokens_seen": 191443312, "step": 88625 }, { "epoch": 14.458401305057096, "grad_norm": 0.026643967255949974, "learning_rate": 0.00021620469680809173, "loss": 0.0042, "num_input_tokens_seen": 191453104, "step": 88630 }, { "epoch": 14.459216965742252, "grad_norm": 0.0020155615638941526, "learning_rate": 0.0002161460965573263, "loss": 0.0111, "num_input_tokens_seen": 191464144, "step": 88635 }, { "epoch": 14.460032626427406, "grad_norm": 0.012268748134374619, "learning_rate": 0.0002160875020592203, "loss": 0.0107, "num_input_tokens_seen": 191475088, "step": 88640 }, { "epoch": 14.460848287112562, "grad_norm": 0.002203174866735935, "learning_rate": 0.00021602891331496123, "loss": 0.0217, "num_input_tokens_seen": 191485840, "step": 88645 }, { "epoch": 14.461663947797716, "grad_norm": 0.0011056186631321907, "learning_rate": 0.0002159703303257363, "loss": 0.0051, "num_input_tokens_seen": 191496720, "step": 88650 }, { "epoch": 14.462479608482871, "grad_norm": 0.05333087965846062, "learning_rate": 0.00021591175309273314, "loss": 0.0722, "num_input_tokens_seen": 191507728, "step": 88655 }, { "epoch": 14.463295269168025, "grad_norm": 0.013012475334107876, "learning_rate": 0.00021585318161713868, "loss": 0.0041, "num_input_tokens_seen": 191518096, "step": 88660 }, { "epoch": 14.464110929853181, "grad_norm": 0.013713795691728592, "learning_rate": 0.00021579461590013994, "loss": 0.0039, "num_input_tokens_seen": 191528560, "step": 88665 }, { "epoch": 14.464926590538337, "grad_norm": 0.0031545383390039206, "learning_rate": 0.0002157360559429239, "loss": 0.0022, "num_input_tokens_seen": 191539024, "step": 88670 }, { "epoch": 14.46574225122349, "grad_norm": 0.03223045915365219, "learning_rate": 0.00021567750174667722, "loss": 0.1562, "num_input_tokens_seen": 191550672, "step": 88675 }, { "epoch": 14.466557911908646, "grad_norm": 0.000889264396391809, "learning_rate": 0.00021561895331258674, "loss": 0.0017, "num_input_tokens_seen": 191561456, "step": 88680 }, { "epoch": 14.4673735725938, "grad_norm": 0.004097413271665573, "learning_rate": 0.0002155604106418389, "loss": 0.0053, "num_input_tokens_seen": 191573104, "step": 88685 }, { "epoch": 14.468189233278956, "grad_norm": 0.023272814229130745, "learning_rate": 0.00021550187373562015, "loss": 0.0043, "num_input_tokens_seen": 191582832, "step": 88690 }, { "epoch": 14.469004893964112, "grad_norm": 0.16478866338729858, "learning_rate": 0.00021544334259511688, "loss": 0.0101, "num_input_tokens_seen": 191592784, "step": 88695 }, { "epoch": 14.469820554649266, "grad_norm": 0.33433669805526733, "learning_rate": 0.0002153848172215152, "loss": 0.0139, "num_input_tokens_seen": 191603344, "step": 88700 }, { "epoch": 14.470636215334421, "grad_norm": 0.0031498237513005733, "learning_rate": 0.00021532629761600132, "loss": 0.1815, "num_input_tokens_seen": 191613744, "step": 88705 }, { "epoch": 14.471451876019575, "grad_norm": 0.07845421880483627, "learning_rate": 0.00021526778377976114, "loss": 0.0034, "num_input_tokens_seen": 191624784, "step": 88710 }, { "epoch": 14.47226753670473, "grad_norm": 0.4229518473148346, "learning_rate": 0.00021520927571398052, "loss": 0.1305, "num_input_tokens_seen": 191635440, "step": 88715 }, { "epoch": 14.473083197389887, "grad_norm": 0.012796735391020775, "learning_rate": 0.00021515077341984523, "loss": 0.0038, "num_input_tokens_seen": 191644880, "step": 88720 }, { "epoch": 14.47389885807504, "grad_norm": 0.07664711028337479, "learning_rate": 0.00021509227689854083, "loss": 0.0047, "num_input_tokens_seen": 191655120, "step": 88725 }, { "epoch": 14.474714518760196, "grad_norm": 0.013845582492649555, "learning_rate": 0.0002150337861512529, "loss": 0.0076, "num_input_tokens_seen": 191666032, "step": 88730 }, { "epoch": 14.47553017944535, "grad_norm": 0.019843915477395058, "learning_rate": 0.0002149753011791668, "loss": 0.0052, "num_input_tokens_seen": 191675760, "step": 88735 }, { "epoch": 14.476345840130506, "grad_norm": 0.033653657883405685, "learning_rate": 0.00021491682198346778, "loss": 0.2274, "num_input_tokens_seen": 191685584, "step": 88740 }, { "epoch": 14.477161500815662, "grad_norm": 0.004855956416577101, "learning_rate": 0.00021485834856534104, "loss": 0.0764, "num_input_tokens_seen": 191695632, "step": 88745 }, { "epoch": 14.477977161500815, "grad_norm": 0.026927856728434563, "learning_rate": 0.00021479988092597157, "loss": 0.0287, "num_input_tokens_seen": 191706416, "step": 88750 }, { "epoch": 14.478792822185971, "grad_norm": 0.0082674166187644, "learning_rate": 0.00021474141906654414, "loss": 0.0029, "num_input_tokens_seen": 191717936, "step": 88755 }, { "epoch": 14.479608482871125, "grad_norm": 0.013341606594622135, "learning_rate": 0.00021468296298824413, "loss": 0.0058, "num_input_tokens_seen": 191728976, "step": 88760 }, { "epoch": 14.48042414355628, "grad_norm": 0.008929502218961716, "learning_rate": 0.00021462451269225547, "loss": 0.0033, "num_input_tokens_seen": 191740272, "step": 88765 }, { "epoch": 14.481239804241435, "grad_norm": 0.007938308641314507, "learning_rate": 0.00021456606817976337, "loss": 0.003, "num_input_tokens_seen": 191751888, "step": 88770 }, { "epoch": 14.48205546492659, "grad_norm": 0.015212338417768478, "learning_rate": 0.00021450762945195167, "loss": 0.0076, "num_input_tokens_seen": 191763728, "step": 88775 }, { "epoch": 14.482871125611746, "grad_norm": 1.0990099906921387, "learning_rate": 0.00021444919651000544, "loss": 0.0907, "num_input_tokens_seen": 191774640, "step": 88780 }, { "epoch": 14.4836867862969, "grad_norm": 0.03344845771789551, "learning_rate": 0.0002143907693551081, "loss": 0.0256, "num_input_tokens_seen": 191784656, "step": 88785 }, { "epoch": 14.484502446982056, "grad_norm": 0.019984211772680283, "learning_rate": 0.00021433234798844448, "loss": 0.0044, "num_input_tokens_seen": 191796848, "step": 88790 }, { "epoch": 14.48531810766721, "grad_norm": 0.02110087312757969, "learning_rate": 0.00021427393241119785, "loss": 0.0102, "num_input_tokens_seen": 191806032, "step": 88795 }, { "epoch": 14.486133768352365, "grad_norm": 0.0715024396777153, "learning_rate": 0.00021421552262455268, "loss": 0.0311, "num_input_tokens_seen": 191817328, "step": 88800 }, { "epoch": 14.486949429037521, "grad_norm": 0.008351379074156284, "learning_rate": 0.00021415711862969244, "loss": 0.0112, "num_input_tokens_seen": 191827888, "step": 88805 }, { "epoch": 14.487765089722675, "grad_norm": 0.01090281642973423, "learning_rate": 0.00021409872042780083, "loss": 0.0766, "num_input_tokens_seen": 191839280, "step": 88810 }, { "epoch": 14.48858075040783, "grad_norm": 0.01043806690722704, "learning_rate": 0.00021404032802006134, "loss": 0.0061, "num_input_tokens_seen": 191848784, "step": 88815 }, { "epoch": 14.489396411092985, "grad_norm": 0.0038927318528294563, "learning_rate": 0.00021398194140765736, "loss": 0.0051, "num_input_tokens_seen": 191858800, "step": 88820 }, { "epoch": 14.49021207177814, "grad_norm": 0.005791252013295889, "learning_rate": 0.0002139235605917722, "loss": 0.0062, "num_input_tokens_seen": 191870608, "step": 88825 }, { "epoch": 14.491027732463296, "grad_norm": 0.003040252486243844, "learning_rate": 0.00021386518557358898, "loss": 0.0671, "num_input_tokens_seen": 191881296, "step": 88830 }, { "epoch": 14.49184339314845, "grad_norm": 0.029066117480397224, "learning_rate": 0.00021380681635429079, "loss": 0.0905, "num_input_tokens_seen": 191892400, "step": 88835 }, { "epoch": 14.492659053833606, "grad_norm": 0.028188761323690414, "learning_rate": 0.00021374845293506046, "loss": 0.0201, "num_input_tokens_seen": 191901680, "step": 88840 }, { "epoch": 14.49347471451876, "grad_norm": 0.011652766726911068, "learning_rate": 0.00021369009531708094, "loss": 0.0104, "num_input_tokens_seen": 191911824, "step": 88845 }, { "epoch": 14.494290375203915, "grad_norm": 0.006433951668441296, "learning_rate": 0.0002136317435015348, "loss": 0.0065, "num_input_tokens_seen": 191922992, "step": 88850 }, { "epoch": 14.49510603588907, "grad_norm": 0.02798541635274887, "learning_rate": 0.0002135733974896047, "loss": 0.0054, "num_input_tokens_seen": 191932496, "step": 88855 }, { "epoch": 14.495921696574225, "grad_norm": 0.007322490680962801, "learning_rate": 0.00021351505728247282, "loss": 0.0075, "num_input_tokens_seen": 191942672, "step": 88860 }, { "epoch": 14.49673735725938, "grad_norm": 0.015419857576489449, "learning_rate": 0.00021345672288132218, "loss": 0.0126, "num_input_tokens_seen": 191953936, "step": 88865 }, { "epoch": 14.497553017944535, "grad_norm": 0.05873335525393486, "learning_rate": 0.00021339839428733415, "loss": 0.0052, "num_input_tokens_seen": 191963504, "step": 88870 }, { "epoch": 14.49836867862969, "grad_norm": 0.08444296568632126, "learning_rate": 0.0002133400715016916, "loss": 0.0254, "num_input_tokens_seen": 191973776, "step": 88875 }, { "epoch": 14.499184339314844, "grad_norm": 0.01983826793730259, "learning_rate": 0.0002132817545255758, "loss": 0.0081, "num_input_tokens_seen": 191982800, "step": 88880 }, { "epoch": 14.5, "grad_norm": 0.020987823605537415, "learning_rate": 0.0002132234433601693, "loss": 0.0055, "num_input_tokens_seen": 191994608, "step": 88885 }, { "epoch": 14.500815660685156, "grad_norm": 0.05628190189599991, "learning_rate": 0.00021316513800665322, "loss": 0.0155, "num_input_tokens_seen": 192004112, "step": 88890 }, { "epoch": 14.50163132137031, "grad_norm": 0.013542087748646736, "learning_rate": 0.0002131068384662098, "loss": 0.0026, "num_input_tokens_seen": 192016720, "step": 88895 }, { "epoch": 14.502446982055465, "grad_norm": 0.01771514117717743, "learning_rate": 0.00021304854474001993, "loss": 0.003, "num_input_tokens_seen": 192027760, "step": 88900 }, { "epoch": 14.50326264274062, "grad_norm": 0.021325405687093735, "learning_rate": 0.00021299025682926565, "loss": 0.0015, "num_input_tokens_seen": 192038992, "step": 88905 }, { "epoch": 14.504078303425775, "grad_norm": 0.002273843390867114, "learning_rate": 0.0002129319747351276, "loss": 0.0025, "num_input_tokens_seen": 192050032, "step": 88910 }, { "epoch": 14.50489396411093, "grad_norm": 0.20562784373760223, "learning_rate": 0.00021287369845878756, "loss": 0.0154, "num_input_tokens_seen": 192060400, "step": 88915 }, { "epoch": 14.505709624796085, "grad_norm": 0.002092252252623439, "learning_rate": 0.00021281542800142595, "loss": 0.0157, "num_input_tokens_seen": 192070448, "step": 88920 }, { "epoch": 14.50652528548124, "grad_norm": 0.025624606758356094, "learning_rate": 0.00021275716336422435, "loss": 0.0052, "num_input_tokens_seen": 192081712, "step": 88925 }, { "epoch": 14.507340946166394, "grad_norm": 0.03953010216355324, "learning_rate": 0.00021269890454836288, "loss": 0.0084, "num_input_tokens_seen": 192092752, "step": 88930 }, { "epoch": 14.50815660685155, "grad_norm": 0.010210723616182804, "learning_rate": 0.00021264065155502293, "loss": 0.0042, "num_input_tokens_seen": 192104112, "step": 88935 }, { "epoch": 14.508972267536706, "grad_norm": 0.00433401670306921, "learning_rate": 0.00021258240438538434, "loss": 0.0158, "num_input_tokens_seen": 192114448, "step": 88940 }, { "epoch": 14.50978792822186, "grad_norm": 0.006997460499405861, "learning_rate": 0.0002125241630406281, "loss": 0.005, "num_input_tokens_seen": 192124176, "step": 88945 }, { "epoch": 14.510603588907015, "grad_norm": 0.004508296027779579, "learning_rate": 0.00021246592752193445, "loss": 0.1081, "num_input_tokens_seen": 192134960, "step": 88950 }, { "epoch": 14.51141924959217, "grad_norm": 0.0038452772423624992, "learning_rate": 0.00021240769783048352, "loss": 0.0026, "num_input_tokens_seen": 192145648, "step": 88955 }, { "epoch": 14.512234910277325, "grad_norm": 0.009463680908083916, "learning_rate": 0.00021234947396745542, "loss": 0.0458, "num_input_tokens_seen": 192156560, "step": 88960 }, { "epoch": 14.513050570962479, "grad_norm": 0.005169307813048363, "learning_rate": 0.00021229125593403016, "loss": 0.0136, "num_input_tokens_seen": 192167184, "step": 88965 }, { "epoch": 14.513866231647635, "grad_norm": 0.0034116103779524565, "learning_rate": 0.00021223304373138753, "loss": 0.0045, "num_input_tokens_seen": 192178608, "step": 88970 }, { "epoch": 14.51468189233279, "grad_norm": 0.014676249586045742, "learning_rate": 0.00021217483736070736, "loss": 0.0055, "num_input_tokens_seen": 192189328, "step": 88975 }, { "epoch": 14.515497553017944, "grad_norm": 0.039021145552396774, "learning_rate": 0.00021211663682316922, "loss": 0.025, "num_input_tokens_seen": 192200880, "step": 88980 }, { "epoch": 14.5163132137031, "grad_norm": 0.026364067569375038, "learning_rate": 0.00021205844211995268, "loss": 0.0033, "num_input_tokens_seen": 192211504, "step": 88985 }, { "epoch": 14.517128874388254, "grad_norm": 0.051095642149448395, "learning_rate": 0.0002120002532522371, "loss": 0.0083, "num_input_tokens_seen": 192223216, "step": 88990 }, { "epoch": 14.51794453507341, "grad_norm": 0.009368719533085823, "learning_rate": 0.00021194207022120153, "loss": 0.0031, "num_input_tokens_seen": 192234640, "step": 88995 }, { "epoch": 14.518760195758565, "grad_norm": 0.005646569188684225, "learning_rate": 0.0002118838930280257, "loss": 0.0032, "num_input_tokens_seen": 192244752, "step": 89000 }, { "epoch": 14.51957585644372, "grad_norm": 0.0006784353754483163, "learning_rate": 0.00021182572167388792, "loss": 0.0012, "num_input_tokens_seen": 192254448, "step": 89005 }, { "epoch": 14.520391517128875, "grad_norm": 0.013773053884506226, "learning_rate": 0.00021176755615996785, "loss": 0.0016, "num_input_tokens_seen": 192265584, "step": 89010 }, { "epoch": 14.521207177814029, "grad_norm": 0.014610863290727139, "learning_rate": 0.00021170939648744346, "loss": 0.0882, "num_input_tokens_seen": 192277200, "step": 89015 }, { "epoch": 14.522022838499185, "grad_norm": 0.06779501587152481, "learning_rate": 0.00021165124265749431, "loss": 0.0883, "num_input_tokens_seen": 192288848, "step": 89020 }, { "epoch": 14.522838499184338, "grad_norm": 0.0008561646100133657, "learning_rate": 0.00021159309467129816, "loss": 0.0024, "num_input_tokens_seen": 192299056, "step": 89025 }, { "epoch": 14.523654159869494, "grad_norm": 0.08283943682909012, "learning_rate": 0.0002115349525300342, "loss": 0.006, "num_input_tokens_seen": 192309232, "step": 89030 }, { "epoch": 14.52446982055465, "grad_norm": 0.037029191851615906, "learning_rate": 0.00021147681623487997, "loss": 0.0038, "num_input_tokens_seen": 192320112, "step": 89035 }, { "epoch": 14.525285481239804, "grad_norm": 0.01584443263709545, "learning_rate": 0.0002114186857870144, "loss": 0.0016, "num_input_tokens_seen": 192330096, "step": 89040 }, { "epoch": 14.52610114192496, "grad_norm": 0.10031913965940475, "learning_rate": 0.00021136056118761494, "loss": 0.0497, "num_input_tokens_seen": 192341744, "step": 89045 }, { "epoch": 14.526916802610113, "grad_norm": 0.04384492337703705, "learning_rate": 0.00021130244243786024, "loss": 0.0035, "num_input_tokens_seen": 192353200, "step": 89050 }, { "epoch": 14.52773246329527, "grad_norm": 0.006799424532800913, "learning_rate": 0.00021124432953892742, "loss": 0.0105, "num_input_tokens_seen": 192364496, "step": 89055 }, { "epoch": 14.528548123980425, "grad_norm": 0.004936617333441973, "learning_rate": 0.00021118622249199494, "loss": 0.0723, "num_input_tokens_seen": 192373680, "step": 89060 }, { "epoch": 14.529363784665579, "grad_norm": 0.003013910725712776, "learning_rate": 0.00021112812129823967, "loss": 0.0061, "num_input_tokens_seen": 192384496, "step": 89065 }, { "epoch": 14.530179445350734, "grad_norm": 0.5813406109809875, "learning_rate": 0.00021107002595883978, "loss": 0.0227, "num_input_tokens_seen": 192394768, "step": 89070 }, { "epoch": 14.530995106035888, "grad_norm": 0.00857462827116251, "learning_rate": 0.00021101193647497208, "loss": 0.0353, "num_input_tokens_seen": 192406736, "step": 89075 }, { "epoch": 14.531810766721044, "grad_norm": 0.013798504136502743, "learning_rate": 0.00021095385284781426, "loss": 0.0096, "num_input_tokens_seen": 192418192, "step": 89080 }, { "epoch": 14.5326264274062, "grad_norm": 0.005595772061496973, "learning_rate": 0.00021089577507854324, "loss": 0.0981, "num_input_tokens_seen": 192429424, "step": 89085 }, { "epoch": 14.533442088091354, "grad_norm": 0.035856693983078, "learning_rate": 0.00021083770316833618, "loss": 0.0431, "num_input_tokens_seen": 192439952, "step": 89090 }, { "epoch": 14.53425774877651, "grad_norm": 0.0023227352648973465, "learning_rate": 0.00021077963711836983, "loss": 0.0029, "num_input_tokens_seen": 192450096, "step": 89095 }, { "epoch": 14.535073409461663, "grad_norm": 0.3182942569255829, "learning_rate": 0.00021072157692982103, "loss": 0.0081, "num_input_tokens_seen": 192461456, "step": 89100 }, { "epoch": 14.535889070146819, "grad_norm": 0.00510216411203146, "learning_rate": 0.00021066352260386644, "loss": 0.0035, "num_input_tokens_seen": 192471824, "step": 89105 }, { "epoch": 14.536704730831975, "grad_norm": 0.07574431598186493, "learning_rate": 0.0002106054741416827, "loss": 0.0036, "num_input_tokens_seen": 192482320, "step": 89110 }, { "epoch": 14.537520391517129, "grad_norm": 0.004888062831014395, "learning_rate": 0.00021054743154444607, "loss": 0.0048, "num_input_tokens_seen": 192492816, "step": 89115 }, { "epoch": 14.538336052202284, "grad_norm": 0.016683122143149376, "learning_rate": 0.00021048939481333297, "loss": 0.0033, "num_input_tokens_seen": 192504016, "step": 89120 }, { "epoch": 14.539151712887438, "grad_norm": 0.006947814021259546, "learning_rate": 0.00021043136394951955, "loss": 0.125, "num_input_tokens_seen": 192514416, "step": 89125 }, { "epoch": 14.539967373572594, "grad_norm": 0.01835622824728489, "learning_rate": 0.00021037333895418186, "loss": 0.0437, "num_input_tokens_seen": 192524336, "step": 89130 }, { "epoch": 14.540783034257748, "grad_norm": 0.0712086632847786, "learning_rate": 0.0002103153198284959, "loss": 0.0035, "num_input_tokens_seen": 192533200, "step": 89135 }, { "epoch": 14.541598694942904, "grad_norm": 0.002161943819373846, "learning_rate": 0.0002102573065736373, "loss": 0.0038, "num_input_tokens_seen": 192545104, "step": 89140 }, { "epoch": 14.54241435562806, "grad_norm": 0.0018386748852208257, "learning_rate": 0.00021019929919078228, "loss": 0.009, "num_input_tokens_seen": 192555856, "step": 89145 }, { "epoch": 14.543230016313213, "grad_norm": 0.8079872727394104, "learning_rate": 0.00021014129768110574, "loss": 0.098, "num_input_tokens_seen": 192566192, "step": 89150 }, { "epoch": 14.544045676998369, "grad_norm": 0.020592303946614265, "learning_rate": 0.0002100833020457839, "loss": 0.0136, "num_input_tokens_seen": 192577680, "step": 89155 }, { "epoch": 14.544861337683523, "grad_norm": 0.5749585628509521, "learning_rate": 0.00021002531228599136, "loss": 0.0973, "num_input_tokens_seen": 192588400, "step": 89160 }, { "epoch": 14.545676998368679, "grad_norm": 0.017069535329937935, "learning_rate": 0.00020996732840290405, "loss": 0.0057, "num_input_tokens_seen": 192599376, "step": 89165 }, { "epoch": 14.546492659053834, "grad_norm": 0.009924108162522316, "learning_rate": 0.0002099093503976965, "loss": 0.0044, "num_input_tokens_seen": 192610960, "step": 89170 }, { "epoch": 14.547308319738988, "grad_norm": 0.24204865097999573, "learning_rate": 0.0002098513782715442, "loss": 0.0177, "num_input_tokens_seen": 192621680, "step": 89175 }, { "epoch": 14.548123980424144, "grad_norm": 0.05922012776136398, "learning_rate": 0.00020979341202562152, "loss": 0.0034, "num_input_tokens_seen": 192633328, "step": 89180 }, { "epoch": 14.548939641109298, "grad_norm": 0.05222422257065773, "learning_rate": 0.00020973545166110368, "loss": 0.0051, "num_input_tokens_seen": 192644816, "step": 89185 }, { "epoch": 14.549755301794454, "grad_norm": 0.015433356165885925, "learning_rate": 0.00020967749717916513, "loss": 0.006, "num_input_tokens_seen": 192655760, "step": 89190 }, { "epoch": 14.550570962479608, "grad_norm": 0.10926583409309387, "learning_rate": 0.00020961954858098037, "loss": 0.009, "num_input_tokens_seen": 192666096, "step": 89195 }, { "epoch": 14.551386623164763, "grad_norm": 0.0032105380669236183, "learning_rate": 0.0002095616058677239, "loss": 0.0081, "num_input_tokens_seen": 192675440, "step": 89200 }, { "epoch": 14.552202283849919, "grad_norm": 0.14845839142799377, "learning_rate": 0.00020950366904056984, "loss": 0.0055, "num_input_tokens_seen": 192686416, "step": 89205 }, { "epoch": 14.553017944535073, "grad_norm": 0.008143202401697636, "learning_rate": 0.00020944573810069252, "loss": 0.005, "num_input_tokens_seen": 192697936, "step": 89210 }, { "epoch": 14.553833605220229, "grad_norm": 0.0077461740002036095, "learning_rate": 0.00020938781304926586, "loss": 0.0032, "num_input_tokens_seen": 192709008, "step": 89215 }, { "epoch": 14.554649265905383, "grad_norm": 0.0006443687016144395, "learning_rate": 0.00020932989388746387, "loss": 0.0048, "num_input_tokens_seen": 192717552, "step": 89220 }, { "epoch": 14.555464926590538, "grad_norm": 0.0020855157636106014, "learning_rate": 0.0002092719806164603, "loss": 0.0122, "num_input_tokens_seen": 192727888, "step": 89225 }, { "epoch": 14.556280587275694, "grad_norm": 0.009736152365803719, "learning_rate": 0.00020921407323742892, "loss": 0.0016, "num_input_tokens_seen": 192739984, "step": 89230 }, { "epoch": 14.557096247960848, "grad_norm": 0.006911523174494505, "learning_rate": 0.00020915617175154316, "loss": 0.0034, "num_input_tokens_seen": 192751312, "step": 89235 }, { "epoch": 14.557911908646004, "grad_norm": 0.009532546624541283, "learning_rate": 0.00020909827615997657, "loss": 0.0135, "num_input_tokens_seen": 192763248, "step": 89240 }, { "epoch": 14.558727569331158, "grad_norm": 0.020412901416420937, "learning_rate": 0.00020904038646390246, "loss": 0.0175, "num_input_tokens_seen": 192774672, "step": 89245 }, { "epoch": 14.559543230016313, "grad_norm": 0.050955090671777725, "learning_rate": 0.00020898250266449399, "loss": 0.0121, "num_input_tokens_seen": 192785552, "step": 89250 }, { "epoch": 14.560358890701469, "grad_norm": 0.004365491680800915, "learning_rate": 0.0002089246247629243, "loss": 0.0032, "num_input_tokens_seen": 192795952, "step": 89255 }, { "epoch": 14.561174551386623, "grad_norm": 0.009529031813144684, "learning_rate": 0.00020886675276036637, "loss": 0.0023, "num_input_tokens_seen": 192806544, "step": 89260 }, { "epoch": 14.561990212071779, "grad_norm": 0.01040890347212553, "learning_rate": 0.00020880888665799304, "loss": 0.0029, "num_input_tokens_seen": 192817520, "step": 89265 }, { "epoch": 14.562805872756933, "grad_norm": 0.00223185820505023, "learning_rate": 0.00020875102645697696, "loss": 0.0021, "num_input_tokens_seen": 192827920, "step": 89270 }, { "epoch": 14.563621533442088, "grad_norm": 0.0598750114440918, "learning_rate": 0.0002086931721584908, "loss": 0.0057, "num_input_tokens_seen": 192838736, "step": 89275 }, { "epoch": 14.564437194127244, "grad_norm": 0.10287459939718246, "learning_rate": 0.00020863532376370715, "loss": 0.0063, "num_input_tokens_seen": 192849776, "step": 89280 }, { "epoch": 14.565252854812398, "grad_norm": 0.007824460975825787, "learning_rate": 0.000208577481273798, "loss": 0.0166, "num_input_tokens_seen": 192858800, "step": 89285 }, { "epoch": 14.566068515497554, "grad_norm": 0.026574891060590744, "learning_rate": 0.00020851964468993612, "loss": 0.1111, "num_input_tokens_seen": 192870064, "step": 89290 }, { "epoch": 14.566884176182707, "grad_norm": 0.0022794893011450768, "learning_rate": 0.00020846181401329338, "loss": 0.0026, "num_input_tokens_seen": 192881872, "step": 89295 }, { "epoch": 14.567699836867863, "grad_norm": 0.0017811759607866406, "learning_rate": 0.00020840398924504188, "loss": 0.0127, "num_input_tokens_seen": 192892464, "step": 89300 }, { "epoch": 14.568515497553017, "grad_norm": 0.043051186949014664, "learning_rate": 0.0002083461703863534, "loss": 0.0053, "num_input_tokens_seen": 192903792, "step": 89305 }, { "epoch": 14.569331158238173, "grad_norm": 0.009227285161614418, "learning_rate": 0.0002082883574383998, "loss": 0.0021, "num_input_tokens_seen": 192914704, "step": 89310 }, { "epoch": 14.570146818923329, "grad_norm": 0.12363433092832565, "learning_rate": 0.00020823055040235266, "loss": 0.0049, "num_input_tokens_seen": 192925776, "step": 89315 }, { "epoch": 14.570962479608482, "grad_norm": 0.039080094546079636, "learning_rate": 0.0002081727492793836, "loss": 0.0095, "num_input_tokens_seen": 192936656, "step": 89320 }, { "epoch": 14.571778140293638, "grad_norm": 0.0033353553153574467, "learning_rate": 0.00020811495407066394, "loss": 0.0063, "num_input_tokens_seen": 192948048, "step": 89325 }, { "epoch": 14.572593800978792, "grad_norm": 0.056662097573280334, "learning_rate": 0.00020805716477736508, "loss": 0.0373, "num_input_tokens_seen": 192957840, "step": 89330 }, { "epoch": 14.573409461663948, "grad_norm": 0.0010778683936223388, "learning_rate": 0.00020799938140065804, "loss": 0.0079, "num_input_tokens_seen": 192968528, "step": 89335 }, { "epoch": 14.574225122349104, "grad_norm": 0.0030743195675313473, "learning_rate": 0.00020794160394171403, "loss": 0.0025, "num_input_tokens_seen": 192979088, "step": 89340 }, { "epoch": 14.575040783034257, "grad_norm": 0.007394440937787294, "learning_rate": 0.00020788383240170395, "loss": 0.0124, "num_input_tokens_seen": 192990640, "step": 89345 }, { "epoch": 14.575856443719413, "grad_norm": 0.001514918520115316, "learning_rate": 0.0002078260667817985, "loss": 0.0017, "num_input_tokens_seen": 193001840, "step": 89350 }, { "epoch": 14.576672104404567, "grad_norm": 0.002615422708913684, "learning_rate": 0.0002077683070831685, "loss": 0.0697, "num_input_tokens_seen": 193013232, "step": 89355 }, { "epoch": 14.577487765089723, "grad_norm": 0.027480650693178177, "learning_rate": 0.00020771055330698446, "loss": 0.0028, "num_input_tokens_seen": 193025040, "step": 89360 }, { "epoch": 14.578303425774878, "grad_norm": 0.43630602955818176, "learning_rate": 0.0002076528054544169, "loss": 0.0143, "num_input_tokens_seen": 193036464, "step": 89365 }, { "epoch": 14.579119086460032, "grad_norm": 0.009855729527771473, "learning_rate": 0.00020759506352663605, "loss": 0.0073, "num_input_tokens_seen": 193046896, "step": 89370 }, { "epoch": 14.579934747145188, "grad_norm": 0.46799859404563904, "learning_rate": 0.0002075373275248122, "loss": 0.1245, "num_input_tokens_seen": 193058192, "step": 89375 }, { "epoch": 14.580750407830342, "grad_norm": 0.010498947463929653, "learning_rate": 0.00020747959745011542, "loss": 0.0054, "num_input_tokens_seen": 193069808, "step": 89380 }, { "epoch": 14.581566068515498, "grad_norm": 0.07971848547458649, "learning_rate": 0.0002074218733037157, "loss": 0.0047, "num_input_tokens_seen": 193081392, "step": 89385 }, { "epoch": 14.582381729200652, "grad_norm": 0.0011771656572818756, "learning_rate": 0.00020736415508678285, "loss": 0.0299, "num_input_tokens_seen": 193092144, "step": 89390 }, { "epoch": 14.583197389885807, "grad_norm": 0.07137224078178406, "learning_rate": 0.0002073064428004865, "loss": 0.0033, "num_input_tokens_seen": 193103312, "step": 89395 }, { "epoch": 14.584013050570963, "grad_norm": 0.004249855410307646, "learning_rate": 0.00020724873644599668, "loss": 0.0054, "num_input_tokens_seen": 193115152, "step": 89400 }, { "epoch": 14.584828711256117, "grad_norm": 0.08202840387821198, "learning_rate": 0.0002071910360244823, "loss": 0.0753, "num_input_tokens_seen": 193125776, "step": 89405 }, { "epoch": 14.585644371941273, "grad_norm": 0.017344074323773384, "learning_rate": 0.0002071333415371134, "loss": 0.0098, "num_input_tokens_seen": 193136208, "step": 89410 }, { "epoch": 14.586460032626427, "grad_norm": 0.013641850091516972, "learning_rate": 0.00020707565298505842, "loss": 0.0018, "num_input_tokens_seen": 193148208, "step": 89415 }, { "epoch": 14.587275693311582, "grad_norm": 0.002559355227276683, "learning_rate": 0.00020701797036948739, "loss": 0.0024, "num_input_tokens_seen": 193159088, "step": 89420 }, { "epoch": 14.588091353996738, "grad_norm": 0.003523677121847868, "learning_rate": 0.00020696029369156844, "loss": 0.0019, "num_input_tokens_seen": 193169648, "step": 89425 }, { "epoch": 14.588907014681892, "grad_norm": 0.0047660889104008675, "learning_rate": 0.0002069026229524711, "loss": 0.0038, "num_input_tokens_seen": 193180624, "step": 89430 }, { "epoch": 14.589722675367048, "grad_norm": 0.0020290075335651636, "learning_rate": 0.00020684495815336392, "loss": 0.0009, "num_input_tokens_seen": 193191408, "step": 89435 }, { "epoch": 14.590538336052202, "grad_norm": 0.0014430329902097583, "learning_rate": 0.00020678729929541552, "loss": 0.0032, "num_input_tokens_seen": 193201008, "step": 89440 }, { "epoch": 14.591353996737357, "grad_norm": 0.0040334672667086124, "learning_rate": 0.00020672964637979453, "loss": 0.0055, "num_input_tokens_seen": 193211120, "step": 89445 }, { "epoch": 14.592169657422513, "grad_norm": 0.0017633815295994282, "learning_rate": 0.00020667199940766924, "loss": 0.0084, "num_input_tokens_seen": 193221936, "step": 89450 }, { "epoch": 14.592985318107667, "grad_norm": 0.7666230201721191, "learning_rate": 0.00020661435838020798, "loss": 0.0843, "num_input_tokens_seen": 193233616, "step": 89455 }, { "epoch": 14.593800978792823, "grad_norm": 0.005272711627185345, "learning_rate": 0.000206556723298579, "loss": 0.0025, "num_input_tokens_seen": 193242992, "step": 89460 }, { "epoch": 14.594616639477977, "grad_norm": 0.0023867280688136816, "learning_rate": 0.00020649909416395025, "loss": 0.0038, "num_input_tokens_seen": 193252912, "step": 89465 }, { "epoch": 14.595432300163132, "grad_norm": 0.016159815713763237, "learning_rate": 0.00020644147097748967, "loss": 0.0012, "num_input_tokens_seen": 193264304, "step": 89470 }, { "epoch": 14.596247960848288, "grad_norm": 0.0027893621008843184, "learning_rate": 0.0002063838537403651, "loss": 0.0108, "num_input_tokens_seen": 193275632, "step": 89475 }, { "epoch": 14.597063621533442, "grad_norm": 0.00795994233340025, "learning_rate": 0.00020632624245374426, "loss": 0.0085, "num_input_tokens_seen": 193285680, "step": 89480 }, { "epoch": 14.597879282218598, "grad_norm": 0.014036700129508972, "learning_rate": 0.0002062686371187946, "loss": 0.0968, "num_input_tokens_seen": 193295792, "step": 89485 }, { "epoch": 14.598694942903752, "grad_norm": 0.0036570588126778603, "learning_rate": 0.00020621103773668366, "loss": 0.0009, "num_input_tokens_seen": 193306320, "step": 89490 }, { "epoch": 14.599510603588907, "grad_norm": 0.004171954933553934, "learning_rate": 0.00020615344430857874, "loss": 0.1716, "num_input_tokens_seen": 193317616, "step": 89495 }, { "epoch": 14.600326264274061, "grad_norm": 0.005262458231300116, "learning_rate": 0.00020609585683564687, "loss": 0.0025, "num_input_tokens_seen": 193328784, "step": 89500 }, { "epoch": 14.601141924959217, "grad_norm": 0.0018460382707417011, "learning_rate": 0.00020603827531905566, "loss": 0.0877, "num_input_tokens_seen": 193339408, "step": 89505 }, { "epoch": 14.601957585644373, "grad_norm": 0.0008219721494242549, "learning_rate": 0.00020598069975997135, "loss": 0.0909, "num_input_tokens_seen": 193349744, "step": 89510 }, { "epoch": 14.602773246329527, "grad_norm": 0.008539113216102123, "learning_rate": 0.0002059231301595615, "loss": 0.0013, "num_input_tokens_seen": 193360528, "step": 89515 }, { "epoch": 14.603588907014682, "grad_norm": 0.008514752611517906, "learning_rate": 0.00020586556651899213, "loss": 0.0024, "num_input_tokens_seen": 193371792, "step": 89520 }, { "epoch": 14.604404567699836, "grad_norm": 0.138560950756073, "learning_rate": 0.00020580800883943058, "loss": 0.0106, "num_input_tokens_seen": 193381392, "step": 89525 }, { "epoch": 14.605220228384992, "grad_norm": 0.015043669380247593, "learning_rate": 0.00020575045712204254, "loss": 0.0061, "num_input_tokens_seen": 193392272, "step": 89530 }, { "epoch": 14.606035889070148, "grad_norm": 0.1300588846206665, "learning_rate": 0.00020569291136799512, "loss": 0.0147, "num_input_tokens_seen": 193402800, "step": 89535 }, { "epoch": 14.606851549755302, "grad_norm": 0.02232443168759346, "learning_rate": 0.00020563537157845392, "loss": 0.0043, "num_input_tokens_seen": 193413008, "step": 89540 }, { "epoch": 14.607667210440457, "grad_norm": 0.33156269788742065, "learning_rate": 0.0002055778377545856, "loss": 0.012, "num_input_tokens_seen": 193424272, "step": 89545 }, { "epoch": 14.608482871125611, "grad_norm": 0.4333055317401886, "learning_rate": 0.0002055203098975556, "loss": 0.1049, "num_input_tokens_seen": 193434928, "step": 89550 }, { "epoch": 14.609298531810767, "grad_norm": 0.00710340915247798, "learning_rate": 0.00020546278800853048, "loss": 0.0012, "num_input_tokens_seen": 193447184, "step": 89555 }, { "epoch": 14.61011419249592, "grad_norm": 1.8786109685897827, "learning_rate": 0.00020540527208867522, "loss": 0.2019, "num_input_tokens_seen": 193458448, "step": 89560 }, { "epoch": 14.610929853181077, "grad_norm": 0.00856061466038227, "learning_rate": 0.00020534776213915619, "loss": 0.0035, "num_input_tokens_seen": 193469680, "step": 89565 }, { "epoch": 14.611745513866232, "grad_norm": 2.7863962650299072, "learning_rate": 0.00020529025816113817, "loss": 0.3, "num_input_tokens_seen": 193480848, "step": 89570 }, { "epoch": 14.612561174551386, "grad_norm": 0.006839493755251169, "learning_rate": 0.00020523276015578713, "loss": 0.0012, "num_input_tokens_seen": 193491280, "step": 89575 }, { "epoch": 14.613376835236542, "grad_norm": 0.0007321059238165617, "learning_rate": 0.0002051752681242682, "loss": 0.0072, "num_input_tokens_seen": 193502384, "step": 89580 }, { "epoch": 14.614192495921696, "grad_norm": 0.005245604086667299, "learning_rate": 0.0002051177820677464, "loss": 0.0025, "num_input_tokens_seen": 193510896, "step": 89585 }, { "epoch": 14.615008156606851, "grad_norm": 0.0040390766225755215, "learning_rate": 0.00020506030198738683, "loss": 0.0075, "num_input_tokens_seen": 193521712, "step": 89590 }, { "epoch": 14.615823817292007, "grad_norm": 0.017503926530480385, "learning_rate": 0.00020500282788435441, "loss": 0.0035, "num_input_tokens_seen": 193533008, "step": 89595 }, { "epoch": 14.616639477977161, "grad_norm": 0.03211967274546623, "learning_rate": 0.00020494535975981398, "loss": 0.0097, "num_input_tokens_seen": 193543472, "step": 89600 }, { "epoch": 14.617455138662317, "grad_norm": 0.397350013256073, "learning_rate": 0.0002048878976149301, "loss": 0.0192, "num_input_tokens_seen": 193553392, "step": 89605 }, { "epoch": 14.61827079934747, "grad_norm": 0.008442388847470284, "learning_rate": 0.00020483044145086732, "loss": 0.0032, "num_input_tokens_seen": 193564912, "step": 89610 }, { "epoch": 14.619086460032626, "grad_norm": 0.0035397973842918873, "learning_rate": 0.00020477299126879013, "loss": 0.0635, "num_input_tokens_seen": 193574512, "step": 89615 }, { "epoch": 14.619902120717782, "grad_norm": 0.004368765279650688, "learning_rate": 0.00020471554706986273, "loss": 0.0045, "num_input_tokens_seen": 193585968, "step": 89620 }, { "epoch": 14.620717781402936, "grad_norm": 0.002169569954276085, "learning_rate": 0.00020465810885524928, "loss": 0.0135, "num_input_tokens_seen": 193596304, "step": 89625 }, { "epoch": 14.621533442088092, "grad_norm": 0.015089782886207104, "learning_rate": 0.0002046006766261142, "loss": 0.0033, "num_input_tokens_seen": 193606672, "step": 89630 }, { "epoch": 14.622349102773246, "grad_norm": 0.012145834043622017, "learning_rate": 0.00020454325038362083, "loss": 0.0077, "num_input_tokens_seen": 193616912, "step": 89635 }, { "epoch": 14.623164763458401, "grad_norm": 0.004161675926297903, "learning_rate": 0.00020448583012893363, "loss": 0.0056, "num_input_tokens_seen": 193627856, "step": 89640 }, { "epoch": 14.623980424143557, "grad_norm": 0.0038490283768624067, "learning_rate": 0.00020442841586321565, "loss": 0.0627, "num_input_tokens_seen": 193638896, "step": 89645 }, { "epoch": 14.624796084828711, "grad_norm": 0.005434063263237476, "learning_rate": 0.0002043710075876311, "loss": 0.0054, "num_input_tokens_seen": 193648816, "step": 89650 }, { "epoch": 14.625611745513867, "grad_norm": 0.02254394255578518, "learning_rate": 0.00020431360530334282, "loss": 0.003, "num_input_tokens_seen": 193660144, "step": 89655 }, { "epoch": 14.62642740619902, "grad_norm": 0.05815372243523598, "learning_rate": 0.0002042562090115147, "loss": 0.0077, "num_input_tokens_seen": 193671024, "step": 89660 }, { "epoch": 14.627243066884176, "grad_norm": 0.11653152108192444, "learning_rate": 0.0002041988187133094, "loss": 0.0498, "num_input_tokens_seen": 193682960, "step": 89665 }, { "epoch": 14.62805872756933, "grad_norm": 0.0057633547112345695, "learning_rate": 0.00020414143440989062, "loss": 0.126, "num_input_tokens_seen": 193693136, "step": 89670 }, { "epoch": 14.628874388254486, "grad_norm": 0.053961724042892456, "learning_rate": 0.00020408405610242063, "loss": 0.0065, "num_input_tokens_seen": 193704592, "step": 89675 }, { "epoch": 14.629690048939642, "grad_norm": 0.009925310499966145, "learning_rate": 0.000204026683792063, "loss": 0.0026, "num_input_tokens_seen": 193715088, "step": 89680 }, { "epoch": 14.630505709624796, "grad_norm": 0.006178427021950483, "learning_rate": 0.00020396931747997978, "loss": 0.003, "num_input_tokens_seen": 193726160, "step": 89685 }, { "epoch": 14.631321370309951, "grad_norm": 0.12800955772399902, "learning_rate": 0.0002039119571673342, "loss": 0.0242, "num_input_tokens_seen": 193736848, "step": 89690 }, { "epoch": 14.632137030995105, "grad_norm": 0.25726088881492615, "learning_rate": 0.00020385460285528807, "loss": 0.0543, "num_input_tokens_seen": 193748080, "step": 89695 }, { "epoch": 14.632952691680261, "grad_norm": 0.0024385061115026474, "learning_rate": 0.0002037972545450044, "loss": 0.0065, "num_input_tokens_seen": 193759440, "step": 89700 }, { "epoch": 14.633768352365417, "grad_norm": 0.008196687325835228, "learning_rate": 0.0002037399122376449, "loss": 0.0058, "num_input_tokens_seen": 193770768, "step": 89705 }, { "epoch": 14.63458401305057, "grad_norm": 0.024108847603201866, "learning_rate": 0.0002036825759343721, "loss": 0.003, "num_input_tokens_seen": 193781616, "step": 89710 }, { "epoch": 14.635399673735726, "grad_norm": 0.03441760316491127, "learning_rate": 0.0002036252456363476, "loss": 0.022, "num_input_tokens_seen": 193792656, "step": 89715 }, { "epoch": 14.63621533442088, "grad_norm": 0.010302347131073475, "learning_rate": 0.00020356792134473356, "loss": 0.0027, "num_input_tokens_seen": 193802384, "step": 89720 }, { "epoch": 14.637030995106036, "grad_norm": 0.009490042924880981, "learning_rate": 0.0002035106030606917, "loss": 0.0124, "num_input_tokens_seen": 193812400, "step": 89725 }, { "epoch": 14.63784665579119, "grad_norm": 0.017873436212539673, "learning_rate": 0.00020345329078538354, "loss": 0.0055, "num_input_tokens_seen": 193823664, "step": 89730 }, { "epoch": 14.638662316476346, "grad_norm": 0.03383546322584152, "learning_rate": 0.00020339598451997066, "loss": 0.0084, "num_input_tokens_seen": 193835696, "step": 89735 }, { "epoch": 14.639477977161501, "grad_norm": 0.006421535741537809, "learning_rate": 0.00020333868426561448, "loss": 0.002, "num_input_tokens_seen": 193847376, "step": 89740 }, { "epoch": 14.640293637846655, "grad_norm": 0.0031049128156155348, "learning_rate": 0.00020328139002347612, "loss": 0.0032, "num_input_tokens_seen": 193858480, "step": 89745 }, { "epoch": 14.641109298531811, "grad_norm": 0.037416599690914154, "learning_rate": 0.00020322410179471684, "loss": 0.0045, "num_input_tokens_seen": 193871024, "step": 89750 }, { "epoch": 14.641924959216965, "grad_norm": 0.007306250277906656, "learning_rate": 0.00020316681958049758, "loss": 0.0061, "num_input_tokens_seen": 193881104, "step": 89755 }, { "epoch": 14.64274061990212, "grad_norm": 0.007262531202286482, "learning_rate": 0.00020310954338197934, "loss": 0.002, "num_input_tokens_seen": 193892784, "step": 89760 }, { "epoch": 14.643556280587276, "grad_norm": 0.0022280393168330193, "learning_rate": 0.00020305227320032283, "loss": 0.0036, "num_input_tokens_seen": 193904528, "step": 89765 }, { "epoch": 14.64437194127243, "grad_norm": 0.0012667253613471985, "learning_rate": 0.00020299500903668856, "loss": 0.0255, "num_input_tokens_seen": 193914640, "step": 89770 }, { "epoch": 14.645187601957586, "grad_norm": 0.0008011406753212214, "learning_rate": 0.00020293775089223748, "loss": 0.0325, "num_input_tokens_seen": 193926416, "step": 89775 }, { "epoch": 14.64600326264274, "grad_norm": 0.0034976284950971603, "learning_rate": 0.00020288049876812943, "loss": 0.0026, "num_input_tokens_seen": 193937776, "step": 89780 }, { "epoch": 14.646818923327896, "grad_norm": 0.003417432773858309, "learning_rate": 0.00020282325266552536, "loss": 0.1563, "num_input_tokens_seen": 193948976, "step": 89785 }, { "epoch": 14.647634584013051, "grad_norm": 0.014804985374212265, "learning_rate": 0.0002027660125855847, "loss": 0.0036, "num_input_tokens_seen": 193959408, "step": 89790 }, { "epoch": 14.648450244698205, "grad_norm": 0.0008979029953479767, "learning_rate": 0.00020270877852946817, "loss": 0.0023, "num_input_tokens_seen": 193971536, "step": 89795 }, { "epoch": 14.649265905383361, "grad_norm": 0.0022123174276202917, "learning_rate": 0.0002026515504983351, "loss": 0.007, "num_input_tokens_seen": 193981392, "step": 89800 }, { "epoch": 14.650081566068515, "grad_norm": 0.0016149893635883927, "learning_rate": 0.00020259432849334592, "loss": 0.0039, "num_input_tokens_seen": 193992592, "step": 89805 }, { "epoch": 14.65089722675367, "grad_norm": 0.4601063132286072, "learning_rate": 0.00020253711251565953, "loss": 0.1179, "num_input_tokens_seen": 194003280, "step": 89810 }, { "epoch": 14.651712887438826, "grad_norm": 0.0017725087236613035, "learning_rate": 0.00020247990256643634, "loss": 0.0016, "num_input_tokens_seen": 194014160, "step": 89815 }, { "epoch": 14.65252854812398, "grad_norm": 0.0016594589687883854, "learning_rate": 0.000202422698646835, "loss": 0.0603, "num_input_tokens_seen": 194025072, "step": 89820 }, { "epoch": 14.653344208809136, "grad_norm": 0.004949385765939951, "learning_rate": 0.00020236550075801535, "loss": 0.0142, "num_input_tokens_seen": 194034384, "step": 89825 }, { "epoch": 14.65415986949429, "grad_norm": 0.002924638567492366, "learning_rate": 0.0002023083089011364, "loss": 0.0047, "num_input_tokens_seen": 194046064, "step": 89830 }, { "epoch": 14.654975530179446, "grad_norm": 0.5376624464988708, "learning_rate": 0.00020225112307735717, "loss": 0.1602, "num_input_tokens_seen": 194056016, "step": 89835 }, { "epoch": 14.655791190864601, "grad_norm": 0.0024497162085026503, "learning_rate": 0.00020219394328783668, "loss": 0.0051, "num_input_tokens_seen": 194067056, "step": 89840 }, { "epoch": 14.656606851549755, "grad_norm": 0.057129036635160446, "learning_rate": 0.00020213676953373372, "loss": 0.0018, "num_input_tokens_seen": 194077712, "step": 89845 }, { "epoch": 14.65742251223491, "grad_norm": 0.006286917254328728, "learning_rate": 0.00020207960181620706, "loss": 0.003, "num_input_tokens_seen": 194087312, "step": 89850 }, { "epoch": 14.658238172920065, "grad_norm": 0.05066222324967384, "learning_rate": 0.00020202244013641513, "loss": 0.0055, "num_input_tokens_seen": 194098800, "step": 89855 }, { "epoch": 14.65905383360522, "grad_norm": 0.004280565306544304, "learning_rate": 0.0002019652844955165, "loss": 0.002, "num_input_tokens_seen": 194110704, "step": 89860 }, { "epoch": 14.659869494290374, "grad_norm": 0.0028599584475159645, "learning_rate": 0.00020190813489466943, "loss": 0.1685, "num_input_tokens_seen": 194122000, "step": 89865 }, { "epoch": 14.66068515497553, "grad_norm": 0.003662576898932457, "learning_rate": 0.00020185099133503216, "loss": 0.0109, "num_input_tokens_seen": 194132336, "step": 89870 }, { "epoch": 14.661500815660686, "grad_norm": 0.0418967604637146, "learning_rate": 0.00020179385381776283, "loss": 0.0035, "num_input_tokens_seen": 194144080, "step": 89875 }, { "epoch": 14.66231647634584, "grad_norm": 0.009954468347132206, "learning_rate": 0.00020173672234401928, "loss": 0.0032, "num_input_tokens_seen": 194153200, "step": 89880 }, { "epoch": 14.663132137030995, "grad_norm": 0.023417534306645393, "learning_rate": 0.00020167959691495946, "loss": 0.0078, "num_input_tokens_seen": 194165104, "step": 89885 }, { "epoch": 14.66394779771615, "grad_norm": 0.008343107998371124, "learning_rate": 0.00020162247753174105, "loss": 0.0016, "num_input_tokens_seen": 194175600, "step": 89890 }, { "epoch": 14.664763458401305, "grad_norm": 0.006546036805957556, "learning_rate": 0.00020156536419552168, "loss": 0.0035, "num_input_tokens_seen": 194184880, "step": 89895 }, { "epoch": 14.66557911908646, "grad_norm": 0.005429636221379042, "learning_rate": 0.00020150825690745883, "loss": 0.0197, "num_input_tokens_seen": 194195152, "step": 89900 }, { "epoch": 14.666394779771615, "grad_norm": 0.003119829809293151, "learning_rate": 0.00020145115566870975, "loss": 0.0068, "num_input_tokens_seen": 194205744, "step": 89905 }, { "epoch": 14.66721044045677, "grad_norm": 0.14407505095005035, "learning_rate": 0.00020139406048043173, "loss": 0.0078, "num_input_tokens_seen": 194215920, "step": 89910 }, { "epoch": 14.668026101141924, "grad_norm": 0.1059744581580162, "learning_rate": 0.00020133697134378176, "loss": 0.0368, "num_input_tokens_seen": 194226736, "step": 89915 }, { "epoch": 14.66884176182708, "grad_norm": 0.011918950825929642, "learning_rate": 0.0002012798882599173, "loss": 0.0051, "num_input_tokens_seen": 194238320, "step": 89920 }, { "epoch": 14.669657422512234, "grad_norm": 0.05558808147907257, "learning_rate": 0.00020122281122999443, "loss": 0.0112, "num_input_tokens_seen": 194249744, "step": 89925 }, { "epoch": 14.67047308319739, "grad_norm": 0.0026128387544304132, "learning_rate": 0.00020116574025517053, "loss": 0.0203, "num_input_tokens_seen": 194259664, "step": 89930 }, { "epoch": 14.671288743882545, "grad_norm": 0.5910449028015137, "learning_rate": 0.00020110867533660204, "loss": 0.0156, "num_input_tokens_seen": 194271600, "step": 89935 }, { "epoch": 14.6721044045677, "grad_norm": 0.005343761760741472, "learning_rate": 0.00020105161647544534, "loss": 0.0017, "num_input_tokens_seen": 194281840, "step": 89940 }, { "epoch": 14.672920065252855, "grad_norm": 0.016192588955163956, "learning_rate": 0.00020099456367285695, "loss": 0.1246, "num_input_tokens_seen": 194292816, "step": 89945 }, { "epoch": 14.673735725938009, "grad_norm": 0.009091212414205074, "learning_rate": 0.00020093751692999302, "loss": 0.0199, "num_input_tokens_seen": 194304176, "step": 89950 }, { "epoch": 14.674551386623165, "grad_norm": 0.06575567275285721, "learning_rate": 0.00020088047624800966, "loss": 0.0068, "num_input_tokens_seen": 194314128, "step": 89955 }, { "epoch": 14.67536704730832, "grad_norm": 0.07140957564115524, "learning_rate": 0.00020082344162806293, "loss": 0.0035, "num_input_tokens_seen": 194324976, "step": 89960 }, { "epoch": 14.676182707993474, "grad_norm": 0.05011884495615959, "learning_rate": 0.00020076641307130872, "loss": 0.0036, "num_input_tokens_seen": 194336112, "step": 89965 }, { "epoch": 14.67699836867863, "grad_norm": 0.007385524921119213, "learning_rate": 0.00020070939057890275, "loss": 0.0217, "num_input_tokens_seen": 194347728, "step": 89970 }, { "epoch": 14.677814029363784, "grad_norm": 0.01539340615272522, "learning_rate": 0.00020065237415200062, "loss": 0.0039, "num_input_tokens_seen": 194358576, "step": 89975 }, { "epoch": 14.67862969004894, "grad_norm": 0.03188261017203331, "learning_rate": 0.00020059536379175792, "loss": 0.0048, "num_input_tokens_seen": 194368464, "step": 89980 }, { "epoch": 14.679445350734095, "grad_norm": 0.8307998180389404, "learning_rate": 0.0002005383594993299, "loss": 0.0216, "num_input_tokens_seen": 194378352, "step": 89985 }, { "epoch": 14.68026101141925, "grad_norm": 0.02968890219926834, "learning_rate": 0.00020048136127587203, "loss": 0.0483, "num_input_tokens_seen": 194388720, "step": 89990 }, { "epoch": 14.681076672104405, "grad_norm": 0.002068957546725869, "learning_rate": 0.0002004243691225393, "loss": 0.0022, "num_input_tokens_seen": 194400080, "step": 89995 }, { "epoch": 14.681892332789559, "grad_norm": 0.008313358761370182, "learning_rate": 0.00020036738304048674, "loss": 0.0016, "num_input_tokens_seen": 194410832, "step": 90000 }, { "epoch": 14.682707993474715, "grad_norm": 0.013613566756248474, "learning_rate": 0.00020031040303086932, "loss": 0.0134, "num_input_tokens_seen": 194421840, "step": 90005 }, { "epoch": 14.68352365415987, "grad_norm": 0.0009858844568952918, "learning_rate": 0.00020025342909484173, "loss": 0.0019, "num_input_tokens_seen": 194432752, "step": 90010 }, { "epoch": 14.684339314845024, "grad_norm": 0.06172553449869156, "learning_rate": 0.00020019646123355868, "loss": 0.0295, "num_input_tokens_seen": 194443344, "step": 90015 }, { "epoch": 14.68515497553018, "grad_norm": 0.009558094665408134, "learning_rate": 0.00020013949944817466, "loss": 0.0682, "num_input_tokens_seen": 194454544, "step": 90020 }, { "epoch": 14.685970636215334, "grad_norm": 0.021997565403580666, "learning_rate": 0.00020008254373984408, "loss": 0.0024, "num_input_tokens_seen": 194465136, "step": 90025 }, { "epoch": 14.68678629690049, "grad_norm": 0.004823493305593729, "learning_rate": 0.00020002559410972121, "loss": 0.0025, "num_input_tokens_seen": 194476016, "step": 90030 }, { "epoch": 14.687601957585644, "grad_norm": 0.009510107338428497, "learning_rate": 0.00019996865055896008, "loss": 0.0052, "num_input_tokens_seen": 194486320, "step": 90035 }, { "epoch": 14.6884176182708, "grad_norm": 0.0036022786516696215, "learning_rate": 0.0001999117130887152, "loss": 0.0129, "num_input_tokens_seen": 194497232, "step": 90040 }, { "epoch": 14.689233278955955, "grad_norm": 0.01022917777299881, "learning_rate": 0.00019985478170013977, "loss": 0.0053, "num_input_tokens_seen": 194508784, "step": 90045 }, { "epoch": 14.690048939641109, "grad_norm": 0.17686966061592102, "learning_rate": 0.00019979785639438836, "loss": 0.0376, "num_input_tokens_seen": 194519120, "step": 90050 }, { "epoch": 14.690864600326265, "grad_norm": 0.08314191550016403, "learning_rate": 0.00019974093717261383, "loss": 0.004, "num_input_tokens_seen": 194528144, "step": 90055 }, { "epoch": 14.691680261011419, "grad_norm": 0.057678647339344025, "learning_rate": 0.0001996840240359703, "loss": 0.0194, "num_input_tokens_seen": 194539344, "step": 90060 }, { "epoch": 14.692495921696574, "grad_norm": 0.015334702096879482, "learning_rate": 0.00019962711698561097, "loss": 0.0109, "num_input_tokens_seen": 194550160, "step": 90065 }, { "epoch": 14.69331158238173, "grad_norm": 0.011460235342383385, "learning_rate": 0.0001995702160226892, "loss": 0.0034, "num_input_tokens_seen": 194561936, "step": 90070 }, { "epoch": 14.694127243066884, "grad_norm": 0.0077322013676166534, "learning_rate": 0.00019951332114835808, "loss": 0.0071, "num_input_tokens_seen": 194572368, "step": 90075 }, { "epoch": 14.69494290375204, "grad_norm": 0.008056914433836937, "learning_rate": 0.00019945643236377074, "loss": 0.002, "num_input_tokens_seen": 194583152, "step": 90080 }, { "epoch": 14.695758564437194, "grad_norm": 0.014877298846840858, "learning_rate": 0.00019939954967008005, "loss": 0.1314, "num_input_tokens_seen": 194594768, "step": 90085 }, { "epoch": 14.69657422512235, "grad_norm": 0.015234891325235367, "learning_rate": 0.00019934267306843885, "loss": 0.0028, "num_input_tokens_seen": 194606032, "step": 90090 }, { "epoch": 14.697389885807503, "grad_norm": 0.02361419051885605, "learning_rate": 0.0001992858025599998, "loss": 0.0027, "num_input_tokens_seen": 194617328, "step": 90095 }, { "epoch": 14.698205546492659, "grad_norm": 0.006071890238672495, "learning_rate": 0.00019922893814591541, "loss": 0.005, "num_input_tokens_seen": 194627920, "step": 90100 }, { "epoch": 14.699021207177815, "grad_norm": 0.0035932869650423527, "learning_rate": 0.00019917207982733814, "loss": 0.0022, "num_input_tokens_seen": 194637936, "step": 90105 }, { "epoch": 14.699836867862969, "grad_norm": 0.0846271961927414, "learning_rate": 0.00019911522760542028, "loss": 0.0045, "num_input_tokens_seen": 194648688, "step": 90110 }, { "epoch": 14.700652528548124, "grad_norm": 0.0042757634073495865, "learning_rate": 0.0001990583814813141, "loss": 0.003, "num_input_tokens_seen": 194659184, "step": 90115 }, { "epoch": 14.701468189233278, "grad_norm": 0.0037720445543527603, "learning_rate": 0.00019900154145617157, "loss": 0.0032, "num_input_tokens_seen": 194670384, "step": 90120 }, { "epoch": 14.702283849918434, "grad_norm": 0.002661908743903041, "learning_rate": 0.00019894470753114456, "loss": 0.0047, "num_input_tokens_seen": 194680400, "step": 90125 }, { "epoch": 14.70309951060359, "grad_norm": 0.31255388259887695, "learning_rate": 0.00019888787970738508, "loss": 0.0032, "num_input_tokens_seen": 194691152, "step": 90130 }, { "epoch": 14.703915171288743, "grad_norm": 0.0013531736331060529, "learning_rate": 0.00019883105798604468, "loss": 0.0049, "num_input_tokens_seen": 194701744, "step": 90135 }, { "epoch": 14.7047308319739, "grad_norm": 0.07724149525165558, "learning_rate": 0.00019877424236827473, "loss": 0.0075, "num_input_tokens_seen": 194713488, "step": 90140 }, { "epoch": 14.705546492659053, "grad_norm": 0.00285876146517694, "learning_rate": 0.00019871743285522725, "loss": 0.0008, "num_input_tokens_seen": 194723664, "step": 90145 }, { "epoch": 14.706362153344209, "grad_norm": 0.5335696339607239, "learning_rate": 0.0001986606294480529, "loss": 0.0215, "num_input_tokens_seen": 194733712, "step": 90150 }, { "epoch": 14.707177814029365, "grad_norm": 0.007888701744377613, "learning_rate": 0.00019860383214790345, "loss": 0.0039, "num_input_tokens_seen": 194743504, "step": 90155 }, { "epoch": 14.707993474714518, "grad_norm": 0.005462154280394316, "learning_rate": 0.0001985470409559294, "loss": 0.0038, "num_input_tokens_seen": 194755248, "step": 90160 }, { "epoch": 14.708809135399674, "grad_norm": 0.0036751835141330957, "learning_rate": 0.00019849025587328228, "loss": 0.0024, "num_input_tokens_seen": 194766128, "step": 90165 }, { "epoch": 14.709624796084828, "grad_norm": 0.22061067819595337, "learning_rate": 0.00019843347690111235, "loss": 0.0081, "num_input_tokens_seen": 194777360, "step": 90170 }, { "epoch": 14.710440456769984, "grad_norm": 0.032692890614271164, "learning_rate": 0.00019837670404057085, "loss": 0.0117, "num_input_tokens_seen": 194788656, "step": 90175 }, { "epoch": 14.71125611745514, "grad_norm": 0.004137896467000246, "learning_rate": 0.00019831993729280774, "loss": 0.0007, "num_input_tokens_seen": 194798096, "step": 90180 }, { "epoch": 14.712071778140293, "grad_norm": 0.0014908617595210671, "learning_rate": 0.0001982631766589742, "loss": 0.0024, "num_input_tokens_seen": 194809456, "step": 90185 }, { "epoch": 14.71288743882545, "grad_norm": 0.01189829409122467, "learning_rate": 0.00019820642214021979, "loss": 0.0035, "num_input_tokens_seen": 194820080, "step": 90190 }, { "epoch": 14.713703099510603, "grad_norm": 0.47292107343673706, "learning_rate": 0.00019814967373769544, "loss": 0.0814, "num_input_tokens_seen": 194830320, "step": 90195 }, { "epoch": 14.714518760195759, "grad_norm": 0.001293840236030519, "learning_rate": 0.00019809293145255048, "loss": 0.079, "num_input_tokens_seen": 194841776, "step": 90200 }, { "epoch": 14.715334420880914, "grad_norm": 0.14460763335227966, "learning_rate": 0.00019803619528593547, "loss": 0.0077, "num_input_tokens_seen": 194851408, "step": 90205 }, { "epoch": 14.716150081566068, "grad_norm": 0.03119935840368271, "learning_rate": 0.00019797946523900006, "loss": 0.0019, "num_input_tokens_seen": 194861200, "step": 90210 }, { "epoch": 14.716965742251224, "grad_norm": 0.020736441016197205, "learning_rate": 0.0001979227413128939, "loss": 0.0027, "num_input_tokens_seen": 194871248, "step": 90215 }, { "epoch": 14.717781402936378, "grad_norm": 0.8582524657249451, "learning_rate": 0.0001978660235087666, "loss": 0.1751, "num_input_tokens_seen": 194882416, "step": 90220 }, { "epoch": 14.718597063621534, "grad_norm": 0.006416116375476122, "learning_rate": 0.00019780931182776762, "loss": 0.0031, "num_input_tokens_seen": 194892816, "step": 90225 }, { "epoch": 14.719412724306688, "grad_norm": 0.10771431028842926, "learning_rate": 0.0001977526062710463, "loss": 0.0054, "num_input_tokens_seen": 194904176, "step": 90230 }, { "epoch": 14.720228384991843, "grad_norm": 0.3856275677680969, "learning_rate": 0.0001976959068397518, "loss": 0.0062, "num_input_tokens_seen": 194916112, "step": 90235 }, { "epoch": 14.721044045676999, "grad_norm": 0.0013380798045545816, "learning_rate": 0.00019763921353503335, "loss": 0.0036, "num_input_tokens_seen": 194926416, "step": 90240 }, { "epoch": 14.721859706362153, "grad_norm": 0.031052274629473686, "learning_rate": 0.0001975825263580397, "loss": 0.003, "num_input_tokens_seen": 194936464, "step": 90245 }, { "epoch": 14.722675367047309, "grad_norm": 0.1070781797170639, "learning_rate": 0.00019752584530991984, "loss": 0.0328, "num_input_tokens_seen": 194947376, "step": 90250 }, { "epoch": 14.723491027732463, "grad_norm": 0.016105569899082184, "learning_rate": 0.00019746917039182226, "loss": 0.0081, "num_input_tokens_seen": 194957456, "step": 90255 }, { "epoch": 14.724306688417618, "grad_norm": 0.6800005435943604, "learning_rate": 0.0001974125016048961, "loss": 0.0457, "num_input_tokens_seen": 194969200, "step": 90260 }, { "epoch": 14.725122349102774, "grad_norm": 0.00817457027733326, "learning_rate": 0.0001973558389502891, "loss": 0.005, "num_input_tokens_seen": 194980560, "step": 90265 }, { "epoch": 14.725938009787928, "grad_norm": 0.02452375739812851, "learning_rate": 0.0001972991824291503, "loss": 0.0254, "num_input_tokens_seen": 194989968, "step": 90270 }, { "epoch": 14.726753670473084, "grad_norm": 0.018420519307255745, "learning_rate": 0.00019724253204262717, "loss": 0.0177, "num_input_tokens_seen": 195001264, "step": 90275 }, { "epoch": 14.727569331158238, "grad_norm": 0.005502955988049507, "learning_rate": 0.00019718588779186864, "loss": 0.0017, "num_input_tokens_seen": 195014000, "step": 90280 }, { "epoch": 14.728384991843393, "grad_norm": 0.0030444420408457518, "learning_rate": 0.00019712924967802182, "loss": 0.0044, "num_input_tokens_seen": 195025712, "step": 90285 }, { "epoch": 14.729200652528547, "grad_norm": 0.004858762491494417, "learning_rate": 0.00019707261770223532, "loss": 0.0019, "num_input_tokens_seen": 195036912, "step": 90290 }, { "epoch": 14.730016313213703, "grad_norm": 0.0944218784570694, "learning_rate": 0.00019701599186565621, "loss": 0.0063, "num_input_tokens_seen": 195048112, "step": 90295 }, { "epoch": 14.730831973898859, "grad_norm": 0.004670348484069109, "learning_rate": 0.00019695937216943272, "loss": 0.0027, "num_input_tokens_seen": 195058736, "step": 90300 }, { "epoch": 14.731647634584013, "grad_norm": 0.05685482174158096, "learning_rate": 0.00019690275861471168, "loss": 0.004, "num_input_tokens_seen": 195068144, "step": 90305 }, { "epoch": 14.732463295269168, "grad_norm": 0.004699467681348324, "learning_rate": 0.00019684615120264104, "loss": 0.0027, "num_input_tokens_seen": 195078608, "step": 90310 }, { "epoch": 14.733278955954322, "grad_norm": 0.011271055787801743, "learning_rate": 0.00019678954993436736, "loss": 0.0064, "num_input_tokens_seen": 195089072, "step": 90315 }, { "epoch": 14.734094616639478, "grad_norm": 0.4532552659511566, "learning_rate": 0.00019673295481103847, "loss": 0.0568, "num_input_tokens_seen": 195101648, "step": 90320 }, { "epoch": 14.734910277324634, "grad_norm": 0.017157189548015594, "learning_rate": 0.00019667636583380066, "loss": 0.0249, "num_input_tokens_seen": 195113520, "step": 90325 }, { "epoch": 14.735725938009788, "grad_norm": 0.0010682785650715232, "learning_rate": 0.0001966197830038014, "loss": 0.0061, "num_input_tokens_seen": 195125008, "step": 90330 }, { "epoch": 14.736541598694943, "grad_norm": 0.007584839593619108, "learning_rate": 0.00019656320632218676, "loss": 0.0029, "num_input_tokens_seen": 195136208, "step": 90335 }, { "epoch": 14.737357259380097, "grad_norm": 0.018214622512459755, "learning_rate": 0.00019650663579010401, "loss": 0.0089, "num_input_tokens_seen": 195145264, "step": 90340 }, { "epoch": 14.738172920065253, "grad_norm": 0.031968094408512115, "learning_rate": 0.00019645007140869897, "loss": 0.0028, "num_input_tokens_seen": 195156912, "step": 90345 }, { "epoch": 14.738988580750409, "grad_norm": 0.009707508608698845, "learning_rate": 0.00019639351317911853, "loss": 0.0061, "num_input_tokens_seen": 195168464, "step": 90350 }, { "epoch": 14.739804241435563, "grad_norm": 0.5486598610877991, "learning_rate": 0.00019633696110250864, "loss": 0.0153, "num_input_tokens_seen": 195179888, "step": 90355 }, { "epoch": 14.740619902120718, "grad_norm": 0.002595171332359314, "learning_rate": 0.0001962804151800155, "loss": 0.0026, "num_input_tokens_seen": 195190352, "step": 90360 }, { "epoch": 14.741435562805872, "grad_norm": 0.04814111813902855, "learning_rate": 0.00019622387541278497, "loss": 0.002, "num_input_tokens_seen": 195200880, "step": 90365 }, { "epoch": 14.742251223491028, "grad_norm": 0.00787262711673975, "learning_rate": 0.000196167341801963, "loss": 0.0028, "num_input_tokens_seen": 195211600, "step": 90370 }, { "epoch": 14.743066884176184, "grad_norm": 0.001716782571747899, "learning_rate": 0.00019611081434869532, "loss": 0.0017, "num_input_tokens_seen": 195222480, "step": 90375 }, { "epoch": 14.743882544861338, "grad_norm": 0.004030111711472273, "learning_rate": 0.00019605429305412746, "loss": 0.0051, "num_input_tokens_seen": 195232976, "step": 90380 }, { "epoch": 14.744698205546493, "grad_norm": 0.0008914527716115117, "learning_rate": 0.00019599777791940497, "loss": 0.0011, "num_input_tokens_seen": 195243856, "step": 90385 }, { "epoch": 14.745513866231647, "grad_norm": 0.016651881858706474, "learning_rate": 0.00019594126894567315, "loss": 0.0102, "num_input_tokens_seen": 195255216, "step": 90390 }, { "epoch": 14.746329526916803, "grad_norm": 0.009090129286050797, "learning_rate": 0.00019588476613407725, "loss": 0.0296, "num_input_tokens_seen": 195265712, "step": 90395 }, { "epoch": 14.747145187601957, "grad_norm": 0.000620881502982229, "learning_rate": 0.00019582826948576215, "loss": 0.001, "num_input_tokens_seen": 195276944, "step": 90400 }, { "epoch": 14.747960848287113, "grad_norm": 0.004584138281643391, "learning_rate": 0.00019577177900187342, "loss": 0.0025, "num_input_tokens_seen": 195285136, "step": 90405 }, { "epoch": 14.748776508972268, "grad_norm": 0.0013352871173992753, "learning_rate": 0.0001957152946835552, "loss": 0.1063, "num_input_tokens_seen": 195294832, "step": 90410 }, { "epoch": 14.749592169657422, "grad_norm": 0.013338768854737282, "learning_rate": 0.00019565881653195284, "loss": 0.0036, "num_input_tokens_seen": 195306384, "step": 90415 }, { "epoch": 14.750407830342578, "grad_norm": 0.009870841167867184, "learning_rate": 0.00019560234454821034, "loss": 0.0019, "num_input_tokens_seen": 195317488, "step": 90420 }, { "epoch": 14.751223491027732, "grad_norm": 0.007064263802021742, "learning_rate": 0.0001955458787334728, "loss": 0.1473, "num_input_tokens_seen": 195327824, "step": 90425 }, { "epoch": 14.752039151712887, "grad_norm": 0.0038319004233926535, "learning_rate": 0.00019548941908888396, "loss": 0.0048, "num_input_tokens_seen": 195338288, "step": 90430 }, { "epoch": 14.752854812398043, "grad_norm": 0.011020504869520664, "learning_rate": 0.00019543296561558865, "loss": 0.0028, "num_input_tokens_seen": 195347760, "step": 90435 }, { "epoch": 14.753670473083197, "grad_norm": 0.10672824084758759, "learning_rate": 0.0001953765183147303, "loss": 0.0732, "num_input_tokens_seen": 195359120, "step": 90440 }, { "epoch": 14.754486133768353, "grad_norm": 0.005035056732594967, "learning_rate": 0.00019532007718745366, "loss": 0.0029, "num_input_tokens_seen": 195369872, "step": 90445 }, { "epoch": 14.755301794453507, "grad_norm": 0.027038784697651863, "learning_rate": 0.00019526364223490172, "loss": 0.0021, "num_input_tokens_seen": 195380816, "step": 90450 }, { "epoch": 14.756117455138662, "grad_norm": 0.009903905913233757, "learning_rate": 0.00019520721345821907, "loss": 0.0022, "num_input_tokens_seen": 195392080, "step": 90455 }, { "epoch": 14.756933115823816, "grad_norm": 0.013743668794631958, "learning_rate": 0.00019515079085854854, "loss": 0.0027, "num_input_tokens_seen": 195403408, "step": 90460 }, { "epoch": 14.757748776508972, "grad_norm": 0.002215584507212043, "learning_rate": 0.00019509437443703415, "loss": 0.0148, "num_input_tokens_seen": 195414480, "step": 90465 }, { "epoch": 14.758564437194128, "grad_norm": 0.021999172866344452, "learning_rate": 0.00019503796419481908, "loss": 0.0038, "num_input_tokens_seen": 195425520, "step": 90470 }, { "epoch": 14.759380097879282, "grad_norm": 0.01619911380112171, "learning_rate": 0.00019498156013304647, "loss": 0.0028, "num_input_tokens_seen": 195435792, "step": 90475 }, { "epoch": 14.760195758564437, "grad_norm": 0.022966833785176277, "learning_rate": 0.0001949251622528595, "loss": 0.0046, "num_input_tokens_seen": 195446512, "step": 90480 }, { "epoch": 14.761011419249591, "grad_norm": 0.06447285413742065, "learning_rate": 0.0001948687705554012, "loss": 0.0033, "num_input_tokens_seen": 195456656, "step": 90485 }, { "epoch": 14.761827079934747, "grad_norm": 0.005138483829796314, "learning_rate": 0.00019481238504181431, "loss": 0.0013, "num_input_tokens_seen": 195468336, "step": 90490 }, { "epoch": 14.762642740619903, "grad_norm": 0.0013741077855229378, "learning_rate": 0.0001947560057132416, "loss": 0.156, "num_input_tokens_seen": 195479056, "step": 90495 }, { "epoch": 14.763458401305057, "grad_norm": 0.0013339656870812178, "learning_rate": 0.00019469963257082564, "loss": 0.0012, "num_input_tokens_seen": 195490512, "step": 90500 }, { "epoch": 14.764274061990212, "grad_norm": 0.4919218122959137, "learning_rate": 0.00019464326561570894, "loss": 0.0458, "num_input_tokens_seen": 195502704, "step": 90505 }, { "epoch": 14.765089722675366, "grad_norm": 0.0020044157281517982, "learning_rate": 0.0001945869048490338, "loss": 0.0013, "num_input_tokens_seen": 195512816, "step": 90510 }, { "epoch": 14.765905383360522, "grad_norm": 0.010413050651550293, "learning_rate": 0.00019453055027194256, "loss": 0.0029, "num_input_tokens_seen": 195524592, "step": 90515 }, { "epoch": 14.766721044045678, "grad_norm": 0.12600289285182953, "learning_rate": 0.00019447420188557714, "loss": 0.0035, "num_input_tokens_seen": 195535024, "step": 90520 }, { "epoch": 14.767536704730832, "grad_norm": 0.0043631913140416145, "learning_rate": 0.00019441785969107967, "loss": 0.0052, "num_input_tokens_seen": 195546384, "step": 90525 }, { "epoch": 14.768352365415987, "grad_norm": 0.0018164021894335747, "learning_rate": 0.00019436152368959193, "loss": 0.0518, "num_input_tokens_seen": 195556240, "step": 90530 }, { "epoch": 14.769168026101141, "grad_norm": 0.005169565323740244, "learning_rate": 0.0001943051938822556, "loss": 0.016, "num_input_tokens_seen": 195566864, "step": 90535 }, { "epoch": 14.769983686786297, "grad_norm": 0.0052770026959478855, "learning_rate": 0.00019424887027021237, "loss": 0.0055, "num_input_tokens_seen": 195578672, "step": 90540 }, { "epoch": 14.770799347471453, "grad_norm": 0.004595869220793247, "learning_rate": 0.00019419255285460347, "loss": 0.0011, "num_input_tokens_seen": 195588496, "step": 90545 }, { "epoch": 14.771615008156607, "grad_norm": 0.3980657756328583, "learning_rate": 0.00019413624163657072, "loss": 0.0161, "num_input_tokens_seen": 195598160, "step": 90550 }, { "epoch": 14.772430668841762, "grad_norm": 0.01790624111890793, "learning_rate": 0.00019407993661725475, "loss": 0.0038, "num_input_tokens_seen": 195608432, "step": 90555 }, { "epoch": 14.773246329526916, "grad_norm": 0.1566634178161621, "learning_rate": 0.0001940236377977973, "loss": 0.137, "num_input_tokens_seen": 195619504, "step": 90560 }, { "epoch": 14.774061990212072, "grad_norm": 1.9931586980819702, "learning_rate": 0.00019396734517933867, "loss": 0.1028, "num_input_tokens_seen": 195629968, "step": 90565 }, { "epoch": 14.774877650897226, "grad_norm": 0.013670021668076515, "learning_rate": 0.00019391105876302012, "loss": 0.1662, "num_input_tokens_seen": 195642320, "step": 90570 }, { "epoch": 14.775693311582382, "grad_norm": 0.00712958350777626, "learning_rate": 0.00019385477854998235, "loss": 0.0095, "num_input_tokens_seen": 195651984, "step": 90575 }, { "epoch": 14.776508972267537, "grad_norm": 0.45859119296073914, "learning_rate": 0.00019379850454136582, "loss": 0.125, "num_input_tokens_seen": 195662672, "step": 90580 }, { "epoch": 14.777324632952691, "grad_norm": 0.09679024666547775, "learning_rate": 0.00019374223673831103, "loss": 0.0069, "num_input_tokens_seen": 195674640, "step": 90585 }, { "epoch": 14.778140293637847, "grad_norm": 0.01906924694776535, "learning_rate": 0.00019368597514195834, "loss": 0.0382, "num_input_tokens_seen": 195685424, "step": 90590 }, { "epoch": 14.778955954323001, "grad_norm": 0.005224125925451517, "learning_rate": 0.00019362971975344796, "loss": 0.0224, "num_input_tokens_seen": 195696944, "step": 90595 }, { "epoch": 14.779771615008157, "grad_norm": 0.004919872619211674, "learning_rate": 0.00019357347057391994, "loss": 0.005, "num_input_tokens_seen": 195707920, "step": 90600 }, { "epoch": 14.780587275693312, "grad_norm": 0.014207074418663979, "learning_rate": 0.0001935172276045143, "loss": 0.0041, "num_input_tokens_seen": 195718544, "step": 90605 }, { "epoch": 14.781402936378466, "grad_norm": 0.09853319078683853, "learning_rate": 0.0001934609908463708, "loss": 0.1058, "num_input_tokens_seen": 195728304, "step": 90610 }, { "epoch": 14.782218597063622, "grad_norm": 0.011572792194783688, "learning_rate": 0.00019340476030062925, "loss": 0.0095, "num_input_tokens_seen": 195739344, "step": 90615 }, { "epoch": 14.783034257748776, "grad_norm": 0.003625387093052268, "learning_rate": 0.00019334853596842915, "loss": 0.0042, "num_input_tokens_seen": 195749936, "step": 90620 }, { "epoch": 14.783849918433932, "grad_norm": 0.0010916964383795857, "learning_rate": 0.00019329231785090994, "loss": 0.0017, "num_input_tokens_seen": 195760048, "step": 90625 }, { "epoch": 14.784665579119086, "grad_norm": 0.002343985252082348, "learning_rate": 0.0001932361059492111, "loss": 0.0037, "num_input_tokens_seen": 195771632, "step": 90630 }, { "epoch": 14.785481239804241, "grad_norm": 0.009995860978960991, "learning_rate": 0.00019317990026447164, "loss": 0.0018, "num_input_tokens_seen": 195781520, "step": 90635 }, { "epoch": 14.786296900489397, "grad_norm": 0.0010385174537077546, "learning_rate": 0.00019312370079783075, "loss": 0.0154, "num_input_tokens_seen": 195793648, "step": 90640 }, { "epoch": 14.78711256117455, "grad_norm": 0.055788811296224594, "learning_rate": 0.0001930675075504274, "loss": 0.0134, "num_input_tokens_seen": 195802928, "step": 90645 }, { "epoch": 14.787928221859707, "grad_norm": 0.010686542838811874, "learning_rate": 0.00019301132052340031, "loss": 0.0212, "num_input_tokens_seen": 195814352, "step": 90650 }, { "epoch": 14.78874388254486, "grad_norm": 0.014930814504623413, "learning_rate": 0.0001929551397178883, "loss": 0.0014, "num_input_tokens_seen": 195825232, "step": 90655 }, { "epoch": 14.789559543230016, "grad_norm": 0.00482224440202117, "learning_rate": 0.00019289896513502991, "loss": 0.0022, "num_input_tokens_seen": 195836880, "step": 90660 }, { "epoch": 14.790375203915172, "grad_norm": 0.7234705090522766, "learning_rate": 0.00019284279677596355, "loss": 0.0291, "num_input_tokens_seen": 195847952, "step": 90665 }, { "epoch": 14.791190864600326, "grad_norm": 0.005445053800940514, "learning_rate": 0.0001927866346418276, "loss": 0.0556, "num_input_tokens_seen": 195858960, "step": 90670 }, { "epoch": 14.792006525285482, "grad_norm": 0.8060768842697144, "learning_rate": 0.00019273047873376005, "loss": 0.0477, "num_input_tokens_seen": 195869264, "step": 90675 }, { "epoch": 14.792822185970635, "grad_norm": 0.3666798770427704, "learning_rate": 0.00019267432905289945, "loss": 0.0736, "num_input_tokens_seen": 195880368, "step": 90680 }, { "epoch": 14.793637846655791, "grad_norm": 0.03639459237456322, "learning_rate": 0.00019261818560038313, "loss": 0.0041, "num_input_tokens_seen": 195890416, "step": 90685 }, { "epoch": 14.794453507340947, "grad_norm": 0.010454765520989895, "learning_rate": 0.00019256204837734937, "loss": 0.0109, "num_input_tokens_seen": 195900464, "step": 90690 }, { "epoch": 14.7952691680261, "grad_norm": 0.004342780914157629, "learning_rate": 0.00019250591738493572, "loss": 0.0071, "num_input_tokens_seen": 195911536, "step": 90695 }, { "epoch": 14.796084828711257, "grad_norm": 0.055346082895994186, "learning_rate": 0.00019244979262427974, "loss": 0.0048, "num_input_tokens_seen": 195922704, "step": 90700 }, { "epoch": 14.79690048939641, "grad_norm": 0.015100730583071709, "learning_rate": 0.00019239367409651893, "loss": 0.0173, "num_input_tokens_seen": 195933296, "step": 90705 }, { "epoch": 14.797716150081566, "grad_norm": 0.015843048691749573, "learning_rate": 0.00019233756180279043, "loss": 0.0058, "num_input_tokens_seen": 195942768, "step": 90710 }, { "epoch": 14.798531810766722, "grad_norm": 0.012738611549139023, "learning_rate": 0.00019228145574423162, "loss": 0.0016, "num_input_tokens_seen": 195953808, "step": 90715 }, { "epoch": 14.799347471451876, "grad_norm": 0.6077333688735962, "learning_rate": 0.00019222535592197944, "loss": 0.1671, "num_input_tokens_seen": 195964464, "step": 90720 }, { "epoch": 14.800163132137031, "grad_norm": 0.30826112627983093, "learning_rate": 0.00019216926233717085, "loss": 0.0347, "num_input_tokens_seen": 195975664, "step": 90725 }, { "epoch": 14.800978792822185, "grad_norm": 0.13063177466392517, "learning_rate": 0.0001921131749909427, "loss": 0.0084, "num_input_tokens_seen": 195986320, "step": 90730 }, { "epoch": 14.801794453507341, "grad_norm": 0.014707125723361969, "learning_rate": 0.00019205709388443165, "loss": 0.0025, "num_input_tokens_seen": 195996784, "step": 90735 }, { "epoch": 14.802610114192497, "grad_norm": 0.003288878360763192, "learning_rate": 0.00019200101901877426, "loss": 0.0039, "num_input_tokens_seen": 196008304, "step": 90740 }, { "epoch": 14.80342577487765, "grad_norm": 0.011088044382631779, "learning_rate": 0.0001919449503951069, "loss": 0.0785, "num_input_tokens_seen": 196018864, "step": 90745 }, { "epoch": 14.804241435562806, "grad_norm": 0.0074303289875388145, "learning_rate": 0.00019188888801456594, "loss": 0.0039, "num_input_tokens_seen": 196029648, "step": 90750 }, { "epoch": 14.80505709624796, "grad_norm": 0.0014132543001323938, "learning_rate": 0.0001918328318782875, "loss": 0.0068, "num_input_tokens_seen": 196039568, "step": 90755 }, { "epoch": 14.805872756933116, "grad_norm": 0.013476034626364708, "learning_rate": 0.00019177678198740766, "loss": 0.0211, "num_input_tokens_seen": 196051280, "step": 90760 }, { "epoch": 14.80668841761827, "grad_norm": 0.03059094212949276, "learning_rate": 0.00019172073834306235, "loss": 0.0076, "num_input_tokens_seen": 196062928, "step": 90765 }, { "epoch": 14.807504078303426, "grad_norm": 0.04523979872465134, "learning_rate": 0.00019166470094638739, "loss": 0.0035, "num_input_tokens_seen": 196073680, "step": 90770 }, { "epoch": 14.808319738988581, "grad_norm": 0.005500610917806625, "learning_rate": 0.00019160866979851842, "loss": 0.0064, "num_input_tokens_seen": 196083984, "step": 90775 }, { "epoch": 14.809135399673735, "grad_norm": 0.010868792422115803, "learning_rate": 0.00019155264490059077, "loss": 0.0038, "num_input_tokens_seen": 196093968, "step": 90780 }, { "epoch": 14.809951060358891, "grad_norm": 0.022862639278173447, "learning_rate": 0.00019149662625374042, "loss": 0.0029, "num_input_tokens_seen": 196103824, "step": 90785 }, { "epoch": 14.810766721044045, "grad_norm": 0.00445165578275919, "learning_rate": 0.00019144061385910195, "loss": 0.0017, "num_input_tokens_seen": 196113104, "step": 90790 }, { "epoch": 14.8115823817292, "grad_norm": 0.0044853598810732365, "learning_rate": 0.00019138460771781125, "loss": 0.0017, "num_input_tokens_seen": 196124560, "step": 90795 }, { "epoch": 14.812398042414356, "grad_norm": 0.0008106532623060048, "learning_rate": 0.0001913286078310026, "loss": 0.0019, "num_input_tokens_seen": 196134928, "step": 90800 }, { "epoch": 14.81321370309951, "grad_norm": 0.0041127754375338554, "learning_rate": 0.00019127261419981168, "loss": 0.0035, "num_input_tokens_seen": 196146352, "step": 90805 }, { "epoch": 14.814029363784666, "grad_norm": 0.011095692403614521, "learning_rate": 0.0001912166268253725, "loss": 0.0121, "num_input_tokens_seen": 196156368, "step": 90810 }, { "epoch": 14.81484502446982, "grad_norm": 0.005023022647947073, "learning_rate": 0.0001911606457088204, "loss": 0.0019, "num_input_tokens_seen": 196167696, "step": 90815 }, { "epoch": 14.815660685154976, "grad_norm": 0.008457830175757408, "learning_rate": 0.00019110467085128936, "loss": 0.0026, "num_input_tokens_seen": 196178512, "step": 90820 }, { "epoch": 14.81647634584013, "grad_norm": 0.011036441661417484, "learning_rate": 0.00019104870225391412, "loss": 0.0028, "num_input_tokens_seen": 196188432, "step": 90825 }, { "epoch": 14.817292006525285, "grad_norm": 0.036544330418109894, "learning_rate": 0.0001909927399178289, "loss": 0.0047, "num_input_tokens_seen": 196199184, "step": 90830 }, { "epoch": 14.818107667210441, "grad_norm": 0.026460448279976845, "learning_rate": 0.0001909367838441678, "loss": 0.0035, "num_input_tokens_seen": 196209840, "step": 90835 }, { "epoch": 14.818923327895595, "grad_norm": 0.3815443813800812, "learning_rate": 0.00019088083403406486, "loss": 0.0906, "num_input_tokens_seen": 196221584, "step": 90840 }, { "epoch": 14.81973898858075, "grad_norm": 0.009295003488659859, "learning_rate": 0.00019082489048865393, "loss": 0.0016, "num_input_tokens_seen": 196233328, "step": 90845 }, { "epoch": 14.820554649265905, "grad_norm": 1.4162017107009888, "learning_rate": 0.00019076895320906885, "loss": 0.0921, "num_input_tokens_seen": 196244144, "step": 90850 }, { "epoch": 14.82137030995106, "grad_norm": 0.003560206387192011, "learning_rate": 0.0001907130221964432, "loss": 0.0157, "num_input_tokens_seen": 196254704, "step": 90855 }, { "epoch": 14.822185970636216, "grad_norm": 0.006497807335108519, "learning_rate": 0.0001906570974519105, "loss": 0.0937, "num_input_tokens_seen": 196265648, "step": 90860 }, { "epoch": 14.82300163132137, "grad_norm": 0.0016851173713803291, "learning_rate": 0.00019060117897660417, "loss": 0.0042, "num_input_tokens_seen": 196276368, "step": 90865 }, { "epoch": 14.823817292006526, "grad_norm": 0.4738226532936096, "learning_rate": 0.00019054526677165744, "loss": 0.142, "num_input_tokens_seen": 196287696, "step": 90870 }, { "epoch": 14.82463295269168, "grad_norm": 0.00117388810031116, "learning_rate": 0.00019048936083820346, "loss": 0.0048, "num_input_tokens_seen": 196298352, "step": 90875 }, { "epoch": 14.825448613376835, "grad_norm": 0.0461152046918869, "learning_rate": 0.00019043346117737526, "loss": 0.0053, "num_input_tokens_seen": 196309296, "step": 90880 }, { "epoch": 14.826264274061991, "grad_norm": 0.0075841969810426235, "learning_rate": 0.00019037756779030545, "loss": 0.0027, "num_input_tokens_seen": 196319568, "step": 90885 }, { "epoch": 14.827079934747145, "grad_norm": 0.013668366707861423, "learning_rate": 0.00019032168067812738, "loss": 0.0026, "num_input_tokens_seen": 196330352, "step": 90890 }, { "epoch": 14.8278955954323, "grad_norm": 0.0021600218024104834, "learning_rate": 0.00019026579984197296, "loss": 0.0052, "num_input_tokens_seen": 196341296, "step": 90895 }, { "epoch": 14.828711256117455, "grad_norm": 0.010189131833612919, "learning_rate": 0.00019020992528297537, "loss": 0.0042, "num_input_tokens_seen": 196352560, "step": 90900 }, { "epoch": 14.82952691680261, "grad_norm": 0.043539393693208694, "learning_rate": 0.0001901540570022663, "loss": 0.0047, "num_input_tokens_seen": 196363312, "step": 90905 }, { "epoch": 14.830342577487766, "grad_norm": 0.022573234513401985, "learning_rate": 0.0001900981950009787, "loss": 0.0865, "num_input_tokens_seen": 196374352, "step": 90910 }, { "epoch": 14.83115823817292, "grad_norm": 0.004776300862431526, "learning_rate": 0.00019004233928024395, "loss": 0.0066, "num_input_tokens_seen": 196384656, "step": 90915 }, { "epoch": 14.831973898858076, "grad_norm": 0.02242133766412735, "learning_rate": 0.0001899864898411947, "loss": 0.002, "num_input_tokens_seen": 196396240, "step": 90920 }, { "epoch": 14.83278955954323, "grad_norm": 0.003272157395258546, "learning_rate": 0.00018993064668496225, "loss": 0.0036, "num_input_tokens_seen": 196405808, "step": 90925 }, { "epoch": 14.833605220228385, "grad_norm": 0.004656730219721794, "learning_rate": 0.00018987480981267892, "loss": 0.002, "num_input_tokens_seen": 196417104, "step": 90930 }, { "epoch": 14.83442088091354, "grad_norm": 0.0012749811867251992, "learning_rate": 0.00018981897922547565, "loss": 0.0258, "num_input_tokens_seen": 196427952, "step": 90935 }, { "epoch": 14.835236541598695, "grad_norm": 0.15742255747318268, "learning_rate": 0.00018976315492448453, "loss": 0.0064, "num_input_tokens_seen": 196439024, "step": 90940 }, { "epoch": 14.83605220228385, "grad_norm": 0.013149394653737545, "learning_rate": 0.00018970733691083637, "loss": 0.0031, "num_input_tokens_seen": 196449584, "step": 90945 }, { "epoch": 14.836867862969005, "grad_norm": 0.057830024510622025, "learning_rate": 0.000189651525185663, "loss": 0.0076, "num_input_tokens_seen": 196460528, "step": 90950 }, { "epoch": 14.83768352365416, "grad_norm": 0.001897350768558681, "learning_rate": 0.00018959571975009481, "loss": 0.0017, "num_input_tokens_seen": 196471856, "step": 90955 }, { "epoch": 14.838499184339314, "grad_norm": 0.6547830104827881, "learning_rate": 0.00018953992060526348, "loss": 0.0512, "num_input_tokens_seen": 196482096, "step": 90960 }, { "epoch": 14.83931484502447, "grad_norm": 0.07979469001293182, "learning_rate": 0.00018948412775229918, "loss": 0.0674, "num_input_tokens_seen": 196492272, "step": 90965 }, { "epoch": 14.840130505709626, "grad_norm": 0.00814758613705635, "learning_rate": 0.0001894283411923331, "loss": 0.0046, "num_input_tokens_seen": 196501872, "step": 90970 }, { "epoch": 14.84094616639478, "grad_norm": 0.001418950268998742, "learning_rate": 0.0001893725609264957, "loss": 0.0048, "num_input_tokens_seen": 196514000, "step": 90975 }, { "epoch": 14.841761827079935, "grad_norm": 0.0042955600656569, "learning_rate": 0.00018931678695591742, "loss": 0.0545, "num_input_tokens_seen": 196524400, "step": 90980 }, { "epoch": 14.84257748776509, "grad_norm": 0.0012596363667398691, "learning_rate": 0.00018926101928172856, "loss": 0.0149, "num_input_tokens_seen": 196535472, "step": 90985 }, { "epoch": 14.843393148450245, "grad_norm": 0.007855188101530075, "learning_rate": 0.00018920525790505933, "loss": 0.0038, "num_input_tokens_seen": 196546576, "step": 90990 }, { "epoch": 14.844208809135399, "grad_norm": 0.016350040212273598, "learning_rate": 0.00018914950282703985, "loss": 0.0028, "num_input_tokens_seen": 196557808, "step": 90995 }, { "epoch": 14.845024469820554, "grad_norm": 0.06966894865036011, "learning_rate": 0.00018909375404879998, "loss": 0.0054, "num_input_tokens_seen": 196568112, "step": 91000 }, { "epoch": 14.84584013050571, "grad_norm": 0.01267112884670496, "learning_rate": 0.00018903801157146965, "loss": 0.0114, "num_input_tokens_seen": 196579856, "step": 91005 }, { "epoch": 14.846655791190864, "grad_norm": 0.012737995944917202, "learning_rate": 0.00018898227539617852, "loss": 0.0022, "num_input_tokens_seen": 196591152, "step": 91010 }, { "epoch": 14.84747145187602, "grad_norm": 0.07722505927085876, "learning_rate": 0.0001889265455240561, "loss": 0.0092, "num_input_tokens_seen": 196601136, "step": 91015 }, { "epoch": 14.848287112561174, "grad_norm": 0.18848595023155212, "learning_rate": 0.00018887082195623167, "loss": 0.0077, "num_input_tokens_seen": 196612112, "step": 91020 }, { "epoch": 14.84910277324633, "grad_norm": 0.002311618998646736, "learning_rate": 0.00018881510469383506, "loss": 0.0181, "num_input_tokens_seen": 196621040, "step": 91025 }, { "epoch": 14.849918433931485, "grad_norm": 0.019284280017018318, "learning_rate": 0.00018875939373799483, "loss": 0.0037, "num_input_tokens_seen": 196631632, "step": 91030 }, { "epoch": 14.850734094616639, "grad_norm": 0.25687074661254883, "learning_rate": 0.00018870368908984063, "loss": 0.0112, "num_input_tokens_seen": 196641904, "step": 91035 }, { "epoch": 14.851549755301795, "grad_norm": 0.009678049944341183, "learning_rate": 0.00018864799075050078, "loss": 0.0269, "num_input_tokens_seen": 196652624, "step": 91040 }, { "epoch": 14.852365415986949, "grad_norm": 0.1337086260318756, "learning_rate": 0.00018859229872110467, "loss": 0.1025, "num_input_tokens_seen": 196663152, "step": 91045 }, { "epoch": 14.853181076672104, "grad_norm": 0.6179363131523132, "learning_rate": 0.00018853661300278034, "loss": 0.0868, "num_input_tokens_seen": 196674384, "step": 91050 }, { "epoch": 14.85399673735726, "grad_norm": 0.006021140608936548, "learning_rate": 0.00018848093359665703, "loss": 0.0043, "num_input_tokens_seen": 196684528, "step": 91055 }, { "epoch": 14.854812398042414, "grad_norm": 0.44819143414497375, "learning_rate": 0.0001884252605038624, "loss": 0.0606, "num_input_tokens_seen": 196695600, "step": 91060 }, { "epoch": 14.85562805872757, "grad_norm": 0.001804789761081338, "learning_rate": 0.00018836959372552553, "loss": 0.0017, "num_input_tokens_seen": 196706224, "step": 91065 }, { "epoch": 14.856443719412724, "grad_norm": 0.01530960202217102, "learning_rate": 0.0001883139332627738, "loss": 0.003, "num_input_tokens_seen": 196716432, "step": 91070 }, { "epoch": 14.85725938009788, "grad_norm": 0.4987078607082367, "learning_rate": 0.00018825827911673592, "loss": 0.1173, "num_input_tokens_seen": 196728016, "step": 91075 }, { "epoch": 14.858075040783035, "grad_norm": 0.008473776280879974, "learning_rate": 0.0001882026312885392, "loss": 0.0012, "num_input_tokens_seen": 196738544, "step": 91080 }, { "epoch": 14.858890701468189, "grad_norm": 0.49949583411216736, "learning_rate": 0.00018814698977931204, "loss": 0.0146, "num_input_tokens_seen": 196749072, "step": 91085 }, { "epoch": 14.859706362153345, "grad_norm": 0.03785784915089607, "learning_rate": 0.0001880913545901814, "loss": 0.0025, "num_input_tokens_seen": 196759280, "step": 91090 }, { "epoch": 14.860522022838499, "grad_norm": 0.013276136480271816, "learning_rate": 0.00018803572572227546, "loss": 0.0053, "num_input_tokens_seen": 196770832, "step": 91095 }, { "epoch": 14.861337683523654, "grad_norm": 0.00885779783129692, "learning_rate": 0.000187980103176721, "loss": 0.0979, "num_input_tokens_seen": 196780944, "step": 91100 }, { "epoch": 14.86215334420881, "grad_norm": 0.07836762815713882, "learning_rate": 0.0001879244869546457, "loss": 0.018, "num_input_tokens_seen": 196792464, "step": 91105 }, { "epoch": 14.862969004893964, "grad_norm": 0.0056850542314350605, "learning_rate": 0.00018786887705717658, "loss": 0.0039, "num_input_tokens_seen": 196803216, "step": 91110 }, { "epoch": 14.86378466557912, "grad_norm": 0.003115960629656911, "learning_rate": 0.00018781327348544065, "loss": 0.0021, "num_input_tokens_seen": 196814032, "step": 91115 }, { "epoch": 14.864600326264274, "grad_norm": 0.005707794800400734, "learning_rate": 0.00018775767624056472, "loss": 0.0128, "num_input_tokens_seen": 196825072, "step": 91120 }, { "epoch": 14.86541598694943, "grad_norm": 0.10761536657810211, "learning_rate": 0.0001877020853236756, "loss": 0.0104, "num_input_tokens_seen": 196835888, "step": 91125 }, { "epoch": 14.866231647634583, "grad_norm": 0.0006443029851652682, "learning_rate": 0.00018764650073589995, "loss": 0.005, "num_input_tokens_seen": 196846544, "step": 91130 }, { "epoch": 14.867047308319739, "grad_norm": 0.004941369406878948, "learning_rate": 0.0001875909224783642, "loss": 0.0057, "num_input_tokens_seen": 196857168, "step": 91135 }, { "epoch": 14.867862969004895, "grad_norm": 0.031196700409054756, "learning_rate": 0.00018753535055219468, "loss": 0.0199, "num_input_tokens_seen": 196867760, "step": 91140 }, { "epoch": 14.868678629690049, "grad_norm": 0.011366274207830429, "learning_rate": 0.0001874797849585177, "loss": 0.1045, "num_input_tokens_seen": 196878928, "step": 91145 }, { "epoch": 14.869494290375204, "grad_norm": 1.00128972530365, "learning_rate": 0.00018742422569845935, "loss": 0.0436, "num_input_tokens_seen": 196888944, "step": 91150 }, { "epoch": 14.870309951060358, "grad_norm": 0.45802080631256104, "learning_rate": 0.00018736867277314556, "loss": 0.0808, "num_input_tokens_seen": 196898960, "step": 91155 }, { "epoch": 14.871125611745514, "grad_norm": 0.016989484429359436, "learning_rate": 0.00018731312618370228, "loss": 0.0442, "num_input_tokens_seen": 196909808, "step": 91160 }, { "epoch": 14.87194127243067, "grad_norm": 0.002204073593020439, "learning_rate": 0.0001872575859312549, "loss": 0.007, "num_input_tokens_seen": 196919728, "step": 91165 }, { "epoch": 14.872756933115824, "grad_norm": 0.006088990718126297, "learning_rate": 0.00018720205201692975, "loss": 0.0027, "num_input_tokens_seen": 196929456, "step": 91170 }, { "epoch": 14.87357259380098, "grad_norm": 0.01547847967594862, "learning_rate": 0.00018714652444185137, "loss": 0.0046, "num_input_tokens_seen": 196939248, "step": 91175 }, { "epoch": 14.874388254486133, "grad_norm": 0.02324049361050129, "learning_rate": 0.00018709100320714594, "loss": 0.0148, "num_input_tokens_seen": 196950544, "step": 91180 }, { "epoch": 14.875203915171289, "grad_norm": 0.6144483089447021, "learning_rate": 0.00018703548831393795, "loss": 0.0567, "num_input_tokens_seen": 196961232, "step": 91185 }, { "epoch": 14.876019575856443, "grad_norm": 0.006280634086579084, "learning_rate": 0.00018697997976335317, "loss": 0.0671, "num_input_tokens_seen": 196971536, "step": 91190 }, { "epoch": 14.876835236541599, "grad_norm": 0.015296096913516521, "learning_rate": 0.0001869244775565158, "loss": 0.0144, "num_input_tokens_seen": 196982160, "step": 91195 }, { "epoch": 14.877650897226754, "grad_norm": 0.003120235400274396, "learning_rate": 0.00018686898169455147, "loss": 0.0078, "num_input_tokens_seen": 196992944, "step": 91200 }, { "epoch": 14.878466557911908, "grad_norm": 0.043369755148887634, "learning_rate": 0.00018681349217858408, "loss": 0.019, "num_input_tokens_seen": 197003792, "step": 91205 }, { "epoch": 14.879282218597064, "grad_norm": 0.004237358458340168, "learning_rate": 0.00018675800900973876, "loss": 0.004, "num_input_tokens_seen": 197014960, "step": 91210 }, { "epoch": 14.880097879282218, "grad_norm": 0.0031959593761712313, "learning_rate": 0.00018670253218913975, "loss": 0.0043, "num_input_tokens_seen": 197025552, "step": 91215 }, { "epoch": 14.880913539967374, "grad_norm": 0.008891470730304718, "learning_rate": 0.00018664706171791134, "loss": 0.0025, "num_input_tokens_seen": 197036752, "step": 91220 }, { "epoch": 14.88172920065253, "grad_norm": 0.00029773113783448935, "learning_rate": 0.0001865915975971778, "loss": 0.0059, "num_input_tokens_seen": 197047024, "step": 91225 }, { "epoch": 14.882544861337683, "grad_norm": 0.005044011864811182, "learning_rate": 0.00018653613982806311, "loss": 0.0023, "num_input_tokens_seen": 197057392, "step": 91230 }, { "epoch": 14.883360522022839, "grad_norm": 0.009152603335678577, "learning_rate": 0.0001864806884116912, "loss": 0.0021, "num_input_tokens_seen": 197066896, "step": 91235 }, { "epoch": 14.884176182707993, "grad_norm": 0.004073834978044033, "learning_rate": 0.00018642524334918582, "loss": 0.002, "num_input_tokens_seen": 197079248, "step": 91240 }, { "epoch": 14.884991843393149, "grad_norm": 0.0020526114385575056, "learning_rate": 0.00018636980464167076, "loss": 0.0039, "num_input_tokens_seen": 197090288, "step": 91245 }, { "epoch": 14.885807504078304, "grad_norm": 0.0018403942231088877, "learning_rate": 0.00018631437229026942, "loss": 0.002, "num_input_tokens_seen": 197100784, "step": 91250 }, { "epoch": 14.886623164763458, "grad_norm": 0.0024247504770755768, "learning_rate": 0.0001862589462961053, "loss": 0.0106, "num_input_tokens_seen": 197112752, "step": 91255 }, { "epoch": 14.887438825448614, "grad_norm": 0.049593936651945114, "learning_rate": 0.0001862035266603016, "loss": 0.0054, "num_input_tokens_seen": 197123920, "step": 91260 }, { "epoch": 14.888254486133768, "grad_norm": 0.009330598637461662, "learning_rate": 0.00018614811338398153, "loss": 0.0018, "num_input_tokens_seen": 197133584, "step": 91265 }, { "epoch": 14.889070146818923, "grad_norm": 0.012051105499267578, "learning_rate": 0.0001860927064682681, "loss": 0.0025, "num_input_tokens_seen": 197143856, "step": 91270 }, { "epoch": 14.88988580750408, "grad_norm": 0.06999044865369797, "learning_rate": 0.0001860373059142842, "loss": 0.0062, "num_input_tokens_seen": 197154640, "step": 91275 }, { "epoch": 14.890701468189233, "grad_norm": 0.0663766860961914, "learning_rate": 0.00018598191172315253, "loss": 0.0036, "num_input_tokens_seen": 197165648, "step": 91280 }, { "epoch": 14.891517128874389, "grad_norm": 0.07517898827791214, "learning_rate": 0.00018592652389599583, "loss": 0.0964, "num_input_tokens_seen": 197176496, "step": 91285 }, { "epoch": 14.892332789559543, "grad_norm": 0.002310275798663497, "learning_rate": 0.00018587114243393655, "loss": 0.0023, "num_input_tokens_seen": 197186992, "step": 91290 }, { "epoch": 14.893148450244698, "grad_norm": 0.6070193648338318, "learning_rate": 0.00018581576733809707, "loss": 0.0378, "num_input_tokens_seen": 197197680, "step": 91295 }, { "epoch": 14.893964110929852, "grad_norm": 0.004368333145976067, "learning_rate": 0.00018576039860959966, "loss": 0.0024, "num_input_tokens_seen": 197209168, "step": 91300 }, { "epoch": 14.894779771615008, "grad_norm": 0.06271976977586746, "learning_rate": 0.00018570503624956635, "loss": 0.0054, "num_input_tokens_seen": 197219728, "step": 91305 }, { "epoch": 14.895595432300164, "grad_norm": 0.05428668111562729, "learning_rate": 0.00018564968025911905, "loss": 0.0048, "num_input_tokens_seen": 197229744, "step": 91310 }, { "epoch": 14.896411092985318, "grad_norm": 0.005725554656237364, "learning_rate": 0.00018559433063937997, "loss": 0.0023, "num_input_tokens_seen": 197240816, "step": 91315 }, { "epoch": 14.897226753670473, "grad_norm": 0.00881729181855917, "learning_rate": 0.00018553898739147057, "loss": 0.0167, "num_input_tokens_seen": 197252464, "step": 91320 }, { "epoch": 14.898042414355627, "grad_norm": 0.001196861732751131, "learning_rate": 0.00018548365051651255, "loss": 0.0038, "num_input_tokens_seen": 197262352, "step": 91325 }, { "epoch": 14.898858075040783, "grad_norm": 0.031206313520669937, "learning_rate": 0.00018542832001562732, "loss": 0.0029, "num_input_tokens_seen": 197273520, "step": 91330 }, { "epoch": 14.899673735725939, "grad_norm": 0.20482459664344788, "learning_rate": 0.00018537299588993627, "loss": 0.1099, "num_input_tokens_seen": 197283920, "step": 91335 }, { "epoch": 14.900489396411093, "grad_norm": 0.007990190759301186, "learning_rate": 0.0001853176781405606, "loss": 0.0922, "num_input_tokens_seen": 197294448, "step": 91340 }, { "epoch": 14.901305057096248, "grad_norm": 0.0065262895077466965, "learning_rate": 0.00018526236676862134, "loss": 0.0049, "num_input_tokens_seen": 197304016, "step": 91345 }, { "epoch": 14.902120717781402, "grad_norm": 0.004501709248870611, "learning_rate": 0.00018520706177523955, "loss": 0.0022, "num_input_tokens_seen": 197314544, "step": 91350 }, { "epoch": 14.902936378466558, "grad_norm": 0.0008994314703159034, "learning_rate": 0.000185151763161536, "loss": 0.0356, "num_input_tokens_seen": 197325808, "step": 91355 }, { "epoch": 14.903752039151712, "grad_norm": 0.5988969206809998, "learning_rate": 0.0001850964709286313, "loss": 0.0955, "num_input_tokens_seen": 197338352, "step": 91360 }, { "epoch": 14.904567699836868, "grad_norm": 0.015295770950615406, "learning_rate": 0.00018504118507764618, "loss": 0.0065, "num_input_tokens_seen": 197348976, "step": 91365 }, { "epoch": 14.905383360522023, "grad_norm": 0.015201558358967304, "learning_rate": 0.00018498590560970098, "loss": 0.0028, "num_input_tokens_seen": 197359472, "step": 91370 }, { "epoch": 14.906199021207177, "grad_norm": 0.07447752356529236, "learning_rate": 0.00018493063252591596, "loss": 0.0136, "num_input_tokens_seen": 197369776, "step": 91375 }, { "epoch": 14.907014681892333, "grad_norm": 0.004664100240916014, "learning_rate": 0.00018487536582741142, "loss": 0.0034, "num_input_tokens_seen": 197379536, "step": 91380 }, { "epoch": 14.907830342577487, "grad_norm": 0.004202402196824551, "learning_rate": 0.00018482010551530736, "loss": 0.0065, "num_input_tokens_seen": 197390160, "step": 91385 }, { "epoch": 14.908646003262643, "grad_norm": 0.013313495554029942, "learning_rate": 0.00018476485159072371, "loss": 0.0083, "num_input_tokens_seen": 197401008, "step": 91390 }, { "epoch": 14.909461663947798, "grad_norm": 0.0597468763589859, "learning_rate": 0.0001847096040547802, "loss": 0.0085, "num_input_tokens_seen": 197413520, "step": 91395 }, { "epoch": 14.910277324632952, "grad_norm": 0.008388367481529713, "learning_rate": 0.00018465436290859662, "loss": 0.0088, "num_input_tokens_seen": 197424240, "step": 91400 }, { "epoch": 14.911092985318108, "grad_norm": 0.0027642918284982443, "learning_rate": 0.00018459912815329234, "loss": 0.087, "num_input_tokens_seen": 197436816, "step": 91405 }, { "epoch": 14.911908646003262, "grad_norm": 0.0009764356655068696, "learning_rate": 0.00018454389978998686, "loss": 0.0932, "num_input_tokens_seen": 197448016, "step": 91410 }, { "epoch": 14.912724306688418, "grad_norm": 0.0351264625787735, "learning_rate": 0.00018448867781979943, "loss": 0.0526, "num_input_tokens_seen": 197457744, "step": 91415 }, { "epoch": 14.913539967373573, "grad_norm": 0.008587658405303955, "learning_rate": 0.00018443346224384906, "loss": 0.0168, "num_input_tokens_seen": 197469264, "step": 91420 }, { "epoch": 14.914355628058727, "grad_norm": 0.005530293099582195, "learning_rate": 0.00018437825306325524, "loss": 0.0301, "num_input_tokens_seen": 197478960, "step": 91425 }, { "epoch": 14.915171288743883, "grad_norm": 0.030203763395547867, "learning_rate": 0.00018432305027913615, "loss": 0.002, "num_input_tokens_seen": 197489616, "step": 91430 }, { "epoch": 14.915986949429037, "grad_norm": 0.009578707627952099, "learning_rate": 0.00018426785389261124, "loss": 0.0161, "num_input_tokens_seen": 197499376, "step": 91435 }, { "epoch": 14.916802610114193, "grad_norm": 0.07021234184503555, "learning_rate": 0.00018421266390479846, "loss": 0.004, "num_input_tokens_seen": 197509552, "step": 91440 }, { "epoch": 14.917618270799348, "grad_norm": 0.0011171189835295081, "learning_rate": 0.00018415748031681706, "loss": 0.0352, "num_input_tokens_seen": 197520656, "step": 91445 }, { "epoch": 14.918433931484502, "grad_norm": 0.04959210380911827, "learning_rate": 0.0001841023031297846, "loss": 0.0396, "num_input_tokens_seen": 197532432, "step": 91450 }, { "epoch": 14.919249592169658, "grad_norm": 0.0012606215896084905, "learning_rate": 0.0001840471323448199, "loss": 0.0313, "num_input_tokens_seen": 197543056, "step": 91455 }, { "epoch": 14.920065252854812, "grad_norm": 0.0010699069825932384, "learning_rate": 0.00018399196796304085, "loss": 0.0691, "num_input_tokens_seen": 197552880, "step": 91460 }, { "epoch": 14.920880913539968, "grad_norm": 0.020792873576283455, "learning_rate": 0.0001839368099855655, "loss": 0.0429, "num_input_tokens_seen": 197564560, "step": 91465 }, { "epoch": 14.921696574225122, "grad_norm": 0.003776568453758955, "learning_rate": 0.00018388165841351162, "loss": 0.009, "num_input_tokens_seen": 197575312, "step": 91470 }, { "epoch": 14.922512234910277, "grad_norm": 0.09122282266616821, "learning_rate": 0.000183826513247997, "loss": 0.0097, "num_input_tokens_seen": 197585264, "step": 91475 }, { "epoch": 14.923327895595433, "grad_norm": 0.10566361993551254, "learning_rate": 0.0001837713744901391, "loss": 0.0121, "num_input_tokens_seen": 197595728, "step": 91480 }, { "epoch": 14.924143556280587, "grad_norm": 0.5424984693527222, "learning_rate": 0.00018371624214105553, "loss": 0.0201, "num_input_tokens_seen": 197605296, "step": 91485 }, { "epoch": 14.924959216965743, "grad_norm": 0.4760046601295471, "learning_rate": 0.00018366111620186348, "loss": 0.0254, "num_input_tokens_seen": 197615792, "step": 91490 }, { "epoch": 14.925774877650896, "grad_norm": 0.04116629436612129, "learning_rate": 0.0001836059966736803, "loss": 0.0137, "num_input_tokens_seen": 197627312, "step": 91495 }, { "epoch": 14.926590538336052, "grad_norm": 0.008484826423227787, "learning_rate": 0.0001835508835576229, "loss": 0.0026, "num_input_tokens_seen": 197637584, "step": 91500 }, { "epoch": 14.927406199021208, "grad_norm": 0.033912546932697296, "learning_rate": 0.00018349577685480834, "loss": 0.0092, "num_input_tokens_seen": 197648560, "step": 91505 }, { "epoch": 14.928221859706362, "grad_norm": 0.0016596310306340456, "learning_rate": 0.0001834406765663534, "loss": 0.0371, "num_input_tokens_seen": 197659536, "step": 91510 }, { "epoch": 14.929037520391518, "grad_norm": 0.034390322864055634, "learning_rate": 0.00018338558269337464, "loss": 0.0025, "num_input_tokens_seen": 197669904, "step": 91515 }, { "epoch": 14.929853181076671, "grad_norm": 0.2875620722770691, "learning_rate": 0.00018333049523698876, "loss": 0.0116, "num_input_tokens_seen": 197679280, "step": 91520 }, { "epoch": 14.930668841761827, "grad_norm": 0.03757292404770851, "learning_rate": 0.00018327541419831196, "loss": 0.0045, "num_input_tokens_seen": 197690480, "step": 91525 }, { "epoch": 14.931484502446983, "grad_norm": 0.008096279576420784, "learning_rate": 0.00018322033957846097, "loss": 0.0276, "num_input_tokens_seen": 197701712, "step": 91530 }, { "epoch": 14.932300163132137, "grad_norm": 0.06404435634613037, "learning_rate": 0.00018316527137855138, "loss": 0.0048, "num_input_tokens_seen": 197712560, "step": 91535 }, { "epoch": 14.933115823817293, "grad_norm": 0.0034364284947514534, "learning_rate": 0.00018311020959969982, "loss": 0.0017, "num_input_tokens_seen": 197722640, "step": 91540 }, { "epoch": 14.933931484502446, "grad_norm": 0.013405256904661655, "learning_rate": 0.0001830551542430215, "loss": 0.0016, "num_input_tokens_seen": 197733616, "step": 91545 }, { "epoch": 14.934747145187602, "grad_norm": 0.12012229114770889, "learning_rate": 0.0001830001053096329, "loss": 0.0743, "num_input_tokens_seen": 197745552, "step": 91550 }, { "epoch": 14.935562805872756, "grad_norm": 0.6721145510673523, "learning_rate": 0.000182945062800649, "loss": 0.0137, "num_input_tokens_seen": 197755632, "step": 91555 }, { "epoch": 14.936378466557912, "grad_norm": 0.021277163177728653, "learning_rate": 0.0001828900267171859, "loss": 0.0022, "num_input_tokens_seen": 197765552, "step": 91560 }, { "epoch": 14.937194127243067, "grad_norm": 0.14915668964385986, "learning_rate": 0.0001828349970603584, "loss": 0.0112, "num_input_tokens_seen": 197776720, "step": 91565 }, { "epoch": 14.938009787928221, "grad_norm": 0.0062555489130318165, "learning_rate": 0.00018277997383128237, "loss": 0.013, "num_input_tokens_seen": 197787824, "step": 91570 }, { "epoch": 14.938825448613377, "grad_norm": 0.005437719635665417, "learning_rate": 0.00018272495703107222, "loss": 0.0073, "num_input_tokens_seen": 197797808, "step": 91575 }, { "epoch": 14.939641109298531, "grad_norm": 0.0006717185606248677, "learning_rate": 0.00018266994666084368, "loss": 0.0265, "num_input_tokens_seen": 197808624, "step": 91580 }, { "epoch": 14.940456769983687, "grad_norm": 0.2600712776184082, "learning_rate": 0.0001826149427217109, "loss": 0.0136, "num_input_tokens_seen": 197819824, "step": 91585 }, { "epoch": 14.941272430668842, "grad_norm": 0.006795211229473352, "learning_rate": 0.00018255994521478925, "loss": 0.0013, "num_input_tokens_seen": 197830800, "step": 91590 }, { "epoch": 14.942088091353996, "grad_norm": 0.0022412503603845835, "learning_rate": 0.00018250495414119273, "loss": 0.0129, "num_input_tokens_seen": 197841968, "step": 91595 }, { "epoch": 14.942903752039152, "grad_norm": 0.10791927576065063, "learning_rate": 0.0001824499695020362, "loss": 0.0108, "num_input_tokens_seen": 197852368, "step": 91600 }, { "epoch": 14.943719412724306, "grad_norm": 0.005554559174925089, "learning_rate": 0.0001823949912984339, "loss": 0.0261, "num_input_tokens_seen": 197862960, "step": 91605 }, { "epoch": 14.944535073409462, "grad_norm": 0.007660820614546537, "learning_rate": 0.00018234001953149997, "loss": 0.0042, "num_input_tokens_seen": 197874192, "step": 91610 }, { "epoch": 14.945350734094617, "grad_norm": 0.0357113778591156, "learning_rate": 0.00018228505420234858, "loss": 0.0021, "num_input_tokens_seen": 197885680, "step": 91615 }, { "epoch": 14.946166394779771, "grad_norm": 0.0036698810290545225, "learning_rate": 0.00018223009531209355, "loss": 0.0027, "num_input_tokens_seen": 197897040, "step": 91620 }, { "epoch": 14.946982055464927, "grad_norm": 0.4504069685935974, "learning_rate": 0.00018217514286184884, "loss": 0.0453, "num_input_tokens_seen": 197907952, "step": 91625 }, { "epoch": 14.947797716150081, "grad_norm": 0.000682205252815038, "learning_rate": 0.00018212019685272802, "loss": 0.0193, "num_input_tokens_seen": 197920048, "step": 91630 }, { "epoch": 14.948613376835237, "grad_norm": 0.004739740863442421, "learning_rate": 0.00018206525728584462, "loss": 0.0064, "num_input_tokens_seen": 197930672, "step": 91635 }, { "epoch": 14.949429037520392, "grad_norm": 0.004977010656148195, "learning_rate": 0.00018201032416231217, "loss": 0.056, "num_input_tokens_seen": 197942352, "step": 91640 }, { "epoch": 14.950244698205546, "grad_norm": 0.6323051452636719, "learning_rate": 0.00018195539748324386, "loss": 0.0118, "num_input_tokens_seen": 197952784, "step": 91645 }, { "epoch": 14.951060358890702, "grad_norm": 0.02068396657705307, "learning_rate": 0.00018190047724975271, "loss": 0.0027, "num_input_tokens_seen": 197963696, "step": 91650 }, { "epoch": 14.951876019575856, "grad_norm": 0.0008221376338042319, "learning_rate": 0.00018184556346295233, "loss": 0.0018, "num_input_tokens_seen": 197974960, "step": 91655 }, { "epoch": 14.952691680261012, "grad_norm": 0.0007419289904646575, "learning_rate": 0.00018179065612395484, "loss": 0.0033, "num_input_tokens_seen": 197985904, "step": 91660 }, { "epoch": 14.953507340946166, "grad_norm": 0.23194904625415802, "learning_rate": 0.0001817357552338737, "loss": 0.0095, "num_input_tokens_seen": 197996688, "step": 91665 }, { "epoch": 14.954323001631321, "grad_norm": 0.00299538834951818, "learning_rate": 0.0001816808607938209, "loss": 0.0029, "num_input_tokens_seen": 198007888, "step": 91670 }, { "epoch": 14.955138662316477, "grad_norm": 0.01290897186845541, "learning_rate": 0.00018162597280490966, "loss": 0.0474, "num_input_tokens_seen": 198019632, "step": 91675 }, { "epoch": 14.955954323001631, "grad_norm": 0.00599845964461565, "learning_rate": 0.00018157109126825156, "loss": 0.0041, "num_input_tokens_seen": 198029936, "step": 91680 }, { "epoch": 14.956769983686787, "grad_norm": 0.005180804058909416, "learning_rate": 0.0001815162161849596, "loss": 0.0019, "num_input_tokens_seen": 198041264, "step": 91685 }, { "epoch": 14.95758564437194, "grad_norm": 0.01789415255188942, "learning_rate": 0.00018146134755614524, "loss": 0.0017, "num_input_tokens_seen": 198052528, "step": 91690 }, { "epoch": 14.958401305057096, "grad_norm": 0.003401143942028284, "learning_rate": 0.0001814064853829211, "loss": 0.0025, "num_input_tokens_seen": 198063216, "step": 91695 }, { "epoch": 14.959216965742252, "grad_norm": 0.03557276725769043, "learning_rate": 0.00018135162966639835, "loss": 0.0114, "num_input_tokens_seen": 198074608, "step": 91700 }, { "epoch": 14.960032626427406, "grad_norm": 0.0038898277562111616, "learning_rate": 0.00018129678040768938, "loss": 0.0034, "num_input_tokens_seen": 198085680, "step": 91705 }, { "epoch": 14.960848287112562, "grad_norm": 0.010417568497359753, "learning_rate": 0.00018124193760790514, "loss": 0.0051, "num_input_tokens_seen": 198095888, "step": 91710 }, { "epoch": 14.961663947797716, "grad_norm": 0.01605057343840599, "learning_rate": 0.00018118710126815773, "loss": 0.0442, "num_input_tokens_seen": 198106576, "step": 91715 }, { "epoch": 14.962479608482871, "grad_norm": 0.00376236904412508, "learning_rate": 0.00018113227138955785, "loss": 0.0061, "num_input_tokens_seen": 198116368, "step": 91720 }, { "epoch": 14.963295269168025, "grad_norm": 0.4293442368507385, "learning_rate": 0.00018107744797321728, "loss": 0.2416, "num_input_tokens_seen": 198125552, "step": 91725 }, { "epoch": 14.964110929853181, "grad_norm": 0.0034157487098127604, "learning_rate": 0.00018102263102024653, "loss": 0.0025, "num_input_tokens_seen": 198136912, "step": 91730 }, { "epoch": 14.964926590538337, "grad_norm": 0.1372259259223938, "learning_rate": 0.00018096782053175715, "loss": 0.0083, "num_input_tokens_seen": 198146800, "step": 91735 }, { "epoch": 14.96574225122349, "grad_norm": 0.7950195074081421, "learning_rate": 0.00018091301650885922, "loss": 0.1111, "num_input_tokens_seen": 198157648, "step": 91740 }, { "epoch": 14.966557911908646, "grad_norm": 0.012486966326832771, "learning_rate": 0.00018085821895266402, "loss": 0.0917, "num_input_tokens_seen": 198170384, "step": 91745 }, { "epoch": 14.9673735725938, "grad_norm": 0.00040594261372461915, "learning_rate": 0.00018080342786428184, "loss": 0.0339, "num_input_tokens_seen": 198181104, "step": 91750 }, { "epoch": 14.968189233278956, "grad_norm": 0.0035222459118813276, "learning_rate": 0.00018074864324482315, "loss": 0.0014, "num_input_tokens_seen": 198192176, "step": 91755 }, { "epoch": 14.969004893964112, "grad_norm": 0.9425178170204163, "learning_rate": 0.0001806938650953982, "loss": 0.0278, "num_input_tokens_seen": 198203312, "step": 91760 }, { "epoch": 14.969820554649266, "grad_norm": 0.005051793530583382, "learning_rate": 0.00018063909341711716, "loss": 0.0075, "num_input_tokens_seen": 198213904, "step": 91765 }, { "epoch": 14.970636215334421, "grad_norm": 0.0014896744396537542, "learning_rate": 0.00018058432821109, "loss": 0.0026, "num_input_tokens_seen": 198225424, "step": 91770 }, { "epoch": 14.971451876019575, "grad_norm": 0.011105705052614212, "learning_rate": 0.00018052956947842665, "loss": 0.0018, "num_input_tokens_seen": 198236496, "step": 91775 }, { "epoch": 14.97226753670473, "grad_norm": 0.003602163400501013, "learning_rate": 0.0001804748172202368, "loss": 0.0106, "num_input_tokens_seen": 198248208, "step": 91780 }, { "epoch": 14.973083197389887, "grad_norm": 0.002370339585468173, "learning_rate": 0.00018042007143763018, "loss": 0.0085, "num_input_tokens_seen": 198259824, "step": 91785 }, { "epoch": 14.97389885807504, "grad_norm": 0.005036045331507921, "learning_rate": 0.00018036533213171618, "loss": 0.1786, "num_input_tokens_seen": 198271856, "step": 91790 }, { "epoch": 14.974714518760196, "grad_norm": 0.027367407456040382, "learning_rate": 0.0001803105993036041, "loss": 0.0068, "num_input_tokens_seen": 198282672, "step": 91795 }, { "epoch": 14.97553017944535, "grad_norm": 0.0048852418549358845, "learning_rate": 0.0001802558729544036, "loss": 0.0082, "num_input_tokens_seen": 198293552, "step": 91800 }, { "epoch": 14.976345840130506, "grad_norm": 0.1048927903175354, "learning_rate": 0.0001802011530852231, "loss": 0.0703, "num_input_tokens_seen": 198304272, "step": 91805 }, { "epoch": 14.977161500815662, "grad_norm": 0.723358690738678, "learning_rate": 0.00018014643969717231, "loss": 0.1453, "num_input_tokens_seen": 198314992, "step": 91810 }, { "epoch": 14.977977161500815, "grad_norm": 0.0012425847817212343, "learning_rate": 0.0001800917327913593, "loss": 0.0109, "num_input_tokens_seen": 198326352, "step": 91815 }, { "epoch": 14.978792822185971, "grad_norm": 0.03811126574873924, "learning_rate": 0.0001800370323688935, "loss": 0.0044, "num_input_tokens_seen": 198336816, "step": 91820 }, { "epoch": 14.979608482871125, "grad_norm": 0.11717061698436737, "learning_rate": 0.00017998233843088284, "loss": 0.0076, "num_input_tokens_seen": 198347536, "step": 91825 }, { "epoch": 14.98042414355628, "grad_norm": 0.010843515396118164, "learning_rate": 0.00017992765097843639, "loss": 0.0031, "num_input_tokens_seen": 198358576, "step": 91830 }, { "epoch": 14.981239804241435, "grad_norm": 0.0015287426067516208, "learning_rate": 0.00017987297001266172, "loss": 0.002, "num_input_tokens_seen": 198370064, "step": 91835 }, { "epoch": 14.98205546492659, "grad_norm": 0.005267042201012373, "learning_rate": 0.00017981829553466783, "loss": 0.0157, "num_input_tokens_seen": 198380656, "step": 91840 }, { "epoch": 14.982871125611746, "grad_norm": 0.0034920715261250734, "learning_rate": 0.00017976362754556203, "loss": 0.0132, "num_input_tokens_seen": 198392112, "step": 91845 }, { "epoch": 14.9836867862969, "grad_norm": 0.005846341140568256, "learning_rate": 0.0001797089660464527, "loss": 0.0132, "num_input_tokens_seen": 198403440, "step": 91850 }, { "epoch": 14.984502446982056, "grad_norm": 0.004615858197212219, "learning_rate": 0.00017965431103844753, "loss": 0.0016, "num_input_tokens_seen": 198413712, "step": 91855 }, { "epoch": 14.98531810766721, "grad_norm": 0.00345953949727118, "learning_rate": 0.00017959966252265407, "loss": 0.0108, "num_input_tokens_seen": 198425424, "step": 91860 }, { "epoch": 14.986133768352365, "grad_norm": 0.04297134652733803, "learning_rate": 0.00017954502050018, "loss": 0.0034, "num_input_tokens_seen": 198435920, "step": 91865 }, { "epoch": 14.986949429037521, "grad_norm": 0.0170292966067791, "learning_rate": 0.00017949038497213255, "loss": 0.002, "num_input_tokens_seen": 198448304, "step": 91870 }, { "epoch": 14.987765089722675, "grad_norm": 0.0019456454319879413, "learning_rate": 0.0001794357559396191, "loss": 0.0047, "num_input_tokens_seen": 198458512, "step": 91875 }, { "epoch": 14.98858075040783, "grad_norm": 0.0030888323672115803, "learning_rate": 0.00017938113340374662, "loss": 0.0058, "num_input_tokens_seen": 198469936, "step": 91880 }, { "epoch": 14.989396411092985, "grad_norm": 0.008779522962868214, "learning_rate": 0.00017932651736562226, "loss": 0.0049, "num_input_tokens_seen": 198480688, "step": 91885 }, { "epoch": 14.99021207177814, "grad_norm": 0.1324496865272522, "learning_rate": 0.00017927190782635283, "loss": 0.1448, "num_input_tokens_seen": 198492240, "step": 91890 }, { "epoch": 14.991027732463294, "grad_norm": 0.11609335243701935, "learning_rate": 0.00017921730478704506, "loss": 0.0059, "num_input_tokens_seen": 198502352, "step": 91895 }, { "epoch": 14.99184339314845, "grad_norm": 0.004886234644800425, "learning_rate": 0.0001791627082488056, "loss": 0.0674, "num_input_tokens_seen": 198514224, "step": 91900 }, { "epoch": 14.992659053833606, "grad_norm": 0.03304458037018776, "learning_rate": 0.00017910811821274082, "loss": 0.008, "num_input_tokens_seen": 198524720, "step": 91905 }, { "epoch": 14.99347471451876, "grad_norm": 0.004803449381142855, "learning_rate": 0.0001790535346799571, "loss": 0.002, "num_input_tokens_seen": 198536176, "step": 91910 }, { "epoch": 14.994290375203915, "grad_norm": 0.004887313582003117, "learning_rate": 0.00017899895765156065, "loss": 0.0038, "num_input_tokens_seen": 198547952, "step": 91915 }, { "epoch": 14.99510603588907, "grad_norm": 0.0403841994702816, "learning_rate": 0.00017894438712865753, "loss": 0.0041, "num_input_tokens_seen": 198559216, "step": 91920 }, { "epoch": 14.995921696574225, "grad_norm": 0.0004432197310961783, "learning_rate": 0.00017888982311235375, "loss": 0.018, "num_input_tokens_seen": 198570256, "step": 91925 }, { "epoch": 14.99673735725938, "grad_norm": 0.04184507951140404, "learning_rate": 0.00017883526560375502, "loss": 0.0037, "num_input_tokens_seen": 198581808, "step": 91930 }, { "epoch": 14.997553017944535, "grad_norm": 0.01710711419582367, "learning_rate": 0.00017878071460396706, "loss": 0.0048, "num_input_tokens_seen": 198593040, "step": 91935 }, { "epoch": 14.99836867862969, "grad_norm": 0.1359815001487732, "learning_rate": 0.0001787261701140952, "loss": 0.0072, "num_input_tokens_seen": 198601776, "step": 91940 }, { "epoch": 14.999184339314844, "grad_norm": 0.32246536016464233, "learning_rate": 0.00017867163213524545, "loss": 0.0294, "num_input_tokens_seen": 198611472, "step": 91945 }, { "epoch": 15.0, "grad_norm": 0.08324826508760452, "learning_rate": 0.00017861710066852237, "loss": 0.1494, "num_input_tokens_seen": 198621168, "step": 91950 }, { "epoch": 15.0, "eval_loss": 0.2368113100528717, "eval_runtime": 104.2294, "eval_samples_per_second": 26.144, "eval_steps_per_second": 6.543, "num_input_tokens_seen": 198621168, "step": 91950 }, { "epoch": 15.000815660685156, "grad_norm": 0.15005184710025787, "learning_rate": 0.00017856257571503164, "loss": 0.0103, "num_input_tokens_seen": 198632528, "step": 91955 }, { "epoch": 15.00163132137031, "grad_norm": 0.026279544457793236, "learning_rate": 0.00017850805727587804, "loss": 0.0176, "num_input_tokens_seen": 198643888, "step": 91960 }, { "epoch": 15.002446982055465, "grad_norm": 0.0065717375837266445, "learning_rate": 0.00017845354535216658, "loss": 0.004, "num_input_tokens_seen": 198654416, "step": 91965 }, { "epoch": 15.00326264274062, "grad_norm": 0.0031500456389039755, "learning_rate": 0.00017839903994500185, "loss": 0.0419, "num_input_tokens_seen": 198666960, "step": 91970 }, { "epoch": 15.004078303425775, "grad_norm": 0.005922715645283461, "learning_rate": 0.0001783445410554886, "loss": 0.0014, "num_input_tokens_seen": 198677808, "step": 91975 }, { "epoch": 15.00489396411093, "grad_norm": 0.0009123813943006098, "learning_rate": 0.00017829004868473124, "loss": 0.004, "num_input_tokens_seen": 198688272, "step": 91980 }, { "epoch": 15.005709624796085, "grad_norm": 0.009499759413301945, "learning_rate": 0.00017823556283383418, "loss": 0.0097, "num_input_tokens_seen": 198699504, "step": 91985 }, { "epoch": 15.00652528548124, "grad_norm": 0.0025711434427648783, "learning_rate": 0.0001781810835039016, "loss": 0.0026, "num_input_tokens_seen": 198710544, "step": 91990 }, { "epoch": 15.007340946166394, "grad_norm": 0.0020499620586633682, "learning_rate": 0.0001781266106960377, "loss": 0.0034, "num_input_tokens_seen": 198720496, "step": 91995 }, { "epoch": 15.00815660685155, "grad_norm": 0.015157933346927166, "learning_rate": 0.00017807214441134628, "loss": 0.0031, "num_input_tokens_seen": 198729744, "step": 92000 }, { "epoch": 15.008972267536704, "grad_norm": 0.01744169555604458, "learning_rate": 0.00017801768465093126, "loss": 0.0027, "num_input_tokens_seen": 198740400, "step": 92005 }, { "epoch": 15.00978792822186, "grad_norm": 0.0031720127444714308, "learning_rate": 0.00017796323141589638, "loss": 0.0011, "num_input_tokens_seen": 198752368, "step": 92010 }, { "epoch": 15.010603588907015, "grad_norm": 0.001125030335970223, "learning_rate": 0.00017790878470734506, "loss": 0.0023, "num_input_tokens_seen": 198762704, "step": 92015 }, { "epoch": 15.01141924959217, "grad_norm": 0.07347182184457779, "learning_rate": 0.0001778543445263809, "loss": 0.0078, "num_input_tokens_seen": 198772656, "step": 92020 }, { "epoch": 15.012234910277325, "grad_norm": 0.00362205458804965, "learning_rate": 0.00017779991087410707, "loss": 0.0066, "num_input_tokens_seen": 198782864, "step": 92025 }, { "epoch": 15.013050570962479, "grad_norm": 0.00837631057947874, "learning_rate": 0.0001777454837516268, "loss": 0.0309, "num_input_tokens_seen": 198793776, "step": 92030 }, { "epoch": 15.013866231647635, "grad_norm": 1.850484848022461, "learning_rate": 0.00017769106316004314, "loss": 0.0401, "num_input_tokens_seen": 198804496, "step": 92035 }, { "epoch": 15.01468189233279, "grad_norm": 0.009433651342988014, "learning_rate": 0.0001776366491004589, "loss": 0.1475, "num_input_tokens_seen": 198815216, "step": 92040 }, { "epoch": 15.015497553017944, "grad_norm": 0.004046480171382427, "learning_rate": 0.00017758224157397696, "loss": 0.0097, "num_input_tokens_seen": 198825296, "step": 92045 }, { "epoch": 15.0163132137031, "grad_norm": 0.06106355041265488, "learning_rate": 0.00017752784058169992, "loss": 0.0051, "num_input_tokens_seen": 198836080, "step": 92050 }, { "epoch": 15.017128874388254, "grad_norm": 0.029603945091366768, "learning_rate": 0.00017747344612473022, "loss": 0.0028, "num_input_tokens_seen": 198847696, "step": 92055 }, { "epoch": 15.01794453507341, "grad_norm": 0.0066187456250190735, "learning_rate": 0.00017741905820417014, "loss": 0.0053, "num_input_tokens_seen": 198860080, "step": 92060 }, { "epoch": 15.018760195758565, "grad_norm": 0.009210659191012383, "learning_rate": 0.00017736467682112245, "loss": 0.0023, "num_input_tokens_seen": 198870640, "step": 92065 }, { "epoch": 15.01957585644372, "grad_norm": 0.0005773415905423462, "learning_rate": 0.00017731030197668847, "loss": 0.0031, "num_input_tokens_seen": 198881488, "step": 92070 }, { "epoch": 15.020391517128875, "grad_norm": 0.02137085609138012, "learning_rate": 0.00017725593367197095, "loss": 0.0033, "num_input_tokens_seen": 198892944, "step": 92075 }, { "epoch": 15.021207177814029, "grad_norm": 0.014607875607907772, "learning_rate": 0.00017720157190807107, "loss": 0.0027, "num_input_tokens_seen": 198903408, "step": 92080 }, { "epoch": 15.022022838499185, "grad_norm": 0.003844099584966898, "learning_rate": 0.00017714721668609095, "loss": 0.0017, "num_input_tokens_seen": 198914160, "step": 92085 }, { "epoch": 15.022838499184338, "grad_norm": 0.003906742203980684, "learning_rate": 0.00017709286800713202, "loss": 0.025, "num_input_tokens_seen": 198924304, "step": 92090 }, { "epoch": 15.023654159869494, "grad_norm": 0.10455742478370667, "learning_rate": 0.00017703852587229584, "loss": 0.009, "num_input_tokens_seen": 198935312, "step": 92095 }, { "epoch": 15.02446982055465, "grad_norm": 0.00283225835300982, "learning_rate": 0.00017698419028268358, "loss": 0.001, "num_input_tokens_seen": 198946640, "step": 92100 }, { "epoch": 15.025285481239804, "grad_norm": 0.025962086394429207, "learning_rate": 0.00017692986123939652, "loss": 0.0076, "num_input_tokens_seen": 198956656, "step": 92105 }, { "epoch": 15.02610114192496, "grad_norm": 0.008207517676055431, "learning_rate": 0.00017687553874353563, "loss": 0.0013, "num_input_tokens_seen": 198968304, "step": 92110 }, { "epoch": 15.026916802610113, "grad_norm": 0.019631782546639442, "learning_rate": 0.0001768212227962019, "loss": 0.0019, "num_input_tokens_seen": 198979632, "step": 92115 }, { "epoch": 15.02773246329527, "grad_norm": 0.018773145973682404, "learning_rate": 0.00017676691339849605, "loss": 0.0069, "num_input_tokens_seen": 198989808, "step": 92120 }, { "epoch": 15.028548123980425, "grad_norm": 0.021854877471923828, "learning_rate": 0.00017671261055151872, "loss": 0.0047, "num_input_tokens_seen": 199001040, "step": 92125 }, { "epoch": 15.029363784665579, "grad_norm": 0.05074403062462807, "learning_rate": 0.00017665831425637052, "loss": 0.0109, "num_input_tokens_seen": 199011568, "step": 92130 }, { "epoch": 15.030179445350734, "grad_norm": 0.0030274391174316406, "learning_rate": 0.0001766040245141517, "loss": 0.0016, "num_input_tokens_seen": 199023312, "step": 92135 }, { "epoch": 15.030995106035888, "grad_norm": 0.0029113576747477055, "learning_rate": 0.00017654974132596263, "loss": 0.0733, "num_input_tokens_seen": 199033296, "step": 92140 }, { "epoch": 15.031810766721044, "grad_norm": 0.010478825308382511, "learning_rate": 0.00017649546469290333, "loss": 0.0025, "num_input_tokens_seen": 199043856, "step": 92145 }, { "epoch": 15.0326264274062, "grad_norm": 0.0034063730854541063, "learning_rate": 0.00017644119461607388, "loss": 0.0018, "num_input_tokens_seen": 199055056, "step": 92150 }, { "epoch": 15.033442088091354, "grad_norm": 0.0036105539184063673, "learning_rate": 0.0001763869310965741, "loss": 0.2094, "num_input_tokens_seen": 199065520, "step": 92155 }, { "epoch": 15.03425774877651, "grad_norm": 0.0027978713624179363, "learning_rate": 0.00017633267413550362, "loss": 0.0041, "num_input_tokens_seen": 199076496, "step": 92160 }, { "epoch": 15.035073409461663, "grad_norm": 0.0005292710848152637, "learning_rate": 0.00017627842373396202, "loss": 0.0058, "num_input_tokens_seen": 199086096, "step": 92165 }, { "epoch": 15.035889070146819, "grad_norm": 0.014616936445236206, "learning_rate": 0.00017622417989304913, "loss": 0.0051, "num_input_tokens_seen": 199096016, "step": 92170 }, { "epoch": 15.036704730831975, "grad_norm": 0.010652135126292706, "learning_rate": 0.0001761699426138636, "loss": 0.0075, "num_input_tokens_seen": 199106832, "step": 92175 }, { "epoch": 15.037520391517129, "grad_norm": 0.03607923537492752, "learning_rate": 0.00017611571189750537, "loss": 0.0137, "num_input_tokens_seen": 199117680, "step": 92180 }, { "epoch": 15.038336052202284, "grad_norm": 0.0021634928416460752, "learning_rate": 0.00017606148774507274, "loss": 0.0028, "num_input_tokens_seen": 199128368, "step": 92185 }, { "epoch": 15.039151712887438, "grad_norm": 0.004082173574715853, "learning_rate": 0.0001760072701576654, "loss": 0.0655, "num_input_tokens_seen": 199137872, "step": 92190 }, { "epoch": 15.039967373572594, "grad_norm": 0.003854207694530487, "learning_rate": 0.00017595305913638138, "loss": 0.0028, "num_input_tokens_seen": 199148912, "step": 92195 }, { "epoch": 15.040783034257748, "grad_norm": 0.027118144556879997, "learning_rate": 0.00017589885468232002, "loss": 0.0035, "num_input_tokens_seen": 199159888, "step": 92200 }, { "epoch": 15.041598694942904, "grad_norm": 0.07028216123580933, "learning_rate": 0.00017584465679657918, "loss": 0.0059, "num_input_tokens_seen": 199170768, "step": 92205 }, { "epoch": 15.04241435562806, "grad_norm": 0.001322569907642901, "learning_rate": 0.00017579046548025796, "loss": 0.0019, "num_input_tokens_seen": 199182800, "step": 92210 }, { "epoch": 15.043230016313213, "grad_norm": 0.008521920070052147, "learning_rate": 0.00017573628073445393, "loss": 0.0022, "num_input_tokens_seen": 199194000, "step": 92215 }, { "epoch": 15.044045676998369, "grad_norm": 0.007180997170507908, "learning_rate": 0.00017568210256026578, "loss": 0.0328, "num_input_tokens_seen": 199203888, "step": 92220 }, { "epoch": 15.044861337683523, "grad_norm": 0.005620477721095085, "learning_rate": 0.000175627930958791, "loss": 0.008, "num_input_tokens_seen": 199215184, "step": 92225 }, { "epoch": 15.045676998368679, "grad_norm": 0.0026418042834848166, "learning_rate": 0.0001755737659311278, "loss": 0.0286, "num_input_tokens_seen": 199225616, "step": 92230 }, { "epoch": 15.046492659053834, "grad_norm": 0.004119323566555977, "learning_rate": 0.00017551960747837382, "loss": 0.0017, "num_input_tokens_seen": 199236304, "step": 92235 }, { "epoch": 15.047308319738988, "grad_norm": 0.10706538707017899, "learning_rate": 0.00017546545560162663, "loss": 0.0644, "num_input_tokens_seen": 199248496, "step": 92240 }, { "epoch": 15.048123980424144, "grad_norm": 0.02891051024198532, "learning_rate": 0.00017541131030198364, "loss": 0.0026, "num_input_tokens_seen": 199258608, "step": 92245 }, { "epoch": 15.048939641109298, "grad_norm": 0.00028251283220015466, "learning_rate": 0.00017535717158054226, "loss": 0.004, "num_input_tokens_seen": 199268080, "step": 92250 }, { "epoch": 15.049755301794454, "grad_norm": 0.001709071220830083, "learning_rate": 0.00017530303943839965, "loss": 0.0129, "num_input_tokens_seen": 199278800, "step": 92255 }, { "epoch": 15.05057096247961, "grad_norm": 0.00671932240948081, "learning_rate": 0.00017524891387665282, "loss": 0.1309, "num_input_tokens_seen": 199289360, "step": 92260 }, { "epoch": 15.051386623164763, "grad_norm": 0.05627777799963951, "learning_rate": 0.00017519479489639877, "loss": 0.0045, "num_input_tokens_seen": 199300112, "step": 92265 }, { "epoch": 15.052202283849919, "grad_norm": 0.008651613257825375, "learning_rate": 0.0001751406824987342, "loss": 0.0092, "num_input_tokens_seen": 199310768, "step": 92270 }, { "epoch": 15.053017944535073, "grad_norm": 0.013139888644218445, "learning_rate": 0.00017508657668475585, "loss": 0.0018, "num_input_tokens_seen": 199320976, "step": 92275 }, { "epoch": 15.053833605220229, "grad_norm": 0.015267265029251575, "learning_rate": 0.00017503247745556, "loss": 0.003, "num_input_tokens_seen": 199331984, "step": 92280 }, { "epoch": 15.054649265905383, "grad_norm": 0.008942226879298687, "learning_rate": 0.0001749783848122436, "loss": 0.0028, "num_input_tokens_seen": 199343568, "step": 92285 }, { "epoch": 15.055464926590538, "grad_norm": 0.003952791448682547, "learning_rate": 0.0001749242987559022, "loss": 0.0265, "num_input_tokens_seen": 199353168, "step": 92290 }, { "epoch": 15.056280587275694, "grad_norm": 0.014960017055273056, "learning_rate": 0.00017487021928763263, "loss": 0.0047, "num_input_tokens_seen": 199363728, "step": 92295 }, { "epoch": 15.057096247960848, "grad_norm": 0.01710602268576622, "learning_rate": 0.0001748161464085302, "loss": 0.0973, "num_input_tokens_seen": 199374192, "step": 92300 }, { "epoch": 15.057911908646004, "grad_norm": 0.07812569290399551, "learning_rate": 0.00017476208011969142, "loss": 0.0051, "num_input_tokens_seen": 199386000, "step": 92305 }, { "epoch": 15.058727569331158, "grad_norm": 0.0018170730909332633, "learning_rate": 0.0001747080204222113, "loss": 0.0033, "num_input_tokens_seen": 199396464, "step": 92310 }, { "epoch": 15.059543230016313, "grad_norm": 0.021724211052060127, "learning_rate": 0.00017465396731718619, "loss": 0.0033, "num_input_tokens_seen": 199408176, "step": 92315 }, { "epoch": 15.060358890701469, "grad_norm": 0.005055665969848633, "learning_rate": 0.0001745999208057108, "loss": 0.0024, "num_input_tokens_seen": 199419088, "step": 92320 }, { "epoch": 15.061174551386623, "grad_norm": 0.05920695886015892, "learning_rate": 0.00017454588088888117, "loss": 0.0045, "num_input_tokens_seen": 199430736, "step": 92325 }, { "epoch": 15.061990212071779, "grad_norm": 0.0020981167908757925, "learning_rate": 0.00017449184756779178, "loss": 0.0041, "num_input_tokens_seen": 199441456, "step": 92330 }, { "epoch": 15.062805872756933, "grad_norm": 0.004952155519276857, "learning_rate": 0.00017443782084353837, "loss": 0.002, "num_input_tokens_seen": 199451664, "step": 92335 }, { "epoch": 15.063621533442088, "grad_norm": 0.04457241669297218, "learning_rate": 0.0001743838007172152, "loss": 0.004, "num_input_tokens_seen": 199462800, "step": 92340 }, { "epoch": 15.064437194127244, "grad_norm": 0.028170321136713028, "learning_rate": 0.00017432978718991772, "loss": 0.0059, "num_input_tokens_seen": 199473200, "step": 92345 }, { "epoch": 15.065252854812398, "grad_norm": 0.015449753031134605, "learning_rate": 0.00017427578026273988, "loss": 0.046, "num_input_tokens_seen": 199484144, "step": 92350 }, { "epoch": 15.066068515497554, "grad_norm": 0.0020990390330553055, "learning_rate": 0.00017422177993677696, "loss": 0.0032, "num_input_tokens_seen": 199494800, "step": 92355 }, { "epoch": 15.066884176182707, "grad_norm": 0.010104361921548843, "learning_rate": 0.00017416778621312257, "loss": 0.01, "num_input_tokens_seen": 199505776, "step": 92360 }, { "epoch": 15.067699836867863, "grad_norm": 0.010933980345726013, "learning_rate": 0.00017411379909287167, "loss": 0.0047, "num_input_tokens_seen": 199516784, "step": 92365 }, { "epoch": 15.068515497553017, "grad_norm": 0.18525753915309906, "learning_rate": 0.00017405981857711772, "loss": 0.0094, "num_input_tokens_seen": 199528400, "step": 92370 }, { "epoch": 15.069331158238173, "grad_norm": 0.0010363530600443482, "learning_rate": 0.0001740058446669552, "loss": 0.002, "num_input_tokens_seen": 199540400, "step": 92375 }, { "epoch": 15.070146818923329, "grad_norm": 0.2711713910102844, "learning_rate": 0.00017395187736347778, "loss": 0.01, "num_input_tokens_seen": 199551408, "step": 92380 }, { "epoch": 15.070962479608482, "grad_norm": 0.02081671543419361, "learning_rate": 0.0001738979166677792, "loss": 0.0022, "num_input_tokens_seen": 199560880, "step": 92385 }, { "epoch": 15.071778140293638, "grad_norm": 0.001128299511037767, "learning_rate": 0.00017384396258095304, "loss": 0.0006, "num_input_tokens_seen": 199572144, "step": 92390 }, { "epoch": 15.072593800978792, "grad_norm": 0.02086414210498333, "learning_rate": 0.0001737900151040927, "loss": 0.0079, "num_input_tokens_seen": 199584048, "step": 92395 }, { "epoch": 15.073409461663948, "grad_norm": 0.16544772684574127, "learning_rate": 0.00017373607423829159, "loss": 0.0068, "num_input_tokens_seen": 199595824, "step": 92400 }, { "epoch": 15.074225122349104, "grad_norm": 0.04024311900138855, "learning_rate": 0.00017368213998464278, "loss": 0.007, "num_input_tokens_seen": 199607216, "step": 92405 }, { "epoch": 15.075040783034257, "grad_norm": 0.000666849547997117, "learning_rate": 0.00017362821234423936, "loss": 0.0014, "num_input_tokens_seen": 199618736, "step": 92410 }, { "epoch": 15.075856443719413, "grad_norm": 0.007606880739331245, "learning_rate": 0.00017357429131817432, "loss": 0.0337, "num_input_tokens_seen": 199629648, "step": 92415 }, { "epoch": 15.076672104404567, "grad_norm": 0.001018756302073598, "learning_rate": 0.0001735203769075403, "loss": 0.0023, "num_input_tokens_seen": 199639888, "step": 92420 }, { "epoch": 15.077487765089723, "grad_norm": 0.00730155361816287, "learning_rate": 0.00017346646911342985, "loss": 0.0032, "num_input_tokens_seen": 199651600, "step": 92425 }, { "epoch": 15.078303425774878, "grad_norm": 0.0009757946827448905, "learning_rate": 0.000173412567936936, "loss": 0.002, "num_input_tokens_seen": 199660848, "step": 92430 }, { "epoch": 15.079119086460032, "grad_norm": 0.014223473146557808, "learning_rate": 0.0001733586733791504, "loss": 0.0021, "num_input_tokens_seen": 199671184, "step": 92435 }, { "epoch": 15.079934747145188, "grad_norm": 0.0027299304492771626, "learning_rate": 0.000173304785441166, "loss": 0.0022, "num_input_tokens_seen": 199681136, "step": 92440 }, { "epoch": 15.080750407830342, "grad_norm": 0.0019904670771211386, "learning_rate": 0.00017325090412407423, "loss": 0.0115, "num_input_tokens_seen": 199691344, "step": 92445 }, { "epoch": 15.081566068515498, "grad_norm": 0.004478312563151121, "learning_rate": 0.00017319702942896777, "loss": 0.005, "num_input_tokens_seen": 199701456, "step": 92450 }, { "epoch": 15.082381729200652, "grad_norm": 0.014595243148505688, "learning_rate": 0.00017314316135693775, "loss": 0.0165, "num_input_tokens_seen": 199710800, "step": 92455 }, { "epoch": 15.083197389885807, "grad_norm": 0.0018416885286569595, "learning_rate": 0.00017308929990907652, "loss": 0.0033, "num_input_tokens_seen": 199721072, "step": 92460 }, { "epoch": 15.084013050570963, "grad_norm": 0.005210902541875839, "learning_rate": 0.000173035445086475, "loss": 0.0015, "num_input_tokens_seen": 199732816, "step": 92465 }, { "epoch": 15.084828711256117, "grad_norm": 0.013290762901306152, "learning_rate": 0.0001729815968902253, "loss": 0.0069, "num_input_tokens_seen": 199743152, "step": 92470 }, { "epoch": 15.085644371941273, "grad_norm": 0.0032980344258248806, "learning_rate": 0.0001729277553214181, "loss": 0.0101, "num_input_tokens_seen": 199753680, "step": 92475 }, { "epoch": 15.086460032626427, "grad_norm": 0.006125927437096834, "learning_rate": 0.00017287392038114514, "loss": 0.0026, "num_input_tokens_seen": 199764400, "step": 92480 }, { "epoch": 15.087275693311582, "grad_norm": 0.00176598085090518, "learning_rate": 0.00017282009207049686, "loss": 0.0029, "num_input_tokens_seen": 199774608, "step": 92485 }, { "epoch": 15.088091353996738, "grad_norm": 0.0010756379924714565, "learning_rate": 0.00017276627039056463, "loss": 0.0109, "num_input_tokens_seen": 199784912, "step": 92490 }, { "epoch": 15.088907014681892, "grad_norm": 0.020969685167074203, "learning_rate": 0.00017271245534243912, "loss": 0.1436, "num_input_tokens_seen": 199795024, "step": 92495 }, { "epoch": 15.089722675367048, "grad_norm": 0.010281615890562534, "learning_rate": 0.00017265864692721084, "loss": 0.0009, "num_input_tokens_seen": 199805904, "step": 92500 }, { "epoch": 15.090538336052202, "grad_norm": 0.0017593882512301207, "learning_rate": 0.00017260484514597035, "loss": 0.0019, "num_input_tokens_seen": 199816208, "step": 92505 }, { "epoch": 15.091353996737357, "grad_norm": 0.001155554549768567, "learning_rate": 0.00017255104999980799, "loss": 0.0048, "num_input_tokens_seen": 199826544, "step": 92510 }, { "epoch": 15.092169657422513, "grad_norm": 0.010744703002274036, "learning_rate": 0.00017249726148981399, "loss": 0.0063, "num_input_tokens_seen": 199837360, "step": 92515 }, { "epoch": 15.092985318107667, "grad_norm": 0.011290965601801872, "learning_rate": 0.00017244347961707852, "loss": 0.0015, "num_input_tokens_seen": 199848176, "step": 92520 }, { "epoch": 15.093800978792823, "grad_norm": 0.006745223421603441, "learning_rate": 0.00017238970438269142, "loss": 0.0046, "num_input_tokens_seen": 199859344, "step": 92525 }, { "epoch": 15.094616639477977, "grad_norm": 0.003182594198733568, "learning_rate": 0.00017233593578774254, "loss": 0.0012, "num_input_tokens_seen": 199869520, "step": 92530 }, { "epoch": 15.095432300163132, "grad_norm": 0.1688506007194519, "learning_rate": 0.00017228217383332163, "loss": 0.008, "num_input_tokens_seen": 199880592, "step": 92535 }, { "epoch": 15.096247960848286, "grad_norm": 0.0019982391968369484, "learning_rate": 0.00017222841852051817, "loss": 0.0033, "num_input_tokens_seen": 199889936, "step": 92540 }, { "epoch": 15.097063621533442, "grad_norm": 0.01286247931420803, "learning_rate": 0.0001721746698504217, "loss": 0.0034, "num_input_tokens_seen": 199899376, "step": 92545 }, { "epoch": 15.097879282218598, "grad_norm": 0.0021901631262153387, "learning_rate": 0.0001721209278241213, "loss": 0.0013, "num_input_tokens_seen": 199910640, "step": 92550 }, { "epoch": 15.098694942903752, "grad_norm": 0.31956011056900024, "learning_rate": 0.00017206719244270636, "loss": 0.0241, "num_input_tokens_seen": 199921872, "step": 92555 }, { "epoch": 15.099510603588907, "grad_norm": 0.024079471826553345, "learning_rate": 0.00017201346370726572, "loss": 0.0425, "num_input_tokens_seen": 199932624, "step": 92560 }, { "epoch": 15.100326264274061, "grad_norm": 0.00888835173100233, "learning_rate": 0.00017195974161888833, "loss": 0.0023, "num_input_tokens_seen": 199943664, "step": 92565 }, { "epoch": 15.101141924959217, "grad_norm": 0.13645078241825104, "learning_rate": 0.00017190602617866274, "loss": 0.0059, "num_input_tokens_seen": 199953488, "step": 92570 }, { "epoch": 15.101957585644373, "grad_norm": 0.004840241279453039, "learning_rate": 0.0001718523173876781, "loss": 0.0028, "num_input_tokens_seen": 199962768, "step": 92575 }, { "epoch": 15.102773246329527, "grad_norm": 0.0016001993790268898, "learning_rate": 0.00017179861524702216, "loss": 0.0051, "num_input_tokens_seen": 199974352, "step": 92580 }, { "epoch": 15.103588907014682, "grad_norm": 0.004659554921090603, "learning_rate": 0.000171744919757784, "loss": 0.0037, "num_input_tokens_seen": 199983824, "step": 92585 }, { "epoch": 15.104404567699836, "grad_norm": 0.010251539759337902, "learning_rate": 0.00017169123092105115, "loss": 0.0019, "num_input_tokens_seen": 199994704, "step": 92590 }, { "epoch": 15.105220228384992, "grad_norm": 0.0009150686673820019, "learning_rate": 0.0001716375487379121, "loss": 0.0343, "num_input_tokens_seen": 200007824, "step": 92595 }, { "epoch": 15.106035889070148, "grad_norm": 0.0012699142098426819, "learning_rate": 0.00017158387320945472, "loss": 0.0291, "num_input_tokens_seen": 200016880, "step": 92600 }, { "epoch": 15.106851549755302, "grad_norm": 0.030986489728093147, "learning_rate": 0.0001715302043367668, "loss": 0.0025, "num_input_tokens_seen": 200028464, "step": 92605 }, { "epoch": 15.107667210440457, "grad_norm": 0.007337637711316347, "learning_rate": 0.00017147654212093595, "loss": 0.0026, "num_input_tokens_seen": 200038480, "step": 92610 }, { "epoch": 15.108482871125611, "grad_norm": 0.0179149117320776, "learning_rate": 0.00017142288656304977, "loss": 0.003, "num_input_tokens_seen": 200049328, "step": 92615 }, { "epoch": 15.109298531810767, "grad_norm": 0.010523026809096336, "learning_rate": 0.0001713692376641956, "loss": 0.1059, "num_input_tokens_seen": 200059728, "step": 92620 }, { "epoch": 15.11011419249592, "grad_norm": 0.006119042169302702, "learning_rate": 0.0001713155954254607, "loss": 0.0068, "num_input_tokens_seen": 200069552, "step": 92625 }, { "epoch": 15.110929853181077, "grad_norm": 0.01386276911944151, "learning_rate": 0.00017126195984793225, "loss": 0.0014, "num_input_tokens_seen": 200079568, "step": 92630 }, { "epoch": 15.111745513866232, "grad_norm": 0.011046777479350567, "learning_rate": 0.0001712083309326972, "loss": 0.005, "num_input_tokens_seen": 200089776, "step": 92635 }, { "epoch": 15.112561174551386, "grad_norm": 0.0011883076513186097, "learning_rate": 0.0001711547086808425, "loss": 0.0008, "num_input_tokens_seen": 200101200, "step": 92640 }, { "epoch": 15.113376835236542, "grad_norm": 0.01813393086194992, "learning_rate": 0.00017110109309345468, "loss": 0.0037, "num_input_tokens_seen": 200111952, "step": 92645 }, { "epoch": 15.114192495921696, "grad_norm": 0.003187261987477541, "learning_rate": 0.00017104748417162054, "loss": 0.0046, "num_input_tokens_seen": 200123184, "step": 92650 }, { "epoch": 15.115008156606851, "grad_norm": 0.06230101361870766, "learning_rate": 0.0001709938819164264, "loss": 0.0036, "num_input_tokens_seen": 200134000, "step": 92655 }, { "epoch": 15.115823817292007, "grad_norm": 0.08341676741838455, "learning_rate": 0.00017094028632895863, "loss": 0.0028, "num_input_tokens_seen": 200144976, "step": 92660 }, { "epoch": 15.116639477977161, "grad_norm": 0.007194597739726305, "learning_rate": 0.0001708866974103034, "loss": 0.0021, "num_input_tokens_seen": 200157008, "step": 92665 }, { "epoch": 15.117455138662317, "grad_norm": 0.08438636362552643, "learning_rate": 0.0001708331151615467, "loss": 0.124, "num_input_tokens_seen": 200168656, "step": 92670 }, { "epoch": 15.11827079934747, "grad_norm": 0.001630541984923184, "learning_rate": 0.00017077953958377458, "loss": 0.0029, "num_input_tokens_seen": 200180368, "step": 92675 }, { "epoch": 15.119086460032626, "grad_norm": 0.001464636530727148, "learning_rate": 0.0001707259706780727, "loss": 0.0052, "num_input_tokens_seen": 200190864, "step": 92680 }, { "epoch": 15.119902120717782, "grad_norm": 0.005892497021704912, "learning_rate": 0.00017067240844552672, "loss": 0.0014, "num_input_tokens_seen": 200200528, "step": 92685 }, { "epoch": 15.120717781402936, "grad_norm": 0.002837817883118987, "learning_rate": 0.00017061885288722218, "loss": 0.0073, "num_input_tokens_seen": 200212144, "step": 92690 }, { "epoch": 15.121533442088092, "grad_norm": 0.014140215702354908, "learning_rate": 0.00017056530400424446, "loss": 0.0016, "num_input_tokens_seen": 200222864, "step": 92695 }, { "epoch": 15.122349102773246, "grad_norm": 0.003915575798600912, "learning_rate": 0.00017051176179767858, "loss": 0.0063, "num_input_tokens_seen": 200234320, "step": 92700 }, { "epoch": 15.123164763458401, "grad_norm": 0.007146508898586035, "learning_rate": 0.00017045822626861017, "loss": 0.0653, "num_input_tokens_seen": 200244912, "step": 92705 }, { "epoch": 15.123980424143557, "grad_norm": 0.06835640966892242, "learning_rate": 0.00017040469741812353, "loss": 0.0118, "num_input_tokens_seen": 200256784, "step": 92710 }, { "epoch": 15.124796084828711, "grad_norm": 0.0016879525501281023, "learning_rate": 0.00017035117524730398, "loss": 0.0016, "num_input_tokens_seen": 200267472, "step": 92715 }, { "epoch": 15.125611745513867, "grad_norm": 0.005772008560597897, "learning_rate": 0.00017029765975723604, "loss": 0.0064, "num_input_tokens_seen": 200278288, "step": 92720 }, { "epoch": 15.12642740619902, "grad_norm": 0.0018967565847560763, "learning_rate": 0.0001702441509490043, "loss": 0.0148, "num_input_tokens_seen": 200288976, "step": 92725 }, { "epoch": 15.127243066884176, "grad_norm": 0.02684551104903221, "learning_rate": 0.00017019064882369317, "loss": 0.1522, "num_input_tokens_seen": 200299376, "step": 92730 }, { "epoch": 15.12805872756933, "grad_norm": 0.018632011488080025, "learning_rate": 0.00017013715338238695, "loss": 0.0055, "num_input_tokens_seen": 200309680, "step": 92735 }, { "epoch": 15.128874388254486, "grad_norm": 0.0026431684382259846, "learning_rate": 0.00017008366462616976, "loss": 0.0031, "num_input_tokens_seen": 200319696, "step": 92740 }, { "epoch": 15.129690048939642, "grad_norm": 0.014334792271256447, "learning_rate": 0.00017003018255612562, "loss": 0.0027, "num_input_tokens_seen": 200330864, "step": 92745 }, { "epoch": 15.130505709624796, "grad_norm": 0.0024962888564914465, "learning_rate": 0.00016997670717333846, "loss": 0.0063, "num_input_tokens_seen": 200341648, "step": 92750 }, { "epoch": 15.131321370309951, "grad_norm": 0.03874950855970383, "learning_rate": 0.00016992323847889195, "loss": 0.0116, "num_input_tokens_seen": 200352816, "step": 92755 }, { "epoch": 15.132137030995105, "grad_norm": 0.001166831818409264, "learning_rate": 0.00016986977647386975, "loss": 0.0049, "num_input_tokens_seen": 200364208, "step": 92760 }, { "epoch": 15.132952691680261, "grad_norm": 0.11291606724262238, "learning_rate": 0.00016981632115935536, "loss": 0.0046, "num_input_tokens_seen": 200374224, "step": 92765 }, { "epoch": 15.133768352365417, "grad_norm": 0.0012088071089237928, "learning_rate": 0.00016976287253643208, "loss": 0.0011, "num_input_tokens_seen": 200386064, "step": 92770 }, { "epoch": 15.13458401305057, "grad_norm": 0.0039956653490662575, "learning_rate": 0.0001697094306061831, "loss": 0.0059, "num_input_tokens_seen": 200396240, "step": 92775 }, { "epoch": 15.135399673735726, "grad_norm": 0.036707255989313126, "learning_rate": 0.00016965599536969156, "loss": 0.0064, "num_input_tokens_seen": 200406672, "step": 92780 }, { "epoch": 15.13621533442088, "grad_norm": 0.016374798491597176, "learning_rate": 0.00016960256682804032, "loss": 0.0026, "num_input_tokens_seen": 200418320, "step": 92785 }, { "epoch": 15.137030995106036, "grad_norm": 0.008576109074056149, "learning_rate": 0.00016954914498231217, "loss": 0.005, "num_input_tokens_seen": 200429840, "step": 92790 }, { "epoch": 15.137846655791192, "grad_norm": 0.0024898925330489874, "learning_rate": 0.00016949572983358986, "loss": 0.0056, "num_input_tokens_seen": 200440784, "step": 92795 }, { "epoch": 15.138662316476346, "grad_norm": 0.014025689102709293, "learning_rate": 0.0001694423213829558, "loss": 0.0013, "num_input_tokens_seen": 200452048, "step": 92800 }, { "epoch": 15.139477977161501, "grad_norm": 0.06928585469722748, "learning_rate": 0.00016938891963149232, "loss": 0.0084, "num_input_tokens_seen": 200462000, "step": 92805 }, { "epoch": 15.140293637846655, "grad_norm": 0.0028578825294971466, "learning_rate": 0.00016933552458028213, "loss": 0.0025, "num_input_tokens_seen": 200472176, "step": 92810 }, { "epoch": 15.141109298531811, "grad_norm": 0.04835427552461624, "learning_rate": 0.0001692821362304066, "loss": 0.0047, "num_input_tokens_seen": 200483216, "step": 92815 }, { "epoch": 15.141924959216965, "grad_norm": 0.0028014755807816982, "learning_rate": 0.00016922875458294856, "loss": 0.0016, "num_input_tokens_seen": 200494160, "step": 92820 }, { "epoch": 15.14274061990212, "grad_norm": 0.0011742091737687588, "learning_rate": 0.00016917537963898903, "loss": 0.0031, "num_input_tokens_seen": 200504560, "step": 92825 }, { "epoch": 15.143556280587276, "grad_norm": 0.007004075683653355, "learning_rate": 0.0001691220113996105, "loss": 0.0024, "num_input_tokens_seen": 200515440, "step": 92830 }, { "epoch": 15.14437194127243, "grad_norm": 0.01578759215772152, "learning_rate": 0.00016906864986589377, "loss": 0.0013, "num_input_tokens_seen": 200525712, "step": 92835 }, { "epoch": 15.145187601957586, "grad_norm": 0.0015252482844516635, "learning_rate": 0.00016901529503892098, "loss": 0.0041, "num_input_tokens_seen": 200536784, "step": 92840 }, { "epoch": 15.14600326264274, "grad_norm": 0.006211650092154741, "learning_rate": 0.00016896194691977284, "loss": 0.0032, "num_input_tokens_seen": 200548176, "step": 92845 }, { "epoch": 15.146818923327896, "grad_norm": 0.001195227261632681, "learning_rate": 0.00016890860550953092, "loss": 0.0018, "num_input_tokens_seen": 200559920, "step": 92850 }, { "epoch": 15.147634584013051, "grad_norm": 0.00874117948114872, "learning_rate": 0.00016885527080927616, "loss": 0.0021, "num_input_tokens_seen": 200569616, "step": 92855 }, { "epoch": 15.148450244698205, "grad_norm": 0.1447891891002655, "learning_rate": 0.00016880194282008941, "loss": 0.0041, "num_input_tokens_seen": 200580464, "step": 92860 }, { "epoch": 15.149265905383361, "grad_norm": 0.004833567887544632, "learning_rate": 0.0001687486215430515, "loss": 0.0038, "num_input_tokens_seen": 200592272, "step": 92865 }, { "epoch": 15.150081566068515, "grad_norm": 0.009706101380288601, "learning_rate": 0.0001686953069792429, "loss": 0.0021, "num_input_tokens_seen": 200603824, "step": 92870 }, { "epoch": 15.15089722675367, "grad_norm": 0.002112502697855234, "learning_rate": 0.00016864199912974427, "loss": 0.0032, "num_input_tokens_seen": 200613584, "step": 92875 }, { "epoch": 15.151712887438826, "grad_norm": 0.008575985208153725, "learning_rate": 0.00016858869799563585, "loss": 0.0034, "num_input_tokens_seen": 200624464, "step": 92880 }, { "epoch": 15.15252854812398, "grad_norm": 0.07412799447774887, "learning_rate": 0.0001685354035779979, "loss": 0.0036, "num_input_tokens_seen": 200635280, "step": 92885 }, { "epoch": 15.153344208809136, "grad_norm": 0.00945495069026947, "learning_rate": 0.00016848211587791045, "loss": 0.0007, "num_input_tokens_seen": 200645968, "step": 92890 }, { "epoch": 15.15415986949429, "grad_norm": 0.0040870546363294125, "learning_rate": 0.00016842883489645355, "loss": 0.0012, "num_input_tokens_seen": 200656016, "step": 92895 }, { "epoch": 15.154975530179446, "grad_norm": 0.07097148895263672, "learning_rate": 0.00016837556063470688, "loss": 0.0024, "num_input_tokens_seen": 200665968, "step": 92900 }, { "epoch": 15.1557911908646, "grad_norm": 0.21168480813503265, "learning_rate": 0.0001683222930937502, "loss": 0.0035, "num_input_tokens_seen": 200677296, "step": 92905 }, { "epoch": 15.156606851549755, "grad_norm": 0.009473263286054134, "learning_rate": 0.00016826903227466284, "loss": 0.0458, "num_input_tokens_seen": 200687472, "step": 92910 }, { "epoch": 15.15742251223491, "grad_norm": 0.16500073671340942, "learning_rate": 0.00016821577817852473, "loss": 0.0242, "num_input_tokens_seen": 200698704, "step": 92915 }, { "epoch": 15.158238172920065, "grad_norm": 0.013035625219345093, "learning_rate": 0.00016816253080641441, "loss": 0.0036, "num_input_tokens_seen": 200708272, "step": 92920 }, { "epoch": 15.15905383360522, "grad_norm": 0.08334468305110931, "learning_rate": 0.00016810929015941174, "loss": 0.0014, "num_input_tokens_seen": 200718608, "step": 92925 }, { "epoch": 15.159869494290374, "grad_norm": 0.0007003924110904336, "learning_rate": 0.00016805605623859492, "loss": 0.0013, "num_input_tokens_seen": 200730288, "step": 92930 }, { "epoch": 15.16068515497553, "grad_norm": 0.003557375865057111, "learning_rate": 0.0001680028290450436, "loss": 0.1876, "num_input_tokens_seen": 200741360, "step": 92935 }, { "epoch": 15.161500815660686, "grad_norm": 0.012556234374642372, "learning_rate": 0.00016794960857983583, "loss": 0.2872, "num_input_tokens_seen": 200753968, "step": 92940 }, { "epoch": 15.16231647634584, "grad_norm": 0.03704483434557915, "learning_rate": 0.00016789639484405077, "loss": 0.0028, "num_input_tokens_seen": 200764112, "step": 92945 }, { "epoch": 15.163132137030995, "grad_norm": 0.002027664100751281, "learning_rate": 0.00016784318783876623, "loss": 0.0012, "num_input_tokens_seen": 200776208, "step": 92950 }, { "epoch": 15.16394779771615, "grad_norm": 0.005491977091878653, "learning_rate": 0.0001677899875650612, "loss": 0.0114, "num_input_tokens_seen": 200787152, "step": 92955 }, { "epoch": 15.164763458401305, "grad_norm": 0.0017870229203253984, "learning_rate": 0.00016773679402401321, "loss": 0.0032, "num_input_tokens_seen": 200799024, "step": 92960 }, { "epoch": 15.16557911908646, "grad_norm": 0.038198426365852356, "learning_rate": 0.0001676836072167009, "loss": 0.0023, "num_input_tokens_seen": 200810032, "step": 92965 }, { "epoch": 15.166394779771615, "grad_norm": 0.002829183591529727, "learning_rate": 0.0001676304271442015, "loss": 0.0015, "num_input_tokens_seen": 200820816, "step": 92970 }, { "epoch": 15.16721044045677, "grad_norm": 0.19888825714588165, "learning_rate": 0.00016757725380759354, "loss": 0.1378, "num_input_tokens_seen": 200830128, "step": 92975 }, { "epoch": 15.168026101141924, "grad_norm": 0.01338116079568863, "learning_rate": 0.00016752408720795386, "loss": 0.0066, "num_input_tokens_seen": 200840336, "step": 92980 }, { "epoch": 15.16884176182708, "grad_norm": 0.00568029098212719, "learning_rate": 0.00016747092734636067, "loss": 0.0061, "num_input_tokens_seen": 200851376, "step": 92985 }, { "epoch": 15.169657422512234, "grad_norm": 0.1049458459019661, "learning_rate": 0.0001674177742238906, "loss": 0.0066, "num_input_tokens_seen": 200863280, "step": 92990 }, { "epoch": 15.17047308319739, "grad_norm": 0.002612957265228033, "learning_rate": 0.0001673646278416215, "loss": 0.0015, "num_input_tokens_seen": 200874224, "step": 92995 }, { "epoch": 15.171288743882545, "grad_norm": 0.0045676566660404205, "learning_rate": 0.00016731148820063013, "loss": 0.0707, "num_input_tokens_seen": 200885104, "step": 93000 }, { "epoch": 15.1721044045677, "grad_norm": 0.0016079711494967341, "learning_rate": 0.00016725835530199352, "loss": 0.0041, "num_input_tokens_seen": 200895568, "step": 93005 }, { "epoch": 15.172920065252855, "grad_norm": 0.002138703130185604, "learning_rate": 0.00016720522914678843, "loss": 0.0024, "num_input_tokens_seen": 200907248, "step": 93010 }, { "epoch": 15.173735725938009, "grad_norm": 0.005183606408536434, "learning_rate": 0.00016715210973609158, "loss": 0.0016, "num_input_tokens_seen": 200918064, "step": 93015 }, { "epoch": 15.174551386623165, "grad_norm": 0.026629121974110603, "learning_rate": 0.00016709899707097948, "loss": 0.0105, "num_input_tokens_seen": 200928560, "step": 93020 }, { "epoch": 15.17536704730832, "grad_norm": 0.018558355048298836, "learning_rate": 0.0001670458911525285, "loss": 0.0029, "num_input_tokens_seen": 200938448, "step": 93025 }, { "epoch": 15.176182707993474, "grad_norm": 0.009968779049813747, "learning_rate": 0.00016699279198181493, "loss": 0.0095, "num_input_tokens_seen": 200949488, "step": 93030 }, { "epoch": 15.17699836867863, "grad_norm": 0.0031037803273648024, "learning_rate": 0.00016693969955991483, "loss": 0.0059, "num_input_tokens_seen": 200961072, "step": 93035 }, { "epoch": 15.177814029363784, "grad_norm": 0.029598917812108994, "learning_rate": 0.00016688661388790434, "loss": 0.0806, "num_input_tokens_seen": 200972752, "step": 93040 }, { "epoch": 15.17862969004894, "grad_norm": 0.0018472730880603194, "learning_rate": 0.00016683353496685895, "loss": 0.0046, "num_input_tokens_seen": 200983920, "step": 93045 }, { "epoch": 15.179445350734095, "grad_norm": 0.018483439460396767, "learning_rate": 0.00016678046279785497, "loss": 0.0418, "num_input_tokens_seen": 200995408, "step": 93050 }, { "epoch": 15.18026101141925, "grad_norm": 0.06471030414104462, "learning_rate": 0.00016672739738196734, "loss": 0.0071, "num_input_tokens_seen": 201005616, "step": 93055 }, { "epoch": 15.181076672104405, "grad_norm": 0.000771304068621248, "learning_rate": 0.0001666743387202721, "loss": 0.0037, "num_input_tokens_seen": 201015344, "step": 93060 }, { "epoch": 15.181892332789559, "grad_norm": 0.0015893502859398723, "learning_rate": 0.00016662128681384388, "loss": 0.0027, "num_input_tokens_seen": 201026448, "step": 93065 }, { "epoch": 15.182707993474715, "grad_norm": 0.0014162855222821236, "learning_rate": 0.00016656824166375855, "loss": 0.0047, "num_input_tokens_seen": 201037456, "step": 93070 }, { "epoch": 15.18352365415987, "grad_norm": 0.01789838634431362, "learning_rate": 0.0001665152032710905, "loss": 0.0082, "num_input_tokens_seen": 201048624, "step": 93075 }, { "epoch": 15.184339314845024, "grad_norm": 0.0031051000114530325, "learning_rate": 0.0001664621716369152, "loss": 0.0024, "num_input_tokens_seen": 201059760, "step": 93080 }, { "epoch": 15.18515497553018, "grad_norm": 0.0025171549059450626, "learning_rate": 0.00016640914676230677, "loss": 0.0012, "num_input_tokens_seen": 201072208, "step": 93085 }, { "epoch": 15.185970636215334, "grad_norm": 0.08467067778110504, "learning_rate": 0.00016635612864834048, "loss": 0.0086, "num_input_tokens_seen": 201082416, "step": 93090 }, { "epoch": 15.18678629690049, "grad_norm": 0.012077942490577698, "learning_rate": 0.00016630311729609026, "loss": 0.0029, "num_input_tokens_seen": 201092944, "step": 93095 }, { "epoch": 15.187601957585644, "grad_norm": 0.10698788613080978, "learning_rate": 0.00016625011270663098, "loss": 0.0039, "num_input_tokens_seen": 201103504, "step": 93100 }, { "epoch": 15.1884176182708, "grad_norm": 0.005380884278565645, "learning_rate": 0.00016619711488103622, "loss": 0.0019, "num_input_tokens_seen": 201115568, "step": 93105 }, { "epoch": 15.189233278955955, "grad_norm": 0.025823216885328293, "learning_rate": 0.0001661441238203807, "loss": 0.0037, "num_input_tokens_seen": 201126000, "step": 93110 }, { "epoch": 15.190048939641109, "grad_norm": 0.012832976877689362, "learning_rate": 0.00016609113952573774, "loss": 0.003, "num_input_tokens_seen": 201138064, "step": 93115 }, { "epoch": 15.190864600326265, "grad_norm": 0.005685700569301844, "learning_rate": 0.0001660381619981817, "loss": 0.1171, "num_input_tokens_seen": 201148368, "step": 93120 }, { "epoch": 15.191680261011419, "grad_norm": 0.010949775576591492, "learning_rate": 0.0001659851912387857, "loss": 0.0028, "num_input_tokens_seen": 201159280, "step": 93125 }, { "epoch": 15.192495921696574, "grad_norm": 0.003707254771143198, "learning_rate": 0.00016593222724862366, "loss": 0.0025, "num_input_tokens_seen": 201170032, "step": 93130 }, { "epoch": 15.19331158238173, "grad_norm": 0.002400952624157071, "learning_rate": 0.0001658792700287689, "loss": 0.0016, "num_input_tokens_seen": 201179984, "step": 93135 }, { "epoch": 15.194127243066884, "grad_norm": 0.014919549226760864, "learning_rate": 0.00016582631958029454, "loss": 0.0039, "num_input_tokens_seen": 201189872, "step": 93140 }, { "epoch": 15.19494290375204, "grad_norm": 0.0005131821380928159, "learning_rate": 0.00016577337590427372, "loss": 0.0046, "num_input_tokens_seen": 201201392, "step": 93145 }, { "epoch": 15.195758564437194, "grad_norm": 0.029786646366119385, "learning_rate": 0.00016572043900177946, "loss": 0.0016, "num_input_tokens_seen": 201213104, "step": 93150 }, { "epoch": 15.19657422512235, "grad_norm": 0.004074044059962034, "learning_rate": 0.0001656675088738846, "loss": 0.0108, "num_input_tokens_seen": 201224432, "step": 93155 }, { "epoch": 15.197389885807505, "grad_norm": 0.007668066769838333, "learning_rate": 0.00016561458552166174, "loss": 0.0107, "num_input_tokens_seen": 201235248, "step": 93160 }, { "epoch": 15.198205546492659, "grad_norm": 0.8521307110786438, "learning_rate": 0.00016556166894618352, "loss": 0.1039, "num_input_tokens_seen": 201245168, "step": 93165 }, { "epoch": 15.199021207177815, "grad_norm": 0.004517595283687115, "learning_rate": 0.00016550875914852237, "loss": 0.0046, "num_input_tokens_seen": 201254384, "step": 93170 }, { "epoch": 15.199836867862969, "grad_norm": 0.0450996570289135, "learning_rate": 0.00016545585612975051, "loss": 0.0451, "num_input_tokens_seen": 201265776, "step": 93175 }, { "epoch": 15.200652528548124, "grad_norm": 0.0036346532870084047, "learning_rate": 0.00016540295989094018, "loss": 0.002, "num_input_tokens_seen": 201277136, "step": 93180 }, { "epoch": 15.201468189233278, "grad_norm": 0.008076614700257778, "learning_rate": 0.0001653500704331633, "loss": 0.0085, "num_input_tokens_seen": 201287856, "step": 93185 }, { "epoch": 15.202283849918434, "grad_norm": 0.001289551379159093, "learning_rate": 0.0001652971877574916, "loss": 0.0074, "num_input_tokens_seen": 201297168, "step": 93190 }, { "epoch": 15.20309951060359, "grad_norm": 0.1675768941640854, "learning_rate": 0.00016524431186499733, "loss": 0.0058, "num_input_tokens_seen": 201307920, "step": 93195 }, { "epoch": 15.203915171288743, "grad_norm": 0.00890275463461876, "learning_rate": 0.0001651914427567514, "loss": 0.1466, "num_input_tokens_seen": 201318704, "step": 93200 }, { "epoch": 15.2047308319739, "grad_norm": 0.0075667728669941425, "learning_rate": 0.000165138580433826, "loss": 0.0086, "num_input_tokens_seen": 201329360, "step": 93205 }, { "epoch": 15.205546492659053, "grad_norm": 0.019372614100575447, "learning_rate": 0.00016508572489729172, "loss": 0.0034, "num_input_tokens_seen": 201340528, "step": 93210 }, { "epoch": 15.206362153344209, "grad_norm": 0.013845077715814114, "learning_rate": 0.00016503287614822042, "loss": 0.003, "num_input_tokens_seen": 201351632, "step": 93215 }, { "epoch": 15.207177814029365, "grad_norm": 0.012783776968717575, "learning_rate": 0.00016498003418768248, "loss": 0.0031, "num_input_tokens_seen": 201362704, "step": 93220 }, { "epoch": 15.207993474714518, "grad_norm": 0.044286951422691345, "learning_rate": 0.00016492719901674947, "loss": 0.0031, "num_input_tokens_seen": 201373264, "step": 93225 }, { "epoch": 15.208809135399674, "grad_norm": 0.0009127430967055261, "learning_rate": 0.00016487437063649152, "loss": 0.0031, "num_input_tokens_seen": 201382928, "step": 93230 }, { "epoch": 15.209624796084828, "grad_norm": 0.017166294157505035, "learning_rate": 0.00016482154904797974, "loss": 0.0664, "num_input_tokens_seen": 201394896, "step": 93235 }, { "epoch": 15.210440456769984, "grad_norm": 0.0013074681628495455, "learning_rate": 0.0001647687342522845, "loss": 0.0015, "num_input_tokens_seen": 201403952, "step": 93240 }, { "epoch": 15.21125611745514, "grad_norm": 0.0034039251040667295, "learning_rate": 0.00016471592625047615, "loss": 0.0053, "num_input_tokens_seen": 201415856, "step": 93245 }, { "epoch": 15.212071778140293, "grad_norm": 0.00026853723102249205, "learning_rate": 0.00016466312504362485, "loss": 0.0038, "num_input_tokens_seen": 201426800, "step": 93250 }, { "epoch": 15.21288743882545, "grad_norm": 0.02092001773416996, "learning_rate": 0.00016461033063280074, "loss": 0.035, "num_input_tokens_seen": 201437488, "step": 93255 }, { "epoch": 15.213703099510603, "grad_norm": 0.0905800610780716, "learning_rate": 0.00016455754301907376, "loss": 0.0037, "num_input_tokens_seen": 201448336, "step": 93260 }, { "epoch": 15.214518760195759, "grad_norm": 0.013709750957787037, "learning_rate": 0.00016450476220351368, "loss": 0.0034, "num_input_tokens_seen": 201459536, "step": 93265 }, { "epoch": 15.215334420880913, "grad_norm": 0.017409684136509895, "learning_rate": 0.00016445198818719025, "loss": 0.0073, "num_input_tokens_seen": 201468944, "step": 93270 }, { "epoch": 15.216150081566068, "grad_norm": 0.0011702359188348055, "learning_rate": 0.00016439922097117294, "loss": 0.0016, "num_input_tokens_seen": 201479664, "step": 93275 }, { "epoch": 15.216965742251224, "grad_norm": 0.012537084519863129, "learning_rate": 0.00016434646055653112, "loss": 0.0149, "num_input_tokens_seen": 201490064, "step": 93280 }, { "epoch": 15.217781402936378, "grad_norm": 0.01967203989624977, "learning_rate": 0.0001642937069443341, "loss": 0.0025, "num_input_tokens_seen": 201501776, "step": 93285 }, { "epoch": 15.218597063621534, "grad_norm": 0.05715855583548546, "learning_rate": 0.00016424096013565098, "loss": 0.0031, "num_input_tokens_seen": 201513040, "step": 93290 }, { "epoch": 15.219412724306688, "grad_norm": 0.024029148742556572, "learning_rate": 0.00016418822013155077, "loss": 0.0024, "num_input_tokens_seen": 201523856, "step": 93295 }, { "epoch": 15.220228384991843, "grad_norm": 0.5616940855979919, "learning_rate": 0.00016413548693310225, "loss": 0.1153, "num_input_tokens_seen": 201535376, "step": 93300 }, { "epoch": 15.221044045676999, "grad_norm": 0.005037497729063034, "learning_rate": 0.00016408276054137417, "loss": 0.0673, "num_input_tokens_seen": 201546064, "step": 93305 }, { "epoch": 15.221859706362153, "grad_norm": 0.006846841424703598, "learning_rate": 0.00016403004095743513, "loss": 0.0037, "num_input_tokens_seen": 201557392, "step": 93310 }, { "epoch": 15.222675367047309, "grad_norm": 0.0015953175025060773, "learning_rate": 0.00016397732818235344, "loss": 0.0063, "num_input_tokens_seen": 201568944, "step": 93315 }, { "epoch": 15.223491027732463, "grad_norm": 0.022259226068854332, "learning_rate": 0.0001639246222171975, "loss": 0.0025, "num_input_tokens_seen": 201579376, "step": 93320 }, { "epoch": 15.224306688417618, "grad_norm": 0.01390728447586298, "learning_rate": 0.0001638719230630355, "loss": 0.0029, "num_input_tokens_seen": 201590544, "step": 93325 }, { "epoch": 15.225122349102774, "grad_norm": 0.00581662543118, "learning_rate": 0.0001638192307209353, "loss": 0.0026, "num_input_tokens_seen": 201602064, "step": 93330 }, { "epoch": 15.225938009787928, "grad_norm": 0.022805117070674896, "learning_rate": 0.00016376654519196477, "loss": 0.0054, "num_input_tokens_seen": 201612944, "step": 93335 }, { "epoch": 15.226753670473084, "grad_norm": 0.520837664604187, "learning_rate": 0.00016371386647719182, "loss": 0.0559, "num_input_tokens_seen": 201622192, "step": 93340 }, { "epoch": 15.227569331158238, "grad_norm": 0.003279214957728982, "learning_rate": 0.00016366119457768407, "loss": 0.0019, "num_input_tokens_seen": 201632560, "step": 93345 }, { "epoch": 15.228384991843393, "grad_norm": 0.008738663047552109, "learning_rate": 0.00016360852949450882, "loss": 0.0631, "num_input_tokens_seen": 201643792, "step": 93350 }, { "epoch": 15.229200652528547, "grad_norm": 0.014815381728112698, "learning_rate": 0.00016355587122873349, "loss": 0.0396, "num_input_tokens_seen": 201654416, "step": 93355 }, { "epoch": 15.230016313213703, "grad_norm": 0.07490991055965424, "learning_rate": 0.00016350321978142525, "loss": 0.0071, "num_input_tokens_seen": 201665744, "step": 93360 }, { "epoch": 15.230831973898859, "grad_norm": 0.022860554978251457, "learning_rate": 0.00016345057515365115, "loss": 0.0073, "num_input_tokens_seen": 201675216, "step": 93365 }, { "epoch": 15.231647634584013, "grad_norm": 0.011496799066662788, "learning_rate": 0.00016339793734647807, "loss": 0.0903, "num_input_tokens_seen": 201685776, "step": 93370 }, { "epoch": 15.232463295269168, "grad_norm": 0.008875560946762562, "learning_rate": 0.00016334530636097277, "loss": 0.0033, "num_input_tokens_seen": 201697296, "step": 93375 }, { "epoch": 15.233278955954322, "grad_norm": 0.004284188617020845, "learning_rate": 0.00016329268219820192, "loss": 0.1195, "num_input_tokens_seen": 201707440, "step": 93380 }, { "epoch": 15.234094616639478, "grad_norm": 1.4567502737045288, "learning_rate": 0.00016324006485923204, "loss": 0.0522, "num_input_tokens_seen": 201718704, "step": 93385 }, { "epoch": 15.234910277324634, "grad_norm": 0.020433912053704262, "learning_rate": 0.00016318745434512944, "loss": 0.0034, "num_input_tokens_seen": 201728592, "step": 93390 }, { "epoch": 15.235725938009788, "grad_norm": 0.004512346815317869, "learning_rate": 0.00016313485065696037, "loss": 0.006, "num_input_tokens_seen": 201738320, "step": 93395 }, { "epoch": 15.236541598694943, "grad_norm": 0.03213776648044586, "learning_rate": 0.00016308225379579088, "loss": 0.0155, "num_input_tokens_seen": 201750160, "step": 93400 }, { "epoch": 15.237357259380097, "grad_norm": 0.06638370454311371, "learning_rate": 0.0001630296637626869, "loss": 0.0061, "num_input_tokens_seen": 201761360, "step": 93405 }, { "epoch": 15.238172920065253, "grad_norm": 0.08439627289772034, "learning_rate": 0.0001629770805587143, "loss": 0.0067, "num_input_tokens_seen": 201772496, "step": 93410 }, { "epoch": 15.238988580750409, "grad_norm": 0.0017806835239753127, "learning_rate": 0.0001629245041849387, "loss": 0.1468, "num_input_tokens_seen": 201782640, "step": 93415 }, { "epoch": 15.239804241435563, "grad_norm": 0.03064138814806938, "learning_rate": 0.0001628719346424256, "loss": 0.009, "num_input_tokens_seen": 201793968, "step": 93420 }, { "epoch": 15.240619902120718, "grad_norm": 0.03552016243338585, "learning_rate": 0.00016281937193224051, "loss": 0.0065, "num_input_tokens_seen": 201805264, "step": 93425 }, { "epoch": 15.241435562805872, "grad_norm": 0.009931345470249653, "learning_rate": 0.0001627668160554485, "loss": 0.0039, "num_input_tokens_seen": 201816112, "step": 93430 }, { "epoch": 15.242251223491028, "grad_norm": 0.038621384650468826, "learning_rate": 0.00016271426701311483, "loss": 0.0474, "num_input_tokens_seen": 201826064, "step": 93435 }, { "epoch": 15.243066884176184, "grad_norm": 0.0006465500337071717, "learning_rate": 0.00016266172480630436, "loss": 0.0043, "num_input_tokens_seen": 201837200, "step": 93440 }, { "epoch": 15.243882544861338, "grad_norm": 0.007097797933965921, "learning_rate": 0.0001626091894360819, "loss": 0.0304, "num_input_tokens_seen": 201848816, "step": 93445 }, { "epoch": 15.244698205546493, "grad_norm": 0.02344132959842682, "learning_rate": 0.00016255666090351245, "loss": 0.0033, "num_input_tokens_seen": 201858416, "step": 93450 }, { "epoch": 15.245513866231647, "grad_norm": 0.6161487102508545, "learning_rate": 0.00016250413920966013, "loss": 0.0466, "num_input_tokens_seen": 201868720, "step": 93455 }, { "epoch": 15.246329526916803, "grad_norm": 0.010321940295398235, "learning_rate": 0.0001624516243555898, "loss": 0.0383, "num_input_tokens_seen": 201879600, "step": 93460 }, { "epoch": 15.247145187601957, "grad_norm": 0.003955533728003502, "learning_rate": 0.00016239911634236527, "loss": 0.1012, "num_input_tokens_seen": 201892816, "step": 93465 }, { "epoch": 15.247960848287113, "grad_norm": 0.001835820497944951, "learning_rate": 0.00016234661517105115, "loss": 0.0033, "num_input_tokens_seen": 201904816, "step": 93470 }, { "epoch": 15.248776508972268, "grad_norm": 0.00370815210044384, "learning_rate": 0.00016229412084271095, "loss": 0.0134, "num_input_tokens_seen": 201915248, "step": 93475 }, { "epoch": 15.249592169657422, "grad_norm": 0.005513612646609545, "learning_rate": 0.00016224163335840897, "loss": 0.0665, "num_input_tokens_seen": 201926064, "step": 93480 }, { "epoch": 15.250407830342578, "grad_norm": 0.0008234487031586468, "learning_rate": 0.00016218915271920875, "loss": 0.0033, "num_input_tokens_seen": 201938000, "step": 93485 }, { "epoch": 15.251223491027732, "grad_norm": 0.0024828226305544376, "learning_rate": 0.00016213667892617394, "loss": 0.0017, "num_input_tokens_seen": 201948112, "step": 93490 }, { "epoch": 15.252039151712887, "grad_norm": 0.004459597636014223, "learning_rate": 0.00016208421198036789, "loss": 0.0079, "num_input_tokens_seen": 201959216, "step": 93495 }, { "epoch": 15.252854812398043, "grad_norm": 0.32820403575897217, "learning_rate": 0.00016203175188285397, "loss": 0.0275, "num_input_tokens_seen": 201969936, "step": 93500 }, { "epoch": 15.253670473083197, "grad_norm": 0.0012011234648525715, "learning_rate": 0.00016197929863469534, "loss": 0.006, "num_input_tokens_seen": 201981872, "step": 93505 }, { "epoch": 15.254486133768353, "grad_norm": 0.006218986120074987, "learning_rate": 0.0001619268522369551, "loss": 0.0275, "num_input_tokens_seen": 201992048, "step": 93510 }, { "epoch": 15.255301794453507, "grad_norm": 0.004850749392062426, "learning_rate": 0.00016187441269069596, "loss": 0.0029, "num_input_tokens_seen": 202003120, "step": 93515 }, { "epoch": 15.256117455138662, "grad_norm": 0.0029057359788566828, "learning_rate": 0.00016182197999698084, "loss": 0.0017, "num_input_tokens_seen": 202014320, "step": 93520 }, { "epoch": 15.256933115823816, "grad_norm": 0.0069501763209700584, "learning_rate": 0.00016176955415687233, "loss": 0.0052, "num_input_tokens_seen": 202024208, "step": 93525 }, { "epoch": 15.257748776508972, "grad_norm": 0.01931832917034626, "learning_rate": 0.00016171713517143288, "loss": 0.0052, "num_input_tokens_seen": 202034672, "step": 93530 }, { "epoch": 15.258564437194128, "grad_norm": 0.02083902806043625, "learning_rate": 0.0001616647230417248, "loss": 0.012, "num_input_tokens_seen": 202045008, "step": 93535 }, { "epoch": 15.259380097879282, "grad_norm": 0.0012294130865484476, "learning_rate": 0.0001616123177688103, "loss": 0.0015, "num_input_tokens_seen": 202056848, "step": 93540 }, { "epoch": 15.260195758564437, "grad_norm": 0.0032329263631254435, "learning_rate": 0.00016155991935375147, "loss": 0.0272, "num_input_tokens_seen": 202067600, "step": 93545 }, { "epoch": 15.261011419249591, "grad_norm": 0.007929227314889431, "learning_rate": 0.00016150752779761008, "loss": 0.0032, "num_input_tokens_seen": 202078288, "step": 93550 }, { "epoch": 15.261827079934747, "grad_norm": 0.003024327801540494, "learning_rate": 0.00016145514310144838, "loss": 0.0029, "num_input_tokens_seen": 202089104, "step": 93555 }, { "epoch": 15.262642740619903, "grad_norm": 0.09575843811035156, "learning_rate": 0.0001614027652663273, "loss": 0.0046, "num_input_tokens_seen": 202098576, "step": 93560 }, { "epoch": 15.263458401305057, "grad_norm": 0.00582880387082696, "learning_rate": 0.00016135039429330912, "loss": 0.0043, "num_input_tokens_seen": 202108528, "step": 93565 }, { "epoch": 15.264274061990212, "grad_norm": 0.0019978994969278574, "learning_rate": 0.0001612980301834544, "loss": 0.0565, "num_input_tokens_seen": 202119792, "step": 93570 }, { "epoch": 15.265089722675366, "grad_norm": 0.040785159915685654, "learning_rate": 0.00016124567293782517, "loss": 0.0776, "num_input_tokens_seen": 202131280, "step": 93575 }, { "epoch": 15.265905383360522, "grad_norm": 0.012387178838253021, "learning_rate": 0.00016119332255748177, "loss": 0.0076, "num_input_tokens_seen": 202140880, "step": 93580 }, { "epoch": 15.266721044045678, "grad_norm": 0.00521023478358984, "learning_rate": 0.0001611409790434858, "loss": 0.0036, "num_input_tokens_seen": 202152496, "step": 93585 }, { "epoch": 15.267536704730832, "grad_norm": 0.5057440996170044, "learning_rate": 0.00016108864239689746, "loss": 0.0665, "num_input_tokens_seen": 202162544, "step": 93590 }, { "epoch": 15.268352365415987, "grad_norm": 0.23563796281814575, "learning_rate": 0.00016103631261877799, "loss": 0.01, "num_input_tokens_seen": 202173584, "step": 93595 }, { "epoch": 15.269168026101141, "grad_norm": 0.0431128591299057, "learning_rate": 0.0001609839897101874, "loss": 0.0067, "num_input_tokens_seen": 202184176, "step": 93600 }, { "epoch": 15.269983686786297, "grad_norm": 0.01184097956866026, "learning_rate": 0.00016093167367218665, "loss": 0.0169, "num_input_tokens_seen": 202194544, "step": 93605 }, { "epoch": 15.270799347471453, "grad_norm": 0.0048804692924022675, "learning_rate": 0.0001608793645058353, "loss": 0.0025, "num_input_tokens_seen": 202206896, "step": 93610 }, { "epoch": 15.271615008156607, "grad_norm": 0.007331953849643469, "learning_rate": 0.0001608270622121942, "loss": 0.0041, "num_input_tokens_seen": 202216848, "step": 93615 }, { "epoch": 15.272430668841762, "grad_norm": 0.0063942731358110905, "learning_rate": 0.00016077476679232262, "loss": 0.002, "num_input_tokens_seen": 202228112, "step": 93620 }, { "epoch": 15.273246329526916, "grad_norm": 0.003414425067603588, "learning_rate": 0.00016072247824728086, "loss": 0.0022, "num_input_tokens_seen": 202239248, "step": 93625 }, { "epoch": 15.274061990212072, "grad_norm": 0.026459679007530212, "learning_rate": 0.00016067019657812852, "loss": 0.0169, "num_input_tokens_seen": 202251120, "step": 93630 }, { "epoch": 15.274877650897226, "grad_norm": 0.06253305077552795, "learning_rate": 0.0001606179217859251, "loss": 0.0098, "num_input_tokens_seen": 202260560, "step": 93635 }, { "epoch": 15.275693311582382, "grad_norm": 0.029631705954670906, "learning_rate": 0.00016056565387173005, "loss": 0.0459, "num_input_tokens_seen": 202271664, "step": 93640 }, { "epoch": 15.276508972267537, "grad_norm": 0.005431040655821562, "learning_rate": 0.0001605133928366026, "loss": 0.0054, "num_input_tokens_seen": 202282320, "step": 93645 }, { "epoch": 15.277324632952691, "grad_norm": 0.00246419757604599, "learning_rate": 0.00016046113868160194, "loss": 0.01, "num_input_tokens_seen": 202292816, "step": 93650 }, { "epoch": 15.278140293637847, "grad_norm": 0.0032046616543084383, "learning_rate": 0.00016040889140778703, "loss": 0.001, "num_input_tokens_seen": 202302896, "step": 93655 }, { "epoch": 15.278955954323001, "grad_norm": 0.02585562691092491, "learning_rate": 0.00016035665101621672, "loss": 0.1417, "num_input_tokens_seen": 202313840, "step": 93660 }, { "epoch": 15.279771615008157, "grad_norm": 0.09804775565862656, "learning_rate": 0.00016030441750794976, "loss": 0.0678, "num_input_tokens_seen": 202324080, "step": 93665 }, { "epoch": 15.280587275693312, "grad_norm": 0.05777350068092346, "learning_rate": 0.00016025219088404468, "loss": 0.0118, "num_input_tokens_seen": 202334960, "step": 93670 }, { "epoch": 15.281402936378466, "grad_norm": 0.009660948067903519, "learning_rate": 0.00016019997114555983, "loss": 0.0801, "num_input_tokens_seen": 202345648, "step": 93675 }, { "epoch": 15.282218597063622, "grad_norm": 0.0101390415802598, "learning_rate": 0.000160147758293554, "loss": 0.0055, "num_input_tokens_seen": 202357008, "step": 93680 }, { "epoch": 15.283034257748776, "grad_norm": 0.010949664749205112, "learning_rate": 0.00016009555232908456, "loss": 0.0042, "num_input_tokens_seen": 202367344, "step": 93685 }, { "epoch": 15.283849918433932, "grad_norm": 0.19166868925094604, "learning_rate": 0.00016004335325321033, "loss": 0.0067, "num_input_tokens_seen": 202378416, "step": 93690 }, { "epoch": 15.284665579119087, "grad_norm": 0.0036083634477108717, "learning_rate": 0.00015999116106698848, "loss": 0.0073, "num_input_tokens_seen": 202388848, "step": 93695 }, { "epoch": 15.285481239804241, "grad_norm": 0.47052520513534546, "learning_rate": 0.0001599389757714774, "loss": 0.1437, "num_input_tokens_seen": 202400048, "step": 93700 }, { "epoch": 15.286296900489397, "grad_norm": 0.000593140721321106, "learning_rate": 0.0001598867973677341, "loss": 0.0132, "num_input_tokens_seen": 202411216, "step": 93705 }, { "epoch": 15.28711256117455, "grad_norm": 0.05543004721403122, "learning_rate": 0.00015983462585681657, "loss": 0.006, "num_input_tokens_seen": 202421424, "step": 93710 }, { "epoch": 15.287928221859707, "grad_norm": 0.008918811567127705, "learning_rate": 0.00015978246123978158, "loss": 0.0038, "num_input_tokens_seen": 202433040, "step": 93715 }, { "epoch": 15.28874388254486, "grad_norm": 0.15539592504501343, "learning_rate": 0.0001597303035176869, "loss": 0.0061, "num_input_tokens_seen": 202443760, "step": 93720 }, { "epoch": 15.289559543230016, "grad_norm": 0.0010111124720424414, "learning_rate": 0.00015967815269158904, "loss": 0.001, "num_input_tokens_seen": 202454480, "step": 93725 }, { "epoch": 15.290375203915172, "grad_norm": 0.006419785786420107, "learning_rate": 0.0001596260087625454, "loss": 0.0028, "num_input_tokens_seen": 202464592, "step": 93730 }, { "epoch": 15.291190864600326, "grad_norm": 0.08302279561758041, "learning_rate": 0.0001595738717316122, "loss": 0.0084, "num_input_tokens_seen": 202476528, "step": 93735 }, { "epoch": 15.292006525285482, "grad_norm": 0.0035879616625607014, "learning_rate": 0.00015952174159984667, "loss": 0.0317, "num_input_tokens_seen": 202487440, "step": 93740 }, { "epoch": 15.292822185970635, "grad_norm": 0.08168449252843857, "learning_rate": 0.0001594696183683046, "loss": 0.0077, "num_input_tokens_seen": 202498192, "step": 93745 }, { "epoch": 15.293637846655791, "grad_norm": 0.035135045647621155, "learning_rate": 0.00015941750203804305, "loss": 0.0095, "num_input_tokens_seen": 202509616, "step": 93750 }, { "epoch": 15.294453507340947, "grad_norm": 0.08051912486553192, "learning_rate": 0.0001593653926101176, "loss": 0.0068, "num_input_tokens_seen": 202520816, "step": 93755 }, { "epoch": 15.2952691680261, "grad_norm": 0.007569537963718176, "learning_rate": 0.00015931329008558477, "loss": 0.0029, "num_input_tokens_seen": 202530928, "step": 93760 }, { "epoch": 15.296084828711257, "grad_norm": 0.22661983966827393, "learning_rate": 0.00015926119446550024, "loss": 0.0096, "num_input_tokens_seen": 202541648, "step": 93765 }, { "epoch": 15.29690048939641, "grad_norm": 0.0014906668802723289, "learning_rate": 0.0001592091057509199, "loss": 0.0074, "num_input_tokens_seen": 202553424, "step": 93770 }, { "epoch": 15.297716150081566, "grad_norm": 0.00238198135048151, "learning_rate": 0.00015915702394289933, "loss": 0.0035, "num_input_tokens_seen": 202564944, "step": 93775 }, { "epoch": 15.298531810766722, "grad_norm": 0.026480555534362793, "learning_rate": 0.00015910494904249411, "loss": 0.0063, "num_input_tokens_seen": 202575888, "step": 93780 }, { "epoch": 15.299347471451876, "grad_norm": 0.14808295667171478, "learning_rate": 0.0001590528810507595, "loss": 0.0071, "num_input_tokens_seen": 202586800, "step": 93785 }, { "epoch": 15.300163132137031, "grad_norm": 0.011909517459571362, "learning_rate": 0.00015900081996875082, "loss": 0.0037, "num_input_tokens_seen": 202597424, "step": 93790 }, { "epoch": 15.300978792822185, "grad_norm": 0.00829069223254919, "learning_rate": 0.0001589487657975231, "loss": 0.0125, "num_input_tokens_seen": 202608080, "step": 93795 }, { "epoch": 15.301794453507341, "grad_norm": 0.013668195344507694, "learning_rate": 0.00015889671853813126, "loss": 0.0029, "num_input_tokens_seen": 202618640, "step": 93800 }, { "epoch": 15.302610114192497, "grad_norm": 0.6986984610557556, "learning_rate": 0.0001588446781916302, "loss": 0.0505, "num_input_tokens_seen": 202629360, "step": 93805 }, { "epoch": 15.30342577487765, "grad_norm": 0.016121450811624527, "learning_rate": 0.00015879264475907447, "loss": 0.0112, "num_input_tokens_seen": 202641360, "step": 93810 }, { "epoch": 15.304241435562806, "grad_norm": 0.005392593797296286, "learning_rate": 0.00015874061824151865, "loss": 0.0046, "num_input_tokens_seen": 202651216, "step": 93815 }, { "epoch": 15.30505709624796, "grad_norm": 0.020854827016592026, "learning_rate": 0.00015868859864001693, "loss": 0.0045, "num_input_tokens_seen": 202663024, "step": 93820 }, { "epoch": 15.305872756933116, "grad_norm": 0.00112466502469033, "learning_rate": 0.00015863658595562414, "loss": 0.0123, "num_input_tokens_seen": 202675056, "step": 93825 }, { "epoch": 15.30668841761827, "grad_norm": 0.0506683811545372, "learning_rate": 0.00015858458018939365, "loss": 0.0066, "num_input_tokens_seen": 202685488, "step": 93830 }, { "epoch": 15.307504078303426, "grad_norm": 0.029055537655949593, "learning_rate": 0.00015853258134238007, "loss": 0.0053, "num_input_tokens_seen": 202697232, "step": 93835 }, { "epoch": 15.308319738988581, "grad_norm": 0.002884516492486, "learning_rate": 0.0001584805894156366, "loss": 0.0041, "num_input_tokens_seen": 202709008, "step": 93840 }, { "epoch": 15.309135399673735, "grad_norm": 0.0017596816178411245, "learning_rate": 0.0001584286044102175, "loss": 0.0086, "num_input_tokens_seen": 202720368, "step": 93845 }, { "epoch": 15.309951060358891, "grad_norm": 0.021234875544905663, "learning_rate": 0.00015837662632717575, "loss": 0.008, "num_input_tokens_seen": 202730448, "step": 93850 }, { "epoch": 15.310766721044045, "grad_norm": 0.017943846061825752, "learning_rate": 0.00015832465516756538, "loss": 0.0038, "num_input_tokens_seen": 202741552, "step": 93855 }, { "epoch": 15.3115823817292, "grad_norm": 0.2247970998287201, "learning_rate": 0.00015827269093243902, "loss": 0.0067, "num_input_tokens_seen": 202753168, "step": 93860 }, { "epoch": 15.312398042414356, "grad_norm": 0.004060364793986082, "learning_rate": 0.0001582207336228504, "loss": 0.0697, "num_input_tokens_seen": 202763760, "step": 93865 }, { "epoch": 15.31321370309951, "grad_norm": 0.030661238357424736, "learning_rate": 0.00015816878323985184, "loss": 0.0777, "num_input_tokens_seen": 202774448, "step": 93870 }, { "epoch": 15.314029363784666, "grad_norm": 0.004571146331727505, "learning_rate": 0.0001581168397844967, "loss": 0.005, "num_input_tokens_seen": 202785712, "step": 93875 }, { "epoch": 15.31484502446982, "grad_norm": 0.0031279721297323704, "learning_rate": 0.0001580649032578375, "loss": 0.0027, "num_input_tokens_seen": 202795248, "step": 93880 }, { "epoch": 15.315660685154976, "grad_norm": 0.015604222193360329, "learning_rate": 0.00015801297366092689, "loss": 0.0112, "num_input_tokens_seen": 202806288, "step": 93885 }, { "epoch": 15.31647634584013, "grad_norm": 0.09192720055580139, "learning_rate": 0.00015796105099481712, "loss": 0.0516, "num_input_tokens_seen": 202817872, "step": 93890 }, { "epoch": 15.317292006525285, "grad_norm": 0.005644198041409254, "learning_rate": 0.00015790913526056061, "loss": 0.0031, "num_input_tokens_seen": 202828080, "step": 93895 }, { "epoch": 15.318107667210441, "grad_norm": 0.3908234238624573, "learning_rate": 0.00015785722645920942, "loss": 0.0095, "num_input_tokens_seen": 202838192, "step": 93900 }, { "epoch": 15.318923327895595, "grad_norm": 0.7623684406280518, "learning_rate": 0.00015780532459181557, "loss": 0.0279, "num_input_tokens_seen": 202848976, "step": 93905 }, { "epoch": 15.31973898858075, "grad_norm": 0.012597830034792423, "learning_rate": 0.00015775342965943095, "loss": 0.0026, "num_input_tokens_seen": 202860560, "step": 93910 }, { "epoch": 15.320554649265905, "grad_norm": 0.0024378118105232716, "learning_rate": 0.00015770154166310724, "loss": 0.0787, "num_input_tokens_seen": 202872400, "step": 93915 }, { "epoch": 15.32137030995106, "grad_norm": 0.5856313109397888, "learning_rate": 0.00015764966060389602, "loss": 0.0115, "num_input_tokens_seen": 202883024, "step": 93920 }, { "epoch": 15.322185970636216, "grad_norm": 0.0010387571528553963, "learning_rate": 0.00015759778648284873, "loss": 0.0031, "num_input_tokens_seen": 202894416, "step": 93925 }, { "epoch": 15.32300163132137, "grad_norm": 0.3006853461265564, "learning_rate": 0.00015754591930101664, "loss": 0.1149, "num_input_tokens_seen": 202905360, "step": 93930 }, { "epoch": 15.323817292006526, "grad_norm": 0.06582503020763397, "learning_rate": 0.00015749405905945095, "loss": 0.0048, "num_input_tokens_seen": 202916240, "step": 93935 }, { "epoch": 15.32463295269168, "grad_norm": 0.007767208386212587, "learning_rate": 0.00015744220575920266, "loss": 0.0031, "num_input_tokens_seen": 202927920, "step": 93940 }, { "epoch": 15.325448613376835, "grad_norm": 0.014898211695253849, "learning_rate": 0.00015739035940132262, "loss": 0.0024, "num_input_tokens_seen": 202937840, "step": 93945 }, { "epoch": 15.326264274061991, "grad_norm": 0.022433992475271225, "learning_rate": 0.0001573385199868616, "loss": 0.0144, "num_input_tokens_seen": 202948912, "step": 93950 }, { "epoch": 15.327079934747145, "grad_norm": 0.008677488192915916, "learning_rate": 0.00015728668751687015, "loss": 0.0054, "num_input_tokens_seen": 202959344, "step": 93955 }, { "epoch": 15.3278955954323, "grad_norm": 0.0584244430065155, "learning_rate": 0.00015723486199239878, "loss": 0.0034, "num_input_tokens_seen": 202970736, "step": 93960 }, { "epoch": 15.328711256117455, "grad_norm": 0.009163172915577888, "learning_rate": 0.00015718304341449759, "loss": 0.0107, "num_input_tokens_seen": 202981776, "step": 93965 }, { "epoch": 15.32952691680261, "grad_norm": 0.00910011027008295, "learning_rate": 0.00015713123178421717, "loss": 0.0011, "num_input_tokens_seen": 202993328, "step": 93970 }, { "epoch": 15.330342577487766, "grad_norm": 0.029195060953497887, "learning_rate": 0.00015707942710260704, "loss": 0.0023, "num_input_tokens_seen": 203004336, "step": 93975 }, { "epoch": 15.33115823817292, "grad_norm": 0.01511499285697937, "learning_rate": 0.00015702762937071747, "loss": 0.006, "num_input_tokens_seen": 203014576, "step": 93980 }, { "epoch": 15.331973898858076, "grad_norm": 0.008337062783539295, "learning_rate": 0.00015697583858959813, "loss": 0.0034, "num_input_tokens_seen": 203025200, "step": 93985 }, { "epoch": 15.33278955954323, "grad_norm": 0.01428698655217886, "learning_rate": 0.00015692405476029853, "loss": 0.0026, "num_input_tokens_seen": 203036016, "step": 93990 }, { "epoch": 15.333605220228385, "grad_norm": 0.05316992104053497, "learning_rate": 0.00015687227788386822, "loss": 0.005, "num_input_tokens_seen": 203047248, "step": 93995 }, { "epoch": 15.33442088091354, "grad_norm": 0.0014572610380128026, "learning_rate": 0.00015682050796135644, "loss": 0.0065, "num_input_tokens_seen": 203057872, "step": 94000 }, { "epoch": 15.335236541598695, "grad_norm": 0.0003803163126576692, "learning_rate": 0.0001567687449938125, "loss": 0.0064, "num_input_tokens_seen": 203069136, "step": 94005 }, { "epoch": 15.33605220228385, "grad_norm": 0.0005980475689284503, "learning_rate": 0.0001567169889822853, "loss": 0.009, "num_input_tokens_seen": 203078576, "step": 94010 }, { "epoch": 15.336867862969005, "grad_norm": 0.0017918755766004324, "learning_rate": 0.00015666523992782384, "loss": 0.006, "num_input_tokens_seen": 203087888, "step": 94015 }, { "epoch": 15.33768352365416, "grad_norm": 0.017859825864434242, "learning_rate": 0.00015661349783147678, "loss": 0.0018, "num_input_tokens_seen": 203097904, "step": 94020 }, { "epoch": 15.338499184339314, "grad_norm": 0.0014235659036785364, "learning_rate": 0.00015656176269429283, "loss": 0.0008, "num_input_tokens_seen": 203109072, "step": 94025 }, { "epoch": 15.33931484502447, "grad_norm": 0.0009418278350494802, "learning_rate": 0.00015651003451732048, "loss": 0.0034, "num_input_tokens_seen": 203120208, "step": 94030 }, { "epoch": 15.340130505709626, "grad_norm": 0.008652381598949432, "learning_rate": 0.00015645831330160804, "loss": 0.0018, "num_input_tokens_seen": 203130928, "step": 94035 }, { "epoch": 15.34094616639478, "grad_norm": 0.00047669251216575503, "learning_rate": 0.00015640659904820364, "loss": 0.0016, "num_input_tokens_seen": 203141712, "step": 94040 }, { "epoch": 15.341761827079935, "grad_norm": 0.0017961309058591723, "learning_rate": 0.00015635489175815537, "loss": 0.0025, "num_input_tokens_seen": 203152688, "step": 94045 }, { "epoch": 15.34257748776509, "grad_norm": 0.006201804615557194, "learning_rate": 0.0001563031914325112, "loss": 0.0014, "num_input_tokens_seen": 203163408, "step": 94050 }, { "epoch": 15.343393148450245, "grad_norm": 0.019376035779714584, "learning_rate": 0.00015625149807231892, "loss": 0.0015, "num_input_tokens_seen": 203174320, "step": 94055 }, { "epoch": 15.3442088091354, "grad_norm": 0.0009080119198188186, "learning_rate": 0.00015619981167862602, "loss": 0.001, "num_input_tokens_seen": 203184816, "step": 94060 }, { "epoch": 15.345024469820554, "grad_norm": 0.015443303622305393, "learning_rate": 0.00015614813225248015, "loss": 0.0068, "num_input_tokens_seen": 203195056, "step": 94065 }, { "epoch": 15.34584013050571, "grad_norm": 0.00035426352405920625, "learning_rate": 0.00015609645979492855, "loss": 0.0052, "num_input_tokens_seen": 203206000, "step": 94070 }, { "epoch": 15.346655791190864, "grad_norm": 0.11414900422096252, "learning_rate": 0.00015604479430701845, "loss": 0.0065, "num_input_tokens_seen": 203217168, "step": 94075 }, { "epoch": 15.34747145187602, "grad_norm": 0.004541793372482061, "learning_rate": 0.00015599313578979696, "loss": 0.0018, "num_input_tokens_seen": 203227248, "step": 94080 }, { "epoch": 15.348287112561174, "grad_norm": 0.003625446930527687, "learning_rate": 0.00015594148424431076, "loss": 0.003, "num_input_tokens_seen": 203238672, "step": 94085 }, { "epoch": 15.34910277324633, "grad_norm": 0.011053141206502914, "learning_rate": 0.00015588983967160724, "loss": 0.0023, "num_input_tokens_seen": 203249552, "step": 94090 }, { "epoch": 15.349918433931485, "grad_norm": 0.016718612983822823, "learning_rate": 0.0001558382020727323, "loss": 0.002, "num_input_tokens_seen": 203259056, "step": 94095 }, { "epoch": 15.350734094616639, "grad_norm": 0.011769182980060577, "learning_rate": 0.00015578657144873316, "loss": 0.0025, "num_input_tokens_seen": 203269456, "step": 94100 }, { "epoch": 15.351549755301795, "grad_norm": 0.03749024122953415, "learning_rate": 0.00015573494780065543, "loss": 0.0023, "num_input_tokens_seen": 203278256, "step": 94105 }, { "epoch": 15.352365415986949, "grad_norm": 0.005377328023314476, "learning_rate": 0.00015568333112954592, "loss": 0.0017, "num_input_tokens_seen": 203288880, "step": 94110 }, { "epoch": 15.353181076672104, "grad_norm": 0.0017177603440359235, "learning_rate": 0.00015563172143645044, "loss": 0.008, "num_input_tokens_seen": 203298640, "step": 94115 }, { "epoch": 15.35399673735726, "grad_norm": 0.0025564224924892187, "learning_rate": 0.00015558011872241506, "loss": 0.0048, "num_input_tokens_seen": 203309168, "step": 94120 }, { "epoch": 15.354812398042414, "grad_norm": 0.588083028793335, "learning_rate": 0.00015552852298848546, "loss": 0.0598, "num_input_tokens_seen": 203320048, "step": 94125 }, { "epoch": 15.35562805872757, "grad_norm": 0.0020723820198327303, "learning_rate": 0.00015547693423570736, "loss": 0.0016, "num_input_tokens_seen": 203331408, "step": 94130 }, { "epoch": 15.356443719412724, "grad_norm": 0.0025758659467101097, "learning_rate": 0.00015542535246512623, "loss": 0.001, "num_input_tokens_seen": 203342928, "step": 94135 }, { "epoch": 15.35725938009788, "grad_norm": 0.019694067537784576, "learning_rate": 0.00015537377767778742, "loss": 0.0019, "num_input_tokens_seen": 203353552, "step": 94140 }, { "epoch": 15.358075040783035, "grad_norm": 0.008567390032112598, "learning_rate": 0.00015532220987473627, "loss": 0.1234, "num_input_tokens_seen": 203363280, "step": 94145 }, { "epoch": 15.358890701468189, "grad_norm": 0.002558504231274128, "learning_rate": 0.00015527064905701776, "loss": 0.0033, "num_input_tokens_seen": 203374288, "step": 94150 }, { "epoch": 15.359706362153345, "grad_norm": 0.0029807421378791332, "learning_rate": 0.00015521909522567685, "loss": 0.0323, "num_input_tokens_seen": 203385616, "step": 94155 }, { "epoch": 15.360522022838499, "grad_norm": 0.0011462070979177952, "learning_rate": 0.0001551675483817584, "loss": 0.0462, "num_input_tokens_seen": 203397104, "step": 94160 }, { "epoch": 15.361337683523654, "grad_norm": 0.00590652646496892, "learning_rate": 0.00015511600852630698, "loss": 0.002, "num_input_tokens_seen": 203408656, "step": 94165 }, { "epoch": 15.362153344208808, "grad_norm": 0.011698364280164242, "learning_rate": 0.0001550644756603672, "loss": 0.0012, "num_input_tokens_seen": 203419120, "step": 94170 }, { "epoch": 15.362969004893964, "grad_norm": 0.0006024792673997581, "learning_rate": 0.00015501294978498344, "loss": 0.0015, "num_input_tokens_seen": 203430960, "step": 94175 }, { "epoch": 15.36378466557912, "grad_norm": 0.003224707907065749, "learning_rate": 0.0001549614309011998, "loss": 0.0043, "num_input_tokens_seen": 203441616, "step": 94180 }, { "epoch": 15.364600326264274, "grad_norm": 0.01984231546521187, "learning_rate": 0.00015490991901006052, "loss": 0.0039, "num_input_tokens_seen": 203453008, "step": 94185 }, { "epoch": 15.36541598694943, "grad_norm": 0.0009202081128023565, "learning_rate": 0.00015485841411260937, "loss": 0.0083, "num_input_tokens_seen": 203463184, "step": 94190 }, { "epoch": 15.366231647634583, "grad_norm": 0.0021524764597415924, "learning_rate": 0.00015480691620989062, "loss": 0.0013, "num_input_tokens_seen": 203473936, "step": 94195 }, { "epoch": 15.367047308319739, "grad_norm": 0.016492361202836037, "learning_rate": 0.00015475542530294728, "loss": 0.002, "num_input_tokens_seen": 203485136, "step": 94200 }, { "epoch": 15.367862969004895, "grad_norm": 0.02251831255853176, "learning_rate": 0.00015470394139282357, "loss": 0.0083, "num_input_tokens_seen": 203495760, "step": 94205 }, { "epoch": 15.368678629690049, "grad_norm": 0.004201612900942564, "learning_rate": 0.0001546524644805622, "loss": 0.0012, "num_input_tokens_seen": 203507472, "step": 94210 }, { "epoch": 15.369494290375204, "grad_norm": 0.001497769495472312, "learning_rate": 0.00015460099456720706, "loss": 0.0052, "num_input_tokens_seen": 203518352, "step": 94215 }, { "epoch": 15.370309951060358, "grad_norm": 0.0014730320544913411, "learning_rate": 0.0001545495316538006, "loss": 0.0016, "num_input_tokens_seen": 203526960, "step": 94220 }, { "epoch": 15.371125611745514, "grad_norm": 0.04781882092356682, "learning_rate": 0.0001544980757413864, "loss": 0.0025, "num_input_tokens_seen": 203538448, "step": 94225 }, { "epoch": 15.37194127243067, "grad_norm": 0.0022414957638829947, "learning_rate": 0.00015444662683100676, "loss": 0.0025, "num_input_tokens_seen": 203548560, "step": 94230 }, { "epoch": 15.372756933115824, "grad_norm": 0.0006568465032614768, "learning_rate": 0.00015439518492370486, "loss": 0.0023, "num_input_tokens_seen": 203558640, "step": 94235 }, { "epoch": 15.37357259380098, "grad_norm": 0.0073090302757918835, "learning_rate": 0.00015434375002052264, "loss": 0.0033, "num_input_tokens_seen": 203569424, "step": 94240 }, { "epoch": 15.374388254486133, "grad_norm": 0.002609936287626624, "learning_rate": 0.00015429232212250317, "loss": 0.0027, "num_input_tokens_seen": 203579408, "step": 94245 }, { "epoch": 15.375203915171289, "grad_norm": 0.0006220395443961024, "learning_rate": 0.00015424090123068802, "loss": 0.0039, "num_input_tokens_seen": 203589840, "step": 94250 }, { "epoch": 15.376019575856443, "grad_norm": 0.016170065850019455, "learning_rate": 0.00015418948734611976, "loss": 0.0371, "num_input_tokens_seen": 203600208, "step": 94255 }, { "epoch": 15.376835236541599, "grad_norm": 0.0014452317263931036, "learning_rate": 0.0001541380804698403, "loss": 0.0013, "num_input_tokens_seen": 203611696, "step": 94260 }, { "epoch": 15.377650897226754, "grad_norm": 0.001372209400869906, "learning_rate": 0.00015408668060289132, "loss": 0.002, "num_input_tokens_seen": 203621648, "step": 94265 }, { "epoch": 15.378466557911908, "grad_norm": 0.0006071311072446406, "learning_rate": 0.00015403528774631463, "loss": 0.0017, "num_input_tokens_seen": 203632080, "step": 94270 }, { "epoch": 15.379282218597064, "grad_norm": 0.03576105460524559, "learning_rate": 0.00015398390190115175, "loss": 0.1296, "num_input_tokens_seen": 203642800, "step": 94275 }, { "epoch": 15.380097879282218, "grad_norm": 0.0016932595754042268, "learning_rate": 0.00015393252306844402, "loss": 0.0318, "num_input_tokens_seen": 203654160, "step": 94280 }, { "epoch": 15.380913539967374, "grad_norm": 0.06304704397916794, "learning_rate": 0.00015388115124923267, "loss": 0.1331, "num_input_tokens_seen": 203665040, "step": 94285 }, { "epoch": 15.38172920065253, "grad_norm": 0.016461336985230446, "learning_rate": 0.00015382978644455896, "loss": 0.0363, "num_input_tokens_seen": 203675024, "step": 94290 }, { "epoch": 15.382544861337683, "grad_norm": 0.011744924820959568, "learning_rate": 0.00015377842865546372, "loss": 0.0041, "num_input_tokens_seen": 203685616, "step": 94295 }, { "epoch": 15.383360522022839, "grad_norm": 0.10838112235069275, "learning_rate": 0.0001537270778829879, "loss": 0.0064, "num_input_tokens_seen": 203696944, "step": 94300 }, { "epoch": 15.384176182707993, "grad_norm": 0.6037494540214539, "learning_rate": 0.00015367573412817186, "loss": 0.0831, "num_input_tokens_seen": 203707344, "step": 94305 }, { "epoch": 15.384991843393149, "grad_norm": 0.0010668321046978235, "learning_rate": 0.0001536243973920568, "loss": 0.0014, "num_input_tokens_seen": 203717648, "step": 94310 }, { "epoch": 15.385807504078304, "grad_norm": 0.002049540402367711, "learning_rate": 0.00015357306767568242, "loss": 0.0023, "num_input_tokens_seen": 203728560, "step": 94315 }, { "epoch": 15.386623164763458, "grad_norm": 0.006294840015470982, "learning_rate": 0.00015352174498008963, "loss": 0.0027, "num_input_tokens_seen": 203740080, "step": 94320 }, { "epoch": 15.387438825448614, "grad_norm": 0.01672750897705555, "learning_rate": 0.00015347042930631788, "loss": 0.0012, "num_input_tokens_seen": 203751472, "step": 94325 }, { "epoch": 15.388254486133768, "grad_norm": 0.008858496323227882, "learning_rate": 0.0001534191206554078, "loss": 0.0023, "num_input_tokens_seen": 203763376, "step": 94330 }, { "epoch": 15.389070146818923, "grad_norm": 0.02900371141731739, "learning_rate": 0.00015336781902839858, "loss": 0.0019, "num_input_tokens_seen": 203774320, "step": 94335 }, { "epoch": 15.38988580750408, "grad_norm": 0.001398382824845612, "learning_rate": 0.00015331652442633053, "loss": 0.0014, "num_input_tokens_seen": 203785136, "step": 94340 }, { "epoch": 15.390701468189233, "grad_norm": 0.03610919788479805, "learning_rate": 0.00015326523685024263, "loss": 0.013, "num_input_tokens_seen": 203795280, "step": 94345 }, { "epoch": 15.391517128874389, "grad_norm": 0.006073774769902229, "learning_rate": 0.0001532139563011749, "loss": 0.0009, "num_input_tokens_seen": 203806000, "step": 94350 }, { "epoch": 15.392332789559543, "grad_norm": 0.0019028345122933388, "learning_rate": 0.00015316268278016594, "loss": 0.0019, "num_input_tokens_seen": 203816816, "step": 94355 }, { "epoch": 15.393148450244698, "grad_norm": 0.0005348801496438682, "learning_rate": 0.00015311141628825554, "loss": 0.0038, "num_input_tokens_seen": 203827472, "step": 94360 }, { "epoch": 15.393964110929852, "grad_norm": 0.0017356444150209427, "learning_rate": 0.000153060156826482, "loss": 0.0012, "num_input_tokens_seen": 203837680, "step": 94365 }, { "epoch": 15.394779771615008, "grad_norm": 0.03037400357425213, "learning_rate": 0.0001530089043958849, "loss": 0.0235, "num_input_tokens_seen": 203847760, "step": 94370 }, { "epoch": 15.395595432300164, "grad_norm": 0.008682015351951122, "learning_rate": 0.00015295765899750214, "loss": 0.002, "num_input_tokens_seen": 203857744, "step": 94375 }, { "epoch": 15.396411092985318, "grad_norm": 0.029603829607367516, "learning_rate": 0.00015290642063237302, "loss": 0.0033, "num_input_tokens_seen": 203868720, "step": 94380 }, { "epoch": 15.397226753670473, "grad_norm": 0.0024776794016361237, "learning_rate": 0.0001528551893015353, "loss": 0.0895, "num_input_tokens_seen": 203879248, "step": 94385 }, { "epoch": 15.398042414355627, "grad_norm": 0.002934439340606332, "learning_rate": 0.00015280396500602783, "loss": 0.0011, "num_input_tokens_seen": 203889296, "step": 94390 }, { "epoch": 15.398858075040783, "grad_norm": 0.028770821169018745, "learning_rate": 0.00015275274774688817, "loss": 0.0012, "num_input_tokens_seen": 203900848, "step": 94395 }, { "epoch": 15.399673735725939, "grad_norm": 0.11703412979841232, "learning_rate": 0.00015270153752515474, "loss": 0.0099, "num_input_tokens_seen": 203911408, "step": 94400 }, { "epoch": 15.400489396411093, "grad_norm": 0.019773095846176147, "learning_rate": 0.00015265033434186525, "loss": 0.0271, "num_input_tokens_seen": 203923184, "step": 94405 }, { "epoch": 15.401305057096248, "grad_norm": 0.005440262146294117, "learning_rate": 0.00015259913819805736, "loss": 0.0019, "num_input_tokens_seen": 203933552, "step": 94410 }, { "epoch": 15.402120717781402, "grad_norm": 0.011708968318998814, "learning_rate": 0.0001525479490947687, "loss": 0.006, "num_input_tokens_seen": 203944752, "step": 94415 }, { "epoch": 15.402936378466558, "grad_norm": 0.0032694439869374037, "learning_rate": 0.00015249676703303654, "loss": 0.001, "num_input_tokens_seen": 203955824, "step": 94420 }, { "epoch": 15.403752039151712, "grad_norm": 0.0076155886054039, "learning_rate": 0.0001524455920138983, "loss": 0.0165, "num_input_tokens_seen": 203965456, "step": 94425 }, { "epoch": 15.404567699836868, "grad_norm": 0.0033928006887435913, "learning_rate": 0.00015239442403839105, "loss": 0.0024, "num_input_tokens_seen": 203975536, "step": 94430 }, { "epoch": 15.405383360522023, "grad_norm": 0.4387916922569275, "learning_rate": 0.0001523432631075517, "loss": 0.1567, "num_input_tokens_seen": 203986256, "step": 94435 }, { "epoch": 15.406199021207177, "grad_norm": 0.0012723475228995085, "learning_rate": 0.00015229210922241721, "loss": 0.1107, "num_input_tokens_seen": 203997040, "step": 94440 }, { "epoch": 15.407014681892333, "grad_norm": 0.004824480973184109, "learning_rate": 0.0001522409623840242, "loss": 0.0104, "num_input_tokens_seen": 204007920, "step": 94445 }, { "epoch": 15.407830342577487, "grad_norm": 0.0012717196950688958, "learning_rate": 0.00015218982259340908, "loss": 0.0067, "num_input_tokens_seen": 204018448, "step": 94450 }, { "epoch": 15.408646003262643, "grad_norm": 0.07703101634979248, "learning_rate": 0.0001521386898516088, "loss": 0.0332, "num_input_tokens_seen": 204028752, "step": 94455 }, { "epoch": 15.409461663947798, "grad_norm": 0.07954272627830505, "learning_rate": 0.0001520875641596589, "loss": 0.0054, "num_input_tokens_seen": 204039344, "step": 94460 }, { "epoch": 15.410277324632952, "grad_norm": 0.008622833527624607, "learning_rate": 0.0001520364455185962, "loss": 0.029, "num_input_tokens_seen": 204051376, "step": 94465 }, { "epoch": 15.411092985318108, "grad_norm": 0.2871306538581848, "learning_rate": 0.00015198533392945602, "loss": 0.019, "num_input_tokens_seen": 204062672, "step": 94470 }, { "epoch": 15.411908646003262, "grad_norm": 0.531629741191864, "learning_rate": 0.00015193422939327488, "loss": 0.2146, "num_input_tokens_seen": 204073360, "step": 94475 }, { "epoch": 15.412724306688418, "grad_norm": 0.42784038186073303, "learning_rate": 0.00015188313191108783, "loss": 0.0204, "num_input_tokens_seen": 204084592, "step": 94480 }, { "epoch": 15.413539967373573, "grad_norm": 0.07477138191461563, "learning_rate": 0.00015183204148393103, "loss": 0.0222, "num_input_tokens_seen": 204095696, "step": 94485 }, { "epoch": 15.414355628058727, "grad_norm": 0.0180678591132164, "learning_rate": 0.00015178095811283927, "loss": 0.0026, "num_input_tokens_seen": 204106864, "step": 94490 }, { "epoch": 15.415171288743883, "grad_norm": 0.04884007200598717, "learning_rate": 0.00015172988179884846, "loss": 0.0038, "num_input_tokens_seen": 204118896, "step": 94495 }, { "epoch": 15.415986949429037, "grad_norm": 0.00031529387342743576, "learning_rate": 0.0001516788125429931, "loss": 0.0027, "num_input_tokens_seen": 204128016, "step": 94500 }, { "epoch": 15.416802610114193, "grad_norm": 0.2522253394126892, "learning_rate": 0.0001516277503463086, "loss": 0.0153, "num_input_tokens_seen": 204138672, "step": 94505 }, { "epoch": 15.417618270799348, "grad_norm": 0.03922179341316223, "learning_rate": 0.00015157669520982975, "loss": 0.0071, "num_input_tokens_seen": 204149840, "step": 94510 }, { "epoch": 15.418433931484502, "grad_norm": 0.007353988941758871, "learning_rate": 0.0001515256471345911, "loss": 0.0041, "num_input_tokens_seen": 204159664, "step": 94515 }, { "epoch": 15.419249592169658, "grad_norm": 0.005019092466682196, "learning_rate": 0.00015147460612162733, "loss": 0.0016, "num_input_tokens_seen": 204171056, "step": 94520 }, { "epoch": 15.420065252854812, "grad_norm": 0.03900919482111931, "learning_rate": 0.00015142357217197278, "loss": 0.0064, "num_input_tokens_seen": 204181296, "step": 94525 }, { "epoch": 15.420880913539968, "grad_norm": 0.05264601483941078, "learning_rate": 0.00015137254528666178, "loss": 0.0039, "num_input_tokens_seen": 204192304, "step": 94530 }, { "epoch": 15.421696574225122, "grad_norm": 0.08832412958145142, "learning_rate": 0.0001513215254667284, "loss": 0.0091, "num_input_tokens_seen": 204203536, "step": 94535 }, { "epoch": 15.422512234910277, "grad_norm": 0.0050577265210449696, "learning_rate": 0.00015127051271320664, "loss": 0.0023, "num_input_tokens_seen": 204214928, "step": 94540 }, { "epoch": 15.423327895595433, "grad_norm": 0.0012761307880282402, "learning_rate": 0.00015121950702713029, "loss": 0.0014, "num_input_tokens_seen": 204226000, "step": 94545 }, { "epoch": 15.424143556280587, "grad_norm": 0.0036849654279649258, "learning_rate": 0.00015116850840953311, "loss": 0.0026, "num_input_tokens_seen": 204237200, "step": 94550 }, { "epoch": 15.424959216965743, "grad_norm": 0.0011683054035529494, "learning_rate": 0.00015111751686144864, "loss": 0.0022, "num_input_tokens_seen": 204248144, "step": 94555 }, { "epoch": 15.425774877650896, "grad_norm": 0.10220471024513245, "learning_rate": 0.00015106653238391028, "loss": 0.0118, "num_input_tokens_seen": 204258768, "step": 94560 }, { "epoch": 15.426590538336052, "grad_norm": 0.0024899379350245, "learning_rate": 0.00015101555497795127, "loss": 0.0048, "num_input_tokens_seen": 204268496, "step": 94565 }, { "epoch": 15.427406199021208, "grad_norm": 0.0045095449313521385, "learning_rate": 0.00015096458464460482, "loss": 0.0019, "num_input_tokens_seen": 204278864, "step": 94570 }, { "epoch": 15.428221859706362, "grad_norm": 0.01300779264420271, "learning_rate": 0.0001509136213849038, "loss": 0.0017, "num_input_tokens_seen": 204289648, "step": 94575 }, { "epoch": 15.429037520391518, "grad_norm": 0.0015890076756477356, "learning_rate": 0.00015086266519988108, "loss": 0.0153, "num_input_tokens_seen": 204300528, "step": 94580 }, { "epoch": 15.429853181076671, "grad_norm": 0.002844614442437887, "learning_rate": 0.00015081171609056937, "loss": 0.0044, "num_input_tokens_seen": 204310896, "step": 94585 }, { "epoch": 15.430668841761827, "grad_norm": 0.0012606673408299685, "learning_rate": 0.00015076077405800126, "loss": 0.0015, "num_input_tokens_seen": 204322096, "step": 94590 }, { "epoch": 15.431484502446983, "grad_norm": 0.0005698453169316053, "learning_rate": 0.0001507098391032089, "loss": 0.0008, "num_input_tokens_seen": 204333616, "step": 94595 }, { "epoch": 15.432300163132137, "grad_norm": 0.024903813377022743, "learning_rate": 0.00015065891122722507, "loss": 0.0044, "num_input_tokens_seen": 204343824, "step": 94600 }, { "epoch": 15.433115823817293, "grad_norm": 0.005087335593998432, "learning_rate": 0.00015060799043108126, "loss": 0.0015, "num_input_tokens_seen": 204354128, "step": 94605 }, { "epoch": 15.433931484502446, "grad_norm": 0.0013004738138988614, "learning_rate": 0.00015055707671581008, "loss": 0.0059, "num_input_tokens_seen": 204365008, "step": 94610 }, { "epoch": 15.434747145187602, "grad_norm": 0.006157164927572012, "learning_rate": 0.00015050617008244272, "loss": 0.0014, "num_input_tokens_seen": 204376368, "step": 94615 }, { "epoch": 15.435562805872756, "grad_norm": 0.0021772703621536493, "learning_rate": 0.00015045527053201137, "loss": 0.0025, "num_input_tokens_seen": 204386288, "step": 94620 }, { "epoch": 15.436378466557912, "grad_norm": 0.020397908985614777, "learning_rate": 0.00015040437806554735, "loss": 0.1338, "num_input_tokens_seen": 204396784, "step": 94625 }, { "epoch": 15.437194127243067, "grad_norm": 0.004979412537068129, "learning_rate": 0.00015035349268408216, "loss": 0.0911, "num_input_tokens_seen": 204407184, "step": 94630 }, { "epoch": 15.438009787928221, "grad_norm": 0.009470396675169468, "learning_rate": 0.00015030261438864694, "loss": 0.0013, "num_input_tokens_seen": 204416400, "step": 94635 }, { "epoch": 15.438825448613377, "grad_norm": 0.0030095677357167006, "learning_rate": 0.0001502517431802729, "loss": 0.0014, "num_input_tokens_seen": 204426256, "step": 94640 }, { "epoch": 15.439641109298531, "grad_norm": 0.041119664907455444, "learning_rate": 0.00015020087905999097, "loss": 0.0014, "num_input_tokens_seen": 204437424, "step": 94645 }, { "epoch": 15.440456769983687, "grad_norm": 0.008877326734364033, "learning_rate": 0.00015015002202883193, "loss": 0.0063, "num_input_tokens_seen": 204446800, "step": 94650 }, { "epoch": 15.441272430668842, "grad_norm": 0.07724998146295547, "learning_rate": 0.00015009917208782657, "loss": 0.017, "num_input_tokens_seen": 204458576, "step": 94655 }, { "epoch": 15.442088091353996, "grad_norm": 0.005192750133574009, "learning_rate": 0.00015004832923800533, "loss": 0.0254, "num_input_tokens_seen": 204468752, "step": 94660 }, { "epoch": 15.442903752039152, "grad_norm": 0.010963196866214275, "learning_rate": 0.00014999749348039866, "loss": 0.0034, "num_input_tokens_seen": 204479024, "step": 94665 }, { "epoch": 15.443719412724306, "grad_norm": 0.07558204233646393, "learning_rate": 0.0001499466648160368, "loss": 0.0059, "num_input_tokens_seen": 204489744, "step": 94670 }, { "epoch": 15.444535073409462, "grad_norm": 0.005794727709144354, "learning_rate": 0.00014989584324594986, "loss": 0.0898, "num_input_tokens_seen": 204500592, "step": 94675 }, { "epoch": 15.445350734094617, "grad_norm": 0.005403991788625717, "learning_rate": 0.00014984502877116773, "loss": 0.0019, "num_input_tokens_seen": 204510960, "step": 94680 }, { "epoch": 15.446166394779771, "grad_norm": 0.018055513501167297, "learning_rate": 0.00014979422139272037, "loss": 0.0028, "num_input_tokens_seen": 204521776, "step": 94685 }, { "epoch": 15.446982055464927, "grad_norm": 0.01070409920066595, "learning_rate": 0.00014974342111163735, "loss": 0.0025, "num_input_tokens_seen": 204530896, "step": 94690 }, { "epoch": 15.447797716150081, "grad_norm": 0.09509050846099854, "learning_rate": 0.00014969262792894822, "loss": 0.0031, "num_input_tokens_seen": 204541648, "step": 94695 }, { "epoch": 15.448613376835237, "grad_norm": 0.05279732868075371, "learning_rate": 0.0001496418418456824, "loss": 0.0027, "num_input_tokens_seen": 204552784, "step": 94700 }, { "epoch": 15.449429037520392, "grad_norm": 0.0030731274746358395, "learning_rate": 0.0001495910628628691, "loss": 0.0572, "num_input_tokens_seen": 204564016, "step": 94705 }, { "epoch": 15.450244698205546, "grad_norm": 0.0014273212291300297, "learning_rate": 0.00014954029098153748, "loss": 0.0031, "num_input_tokens_seen": 204574032, "step": 94710 }, { "epoch": 15.451060358890702, "grad_norm": 0.001934555359184742, "learning_rate": 0.00014948952620271643, "loss": 0.0017, "num_input_tokens_seen": 204585552, "step": 94715 }, { "epoch": 15.451876019575856, "grad_norm": 0.0016809795051813126, "learning_rate": 0.00014943876852743475, "loss": 0.0012, "num_input_tokens_seen": 204596848, "step": 94720 }, { "epoch": 15.452691680261012, "grad_norm": 0.0013799264561384916, "learning_rate": 0.00014938801795672102, "loss": 0.0047, "num_input_tokens_seen": 204607792, "step": 94725 }, { "epoch": 15.453507340946166, "grad_norm": 0.02855961211025715, "learning_rate": 0.00014933727449160423, "loss": 0.0382, "num_input_tokens_seen": 204619184, "step": 94730 }, { "epoch": 15.454323001631321, "grad_norm": 0.013205167837440968, "learning_rate": 0.00014928653813311204, "loss": 0.003, "num_input_tokens_seen": 204629584, "step": 94735 }, { "epoch": 15.455138662316477, "grad_norm": 0.0069459774531424046, "learning_rate": 0.00014923580888227329, "loss": 0.0044, "num_input_tokens_seen": 204641424, "step": 94740 }, { "epoch": 15.455954323001631, "grad_norm": 0.004139984026551247, "learning_rate": 0.00014918508674011582, "loss": 0.0041, "num_input_tokens_seen": 204652208, "step": 94745 }, { "epoch": 15.456769983686787, "grad_norm": 0.004607068374752998, "learning_rate": 0.0001491343717076676, "loss": 0.0024, "num_input_tokens_seen": 204662608, "step": 94750 }, { "epoch": 15.45758564437194, "grad_norm": 1.534938931465149, "learning_rate": 0.00014908366378595645, "loss": 0.0989, "num_input_tokens_seen": 204674448, "step": 94755 }, { "epoch": 15.458401305057096, "grad_norm": 0.002478801878169179, "learning_rate": 0.00014903296297601, "loss": 0.0011, "num_input_tokens_seen": 204686096, "step": 94760 }, { "epoch": 15.459216965742252, "grad_norm": 0.010276932269334793, "learning_rate": 0.00014898226927885584, "loss": 0.0018, "num_input_tokens_seen": 204696048, "step": 94765 }, { "epoch": 15.460032626427406, "grad_norm": 0.0010761553421616554, "learning_rate": 0.00014893158269552127, "loss": 0.0016, "num_input_tokens_seen": 204706704, "step": 94770 }, { "epoch": 15.460848287112562, "grad_norm": 0.02356075681746006, "learning_rate": 0.00014888090322703353, "loss": 0.0051, "num_input_tokens_seen": 204716144, "step": 94775 }, { "epoch": 15.461663947797716, "grad_norm": 0.023741189390420914, "learning_rate": 0.00014883023087441965, "loss": 0.0028, "num_input_tokens_seen": 204727984, "step": 94780 }, { "epoch": 15.462479608482871, "grad_norm": 0.5104894042015076, "learning_rate": 0.0001487795656387067, "loss": 0.0932, "num_input_tokens_seen": 204738960, "step": 94785 }, { "epoch": 15.463295269168025, "grad_norm": 0.006746853701770306, "learning_rate": 0.00014872890752092144, "loss": 0.0011, "num_input_tokens_seen": 204749456, "step": 94790 }, { "epoch": 15.464110929853181, "grad_norm": 0.023834139108657837, "learning_rate": 0.00014867825652209045, "loss": 0.0064, "num_input_tokens_seen": 204760304, "step": 94795 }, { "epoch": 15.464926590538337, "grad_norm": 0.002329958835616708, "learning_rate": 0.00014862761264324025, "loss": 0.0008, "num_input_tokens_seen": 204771984, "step": 94800 }, { "epoch": 15.46574225122349, "grad_norm": 0.007659323513507843, "learning_rate": 0.00014857697588539727, "loss": 0.0029, "num_input_tokens_seen": 204782640, "step": 94805 }, { "epoch": 15.466557911908646, "grad_norm": 0.0002704902726691216, "learning_rate": 0.00014852634624958766, "loss": 0.0101, "num_input_tokens_seen": 204791632, "step": 94810 }, { "epoch": 15.4673735725938, "grad_norm": 0.015242592431604862, "learning_rate": 0.00014847572373683749, "loss": 0.0133, "num_input_tokens_seen": 204804080, "step": 94815 }, { "epoch": 15.468189233278956, "grad_norm": 0.006385531276464462, "learning_rate": 0.00014842510834817274, "loss": 0.0027, "num_input_tokens_seen": 204814672, "step": 94820 }, { "epoch": 15.469004893964112, "grad_norm": 0.005965860094875097, "learning_rate": 0.00014837450008461922, "loss": 0.0024, "num_input_tokens_seen": 204825552, "step": 94825 }, { "epoch": 15.469820554649266, "grad_norm": 0.002931939670816064, "learning_rate": 0.00014832389894720233, "loss": 0.001, "num_input_tokens_seen": 204835280, "step": 94830 }, { "epoch": 15.470636215334421, "grad_norm": 0.0005909419851377606, "learning_rate": 0.00014827330493694807, "loss": 0.0078, "num_input_tokens_seen": 204844752, "step": 94835 }, { "epoch": 15.471451876019575, "grad_norm": 0.0040916381403803825, "learning_rate": 0.0001482227180548812, "loss": 0.004, "num_input_tokens_seen": 204855472, "step": 94840 }, { "epoch": 15.47226753670473, "grad_norm": 0.006552472244948149, "learning_rate": 0.00014817213830202748, "loss": 0.008, "num_input_tokens_seen": 204866832, "step": 94845 }, { "epoch": 15.473083197389887, "grad_norm": 0.00648926105350256, "learning_rate": 0.00014812156567941143, "loss": 0.0004, "num_input_tokens_seen": 204877712, "step": 94850 }, { "epoch": 15.47389885807504, "grad_norm": 0.2792307734489441, "learning_rate": 0.00014807100018805853, "loss": 0.0058, "num_input_tokens_seen": 204887696, "step": 94855 }, { "epoch": 15.474714518760196, "grad_norm": 0.017968228086829185, "learning_rate": 0.00014802044182899294, "loss": 0.0026, "num_input_tokens_seen": 204899632, "step": 94860 }, { "epoch": 15.47553017944535, "grad_norm": 0.00422089034691453, "learning_rate": 0.00014796989060323997, "loss": 0.0009, "num_input_tokens_seen": 204910000, "step": 94865 }, { "epoch": 15.476345840130506, "grad_norm": 0.0017101641278713942, "learning_rate": 0.00014791934651182338, "loss": 0.0653, "num_input_tokens_seen": 204921168, "step": 94870 }, { "epoch": 15.477161500815662, "grad_norm": 0.0047962963581085205, "learning_rate": 0.0001478688095557682, "loss": 0.0013, "num_input_tokens_seen": 204930704, "step": 94875 }, { "epoch": 15.477977161500815, "grad_norm": 0.02295534871518612, "learning_rate": 0.00014781827973609803, "loss": 0.027, "num_input_tokens_seen": 204941872, "step": 94880 }, { "epoch": 15.478792822185971, "grad_norm": 0.008626680821180344, "learning_rate": 0.00014776775705383733, "loss": 0.0009, "num_input_tokens_seen": 204952208, "step": 94885 }, { "epoch": 15.479608482871125, "grad_norm": 0.0017688804073259234, "learning_rate": 0.00014771724151000986, "loss": 0.0034, "num_input_tokens_seen": 204963728, "step": 94890 }, { "epoch": 15.48042414355628, "grad_norm": 0.019344795495271683, "learning_rate": 0.00014766673310563945, "loss": 0.0008, "num_input_tokens_seen": 204975440, "step": 94895 }, { "epoch": 15.481239804241435, "grad_norm": 0.013398679904639721, "learning_rate": 0.0001476162318417496, "loss": 0.0015, "num_input_tokens_seen": 204986768, "step": 94900 }, { "epoch": 15.48205546492659, "grad_norm": 0.05068299174308777, "learning_rate": 0.00014756573771936382, "loss": 0.0055, "num_input_tokens_seen": 204998064, "step": 94905 }, { "epoch": 15.482871125611746, "grad_norm": 0.002898820675909519, "learning_rate": 0.0001475152507395055, "loss": 0.0008, "num_input_tokens_seen": 205008304, "step": 94910 }, { "epoch": 15.4836867862969, "grad_norm": 0.016531746834516525, "learning_rate": 0.00014746477090319781, "loss": 0.0072, "num_input_tokens_seen": 205018192, "step": 94915 }, { "epoch": 15.484502446982056, "grad_norm": 0.06572934985160828, "learning_rate": 0.00014741429821146375, "loss": 0.0037, "num_input_tokens_seen": 205028816, "step": 94920 }, { "epoch": 15.48531810766721, "grad_norm": 0.001525534433312714, "learning_rate": 0.00014736383266532622, "loss": 0.0006, "num_input_tokens_seen": 205039920, "step": 94925 }, { "epoch": 15.486133768352365, "grad_norm": 0.0032828834373503923, "learning_rate": 0.00014731337426580792, "loss": 0.0048, "num_input_tokens_seen": 205050032, "step": 94930 }, { "epoch": 15.486949429037521, "grad_norm": 0.01835629530251026, "learning_rate": 0.0001472629230139314, "loss": 0.0046, "num_input_tokens_seen": 205061968, "step": 94935 }, { "epoch": 15.487765089722675, "grad_norm": 0.007460998836904764, "learning_rate": 0.00014721247891071954, "loss": 0.0015, "num_input_tokens_seen": 205072976, "step": 94940 }, { "epoch": 15.48858075040783, "grad_norm": 0.0013818942243233323, "learning_rate": 0.00014716204195719396, "loss": 0.0032, "num_input_tokens_seen": 205082832, "step": 94945 }, { "epoch": 15.489396411092985, "grad_norm": 0.008082177489995956, "learning_rate": 0.00014711161215437757, "loss": 0.0025, "num_input_tokens_seen": 205094736, "step": 94950 }, { "epoch": 15.49021207177814, "grad_norm": 0.0091333519667387, "learning_rate": 0.00014706118950329173, "loss": 0.0014, "num_input_tokens_seen": 205105616, "step": 94955 }, { "epoch": 15.491027732463296, "grad_norm": 0.001196563825942576, "learning_rate": 0.00014701077400495894, "loss": 0.0662, "num_input_tokens_seen": 205115312, "step": 94960 }, { "epoch": 15.49184339314845, "grad_norm": 0.0008588659111410379, "learning_rate": 0.00014696036566040028, "loss": 0.0265, "num_input_tokens_seen": 205126800, "step": 94965 }, { "epoch": 15.492659053833606, "grad_norm": 0.00046497659059241414, "learning_rate": 0.00014690996447063798, "loss": 0.0014, "num_input_tokens_seen": 205137808, "step": 94970 }, { "epoch": 15.49347471451876, "grad_norm": 0.001684483140707016, "learning_rate": 0.00014685957043669283, "loss": 0.0017, "num_input_tokens_seen": 205148560, "step": 94975 }, { "epoch": 15.494290375203915, "grad_norm": 0.08418180793523788, "learning_rate": 0.00014680918355958683, "loss": 0.0681, "num_input_tokens_seen": 205160080, "step": 94980 }, { "epoch": 15.49510603588907, "grad_norm": 0.0057790386490523815, "learning_rate": 0.00014675880384034046, "loss": 0.0023, "num_input_tokens_seen": 205170544, "step": 94985 }, { "epoch": 15.495921696574225, "grad_norm": 0.001607961137779057, "learning_rate": 0.00014670843127997542, "loss": 0.0033, "num_input_tokens_seen": 205181712, "step": 94990 }, { "epoch": 15.49673735725938, "grad_norm": 0.0023566484451293945, "learning_rate": 0.0001466580658795118, "loss": 0.0023, "num_input_tokens_seen": 205192560, "step": 94995 }, { "epoch": 15.497553017944535, "grad_norm": 0.14754217863082886, "learning_rate": 0.00014660770763997105, "loss": 0.106, "num_input_tokens_seen": 205202032, "step": 95000 }, { "epoch": 15.49836867862969, "grad_norm": 0.009285212494432926, "learning_rate": 0.00014655735656237312, "loss": 0.0017, "num_input_tokens_seen": 205213104, "step": 95005 }, { "epoch": 15.499184339314844, "grad_norm": 0.0009993219282478094, "learning_rate": 0.00014650701264773907, "loss": 0.0007, "num_input_tokens_seen": 205224752, "step": 95010 }, { "epoch": 15.5, "grad_norm": 0.04408176988363266, "learning_rate": 0.0001464566758970885, "loss": 0.0042, "num_input_tokens_seen": 205236304, "step": 95015 }, { "epoch": 15.500815660685156, "grad_norm": 0.0037940412294119596, "learning_rate": 0.00014640634631144206, "loss": 0.0014, "num_input_tokens_seen": 205246224, "step": 95020 }, { "epoch": 15.50163132137031, "grad_norm": 0.013590458780527115, "learning_rate": 0.00014635602389181956, "loss": 0.003, "num_input_tokens_seen": 205257424, "step": 95025 }, { "epoch": 15.502446982055465, "grad_norm": 0.008497872389853, "learning_rate": 0.00014630570863924088, "loss": 0.008, "num_input_tokens_seen": 205268560, "step": 95030 }, { "epoch": 15.50326264274062, "grad_norm": 0.005137204192578793, "learning_rate": 0.0001462554005547257, "loss": 0.0047, "num_input_tokens_seen": 205279280, "step": 95035 }, { "epoch": 15.504078303425775, "grad_norm": 0.012710629031062126, "learning_rate": 0.00014620509963929362, "loss": 0.0027, "num_input_tokens_seen": 205289616, "step": 95040 }, { "epoch": 15.50489396411093, "grad_norm": 0.03326995298266411, "learning_rate": 0.00014615480589396396, "loss": 0.0075, "num_input_tokens_seen": 205300976, "step": 95045 }, { "epoch": 15.505709624796085, "grad_norm": 0.0013846260262653232, "learning_rate": 0.0001461045193197561, "loss": 0.0344, "num_input_tokens_seen": 205311568, "step": 95050 }, { "epoch": 15.50652528548124, "grad_norm": 0.02186959609389305, "learning_rate": 0.00014605423991768908, "loss": 0.0199, "num_input_tokens_seen": 205322032, "step": 95055 }, { "epoch": 15.507340946166394, "grad_norm": 0.00837793666869402, "learning_rate": 0.00014600396768878188, "loss": 0.0045, "num_input_tokens_seen": 205333104, "step": 95060 }, { "epoch": 15.50815660685155, "grad_norm": 0.18578238785266876, "learning_rate": 0.0001459537026340534, "loss": 0.0141, "num_input_tokens_seen": 205344496, "step": 95065 }, { "epoch": 15.508972267536706, "grad_norm": 0.00431056646630168, "learning_rate": 0.0001459034447545222, "loss": 0.0303, "num_input_tokens_seen": 205355248, "step": 95070 }, { "epoch": 15.50978792822186, "grad_norm": 0.011880909092724323, "learning_rate": 0.00014585319405120695, "loss": 0.0025, "num_input_tokens_seen": 205367600, "step": 95075 }, { "epoch": 15.510603588907015, "grad_norm": 0.0075214398093521595, "learning_rate": 0.0001458029505251258, "loss": 0.0066, "num_input_tokens_seen": 205376976, "step": 95080 }, { "epoch": 15.51141924959217, "grad_norm": 0.01479738112539053, "learning_rate": 0.0001457527141772975, "loss": 0.0722, "num_input_tokens_seen": 205388272, "step": 95085 }, { "epoch": 15.512234910277325, "grad_norm": 0.0015288260765373707, "learning_rate": 0.00014570248500873963, "loss": 0.0023, "num_input_tokens_seen": 205398864, "step": 95090 }, { "epoch": 15.513050570962479, "grad_norm": 0.039064157754182816, "learning_rate": 0.00014565226302047058, "loss": 0.0097, "num_input_tokens_seen": 205409872, "step": 95095 }, { "epoch": 15.513866231647635, "grad_norm": 0.44162848591804504, "learning_rate": 0.00014560204821350764, "loss": 0.0117, "num_input_tokens_seen": 205420208, "step": 95100 }, { "epoch": 15.51468189233279, "grad_norm": 0.0027341239620000124, "learning_rate": 0.00014555184058886905, "loss": 0.0178, "num_input_tokens_seen": 205431536, "step": 95105 }, { "epoch": 15.515497553017944, "grad_norm": 0.008773371577262878, "learning_rate": 0.00014550164014757183, "loss": 0.0011, "num_input_tokens_seen": 205443504, "step": 95110 }, { "epoch": 15.5163132137031, "grad_norm": 0.6953399181365967, "learning_rate": 0.00014545144689063382, "loss": 0.2316, "num_input_tokens_seen": 205453456, "step": 95115 }, { "epoch": 15.517128874388254, "grad_norm": 0.7138208746910095, "learning_rate": 0.0001454012608190718, "loss": 0.072, "num_input_tokens_seen": 205465136, "step": 95120 }, { "epoch": 15.51794453507341, "grad_norm": 0.0015598111785948277, "learning_rate": 0.0001453510819339033, "loss": 0.0026, "num_input_tokens_seen": 205475184, "step": 95125 }, { "epoch": 15.518760195758565, "grad_norm": 0.00230622966773808, "learning_rate": 0.0001453009102361447, "loss": 0.0065, "num_input_tokens_seen": 205484688, "step": 95130 }, { "epoch": 15.51957585644372, "grad_norm": 0.02480519562959671, "learning_rate": 0.0001452507457268135, "loss": 0.0032, "num_input_tokens_seen": 205494960, "step": 95135 }, { "epoch": 15.520391517128875, "grad_norm": 0.0014786440879106522, "learning_rate": 0.00014520058840692562, "loss": 0.0009, "num_input_tokens_seen": 205506064, "step": 95140 }, { "epoch": 15.521207177814029, "grad_norm": 0.023763207718729973, "learning_rate": 0.00014515043827749812, "loss": 0.0023, "num_input_tokens_seen": 205516944, "step": 95145 }, { "epoch": 15.522022838499185, "grad_norm": 0.05934037268161774, "learning_rate": 0.0001451002953395471, "loss": 0.0033, "num_input_tokens_seen": 205527056, "step": 95150 }, { "epoch": 15.522838499184338, "grad_norm": 0.015051600523293018, "learning_rate": 0.00014505015959408884, "loss": 0.0016, "num_input_tokens_seen": 205536944, "step": 95155 }, { "epoch": 15.523654159869494, "grad_norm": 0.0194566547870636, "learning_rate": 0.00014500003104213932, "loss": 0.0033, "num_input_tokens_seen": 205546768, "step": 95160 }, { "epoch": 15.52446982055465, "grad_norm": 0.004796968307346106, "learning_rate": 0.0001449499096847146, "loss": 0.0064, "num_input_tokens_seen": 205557200, "step": 95165 }, { "epoch": 15.525285481239804, "grad_norm": 0.004861933179199696, "learning_rate": 0.00014489979552283035, "loss": 0.093, "num_input_tokens_seen": 205568144, "step": 95170 }, { "epoch": 15.52610114192496, "grad_norm": 0.023706277832388878, "learning_rate": 0.0001448496885575022, "loss": 0.0026, "num_input_tokens_seen": 205579504, "step": 95175 }, { "epoch": 15.526916802610113, "grad_norm": 0.010353012010455132, "learning_rate": 0.00014479958878974564, "loss": 0.0029, "num_input_tokens_seen": 205590256, "step": 95180 }, { "epoch": 15.52773246329527, "grad_norm": 0.018889334052801132, "learning_rate": 0.00014474949622057603, "loss": 0.002, "num_input_tokens_seen": 205601520, "step": 95185 }, { "epoch": 15.528548123980425, "grad_norm": 0.008149920962750912, "learning_rate": 0.00014469941085100857, "loss": 0.0015, "num_input_tokens_seen": 205612304, "step": 95190 }, { "epoch": 15.529363784665579, "grad_norm": 0.030473439022898674, "learning_rate": 0.00014464933268205826, "loss": 0.0017, "num_input_tokens_seen": 205622768, "step": 95195 }, { "epoch": 15.530179445350734, "grad_norm": 0.01042587123811245, "learning_rate": 0.00014459926171474002, "loss": 0.0077, "num_input_tokens_seen": 205633072, "step": 95200 }, { "epoch": 15.530995106035888, "grad_norm": 0.06183413416147232, "learning_rate": 0.0001445491979500686, "loss": 0.0107, "num_input_tokens_seen": 205643120, "step": 95205 }, { "epoch": 15.531810766721044, "grad_norm": 0.0014860248193144798, "learning_rate": 0.0001444991413890586, "loss": 0.0024, "num_input_tokens_seen": 205653296, "step": 95210 }, { "epoch": 15.5326264274062, "grad_norm": 0.0037677655927836895, "learning_rate": 0.00014444909203272438, "loss": 0.0344, "num_input_tokens_seen": 205662672, "step": 95215 }, { "epoch": 15.533442088091354, "grad_norm": 0.034878209233284, "learning_rate": 0.0001443990498820806, "loss": 0.0067, "num_input_tokens_seen": 205672752, "step": 95220 }, { "epoch": 15.53425774877651, "grad_norm": 0.366243839263916, "learning_rate": 0.0001443490149381409, "loss": 0.022, "num_input_tokens_seen": 205683984, "step": 95225 }, { "epoch": 15.535073409461663, "grad_norm": 0.003331542480736971, "learning_rate": 0.0001442989872019199, "loss": 0.0056, "num_input_tokens_seen": 205694992, "step": 95230 }, { "epoch": 15.535889070146819, "grad_norm": 0.0007717033731751144, "learning_rate": 0.00014424896667443083, "loss": 0.0027, "num_input_tokens_seen": 205706512, "step": 95235 }, { "epoch": 15.536704730831975, "grad_norm": 0.004548640456050634, "learning_rate": 0.00014419895335668809, "loss": 0.0032, "num_input_tokens_seen": 205717968, "step": 95240 }, { "epoch": 15.537520391517129, "grad_norm": 0.006112218368798494, "learning_rate": 0.00014414894724970462, "loss": 0.0018, "num_input_tokens_seen": 205728912, "step": 95245 }, { "epoch": 15.538336052202284, "grad_norm": 0.02950763888657093, "learning_rate": 0.00014409894835449444, "loss": 0.0033, "num_input_tokens_seen": 205739888, "step": 95250 }, { "epoch": 15.539151712887438, "grad_norm": 0.1370188146829605, "learning_rate": 0.00014404895667207028, "loss": 0.0057, "num_input_tokens_seen": 205751376, "step": 95255 }, { "epoch": 15.539967373572594, "grad_norm": 0.0022804271429777145, "learning_rate": 0.00014399897220344576, "loss": 0.0006, "num_input_tokens_seen": 205761072, "step": 95260 }, { "epoch": 15.540783034257748, "grad_norm": 0.001658109133131802, "learning_rate": 0.00014394899494963364, "loss": 0.0009, "num_input_tokens_seen": 205771824, "step": 95265 }, { "epoch": 15.541598694942904, "grad_norm": 0.0006953808479011059, "learning_rate": 0.00014389902491164681, "loss": 0.1072, "num_input_tokens_seen": 205781616, "step": 95270 }, { "epoch": 15.54241435562806, "grad_norm": 0.060906656086444855, "learning_rate": 0.00014384906209049804, "loss": 0.002, "num_input_tokens_seen": 205792528, "step": 95275 }, { "epoch": 15.543230016313213, "grad_norm": 0.008214866742491722, "learning_rate": 0.0001437991064871998, "loss": 0.1226, "num_input_tokens_seen": 205803568, "step": 95280 }, { "epoch": 15.544045676998369, "grad_norm": 0.0363452285528183, "learning_rate": 0.0001437491581027645, "loss": 0.0863, "num_input_tokens_seen": 205813424, "step": 95285 }, { "epoch": 15.544861337683523, "grad_norm": 0.0006147515960037708, "learning_rate": 0.00014369921693820447, "loss": 0.0011, "num_input_tokens_seen": 205824688, "step": 95290 }, { "epoch": 15.545676998368679, "grad_norm": 0.004925783723592758, "learning_rate": 0.00014364928299453184, "loss": 0.0018, "num_input_tokens_seen": 205835760, "step": 95295 }, { "epoch": 15.546492659053834, "grad_norm": 0.0010029467521235347, "learning_rate": 0.00014359935627275856, "loss": 0.0034, "num_input_tokens_seen": 205846672, "step": 95300 }, { "epoch": 15.547308319738988, "grad_norm": 0.0026191682554781437, "learning_rate": 0.00014354943677389643, "loss": 0.0013, "num_input_tokens_seen": 205858384, "step": 95305 }, { "epoch": 15.548123980424144, "grad_norm": 0.007862421683967113, "learning_rate": 0.00014349952449895715, "loss": 0.0016, "num_input_tokens_seen": 205868720, "step": 95310 }, { "epoch": 15.548939641109298, "grad_norm": 0.0005457144579850137, "learning_rate": 0.00014344961944895223, "loss": 0.0021, "num_input_tokens_seen": 205877776, "step": 95315 }, { "epoch": 15.549755301794454, "grad_norm": 0.011868278495967388, "learning_rate": 0.00014339972162489317, "loss": 0.015, "num_input_tokens_seen": 205888912, "step": 95320 }, { "epoch": 15.550570962479608, "grad_norm": 0.0013630108442157507, "learning_rate": 0.0001433498310277911, "loss": 0.0148, "num_input_tokens_seen": 205900240, "step": 95325 }, { "epoch": 15.551386623164763, "grad_norm": 0.004692132119089365, "learning_rate": 0.0001432999476586571, "loss": 0.0011, "num_input_tokens_seen": 205911792, "step": 95330 }, { "epoch": 15.552202283849919, "grad_norm": 0.002104677725583315, "learning_rate": 0.00014325007151850218, "loss": 0.0064, "num_input_tokens_seen": 205922704, "step": 95335 }, { "epoch": 15.553017944535073, "grad_norm": 0.030288465321063995, "learning_rate": 0.00014320020260833716, "loss": 0.0021, "num_input_tokens_seen": 205934064, "step": 95340 }, { "epoch": 15.553833605220229, "grad_norm": 0.0025799486320465803, "learning_rate": 0.00014315034092917268, "loss": 0.001, "num_input_tokens_seen": 205943760, "step": 95345 }, { "epoch": 15.554649265905383, "grad_norm": 0.0033469286281615496, "learning_rate": 0.00014310048648201917, "loss": 0.0049, "num_input_tokens_seen": 205952528, "step": 95350 }, { "epoch": 15.555464926590538, "grad_norm": 0.021118100732564926, "learning_rate": 0.0001430506392678871, "loss": 0.004, "num_input_tokens_seen": 205963056, "step": 95355 }, { "epoch": 15.556280587275694, "grad_norm": 0.024281244724988937, "learning_rate": 0.00014300079928778646, "loss": 0.0024, "num_input_tokens_seen": 205973648, "step": 95360 }, { "epoch": 15.557096247960848, "grad_norm": 0.09317111223936081, "learning_rate": 0.00014295096654272772, "loss": 0.0031, "num_input_tokens_seen": 205984112, "step": 95365 }, { "epoch": 15.557911908646004, "grad_norm": 0.0007803754997439682, "learning_rate": 0.00014290114103372058, "loss": 0.0567, "num_input_tokens_seen": 205994704, "step": 95370 }, { "epoch": 15.558727569331158, "grad_norm": 0.009101621806621552, "learning_rate": 0.00014285132276177482, "loss": 0.0037, "num_input_tokens_seen": 206004784, "step": 95375 }, { "epoch": 15.559543230016313, "grad_norm": 0.0009343404090031981, "learning_rate": 0.00014280151172790006, "loss": 0.0052, "num_input_tokens_seen": 206015472, "step": 95380 }, { "epoch": 15.560358890701469, "grad_norm": 0.0013652790803462267, "learning_rate": 0.00014275170793310582, "loss": 0.0072, "num_input_tokens_seen": 206028336, "step": 95385 }, { "epoch": 15.561174551386623, "grad_norm": 0.015270788222551346, "learning_rate": 0.00014270191137840145, "loss": 0.1079, "num_input_tokens_seen": 206037424, "step": 95390 }, { "epoch": 15.561990212071779, "grad_norm": 0.0003416137769818306, "learning_rate": 0.00014265212206479604, "loss": 0.014, "num_input_tokens_seen": 206047600, "step": 95395 }, { "epoch": 15.562805872756933, "grad_norm": 0.020423779264092445, "learning_rate": 0.00014260233999329873, "loss": 0.0035, "num_input_tokens_seen": 206057488, "step": 95400 }, { "epoch": 15.563621533442088, "grad_norm": 0.001411057892255485, "learning_rate": 0.00014255256516491845, "loss": 0.0022, "num_input_tokens_seen": 206069008, "step": 95405 }, { "epoch": 15.564437194127244, "grad_norm": 0.8168393969535828, "learning_rate": 0.00014250279758066387, "loss": 0.0089, "num_input_tokens_seen": 206079984, "step": 95410 }, { "epoch": 15.565252854812398, "grad_norm": 0.04632632061839104, "learning_rate": 0.00014245303724154358, "loss": 0.0034, "num_input_tokens_seen": 206091248, "step": 95415 }, { "epoch": 15.566068515497554, "grad_norm": 0.011034045368432999, "learning_rate": 0.00014240328414856607, "loss": 0.0018, "num_input_tokens_seen": 206101328, "step": 95420 }, { "epoch": 15.566884176182707, "grad_norm": 0.006134867202490568, "learning_rate": 0.00014235353830273966, "loss": 0.0027, "num_input_tokens_seen": 206111472, "step": 95425 }, { "epoch": 15.567699836867863, "grad_norm": 0.002034999430179596, "learning_rate": 0.00014230379970507252, "loss": 0.0046, "num_input_tokens_seen": 206122736, "step": 95430 }, { "epoch": 15.568515497553017, "grad_norm": 0.0007061712676659226, "learning_rate": 0.00014225406835657262, "loss": 0.0011, "num_input_tokens_seen": 206134576, "step": 95435 }, { "epoch": 15.569331158238173, "grad_norm": 0.04730561748147011, "learning_rate": 0.00014220434425824785, "loss": 0.0132, "num_input_tokens_seen": 206145040, "step": 95440 }, { "epoch": 15.570146818923329, "grad_norm": 0.006063089240342379, "learning_rate": 0.00014215462741110597, "loss": 0.0023, "num_input_tokens_seen": 206156208, "step": 95445 }, { "epoch": 15.570962479608482, "grad_norm": 0.5385159850120544, "learning_rate": 0.00014210491781615453, "loss": 0.0871, "num_input_tokens_seen": 206167408, "step": 95450 }, { "epoch": 15.571778140293638, "grad_norm": 0.002197818597778678, "learning_rate": 0.00014205521547440092, "loss": 0.0049, "num_input_tokens_seen": 206177808, "step": 95455 }, { "epoch": 15.572593800978792, "grad_norm": 0.009187380783259869, "learning_rate": 0.00014200552038685249, "loss": 0.0039, "num_input_tokens_seen": 206188016, "step": 95460 }, { "epoch": 15.573409461663948, "grad_norm": 0.00341814081184566, "learning_rate": 0.00014195583255451633, "loss": 0.0043, "num_input_tokens_seen": 206199472, "step": 95465 }, { "epoch": 15.574225122349104, "grad_norm": 0.0038523096591234207, "learning_rate": 0.00014190615197839929, "loss": 0.0035, "num_input_tokens_seen": 206209520, "step": 95470 }, { "epoch": 15.575040783034257, "grad_norm": 0.0005846923450008035, "learning_rate": 0.00014185647865950861, "loss": 0.0008, "num_input_tokens_seen": 206220368, "step": 95475 }, { "epoch": 15.575856443719413, "grad_norm": 0.22896532714366913, "learning_rate": 0.00014180681259885048, "loss": 0.1469, "num_input_tokens_seen": 206231568, "step": 95480 }, { "epoch": 15.576672104404567, "grad_norm": 0.0027704713866114616, "learning_rate": 0.000141757153797432, "loss": 0.001, "num_input_tokens_seen": 206243376, "step": 95485 }, { "epoch": 15.577487765089723, "grad_norm": 0.05355888605117798, "learning_rate": 0.00014170750225625888, "loss": 0.0442, "num_input_tokens_seen": 206253360, "step": 95490 }, { "epoch": 15.578303425774878, "grad_norm": 0.004209849517792463, "learning_rate": 0.00014165785797633812, "loss": 0.0015, "num_input_tokens_seen": 206263856, "step": 95495 }, { "epoch": 15.579119086460032, "grad_norm": 0.004551318474113941, "learning_rate": 0.00014160822095867515, "loss": 0.0031, "num_input_tokens_seen": 206275376, "step": 95500 }, { "epoch": 15.579934747145188, "grad_norm": 0.009526636451482773, "learning_rate": 0.00014155859120427633, "loss": 0.0051, "num_input_tokens_seen": 206286608, "step": 95505 }, { "epoch": 15.580750407830342, "grad_norm": 0.05764400586485863, "learning_rate": 0.00014150896871414743, "loss": 0.0085, "num_input_tokens_seen": 206297616, "step": 95510 }, { "epoch": 15.581566068515498, "grad_norm": 0.05977749451994896, "learning_rate": 0.00014145935348929407, "loss": 0.0058, "num_input_tokens_seen": 206309232, "step": 95515 }, { "epoch": 15.582381729200652, "grad_norm": 0.00463469885289669, "learning_rate": 0.0001414097455307217, "loss": 0.0054, "num_input_tokens_seen": 206320528, "step": 95520 }, { "epoch": 15.583197389885807, "grad_norm": 0.0017053117044270039, "learning_rate": 0.00014136014483943576, "loss": 0.0737, "num_input_tokens_seen": 206330192, "step": 95525 }, { "epoch": 15.584013050570963, "grad_norm": 0.02842816151678562, "learning_rate": 0.0001413105514164415, "loss": 0.0025, "num_input_tokens_seen": 206341200, "step": 95530 }, { "epoch": 15.584828711256117, "grad_norm": 0.006738144904375076, "learning_rate": 0.0001412609652627439, "loss": 0.0016, "num_input_tokens_seen": 206351472, "step": 95535 }, { "epoch": 15.585644371941273, "grad_norm": 0.13522782921791077, "learning_rate": 0.00014121138637934795, "loss": 0.0111, "num_input_tokens_seen": 206362512, "step": 95540 }, { "epoch": 15.586460032626427, "grad_norm": 0.01704743131995201, "learning_rate": 0.00014116181476725838, "loss": 0.0305, "num_input_tokens_seen": 206374160, "step": 95545 }, { "epoch": 15.587275693311582, "grad_norm": 0.0007164751878008246, "learning_rate": 0.00014111225042747987, "loss": 0.003, "num_input_tokens_seen": 206383952, "step": 95550 }, { "epoch": 15.588091353996738, "grad_norm": 0.0008742042118683457, "learning_rate": 0.00014106269336101692, "loss": 0.001, "num_input_tokens_seen": 206393552, "step": 95555 }, { "epoch": 15.588907014681892, "grad_norm": 0.0022135020699352026, "learning_rate": 0.0001410131435688738, "loss": 0.0014, "num_input_tokens_seen": 206404112, "step": 95560 }, { "epoch": 15.589722675367048, "grad_norm": 0.006435474380850792, "learning_rate": 0.00014096360105205475, "loss": 0.0063, "num_input_tokens_seen": 206414704, "step": 95565 }, { "epoch": 15.590538336052202, "grad_norm": 0.0010266860481351614, "learning_rate": 0.00014091406581156373, "loss": 0.0019, "num_input_tokens_seen": 206425264, "step": 95570 }, { "epoch": 15.591353996737357, "grad_norm": 0.00455155223608017, "learning_rate": 0.00014086453784840463, "loss": 0.0018, "num_input_tokens_seen": 206434768, "step": 95575 }, { "epoch": 15.592169657422513, "grad_norm": 0.0011100763222202659, "learning_rate": 0.00014081501716358154, "loss": 0.0017, "num_input_tokens_seen": 206446928, "step": 95580 }, { "epoch": 15.592985318107667, "grad_norm": 0.002654121723026037, "learning_rate": 0.0001407655037580975, "loss": 0.0012, "num_input_tokens_seen": 206456880, "step": 95585 }, { "epoch": 15.593800978792823, "grad_norm": 0.0031467049848288298, "learning_rate": 0.0001407159976329565, "loss": 0.001, "num_input_tokens_seen": 206467216, "step": 95590 }, { "epoch": 15.594616639477977, "grad_norm": 0.0007504248642362654, "learning_rate": 0.00014066649878916133, "loss": 0.0014, "num_input_tokens_seen": 206478128, "step": 95595 }, { "epoch": 15.595432300163132, "grad_norm": 0.0024569076485931873, "learning_rate": 0.00014061700722771569, "loss": 0.0006, "num_input_tokens_seen": 206490128, "step": 95600 }, { "epoch": 15.596247960848288, "grad_norm": 0.00822295993566513, "learning_rate": 0.000140567522949622, "loss": 0.0014, "num_input_tokens_seen": 206501040, "step": 95605 }, { "epoch": 15.597063621533442, "grad_norm": 0.052377086132764816, "learning_rate": 0.00014051804595588375, "loss": 0.002, "num_input_tokens_seen": 206511696, "step": 95610 }, { "epoch": 15.597879282218598, "grad_norm": 0.014150702394545078, "learning_rate": 0.00014046857624750304, "loss": 0.0034, "num_input_tokens_seen": 206521936, "step": 95615 }, { "epoch": 15.598694942903752, "grad_norm": 0.003785144304856658, "learning_rate": 0.00014041911382548305, "loss": 0.0012, "num_input_tokens_seen": 206533200, "step": 95620 }, { "epoch": 15.599510603588907, "grad_norm": 0.019707906991243362, "learning_rate": 0.00014036965869082551, "loss": 0.022, "num_input_tokens_seen": 206544304, "step": 95625 }, { "epoch": 15.600326264274061, "grad_norm": 0.01421266794204712, "learning_rate": 0.00014032021084453344, "loss": 0.0058, "num_input_tokens_seen": 206555152, "step": 95630 }, { "epoch": 15.601141924959217, "grad_norm": 0.01474962942302227, "learning_rate": 0.0001402707702876082, "loss": 0.0073, "num_input_tokens_seen": 206567056, "step": 95635 }, { "epoch": 15.601957585644373, "grad_norm": 0.0010720965219661593, "learning_rate": 0.0001402213370210525, "loss": 0.0015, "num_input_tokens_seen": 206577520, "step": 95640 }, { "epoch": 15.602773246329527, "grad_norm": 0.007528163492679596, "learning_rate": 0.00014017191104586751, "loss": 0.0059, "num_input_tokens_seen": 206588720, "step": 95645 }, { "epoch": 15.603588907014682, "grad_norm": 0.045820482075214386, "learning_rate": 0.00014012249236305542, "loss": 0.0033, "num_input_tokens_seen": 206599760, "step": 95650 }, { "epoch": 15.604404567699836, "grad_norm": 0.009412800893187523, "learning_rate": 0.00014007308097361749, "loss": 0.0043, "num_input_tokens_seen": 206610192, "step": 95655 }, { "epoch": 15.605220228384992, "grad_norm": 0.005069994367659092, "learning_rate": 0.00014002367687855516, "loss": 0.0017, "num_input_tokens_seen": 206620240, "step": 95660 }, { "epoch": 15.606035889070148, "grad_norm": 0.009124625474214554, "learning_rate": 0.00013997428007886975, "loss": 0.0048, "num_input_tokens_seen": 206631056, "step": 95665 }, { "epoch": 15.606851549755302, "grad_norm": 0.0022985092364251614, "learning_rate": 0.00013992489057556223, "loss": 0.0015, "num_input_tokens_seen": 206642032, "step": 95670 }, { "epoch": 15.607667210440457, "grad_norm": 0.002274824073538184, "learning_rate": 0.00013987550836963358, "loss": 0.0028, "num_input_tokens_seen": 206652528, "step": 95675 }, { "epoch": 15.608482871125611, "grad_norm": 0.1229882463812828, "learning_rate": 0.0001398261334620846, "loss": 0.0025, "num_input_tokens_seen": 206662864, "step": 95680 }, { "epoch": 15.609298531810767, "grad_norm": 0.0009678273927420378, "learning_rate": 0.00013977676585391597, "loss": 0.0022, "num_input_tokens_seen": 206673840, "step": 95685 }, { "epoch": 15.61011419249592, "grad_norm": 0.008544718846678734, "learning_rate": 0.00013972740554612817, "loss": 0.0042, "num_input_tokens_seen": 206684240, "step": 95690 }, { "epoch": 15.610929853181077, "grad_norm": 0.010948436334729195, "learning_rate": 0.0001396780525397215, "loss": 0.0606, "num_input_tokens_seen": 206695440, "step": 95695 }, { "epoch": 15.611745513866232, "grad_norm": 0.06881558895111084, "learning_rate": 0.00013962870683569605, "loss": 0.0048, "num_input_tokens_seen": 206705840, "step": 95700 }, { "epoch": 15.612561174551386, "grad_norm": 0.006645071320235729, "learning_rate": 0.00013957936843505238, "loss": 0.0306, "num_input_tokens_seen": 206716176, "step": 95705 }, { "epoch": 15.613376835236542, "grad_norm": 0.2631056606769562, "learning_rate": 0.00013953003733878965, "loss": 0.0179, "num_input_tokens_seen": 206727376, "step": 95710 }, { "epoch": 15.614192495921696, "grad_norm": 0.007076509762555361, "learning_rate": 0.0001394807135479083, "loss": 0.0041, "num_input_tokens_seen": 206737648, "step": 95715 }, { "epoch": 15.615008156606851, "grad_norm": 0.733444094657898, "learning_rate": 0.0001394313970634074, "loss": 0.1427, "num_input_tokens_seen": 206748592, "step": 95720 }, { "epoch": 15.615823817292007, "grad_norm": 0.0013660003896802664, "learning_rate": 0.0001393820878862869, "loss": 0.0773, "num_input_tokens_seen": 206759120, "step": 95725 }, { "epoch": 15.616639477977161, "grad_norm": 0.01996493898332119, "learning_rate": 0.00013933278601754563, "loss": 0.0134, "num_input_tokens_seen": 206770896, "step": 95730 }, { "epoch": 15.617455138662317, "grad_norm": 0.0015449258498847485, "learning_rate": 0.00013928349145818326, "loss": 0.0061, "num_input_tokens_seen": 206782000, "step": 95735 }, { "epoch": 15.61827079934747, "grad_norm": 0.014810550957918167, "learning_rate": 0.00013923420420919823, "loss": 0.0039, "num_input_tokens_seen": 206793072, "step": 95740 }, { "epoch": 15.619086460032626, "grad_norm": 0.0011037884978577495, "learning_rate": 0.00013918492427159002, "loss": 0.0278, "num_input_tokens_seen": 206805168, "step": 95745 }, { "epoch": 15.619902120717782, "grad_norm": 0.004637254402041435, "learning_rate": 0.00013913565164635672, "loss": 0.0012, "num_input_tokens_seen": 206815440, "step": 95750 }, { "epoch": 15.620717781402936, "grad_norm": 0.0011846721172332764, "learning_rate": 0.00013908638633449756, "loss": 0.0005, "num_input_tokens_seen": 206825008, "step": 95755 }, { "epoch": 15.621533442088092, "grad_norm": 0.0013002973282709718, "learning_rate": 0.00013903712833701032, "loss": 0.0044, "num_input_tokens_seen": 206835536, "step": 95760 }, { "epoch": 15.622349102773246, "grad_norm": 0.008994800969958305, "learning_rate": 0.0001389878776548939, "loss": 0.0023, "num_input_tokens_seen": 206847344, "step": 95765 }, { "epoch": 15.623164763458401, "grad_norm": 0.0006031625671312213, "learning_rate": 0.00013893863428914583, "loss": 0.0006, "num_input_tokens_seen": 206858288, "step": 95770 }, { "epoch": 15.623980424143557, "grad_norm": 0.10449585318565369, "learning_rate": 0.00013888939824076464, "loss": 0.0023, "num_input_tokens_seen": 206870512, "step": 95775 }, { "epoch": 15.624796084828711, "grad_norm": 0.010346177034080029, "learning_rate": 0.00013884016951074758, "loss": 0.0102, "num_input_tokens_seen": 206882384, "step": 95780 }, { "epoch": 15.625611745513867, "grad_norm": 0.0041154371574521065, "learning_rate": 0.00013879094810009284, "loss": 0.0018, "num_input_tokens_seen": 206892784, "step": 95785 }, { "epoch": 15.62642740619902, "grad_norm": 0.0015759262023493648, "learning_rate": 0.00013874173400979772, "loss": 0.0214, "num_input_tokens_seen": 206904528, "step": 95790 }, { "epoch": 15.627243066884176, "grad_norm": 0.01136757992208004, "learning_rate": 0.00013869252724085974, "loss": 0.0037, "num_input_tokens_seen": 206916400, "step": 95795 }, { "epoch": 15.62805872756933, "grad_norm": 0.1922260969877243, "learning_rate": 0.00013864332779427597, "loss": 0.007, "num_input_tokens_seen": 206927440, "step": 95800 }, { "epoch": 15.628874388254486, "grad_norm": 0.000996628892607987, "learning_rate": 0.00013859413567104357, "loss": 0.018, "num_input_tokens_seen": 206938608, "step": 95805 }, { "epoch": 15.629690048939642, "grad_norm": 0.00351770780980587, "learning_rate": 0.00013854495087215951, "loss": 0.0013, "num_input_tokens_seen": 206949136, "step": 95810 }, { "epoch": 15.630505709624796, "grad_norm": 0.0383528470993042, "learning_rate": 0.00013849577339862057, "loss": 0.0032, "num_input_tokens_seen": 206958544, "step": 95815 }, { "epoch": 15.631321370309951, "grad_norm": 0.06590034067630768, "learning_rate": 0.00013844660325142334, "loss": 0.0059, "num_input_tokens_seen": 206969456, "step": 95820 }, { "epoch": 15.632137030995105, "grad_norm": 0.0005166734335944057, "learning_rate": 0.00013839744043156438, "loss": 0.0008, "num_input_tokens_seen": 206980176, "step": 95825 }, { "epoch": 15.632952691680261, "grad_norm": 0.004043356981128454, "learning_rate": 0.00013834828494004004, "loss": 0.0086, "num_input_tokens_seen": 206991184, "step": 95830 }, { "epoch": 15.633768352365417, "grad_norm": 0.006381909362971783, "learning_rate": 0.0001382991367778465, "loss": 0.0057, "num_input_tokens_seen": 207001360, "step": 95835 }, { "epoch": 15.63458401305057, "grad_norm": 0.0019231840269640088, "learning_rate": 0.00013824999594597975, "loss": 0.0011, "num_input_tokens_seen": 207010736, "step": 95840 }, { "epoch": 15.635399673735726, "grad_norm": 0.00047763565089553595, "learning_rate": 0.00013820086244543562, "loss": 0.1042, "num_input_tokens_seen": 207021840, "step": 95845 }, { "epoch": 15.63621533442088, "grad_norm": 0.0834665521979332, "learning_rate": 0.00013815173627721027, "loss": 0.0058, "num_input_tokens_seen": 207032848, "step": 95850 }, { "epoch": 15.637030995106036, "grad_norm": 0.09207990765571594, "learning_rate": 0.00013810261744229873, "loss": 0.0052, "num_input_tokens_seen": 207044336, "step": 95855 }, { "epoch": 15.63784665579119, "grad_norm": 0.008700598031282425, "learning_rate": 0.00013805350594169708, "loss": 0.0021, "num_input_tokens_seen": 207055376, "step": 95860 }, { "epoch": 15.638662316476346, "grad_norm": 0.03562851995229721, "learning_rate": 0.0001380044017764, "loss": 0.0079, "num_input_tokens_seen": 207065904, "step": 95865 }, { "epoch": 15.639477977161501, "grad_norm": 0.00032584878499619663, "learning_rate": 0.0001379553049474032, "loss": 0.03, "num_input_tokens_seen": 207076752, "step": 95870 }, { "epoch": 15.640293637846655, "grad_norm": 0.025242719799280167, "learning_rate": 0.00013790621545570114, "loss": 0.004, "num_input_tokens_seen": 207088016, "step": 95875 }, { "epoch": 15.641109298531811, "grad_norm": 0.0008892813930287957, "learning_rate": 0.00013785713330228928, "loss": 0.0025, "num_input_tokens_seen": 207098160, "step": 95880 }, { "epoch": 15.641924959216965, "grad_norm": 0.030518537387251854, "learning_rate": 0.00013780805848816175, "loss": 0.0026, "num_input_tokens_seen": 207108176, "step": 95885 }, { "epoch": 15.64274061990212, "grad_norm": 0.016238387674093246, "learning_rate": 0.0001377589910143135, "loss": 0.008, "num_input_tokens_seen": 207119440, "step": 95890 }, { "epoch": 15.643556280587276, "grad_norm": 0.0018854053923860192, "learning_rate": 0.00013770993088173884, "loss": 0.0014, "num_input_tokens_seen": 207129168, "step": 95895 }, { "epoch": 15.64437194127243, "grad_norm": 0.00233276654034853, "learning_rate": 0.000137660878091432, "loss": 0.0017, "num_input_tokens_seen": 207139600, "step": 95900 }, { "epoch": 15.645187601957586, "grad_norm": 0.09753572940826416, "learning_rate": 0.0001376118326443872, "loss": 0.002, "num_input_tokens_seen": 207150896, "step": 95905 }, { "epoch": 15.64600326264274, "grad_norm": 0.1661272794008255, "learning_rate": 0.00013756279454159827, "loss": 0.0099, "num_input_tokens_seen": 207160592, "step": 95910 }, { "epoch": 15.646818923327896, "grad_norm": 0.000456740875961259, "learning_rate": 0.0001375137637840591, "loss": 0.0123, "num_input_tokens_seen": 207171856, "step": 95915 }, { "epoch": 15.647634584013051, "grad_norm": 0.006311553064733744, "learning_rate": 0.00013746474037276335, "loss": 0.0014, "num_input_tokens_seen": 207183728, "step": 95920 }, { "epoch": 15.648450244698205, "grad_norm": 0.0018103966722264886, "learning_rate": 0.0001374157243087046, "loss": 0.0022, "num_input_tokens_seen": 207192976, "step": 95925 }, { "epoch": 15.649265905383361, "grad_norm": 0.0052458480931818485, "learning_rate": 0.00013736671559287612, "loss": 0.0221, "num_input_tokens_seen": 207204560, "step": 95930 }, { "epoch": 15.650081566068515, "grad_norm": 0.35821104049682617, "learning_rate": 0.0001373177142262712, "loss": 0.0182, "num_input_tokens_seen": 207215824, "step": 95935 }, { "epoch": 15.65089722675367, "grad_norm": 0.014001819305121899, "learning_rate": 0.0001372687202098829, "loss": 0.0025, "num_input_tokens_seen": 207226960, "step": 95940 }, { "epoch": 15.651712887438826, "grad_norm": 0.0030215075239539146, "learning_rate": 0.00013721973354470412, "loss": 0.0032, "num_input_tokens_seen": 207237552, "step": 95945 }, { "epoch": 15.65252854812398, "grad_norm": 0.002811993472278118, "learning_rate": 0.00013717075423172765, "loss": 0.001, "num_input_tokens_seen": 207249424, "step": 95950 }, { "epoch": 15.653344208809136, "grad_norm": 0.04759371280670166, "learning_rate": 0.00013712178227194617, "loss": 0.0054, "num_input_tokens_seen": 207259600, "step": 95955 }, { "epoch": 15.65415986949429, "grad_norm": 0.0038091284222900867, "learning_rate": 0.00013707281766635204, "loss": 0.0682, "num_input_tokens_seen": 207270448, "step": 95960 }, { "epoch": 15.654975530179446, "grad_norm": 0.04629696160554886, "learning_rate": 0.00013702386041593772, "loss": 0.0065, "num_input_tokens_seen": 207281424, "step": 95965 }, { "epoch": 15.655791190864601, "grad_norm": 0.006149706896394491, "learning_rate": 0.00013697491052169536, "loss": 0.0024, "num_input_tokens_seen": 207292976, "step": 95970 }, { "epoch": 15.656606851549755, "grad_norm": 0.01826256327331066, "learning_rate": 0.00013692596798461692, "loss": 0.0016, "num_input_tokens_seen": 207304240, "step": 95975 }, { "epoch": 15.65742251223491, "grad_norm": 0.002373218536376953, "learning_rate": 0.00013687703280569437, "loss": 0.1386, "num_input_tokens_seen": 207314896, "step": 95980 }, { "epoch": 15.658238172920065, "grad_norm": 0.001200975151732564, "learning_rate": 0.0001368281049859194, "loss": 0.0011, "num_input_tokens_seen": 207326512, "step": 95985 }, { "epoch": 15.65905383360522, "grad_norm": 0.046008266508579254, "learning_rate": 0.0001367791845262834, "loss": 0.0039, "num_input_tokens_seen": 207336944, "step": 95990 }, { "epoch": 15.659869494290374, "grad_norm": 0.0028325358871370554, "learning_rate": 0.0001367302714277784, "loss": 0.0027, "num_input_tokens_seen": 207348560, "step": 95995 }, { "epoch": 15.66068515497553, "grad_norm": 0.0385119691491127, "learning_rate": 0.00013668136569139488, "loss": 0.0065, "num_input_tokens_seen": 207359440, "step": 96000 }, { "epoch": 15.661500815660686, "grad_norm": 0.0017604627646505833, "learning_rate": 0.00013663246731812463, "loss": 0.0062, "num_input_tokens_seen": 207370768, "step": 96005 }, { "epoch": 15.66231647634584, "grad_norm": 0.0032870511058717966, "learning_rate": 0.00013658357630895834, "loss": 0.0018, "num_input_tokens_seen": 207382768, "step": 96010 }, { "epoch": 15.663132137030995, "grad_norm": 0.0009842633735388517, "learning_rate": 0.00013653469266488688, "loss": 0.0025, "num_input_tokens_seen": 207392976, "step": 96015 }, { "epoch": 15.66394779771615, "grad_norm": 0.023544909432530403, "learning_rate": 0.000136485816386901, "loss": 0.0975, "num_input_tokens_seen": 207404592, "step": 96020 }, { "epoch": 15.664763458401305, "grad_norm": 0.0010194077622145414, "learning_rate": 0.00013643694747599123, "loss": 0.0015, "num_input_tokens_seen": 207415408, "step": 96025 }, { "epoch": 15.66557911908646, "grad_norm": 0.0012003988958895206, "learning_rate": 0.0001363880859331479, "loss": 0.0907, "num_input_tokens_seen": 207425232, "step": 96030 }, { "epoch": 15.666394779771615, "grad_norm": 0.001043745898641646, "learning_rate": 0.00013633923175936124, "loss": 0.0779, "num_input_tokens_seen": 207437584, "step": 96035 }, { "epoch": 15.66721044045677, "grad_norm": 0.025039061903953552, "learning_rate": 0.00013629038495562145, "loss": 0.1374, "num_input_tokens_seen": 207448816, "step": 96040 }, { "epoch": 15.668026101141924, "grad_norm": 0.0040556760504841805, "learning_rate": 0.00013624154552291834, "loss": 0.0025, "num_input_tokens_seen": 207458960, "step": 96045 }, { "epoch": 15.66884176182708, "grad_norm": 0.00263931299559772, "learning_rate": 0.00013619271346224183, "loss": 0.1143, "num_input_tokens_seen": 207468656, "step": 96050 }, { "epoch": 15.669657422512234, "grad_norm": 0.015455449931323528, "learning_rate": 0.0001361438887745815, "loss": 0.0032, "num_input_tokens_seen": 207479408, "step": 96055 }, { "epoch": 15.67047308319739, "grad_norm": 0.03252527117729187, "learning_rate": 0.0001360950714609268, "loss": 0.004, "num_input_tokens_seen": 207489968, "step": 96060 }, { "epoch": 15.671288743882545, "grad_norm": 0.003963053692132235, "learning_rate": 0.00013604626152226719, "loss": 0.0027, "num_input_tokens_seen": 207502000, "step": 96065 }, { "epoch": 15.6721044045677, "grad_norm": 0.008263356983661652, "learning_rate": 0.00013599745895959175, "loss": 0.0605, "num_input_tokens_seen": 207512144, "step": 96070 }, { "epoch": 15.672920065252855, "grad_norm": 0.45873066782951355, "learning_rate": 0.00013594866377388958, "loss": 0.0082, "num_input_tokens_seen": 207523248, "step": 96075 }, { "epoch": 15.673735725938009, "grad_norm": 0.02381652407348156, "learning_rate": 0.0001358998759661496, "loss": 0.0024, "num_input_tokens_seen": 207532880, "step": 96080 }, { "epoch": 15.674551386623165, "grad_norm": 0.020507873967289925, "learning_rate": 0.00013585109553736053, "loss": 0.0036, "num_input_tokens_seen": 207543696, "step": 96085 }, { "epoch": 15.67536704730832, "grad_norm": 0.03291149064898491, "learning_rate": 0.00013580232248851094, "loss": 0.0121, "num_input_tokens_seen": 207554736, "step": 96090 }, { "epoch": 15.676182707993474, "grad_norm": 0.011057699099183083, "learning_rate": 0.00013575355682058932, "loss": 0.0088, "num_input_tokens_seen": 207565808, "step": 96095 }, { "epoch": 15.67699836867863, "grad_norm": 0.01052065659314394, "learning_rate": 0.0001357047985345839, "loss": 0.0029, "num_input_tokens_seen": 207575120, "step": 96100 }, { "epoch": 15.677814029363784, "grad_norm": 0.0016519392374902964, "learning_rate": 0.00013565604763148294, "loss": 0.0057, "num_input_tokens_seen": 207586480, "step": 96105 }, { "epoch": 15.67862969004894, "grad_norm": 0.0008954678778536618, "learning_rate": 0.00013560730411227417, "loss": 0.0528, "num_input_tokens_seen": 207596656, "step": 96110 }, { "epoch": 15.679445350734095, "grad_norm": 0.010876849293708801, "learning_rate": 0.000135558567977946, "loss": 0.052, "num_input_tokens_seen": 207607728, "step": 96115 }, { "epoch": 15.68026101141925, "grad_norm": 0.007376998197287321, "learning_rate": 0.00013550983922948546, "loss": 0.0026, "num_input_tokens_seen": 207617552, "step": 96120 }, { "epoch": 15.681076672104405, "grad_norm": 0.5630950927734375, "learning_rate": 0.00013546111786788073, "loss": 0.0629, "num_input_tokens_seen": 207628080, "step": 96125 }, { "epoch": 15.681892332789559, "grad_norm": 0.005674498621374369, "learning_rate": 0.00013541240389411857, "loss": 0.0199, "num_input_tokens_seen": 207639696, "step": 96130 }, { "epoch": 15.682707993474715, "grad_norm": 0.002528025070205331, "learning_rate": 0.00013536369730918668, "loss": 0.0063, "num_input_tokens_seen": 207649936, "step": 96135 }, { "epoch": 15.68352365415987, "grad_norm": 0.024930205196142197, "learning_rate": 0.00013531499811407212, "loss": 0.0043, "num_input_tokens_seen": 207660400, "step": 96140 }, { "epoch": 15.684339314845024, "grad_norm": 0.0038991905748844147, "learning_rate": 0.00013526630630976172, "loss": 0.0043, "num_input_tokens_seen": 207670608, "step": 96145 }, { "epoch": 15.68515497553018, "grad_norm": 0.00235603260807693, "learning_rate": 0.00013521762189724228, "loss": 0.0058, "num_input_tokens_seen": 207681584, "step": 96150 }, { "epoch": 15.685970636215334, "grad_norm": 0.763839840888977, "learning_rate": 0.00013516894487750053, "loss": 0.0579, "num_input_tokens_seen": 207692368, "step": 96155 }, { "epoch": 15.68678629690049, "grad_norm": 0.003856255440041423, "learning_rate": 0.00013512027525152293, "loss": 0.002, "num_input_tokens_seen": 207703408, "step": 96160 }, { "epoch": 15.687601957585644, "grad_norm": 0.00043420374277047813, "learning_rate": 0.00013507161302029586, "loss": 0.0019, "num_input_tokens_seen": 207714480, "step": 96165 }, { "epoch": 15.6884176182708, "grad_norm": 0.02960328944027424, "learning_rate": 0.00013502295818480548, "loss": 0.0039, "num_input_tokens_seen": 207724656, "step": 96170 }, { "epoch": 15.689233278955955, "grad_norm": 0.0026452350430190563, "learning_rate": 0.00013497431074603784, "loss": 0.0284, "num_input_tokens_seen": 207735984, "step": 96175 }, { "epoch": 15.690048939641109, "grad_norm": 0.0021662067156285048, "learning_rate": 0.00013492567070497885, "loss": 0.0051, "num_input_tokens_seen": 207746864, "step": 96180 }, { "epoch": 15.690864600326265, "grad_norm": 0.013586881570518017, "learning_rate": 0.0001348770380626143, "loss": 0.0079, "num_input_tokens_seen": 207757520, "step": 96185 }, { "epoch": 15.691680261011419, "grad_norm": 0.39000317454338074, "learning_rate": 0.00013482841281992975, "loss": 0.1045, "num_input_tokens_seen": 207769552, "step": 96190 }, { "epoch": 15.692495921696574, "grad_norm": 0.02108220010995865, "learning_rate": 0.00013477979497791064, "loss": 0.0034, "num_input_tokens_seen": 207781168, "step": 96195 }, { "epoch": 15.69331158238173, "grad_norm": 0.001122990041039884, "learning_rate": 0.00013473118453754236, "loss": 0.0019, "num_input_tokens_seen": 207792368, "step": 96200 }, { "epoch": 15.694127243066884, "grad_norm": 0.004716485273092985, "learning_rate": 0.00013468258149981, "loss": 0.0148, "num_input_tokens_seen": 207802896, "step": 96205 }, { "epoch": 15.69494290375204, "grad_norm": 0.0714481994509697, "learning_rate": 0.00013463398586569854, "loss": 0.0023, "num_input_tokens_seen": 207813552, "step": 96210 }, { "epoch": 15.695758564437194, "grad_norm": 0.002846804680302739, "learning_rate": 0.00013458539763619272, "loss": 0.0113, "num_input_tokens_seen": 207822768, "step": 96215 }, { "epoch": 15.69657422512235, "grad_norm": 0.05376075953245163, "learning_rate": 0.00013453681681227763, "loss": 0.0125, "num_input_tokens_seen": 207833008, "step": 96220 }, { "epoch": 15.697389885807503, "grad_norm": 0.004778926260769367, "learning_rate": 0.0001344882433949373, "loss": 0.0266, "num_input_tokens_seen": 207843920, "step": 96225 }, { "epoch": 15.698205546492659, "grad_norm": 0.005003618076443672, "learning_rate": 0.00013443967738515673, "loss": 0.0039, "num_input_tokens_seen": 207853840, "step": 96230 }, { "epoch": 15.699021207177815, "grad_norm": 0.0037451880052685738, "learning_rate": 0.00013439111878391953, "loss": 0.0016, "num_input_tokens_seen": 207865104, "step": 96235 }, { "epoch": 15.699836867862969, "grad_norm": 0.5726847648620605, "learning_rate": 0.00013434256759221037, "loss": 0.066, "num_input_tokens_seen": 207874224, "step": 96240 }, { "epoch": 15.700652528548124, "grad_norm": 0.03150755539536476, "learning_rate": 0.00013429402381101268, "loss": 0.0015, "num_input_tokens_seen": 207884752, "step": 96245 }, { "epoch": 15.701468189233278, "grad_norm": 0.0010075062746182084, "learning_rate": 0.00013424548744131088, "loss": 0.001, "num_input_tokens_seen": 207894448, "step": 96250 }, { "epoch": 15.702283849918434, "grad_norm": 0.782049298286438, "learning_rate": 0.00013419695848408792, "loss": 0.0916, "num_input_tokens_seen": 207906608, "step": 96255 }, { "epoch": 15.70309951060359, "grad_norm": 0.026163609698414803, "learning_rate": 0.00013414843694032792, "loss": 0.0035, "num_input_tokens_seen": 207918608, "step": 96260 }, { "epoch": 15.703915171288743, "grad_norm": 0.004696679767221212, "learning_rate": 0.00013409992281101368, "loss": 0.0031, "num_input_tokens_seen": 207930064, "step": 96265 }, { "epoch": 15.7047308319739, "grad_norm": 0.0025974763557314873, "learning_rate": 0.000134051416097129, "loss": 0.0085, "num_input_tokens_seen": 207942160, "step": 96270 }, { "epoch": 15.705546492659053, "grad_norm": 0.039734333753585815, "learning_rate": 0.00013400291679965633, "loss": 0.0052, "num_input_tokens_seen": 207951312, "step": 96275 }, { "epoch": 15.706362153344209, "grad_norm": 0.057665541768074036, "learning_rate": 0.000133954424919579, "loss": 0.083, "num_input_tokens_seen": 207962768, "step": 96280 }, { "epoch": 15.707177814029365, "grad_norm": 0.23219189047813416, "learning_rate": 0.00013390594045787957, "loss": 0.0067, "num_input_tokens_seen": 207973936, "step": 96285 }, { "epoch": 15.707993474714518, "grad_norm": 0.022502528503537178, "learning_rate": 0.00013385746341554067, "loss": 0.0067, "num_input_tokens_seen": 207984688, "step": 96290 }, { "epoch": 15.708809135399674, "grad_norm": 0.1352599561214447, "learning_rate": 0.0001338089937935448, "loss": 0.0849, "num_input_tokens_seen": 207996208, "step": 96295 }, { "epoch": 15.709624796084828, "grad_norm": 0.19445081055164337, "learning_rate": 0.0001337605315928742, "loss": 0.0096, "num_input_tokens_seen": 208007568, "step": 96300 }, { "epoch": 15.710440456769984, "grad_norm": 0.4363537132740021, "learning_rate": 0.00013371207681451102, "loss": 0.0164, "num_input_tokens_seen": 208016912, "step": 96305 }, { "epoch": 15.71125611745514, "grad_norm": 0.001682588830590248, "learning_rate": 0.00013366362945943733, "loss": 0.0035, "num_input_tokens_seen": 208026960, "step": 96310 }, { "epoch": 15.712071778140293, "grad_norm": 0.001164038316346705, "learning_rate": 0.00013361518952863488, "loss": 0.0029, "num_input_tokens_seen": 208037104, "step": 96315 }, { "epoch": 15.71288743882545, "grad_norm": 0.0353374183177948, "learning_rate": 0.00013356675702308541, "loss": 0.088, "num_input_tokens_seen": 208047632, "step": 96320 }, { "epoch": 15.713703099510603, "grad_norm": 0.08301739394664764, "learning_rate": 0.00013351833194377044, "loss": 0.0057, "num_input_tokens_seen": 208057744, "step": 96325 }, { "epoch": 15.714518760195759, "grad_norm": 0.004812445491552353, "learning_rate": 0.00013346991429167128, "loss": 0.0105, "num_input_tokens_seen": 208067632, "step": 96330 }, { "epoch": 15.715334420880914, "grad_norm": 0.0030532728414982557, "learning_rate": 0.00013342150406776953, "loss": 0.0054, "num_input_tokens_seen": 208078192, "step": 96335 }, { "epoch": 15.716150081566068, "grad_norm": 0.00913258083164692, "learning_rate": 0.00013337310127304575, "loss": 0.0039, "num_input_tokens_seen": 208088912, "step": 96340 }, { "epoch": 15.716965742251224, "grad_norm": 0.0003174035809934139, "learning_rate": 0.0001333247059084815, "loss": 0.0012, "num_input_tokens_seen": 208100464, "step": 96345 }, { "epoch": 15.717781402936378, "grad_norm": 0.07068848609924316, "learning_rate": 0.00013327631797505697, "loss": 0.0048, "num_input_tokens_seen": 208110704, "step": 96350 }, { "epoch": 15.718597063621534, "grad_norm": 0.0047727604396641254, "learning_rate": 0.00013322793747375333, "loss": 0.0019, "num_input_tokens_seen": 208122608, "step": 96355 }, { "epoch": 15.719412724306688, "grad_norm": 0.0011388602433726192, "learning_rate": 0.00013317956440555051, "loss": 0.0012, "num_input_tokens_seen": 208133648, "step": 96360 }, { "epoch": 15.720228384991843, "grad_norm": 0.00047913682647049427, "learning_rate": 0.00013313119877142947, "loss": 0.003, "num_input_tokens_seen": 208144752, "step": 96365 }, { "epoch": 15.721044045676999, "grad_norm": 0.0011415015906095505, "learning_rate": 0.00013308284057236984, "loss": 0.0037, "num_input_tokens_seen": 208154384, "step": 96370 }, { "epoch": 15.721859706362153, "grad_norm": 0.2590673565864563, "learning_rate": 0.00013303448980935218, "loss": 0.0078, "num_input_tokens_seen": 208165296, "step": 96375 }, { "epoch": 15.722675367047309, "grad_norm": 0.025836044922471046, "learning_rate": 0.00013298614648335583, "loss": 0.0028, "num_input_tokens_seen": 208176112, "step": 96380 }, { "epoch": 15.723491027732463, "grad_norm": 0.021637318655848503, "learning_rate": 0.0001329378105953611, "loss": 0.0036, "num_input_tokens_seen": 208185872, "step": 96385 }, { "epoch": 15.724306688417618, "grad_norm": 0.019314365461468697, "learning_rate": 0.00013288948214634698, "loss": 0.004, "num_input_tokens_seen": 208196976, "step": 96390 }, { "epoch": 15.725122349102774, "grad_norm": 0.007318970747292042, "learning_rate": 0.00013284116113729356, "loss": 0.0037, "num_input_tokens_seen": 208208336, "step": 96395 }, { "epoch": 15.725938009787928, "grad_norm": 0.03209533914923668, "learning_rate": 0.00013279284756917943, "loss": 0.0022, "num_input_tokens_seen": 208219856, "step": 96400 }, { "epoch": 15.726753670473084, "grad_norm": 0.0057929218746721745, "learning_rate": 0.00013274454144298438, "loss": 0.003, "num_input_tokens_seen": 208229808, "step": 96405 }, { "epoch": 15.727569331158238, "grad_norm": 0.03650674223899841, "learning_rate": 0.00013269624275968683, "loss": 0.0682, "num_input_tokens_seen": 208239568, "step": 96410 }, { "epoch": 15.728384991843393, "grad_norm": 0.0009229037095792592, "learning_rate": 0.00013264795152026615, "loss": 0.0198, "num_input_tokens_seen": 208250448, "step": 96415 }, { "epoch": 15.729200652528547, "grad_norm": 0.1574191153049469, "learning_rate": 0.00013259966772570048, "loss": 0.0061, "num_input_tokens_seen": 208261840, "step": 96420 }, { "epoch": 15.730016313213703, "grad_norm": 0.026466218754649162, "learning_rate": 0.00013255139137696874, "loss": 0.0015, "num_input_tokens_seen": 208272752, "step": 96425 }, { "epoch": 15.730831973898859, "grad_norm": 0.0026206725742667913, "learning_rate": 0.0001325031224750492, "loss": 0.0023, "num_input_tokens_seen": 208282800, "step": 96430 }, { "epoch": 15.731647634584013, "grad_norm": 0.4331301748752594, "learning_rate": 0.0001324548610209201, "loss": 0.0106, "num_input_tokens_seen": 208293072, "step": 96435 }, { "epoch": 15.732463295269168, "grad_norm": 0.09264283627271652, "learning_rate": 0.00013240660701555951, "loss": 0.0031, "num_input_tokens_seen": 208304560, "step": 96440 }, { "epoch": 15.733278955954322, "grad_norm": 0.0013159823138266802, "learning_rate": 0.00013235836045994532, "loss": 0.0085, "num_input_tokens_seen": 208316272, "step": 96445 }, { "epoch": 15.734094616639478, "grad_norm": 0.012794621288776398, "learning_rate": 0.00013231012135505538, "loss": 0.004, "num_input_tokens_seen": 208327184, "step": 96450 }, { "epoch": 15.734910277324634, "grad_norm": 0.00966342631727457, "learning_rate": 0.00013226188970186725, "loss": 0.0041, "num_input_tokens_seen": 208338736, "step": 96455 }, { "epoch": 15.735725938009788, "grad_norm": 0.002653430448845029, "learning_rate": 0.0001322136655013585, "loss": 0.0048, "num_input_tokens_seen": 208348272, "step": 96460 }, { "epoch": 15.736541598694943, "grad_norm": 0.13813112676143646, "learning_rate": 0.00013216544875450633, "loss": 0.0456, "num_input_tokens_seen": 208358672, "step": 96465 }, { "epoch": 15.737357259380097, "grad_norm": 0.005092136561870575, "learning_rate": 0.00013211723946228798, "loss": 0.0037, "num_input_tokens_seen": 208369264, "step": 96470 }, { "epoch": 15.738172920065253, "grad_norm": 0.008314227685332298, "learning_rate": 0.00013206903762568028, "loss": 0.0011, "num_input_tokens_seen": 208380048, "step": 96475 }, { "epoch": 15.738988580750409, "grad_norm": 0.013196723535656929, "learning_rate": 0.00013202084324566066, "loss": 0.0043, "num_input_tokens_seen": 208391024, "step": 96480 }, { "epoch": 15.739804241435563, "grad_norm": 0.211594820022583, "learning_rate": 0.0001319726563232051, "loss": 0.0277, "num_input_tokens_seen": 208401776, "step": 96485 }, { "epoch": 15.740619902120718, "grad_norm": 0.005501213483512402, "learning_rate": 0.00013192447685929088, "loss": 0.0018, "num_input_tokens_seen": 208413776, "step": 96490 }, { "epoch": 15.741435562805872, "grad_norm": 0.0033411455806344748, "learning_rate": 0.00013187630485489378, "loss": 0.0014, "num_input_tokens_seen": 208424112, "step": 96495 }, { "epoch": 15.742251223491028, "grad_norm": 0.000817911874037236, "learning_rate": 0.0001318281403109906, "loss": 0.0006, "num_input_tokens_seen": 208435152, "step": 96500 }, { "epoch": 15.743066884176184, "grad_norm": 0.00817803479731083, "learning_rate": 0.00013177998322855695, "loss": 0.0022, "num_input_tokens_seen": 208445552, "step": 96505 }, { "epoch": 15.743882544861338, "grad_norm": 0.02054639346897602, "learning_rate": 0.00013173183360856938, "loss": 0.0016, "num_input_tokens_seen": 208455536, "step": 96510 }, { "epoch": 15.744698205546493, "grad_norm": 0.0038372152484953403, "learning_rate": 0.00013168369145200303, "loss": 0.0017, "num_input_tokens_seen": 208466224, "step": 96515 }, { "epoch": 15.745513866231647, "grad_norm": 0.040325090289115906, "learning_rate": 0.0001316355567598343, "loss": 0.0022, "num_input_tokens_seen": 208476752, "step": 96520 }, { "epoch": 15.746329526916803, "grad_norm": 0.0026046517305076122, "learning_rate": 0.00013158742953303792, "loss": 0.0162, "num_input_tokens_seen": 208487280, "step": 96525 }, { "epoch": 15.747145187601957, "grad_norm": 0.0007771203527227044, "learning_rate": 0.00013153930977258987, "loss": 0.0007, "num_input_tokens_seen": 208497776, "step": 96530 }, { "epoch": 15.747960848287113, "grad_norm": 0.00024037643743213266, "learning_rate": 0.0001314911974794651, "loss": 0.002, "num_input_tokens_seen": 208509168, "step": 96535 }, { "epoch": 15.748776508972268, "grad_norm": 0.2253538817167282, "learning_rate": 0.00013144309265463873, "loss": 0.0125, "num_input_tokens_seen": 208519856, "step": 96540 }, { "epoch": 15.749592169657422, "grad_norm": 0.0013409418752416968, "learning_rate": 0.00013139499529908562, "loss": 0.0541, "num_input_tokens_seen": 208530704, "step": 96545 }, { "epoch": 15.750407830342578, "grad_norm": 0.00018763738626148552, "learning_rate": 0.00013134690541378053, "loss": 0.0009, "num_input_tokens_seen": 208541968, "step": 96550 }, { "epoch": 15.751223491027732, "grad_norm": 0.0010253424989059567, "learning_rate": 0.00013129882299969803, "loss": 0.0012, "num_input_tokens_seen": 208553648, "step": 96555 }, { "epoch": 15.752039151712887, "grad_norm": 0.0036719876807183027, "learning_rate": 0.00013125074805781268, "loss": 0.0015, "num_input_tokens_seen": 208564560, "step": 96560 }, { "epoch": 15.752854812398043, "grad_norm": 0.001249134773388505, "learning_rate": 0.0001312026805890987, "loss": 0.0026, "num_input_tokens_seen": 208574832, "step": 96565 }, { "epoch": 15.753670473083197, "grad_norm": 0.005163257010281086, "learning_rate": 0.00013115462059453022, "loss": 0.0013, "num_input_tokens_seen": 208584720, "step": 96570 }, { "epoch": 15.754486133768353, "grad_norm": 0.0015362701378762722, "learning_rate": 0.00013110656807508125, "loss": 0.0016, "num_input_tokens_seen": 208595696, "step": 96575 }, { "epoch": 15.755301794453507, "grad_norm": 0.07348399609327316, "learning_rate": 0.0001310585230317257, "loss": 0.0024, "num_input_tokens_seen": 208606352, "step": 96580 }, { "epoch": 15.756117455138662, "grad_norm": 0.0003318371600471437, "learning_rate": 0.0001310104854654372, "loss": 0.0034, "num_input_tokens_seen": 208615664, "step": 96585 }, { "epoch": 15.756933115823816, "grad_norm": 0.01745942048728466, "learning_rate": 0.0001309624553771893, "loss": 0.0049, "num_input_tokens_seen": 208626960, "step": 96590 }, { "epoch": 15.757748776508972, "grad_norm": 0.010197905823588371, "learning_rate": 0.00013091443276795544, "loss": 0.0022, "num_input_tokens_seen": 208637232, "step": 96595 }, { "epoch": 15.758564437194128, "grad_norm": 0.0012122580083087087, "learning_rate": 0.00013086641763870876, "loss": 0.0067, "num_input_tokens_seen": 208648048, "step": 96600 }, { "epoch": 15.759380097879282, "grad_norm": 0.0027509070932865143, "learning_rate": 0.00013081840999042244, "loss": 0.0024, "num_input_tokens_seen": 208658960, "step": 96605 }, { "epoch": 15.760195758564437, "grad_norm": 0.002174974186345935, "learning_rate": 0.0001307704098240694, "loss": 0.015, "num_input_tokens_seen": 208670064, "step": 96610 }, { "epoch": 15.761011419249591, "grad_norm": 0.0011658243602141738, "learning_rate": 0.0001307224171406224, "loss": 0.0085, "num_input_tokens_seen": 208681904, "step": 96615 }, { "epoch": 15.761827079934747, "grad_norm": 0.0030715486500412226, "learning_rate": 0.0001306744319410539, "loss": 0.014, "num_input_tokens_seen": 208693168, "step": 96620 }, { "epoch": 15.762642740619903, "grad_norm": 0.11549370735883713, "learning_rate": 0.00013062645422633683, "loss": 0.0028, "num_input_tokens_seen": 208703344, "step": 96625 }, { "epoch": 15.763458401305057, "grad_norm": 0.0026244190521538258, "learning_rate": 0.000130578483997443, "loss": 0.0006, "num_input_tokens_seen": 208712176, "step": 96630 }, { "epoch": 15.764274061990212, "grad_norm": 0.0077323331497609615, "learning_rate": 0.00013053052125534497, "loss": 0.0013, "num_input_tokens_seen": 208723344, "step": 96635 }, { "epoch": 15.765089722675366, "grad_norm": 0.0006282702088356018, "learning_rate": 0.00013048256600101465, "loss": 0.002, "num_input_tokens_seen": 208732624, "step": 96640 }, { "epoch": 15.765905383360522, "grad_norm": 0.0021541621536016464, "learning_rate": 0.00013043461823542387, "loss": 0.0047, "num_input_tokens_seen": 208744432, "step": 96645 }, { "epoch": 15.766721044045678, "grad_norm": 0.0005122669972479343, "learning_rate": 0.0001303866779595444, "loss": 0.0022, "num_input_tokens_seen": 208754864, "step": 96650 }, { "epoch": 15.767536704730832, "grad_norm": 0.006448698695749044, "learning_rate": 0.0001303387451743478, "loss": 0.0038, "num_input_tokens_seen": 208766544, "step": 96655 }, { "epoch": 15.768352365415987, "grad_norm": 0.0061017731204628944, "learning_rate": 0.00013029081988080545, "loss": 0.1066, "num_input_tokens_seen": 208777392, "step": 96660 }, { "epoch": 15.769168026101141, "grad_norm": 0.06350026279687881, "learning_rate": 0.00013024290207988866, "loss": 0.0035, "num_input_tokens_seen": 208787408, "step": 96665 }, { "epoch": 15.769983686786297, "grad_norm": 0.012828153558075428, "learning_rate": 0.00013019499177256848, "loss": 0.1233, "num_input_tokens_seen": 208797040, "step": 96670 }, { "epoch": 15.770799347471453, "grad_norm": 0.005641296040266752, "learning_rate": 0.00013014708895981597, "loss": 0.0057, "num_input_tokens_seen": 208809648, "step": 96675 }, { "epoch": 15.771615008156607, "grad_norm": 0.0009565745131112635, "learning_rate": 0.00013009919364260193, "loss": 0.0085, "num_input_tokens_seen": 208820592, "step": 96680 }, { "epoch": 15.772430668841762, "grad_norm": 0.0075340899638831615, "learning_rate": 0.0001300513058218969, "loss": 0.0027, "num_input_tokens_seen": 208830672, "step": 96685 }, { "epoch": 15.773246329526916, "grad_norm": 0.005056384485214949, "learning_rate": 0.0001300034254986715, "loss": 0.0035, "num_input_tokens_seen": 208841552, "step": 96690 }, { "epoch": 15.774061990212072, "grad_norm": 0.01369437761604786, "learning_rate": 0.00012995555267389608, "loss": 0.0042, "num_input_tokens_seen": 208852208, "step": 96695 }, { "epoch": 15.774877650897226, "grad_norm": 3.37766170501709, "learning_rate": 0.0001299076873485408, "loss": 0.1299, "num_input_tokens_seen": 208862672, "step": 96700 }, { "epoch": 15.775693311582382, "grad_norm": 0.0019586030393838882, "learning_rate": 0.00012985982952357577, "loss": 0.0007, "num_input_tokens_seen": 208873776, "step": 96705 }, { "epoch": 15.776508972267537, "grad_norm": 0.00436317827552557, "learning_rate": 0.00012981197919997078, "loss": 0.0058, "num_input_tokens_seen": 208884368, "step": 96710 }, { "epoch": 15.777324632952691, "grad_norm": 0.0014498537639155984, "learning_rate": 0.00012976413637869573, "loss": 0.0038, "num_input_tokens_seen": 208894512, "step": 96715 }, { "epoch": 15.778140293637847, "grad_norm": 0.018102500587701797, "learning_rate": 0.00012971630106072007, "loss": 0.0027, "num_input_tokens_seen": 208905488, "step": 96720 }, { "epoch": 15.778955954323001, "grad_norm": 0.0548224151134491, "learning_rate": 0.00012966847324701337, "loss": 0.0031, "num_input_tokens_seen": 208915760, "step": 96725 }, { "epoch": 15.779771615008157, "grad_norm": 0.003279587486758828, "learning_rate": 0.0001296206529385448, "loss": 0.0067, "num_input_tokens_seen": 208925808, "step": 96730 }, { "epoch": 15.780587275693312, "grad_norm": 0.009405073709785938, "learning_rate": 0.00012957284013628357, "loss": 0.0014, "num_input_tokens_seen": 208935024, "step": 96735 }, { "epoch": 15.781402936378466, "grad_norm": 0.028064055368304253, "learning_rate": 0.00012952503484119866, "loss": 0.0022, "num_input_tokens_seen": 208943728, "step": 96740 }, { "epoch": 15.782218597063622, "grad_norm": 0.00217696325853467, "learning_rate": 0.0001294772370542589, "loss": 0.0102, "num_input_tokens_seen": 208954096, "step": 96745 }, { "epoch": 15.783034257748776, "grad_norm": 0.0025390072260051966, "learning_rate": 0.00012942944677643282, "loss": 0.001, "num_input_tokens_seen": 208965136, "step": 96750 }, { "epoch": 15.783849918433932, "grad_norm": 0.15127967298030853, "learning_rate": 0.0001293816640086894, "loss": 0.0625, "num_input_tokens_seen": 208976112, "step": 96755 }, { "epoch": 15.784665579119086, "grad_norm": 0.0010695239761844277, "learning_rate": 0.00012933388875199643, "loss": 0.0013, "num_input_tokens_seen": 208986928, "step": 96760 }, { "epoch": 15.785481239804241, "grad_norm": 0.00038432751898653805, "learning_rate": 0.00012928612100732257, "loss": 0.0007, "num_input_tokens_seen": 208998512, "step": 96765 }, { "epoch": 15.786296900489397, "grad_norm": 0.005832038354128599, "learning_rate": 0.00012923836077563576, "loss": 0.0014, "num_input_tokens_seen": 209008208, "step": 96770 }, { "epoch": 15.78711256117455, "grad_norm": 0.00981160532683134, "learning_rate": 0.0001291906080579039, "loss": 0.0009, "num_input_tokens_seen": 209018480, "step": 96775 }, { "epoch": 15.787928221859707, "grad_norm": 0.012114566750824451, "learning_rate": 0.0001291428628550948, "loss": 0.0192, "num_input_tokens_seen": 209029072, "step": 96780 }, { "epoch": 15.78874388254486, "grad_norm": 0.0163866113871336, "learning_rate": 0.000129095125168176, "loss": 0.0256, "num_input_tokens_seen": 209040304, "step": 96785 }, { "epoch": 15.789559543230016, "grad_norm": 0.0029381830245256424, "learning_rate": 0.00012904739499811508, "loss": 0.0048, "num_input_tokens_seen": 209051152, "step": 96790 }, { "epoch": 15.790375203915172, "grad_norm": 0.006611849181354046, "learning_rate": 0.00012899967234587922, "loss": 0.0292, "num_input_tokens_seen": 209062000, "step": 96795 }, { "epoch": 15.791190864600326, "grad_norm": 0.00876909215003252, "learning_rate": 0.00012895195721243568, "loss": 0.0016, "num_input_tokens_seen": 209072304, "step": 96800 }, { "epoch": 15.792006525285482, "grad_norm": 0.0037997523322701454, "learning_rate": 0.00012890424959875147, "loss": 0.0009, "num_input_tokens_seen": 209083888, "step": 96805 }, { "epoch": 15.792822185970635, "grad_norm": 0.005078902002424002, "learning_rate": 0.0001288565495057934, "loss": 0.001, "num_input_tokens_seen": 209095888, "step": 96810 }, { "epoch": 15.793637846655791, "grad_norm": 0.0032598378602415323, "learning_rate": 0.00012880885693452814, "loss": 0.0062, "num_input_tokens_seen": 209106000, "step": 96815 }, { "epoch": 15.794453507340947, "grad_norm": 0.0034630298614501953, "learning_rate": 0.0001287611718859223, "loss": 0.0016, "num_input_tokens_seen": 209115952, "step": 96820 }, { "epoch": 15.7952691680261, "grad_norm": 0.0033087453339248896, "learning_rate": 0.00012871349436094226, "loss": 0.001, "num_input_tokens_seen": 209127248, "step": 96825 }, { "epoch": 15.796084828711257, "grad_norm": 0.001051778206601739, "learning_rate": 0.0001286658243605543, "loss": 0.0026, "num_input_tokens_seen": 209138288, "step": 96830 }, { "epoch": 15.79690048939641, "grad_norm": 0.04102922976016998, "learning_rate": 0.00012861816188572444, "loss": 0.0039, "num_input_tokens_seen": 209149296, "step": 96835 }, { "epoch": 15.797716150081566, "grad_norm": 0.0023798923939466476, "learning_rate": 0.00012857050693741866, "loss": 0.005, "num_input_tokens_seen": 209160496, "step": 96840 }, { "epoch": 15.798531810766722, "grad_norm": 0.009345081634819508, "learning_rate": 0.00012852285951660275, "loss": 0.0018, "num_input_tokens_seen": 209171440, "step": 96845 }, { "epoch": 15.799347471451876, "grad_norm": 0.0020883092656731606, "learning_rate": 0.00012847521962424237, "loss": 0.0015, "num_input_tokens_seen": 209182704, "step": 96850 }, { "epoch": 15.800163132137031, "grad_norm": 0.007022218778729439, "learning_rate": 0.00012842758726130281, "loss": 0.0025, "num_input_tokens_seen": 209194256, "step": 96855 }, { "epoch": 15.800978792822185, "grad_norm": 0.0004571795871015638, "learning_rate": 0.0001283799624287499, "loss": 0.0095, "num_input_tokens_seen": 209205264, "step": 96860 }, { "epoch": 15.801794453507341, "grad_norm": 0.0013870035763829947, "learning_rate": 0.00012833234512754817, "loss": 0.001, "num_input_tokens_seen": 209215952, "step": 96865 }, { "epoch": 15.802610114192497, "grad_norm": 0.002285180613398552, "learning_rate": 0.0001282847353586632, "loss": 0.0024, "num_input_tokens_seen": 209226928, "step": 96870 }, { "epoch": 15.80342577487765, "grad_norm": 0.007267099339514971, "learning_rate": 0.0001282371331230594, "loss": 0.0032, "num_input_tokens_seen": 209238704, "step": 96875 }, { "epoch": 15.804241435562806, "grad_norm": 0.6497460007667542, "learning_rate": 0.00012818953842170193, "loss": 0.1171, "num_input_tokens_seen": 209249808, "step": 96880 }, { "epoch": 15.80505709624796, "grad_norm": 0.00022022541088517755, "learning_rate": 0.0001281419512555549, "loss": 0.0008, "num_input_tokens_seen": 209259952, "step": 96885 }, { "epoch": 15.805872756933116, "grad_norm": 0.0015587671659886837, "learning_rate": 0.00012809437162558324, "loss": 0.0659, "num_input_tokens_seen": 209270576, "step": 96890 }, { "epoch": 15.80668841761827, "grad_norm": 0.02945493347942829, "learning_rate": 0.00012804679953275068, "loss": 0.0022, "num_input_tokens_seen": 209281424, "step": 96895 }, { "epoch": 15.807504078303426, "grad_norm": 0.03355622664093971, "learning_rate": 0.00012799923497802185, "loss": 0.0051, "num_input_tokens_seen": 209291664, "step": 96900 }, { "epoch": 15.808319738988581, "grad_norm": 0.0016738567501306534, "learning_rate": 0.00012795167796236012, "loss": 0.0052, "num_input_tokens_seen": 209303088, "step": 96905 }, { "epoch": 15.809135399673735, "grad_norm": 0.0012268935097381473, "learning_rate": 0.00012790412848672977, "loss": 0.0013, "num_input_tokens_seen": 209313328, "step": 96910 }, { "epoch": 15.809951060358891, "grad_norm": 0.0003424619499128312, "learning_rate": 0.0001278565865520943, "loss": 0.0007, "num_input_tokens_seen": 209324080, "step": 96915 }, { "epoch": 15.810766721044045, "grad_norm": 0.5638118386268616, "learning_rate": 0.00012780905215941724, "loss": 0.0926, "num_input_tokens_seen": 209334992, "step": 96920 }, { "epoch": 15.8115823817292, "grad_norm": 0.000379926961613819, "learning_rate": 0.00012776152530966184, "loss": 0.003, "num_input_tokens_seen": 209345680, "step": 96925 }, { "epoch": 15.812398042414356, "grad_norm": 0.12855461239814758, "learning_rate": 0.0001277140060037914, "loss": 0.0069, "num_input_tokens_seen": 209357072, "step": 96930 }, { "epoch": 15.81321370309951, "grad_norm": 0.001596722868271172, "learning_rate": 0.00012766649424276888, "loss": 0.0004, "num_input_tokens_seen": 209368688, "step": 96935 }, { "epoch": 15.814029363784666, "grad_norm": 0.002013827906921506, "learning_rate": 0.00012761899002755716, "loss": 0.0013, "num_input_tokens_seen": 209380176, "step": 96940 }, { "epoch": 15.81484502446982, "grad_norm": 0.0006711476598866284, "learning_rate": 0.00012757149335911906, "loss": 0.004, "num_input_tokens_seen": 209390000, "step": 96945 }, { "epoch": 15.815660685154976, "grad_norm": 0.00945072341710329, "learning_rate": 0.00012752400423841708, "loss": 0.0013, "num_input_tokens_seen": 209401008, "step": 96950 }, { "epoch": 15.81647634584013, "grad_norm": 0.10607539117336273, "learning_rate": 0.0001274765226664137, "loss": 0.0026, "num_input_tokens_seen": 209411216, "step": 96955 }, { "epoch": 15.817292006525285, "grad_norm": 0.003964398056268692, "learning_rate": 0.00012742904864407095, "loss": 0.0188, "num_input_tokens_seen": 209422384, "step": 96960 }, { "epoch": 15.818107667210441, "grad_norm": 0.10050233453512192, "learning_rate": 0.0001273815821723515, "loss": 0.0032, "num_input_tokens_seen": 209433488, "step": 96965 }, { "epoch": 15.818923327895595, "grad_norm": 0.10801159590482712, "learning_rate": 0.00012733412325221673, "loss": 0.0035, "num_input_tokens_seen": 209444496, "step": 96970 }, { "epoch": 15.81973898858075, "grad_norm": 0.13759921491146088, "learning_rate": 0.00012728667188462893, "loss": 0.0072, "num_input_tokens_seen": 209454960, "step": 96975 }, { "epoch": 15.820554649265905, "grad_norm": 0.002384861698374152, "learning_rate": 0.00012723922807054934, "loss": 0.0039, "num_input_tokens_seen": 209465456, "step": 96980 }, { "epoch": 15.82137030995106, "grad_norm": 0.0007334126275964081, "learning_rate": 0.00012719179181093992, "loss": 0.0009, "num_input_tokens_seen": 209476080, "step": 96985 }, { "epoch": 15.822185970636216, "grad_norm": 0.007079676259309053, "learning_rate": 0.00012714436310676147, "loss": 0.0013, "num_input_tokens_seen": 209487504, "step": 96990 }, { "epoch": 15.82300163132137, "grad_norm": 0.010506193153560162, "learning_rate": 0.00012709694195897587, "loss": 0.0017, "num_input_tokens_seen": 209498704, "step": 96995 }, { "epoch": 15.823817292006526, "grad_norm": 0.007928671315312386, "learning_rate": 0.00012704952836854345, "loss": 0.0025, "num_input_tokens_seen": 209509808, "step": 97000 }, { "epoch": 15.82463295269168, "grad_norm": 0.005038387607783079, "learning_rate": 0.00012700212233642577, "loss": 0.0015, "num_input_tokens_seen": 209518896, "step": 97005 }, { "epoch": 15.825448613376835, "grad_norm": 0.19782593846321106, "learning_rate": 0.00012695472386358293, "loss": 0.0107, "num_input_tokens_seen": 209529392, "step": 97010 }, { "epoch": 15.826264274061991, "grad_norm": 0.013628569431602955, "learning_rate": 0.00012690733295097617, "loss": 0.0024, "num_input_tokens_seen": 209541072, "step": 97015 }, { "epoch": 15.827079934747145, "grad_norm": 0.0022729025222361088, "learning_rate": 0.00012685994959956532, "loss": 0.0037, "num_input_tokens_seen": 209551568, "step": 97020 }, { "epoch": 15.8278955954323, "grad_norm": 0.002973973285406828, "learning_rate": 0.00012681257381031124, "loss": 0.0018, "num_input_tokens_seen": 209562480, "step": 97025 }, { "epoch": 15.828711256117455, "grad_norm": 0.0014633465325459838, "learning_rate": 0.00012676520558417347, "loss": 0.0013, "num_input_tokens_seen": 209572848, "step": 97030 }, { "epoch": 15.82952691680261, "grad_norm": 0.0008202652097679675, "learning_rate": 0.00012671784492211262, "loss": 0.0026, "num_input_tokens_seen": 209584048, "step": 97035 }, { "epoch": 15.830342577487766, "grad_norm": 0.000434060872066766, "learning_rate": 0.00012667049182508788, "loss": 0.0005, "num_input_tokens_seen": 209595088, "step": 97040 }, { "epoch": 15.83115823817292, "grad_norm": 0.0006406829343177378, "learning_rate": 0.00012662314629405936, "loss": 0.0024, "num_input_tokens_seen": 209605200, "step": 97045 }, { "epoch": 15.831973898858076, "grad_norm": 0.003248979104682803, "learning_rate": 0.00012657580832998644, "loss": 0.0011, "num_input_tokens_seen": 209615056, "step": 97050 }, { "epoch": 15.83278955954323, "grad_norm": 0.0008684792555868626, "learning_rate": 0.0001265284779338285, "loss": 0.0022, "num_input_tokens_seen": 209625552, "step": 97055 }, { "epoch": 15.833605220228385, "grad_norm": 0.029019569978117943, "learning_rate": 0.00012648115510654473, "loss": 0.006, "num_input_tokens_seen": 209636336, "step": 97060 }, { "epoch": 15.83442088091354, "grad_norm": 0.0013905549421906471, "learning_rate": 0.00012643383984909423, "loss": 0.0009, "num_input_tokens_seen": 209647056, "step": 97065 }, { "epoch": 15.835236541598695, "grad_norm": 0.0020442737732082605, "learning_rate": 0.0001263865321624358, "loss": 0.0124, "num_input_tokens_seen": 209658512, "step": 97070 }, { "epoch": 15.83605220228385, "grad_norm": 0.0013671980705112219, "learning_rate": 0.0001263392320475283, "loss": 0.0774, "num_input_tokens_seen": 209668400, "step": 97075 }, { "epoch": 15.836867862969005, "grad_norm": 0.000905030348803848, "learning_rate": 0.0001262919395053303, "loss": 0.0014, "num_input_tokens_seen": 209679920, "step": 97080 }, { "epoch": 15.83768352365416, "grad_norm": 0.004797177854925394, "learning_rate": 0.0001262446545368002, "loss": 0.005, "num_input_tokens_seen": 209690512, "step": 97085 }, { "epoch": 15.838499184339314, "grad_norm": 0.0026325734797865152, "learning_rate": 0.0001261973771428963, "loss": 0.0012, "num_input_tokens_seen": 209701872, "step": 97090 }, { "epoch": 15.83931484502447, "grad_norm": 0.0004018640611320734, "learning_rate": 0.00012615010732457677, "loss": 0.0025, "num_input_tokens_seen": 209713680, "step": 97095 }, { "epoch": 15.840130505709626, "grad_norm": 0.01304841972887516, "learning_rate": 0.00012610284508279956, "loss": 0.0012, "num_input_tokens_seen": 209725552, "step": 97100 }, { "epoch": 15.84094616639478, "grad_norm": 0.03244048357009888, "learning_rate": 0.00012605559041852245, "loss": 0.0059, "num_input_tokens_seen": 209735984, "step": 97105 }, { "epoch": 15.841761827079935, "grad_norm": 0.02472323551774025, "learning_rate": 0.0001260083433327034, "loss": 0.0012, "num_input_tokens_seen": 209747312, "step": 97110 }, { "epoch": 15.84257748776509, "grad_norm": 0.002015564125031233, "learning_rate": 0.00012596110382629943, "loss": 0.0029, "num_input_tokens_seen": 209757680, "step": 97115 }, { "epoch": 15.843393148450245, "grad_norm": 0.0009261518134735525, "learning_rate": 0.0001259138719002685, "loss": 0.001, "num_input_tokens_seen": 209769040, "step": 97120 }, { "epoch": 15.844208809135399, "grad_norm": 0.0004853067803196609, "learning_rate": 0.0001258666475555672, "loss": 0.0016, "num_input_tokens_seen": 209780624, "step": 97125 }, { "epoch": 15.845024469820554, "grad_norm": 0.0011482738191261888, "learning_rate": 0.00012581943079315323, "loss": 0.0005, "num_input_tokens_seen": 209791440, "step": 97130 }, { "epoch": 15.84584013050571, "grad_norm": 0.012917263433337212, "learning_rate": 0.00012577222161398288, "loss": 0.0253, "num_input_tokens_seen": 209802800, "step": 97135 }, { "epoch": 15.846655791190864, "grad_norm": 0.012864846736192703, "learning_rate": 0.00012572502001901347, "loss": 0.0018, "num_input_tokens_seen": 209813072, "step": 97140 }, { "epoch": 15.84747145187602, "grad_norm": 0.0005281308549456298, "learning_rate": 0.00012567782600920107, "loss": 0.0015, "num_input_tokens_seen": 209825232, "step": 97145 }, { "epoch": 15.848287112561174, "grad_norm": 0.0009208358242176473, "learning_rate": 0.0001256306395855027, "loss": 0.0013, "num_input_tokens_seen": 209835120, "step": 97150 }, { "epoch": 15.84910277324633, "grad_norm": 0.02001389116048813, "learning_rate": 0.000125583460748874, "loss": 0.0016, "num_input_tokens_seen": 209845200, "step": 97155 }, { "epoch": 15.849918433931485, "grad_norm": 0.010557861998677254, "learning_rate": 0.00012553628950027175, "loss": 0.0028, "num_input_tokens_seen": 209856336, "step": 97160 }, { "epoch": 15.850734094616639, "grad_norm": 0.11188769340515137, "learning_rate": 0.00012548912584065135, "loss": 0.0019, "num_input_tokens_seen": 209868240, "step": 97165 }, { "epoch": 15.851549755301795, "grad_norm": 0.00966912042349577, "learning_rate": 0.00012544196977096905, "loss": 0.0004, "num_input_tokens_seen": 209879120, "step": 97170 }, { "epoch": 15.852365415986949, "grad_norm": 0.12375235557556152, "learning_rate": 0.00012539482129218045, "loss": 0.0029, "num_input_tokens_seen": 209889712, "step": 97175 }, { "epoch": 15.853181076672104, "grad_norm": 0.000904184824321419, "learning_rate": 0.00012534768040524098, "loss": 0.0005, "num_input_tokens_seen": 209900816, "step": 97180 }, { "epoch": 15.85399673735726, "grad_norm": 0.10164017230272293, "learning_rate": 0.000125300547111106, "loss": 0.004, "num_input_tokens_seen": 209910320, "step": 97185 }, { "epoch": 15.854812398042414, "grad_norm": 0.011888348497450352, "learning_rate": 0.00012525342141073083, "loss": 0.0014, "num_input_tokens_seen": 209920944, "step": 97190 }, { "epoch": 15.85562805872757, "grad_norm": 0.008458067663013935, "learning_rate": 0.00012520630330507042, "loss": 0.0013, "num_input_tokens_seen": 209932272, "step": 97195 }, { "epoch": 15.856443719412724, "grad_norm": 0.000870992022100836, "learning_rate": 0.0001251591927950798, "loss": 0.0007, "num_input_tokens_seen": 209942736, "step": 97200 }, { "epoch": 15.85725938009788, "grad_norm": 0.000473090389277786, "learning_rate": 0.00012511208988171362, "loss": 0.0005, "num_input_tokens_seen": 209953072, "step": 97205 }, { "epoch": 15.858075040783035, "grad_norm": 0.0010090031428262591, "learning_rate": 0.0001250649945659265, "loss": 0.0011, "num_input_tokens_seen": 209964112, "step": 97210 }, { "epoch": 15.858890701468189, "grad_norm": 0.015405405312776566, "learning_rate": 0.00012501790684867292, "loss": 0.0016, "num_input_tokens_seen": 209975440, "step": 97215 }, { "epoch": 15.859706362153345, "grad_norm": 0.010843812488019466, "learning_rate": 0.0001249708267309072, "loss": 0.001, "num_input_tokens_seen": 209987088, "step": 97220 }, { "epoch": 15.860522022838499, "grad_norm": 0.007993648760020733, "learning_rate": 0.00012492375421358336, "loss": 0.0026, "num_input_tokens_seen": 209998832, "step": 97225 }, { "epoch": 15.861337683523654, "grad_norm": 0.008369416929781437, "learning_rate": 0.00012487668929765555, "loss": 0.0046, "num_input_tokens_seen": 210009328, "step": 97230 }, { "epoch": 15.86215334420881, "grad_norm": 0.0016901845810934901, "learning_rate": 0.00012482963198407742, "loss": 0.0023, "num_input_tokens_seen": 210020080, "step": 97235 }, { "epoch": 15.862969004893964, "grad_norm": 0.004488547798246145, "learning_rate": 0.00012478258227380262, "loss": 0.0029, "num_input_tokens_seen": 210030448, "step": 97240 }, { "epoch": 15.86378466557912, "grad_norm": 0.003256887663155794, "learning_rate": 0.0001247355401677851, "loss": 0.0256, "num_input_tokens_seen": 210043024, "step": 97245 }, { "epoch": 15.864600326264274, "grad_norm": 0.00019677575619425625, "learning_rate": 0.00012468850566697758, "loss": 0.0015, "num_input_tokens_seen": 210055024, "step": 97250 }, { "epoch": 15.86541598694943, "grad_norm": 0.0008272241684608161, "learning_rate": 0.00012464147877233394, "loss": 0.0012, "num_input_tokens_seen": 210066096, "step": 97255 }, { "epoch": 15.866231647634583, "grad_norm": 0.007103449199348688, "learning_rate": 0.00012459445948480663, "loss": 0.0014, "num_input_tokens_seen": 210076688, "step": 97260 }, { "epoch": 15.867047308319739, "grad_norm": 0.021840078756213188, "learning_rate": 0.0001245474478053491, "loss": 0.0025, "num_input_tokens_seen": 210086256, "step": 97265 }, { "epoch": 15.867862969004895, "grad_norm": 0.003334933193400502, "learning_rate": 0.00012450044373491355, "loss": 0.0006, "num_input_tokens_seen": 210097424, "step": 97270 }, { "epoch": 15.868678629690049, "grad_norm": 0.0055555677972733974, "learning_rate": 0.00012445344727445303, "loss": 0.0016, "num_input_tokens_seen": 210107024, "step": 97275 }, { "epoch": 15.869494290375204, "grad_norm": 0.0009360619587823749, "learning_rate": 0.00012440645842491977, "loss": 0.0005, "num_input_tokens_seen": 210116880, "step": 97280 }, { "epoch": 15.870309951060358, "grad_norm": 0.002635387470945716, "learning_rate": 0.0001243594771872661, "loss": 0.1729, "num_input_tokens_seen": 210127184, "step": 97285 }, { "epoch": 15.871125611745514, "grad_norm": 0.06094209477305412, "learning_rate": 0.00012431250356244422, "loss": 0.0038, "num_input_tokens_seen": 210138224, "step": 97290 }, { "epoch": 15.87194127243067, "grad_norm": 0.0005155637627467513, "learning_rate": 0.000124265537551406, "loss": 0.0034, "num_input_tokens_seen": 210148656, "step": 97295 }, { "epoch": 15.872756933115824, "grad_norm": 0.4359850585460663, "learning_rate": 0.00012421857915510332, "loss": 0.0271, "num_input_tokens_seen": 210160496, "step": 97300 }, { "epoch": 15.87357259380098, "grad_norm": 0.5114566683769226, "learning_rate": 0.00012417162837448787, "loss": 0.0092, "num_input_tokens_seen": 210172400, "step": 97305 }, { "epoch": 15.874388254486133, "grad_norm": 0.0038012703880667686, "learning_rate": 0.0001241246852105111, "loss": 0.0284, "num_input_tokens_seen": 210183984, "step": 97310 }, { "epoch": 15.875203915171289, "grad_norm": 0.013922316022217274, "learning_rate": 0.00012407774966412445, "loss": 0.0075, "num_input_tokens_seen": 210196112, "step": 97315 }, { "epoch": 15.876019575856443, "grad_norm": 0.02212394028902054, "learning_rate": 0.0001240308217362791, "loss": 0.0011, "num_input_tokens_seen": 210207408, "step": 97320 }, { "epoch": 15.876835236541599, "grad_norm": 0.017958035692572594, "learning_rate": 0.0001239839014279261, "loss": 0.0014, "num_input_tokens_seen": 210218832, "step": 97325 }, { "epoch": 15.877650897226754, "grad_norm": 0.004952315706759691, "learning_rate": 0.0001239369887400163, "loss": 0.005, "num_input_tokens_seen": 210230832, "step": 97330 }, { "epoch": 15.878466557911908, "grad_norm": 0.10258053243160248, "learning_rate": 0.0001238900836735005, "loss": 0.0038, "num_input_tokens_seen": 210241168, "step": 97335 }, { "epoch": 15.879282218597064, "grad_norm": 0.010011304169893265, "learning_rate": 0.00012384318622932932, "loss": 0.0034, "num_input_tokens_seen": 210252176, "step": 97340 }, { "epoch": 15.880097879282218, "grad_norm": 0.000613482145126909, "learning_rate": 0.00012379629640845314, "loss": 0.0016, "num_input_tokens_seen": 210262704, "step": 97345 }, { "epoch": 15.880913539967374, "grad_norm": 0.0014483414124697447, "learning_rate": 0.0001237494142118223, "loss": 0.0008, "num_input_tokens_seen": 210273104, "step": 97350 }, { "epoch": 15.88172920065253, "grad_norm": 0.005220226943492889, "learning_rate": 0.00012370253964038685, "loss": 0.0024, "num_input_tokens_seen": 210283728, "step": 97355 }, { "epoch": 15.882544861337683, "grad_norm": 0.0020932487677782774, "learning_rate": 0.0001236556726950968, "loss": 0.0015, "num_input_tokens_seen": 210295440, "step": 97360 }, { "epoch": 15.883360522022839, "grad_norm": 0.03078819066286087, "learning_rate": 0.000123608813376902, "loss": 0.0018, "num_input_tokens_seen": 210307440, "step": 97365 }, { "epoch": 15.884176182707993, "grad_norm": 0.00039980438305065036, "learning_rate": 0.00012356196168675205, "loss": 0.0031, "num_input_tokens_seen": 210317392, "step": 97370 }, { "epoch": 15.884991843393149, "grad_norm": 0.0004841327026952058, "learning_rate": 0.00012351511762559653, "loss": 0.0012, "num_input_tokens_seen": 210327728, "step": 97375 }, { "epoch": 15.885807504078304, "grad_norm": 0.752201497554779, "learning_rate": 0.0001234682811943847, "loss": 0.0498, "num_input_tokens_seen": 210338672, "step": 97380 }, { "epoch": 15.886623164763458, "grad_norm": 0.005839809309691191, "learning_rate": 0.00012342145239406573, "loss": 0.0702, "num_input_tokens_seen": 210350352, "step": 97385 }, { "epoch": 15.887438825448614, "grad_norm": 0.0002493146457709372, "learning_rate": 0.00012337463122558885, "loss": 0.0014, "num_input_tokens_seen": 210361872, "step": 97390 }, { "epoch": 15.888254486133768, "grad_norm": 0.0055696722120046616, "learning_rate": 0.00012332781768990286, "loss": 0.0042, "num_input_tokens_seen": 210373520, "step": 97395 }, { "epoch": 15.889070146818923, "grad_norm": 0.001248400192707777, "learning_rate": 0.00012328101178795648, "loss": 0.0013, "num_input_tokens_seen": 210384912, "step": 97400 }, { "epoch": 15.88988580750408, "grad_norm": 0.058470193296670914, "learning_rate": 0.0001232342135206983, "loss": 0.024, "num_input_tokens_seen": 210394896, "step": 97405 }, { "epoch": 15.890701468189233, "grad_norm": 0.7765676379203796, "learning_rate": 0.0001231874228890768, "loss": 0.0108, "num_input_tokens_seen": 210405136, "step": 97410 }, { "epoch": 15.891517128874389, "grad_norm": 0.0009560906910337508, "learning_rate": 0.00012314063989404012, "loss": 0.0073, "num_input_tokens_seen": 210416144, "step": 97415 }, { "epoch": 15.892332789559543, "grad_norm": 0.0044351788237690926, "learning_rate": 0.00012309386453653647, "loss": 0.0407, "num_input_tokens_seen": 210426960, "step": 97420 }, { "epoch": 15.893148450244698, "grad_norm": 0.00936975609511137, "learning_rate": 0.00012304709681751385, "loss": 0.0128, "num_input_tokens_seen": 210437680, "step": 97425 }, { "epoch": 15.893964110929852, "grad_norm": 0.00683948677033186, "learning_rate": 0.00012300033673792, "loss": 0.0012, "num_input_tokens_seen": 210449200, "step": 97430 }, { "epoch": 15.894779771615008, "grad_norm": 0.00031043830676935613, "learning_rate": 0.00012295358429870252, "loss": 0.0055, "num_input_tokens_seen": 210459824, "step": 97435 }, { "epoch": 15.895595432300164, "grad_norm": 0.002923042280599475, "learning_rate": 0.000122906839500809, "loss": 0.0045, "num_input_tokens_seen": 210469808, "step": 97440 }, { "epoch": 15.896411092985318, "grad_norm": 0.0020240000449121, "learning_rate": 0.0001228601023451868, "loss": 0.0011, "num_input_tokens_seen": 210479184, "step": 97445 }, { "epoch": 15.897226753670473, "grad_norm": 0.0005645108758471906, "learning_rate": 0.00012281337283278298, "loss": 0.0317, "num_input_tokens_seen": 210489264, "step": 97450 }, { "epoch": 15.898042414355627, "grad_norm": 0.00976780615746975, "learning_rate": 0.0001227666509645447, "loss": 0.0029, "num_input_tokens_seen": 210499280, "step": 97455 }, { "epoch": 15.898858075040783, "grad_norm": 0.004547907970845699, "learning_rate": 0.00012271993674141878, "loss": 0.0153, "num_input_tokens_seen": 210509648, "step": 97460 }, { "epoch": 15.899673735725939, "grad_norm": 0.0006972053670324385, "learning_rate": 0.000122673230164352, "loss": 0.0013, "num_input_tokens_seen": 210520752, "step": 97465 }, { "epoch": 15.900489396411093, "grad_norm": 1.194573998451233, "learning_rate": 0.00012262653123429085, "loss": 0.0174, "num_input_tokens_seen": 210532336, "step": 97470 }, { "epoch": 15.901305057096248, "grad_norm": 0.005130276549607515, "learning_rate": 0.0001225798399521818, "loss": 0.0009, "num_input_tokens_seen": 210543024, "step": 97475 }, { "epoch": 15.902120717781402, "grad_norm": 0.010159132070839405, "learning_rate": 0.00012253315631897106, "loss": 0.0192, "num_input_tokens_seen": 210553488, "step": 97480 }, { "epoch": 15.902936378466558, "grad_norm": 0.01264498382806778, "learning_rate": 0.00012248648033560473, "loss": 0.0027, "num_input_tokens_seen": 210565296, "step": 97485 }, { "epoch": 15.903752039151712, "grad_norm": 0.062267009168863297, "learning_rate": 0.00012243981200302885, "loss": 0.0034, "num_input_tokens_seen": 210575632, "step": 97490 }, { "epoch": 15.904567699836868, "grad_norm": 0.012246742844581604, "learning_rate": 0.00012239315132218898, "loss": 0.0029, "num_input_tokens_seen": 210587184, "step": 97495 }, { "epoch": 15.905383360522023, "grad_norm": 0.0017103358404710889, "learning_rate": 0.00012234649829403116, "loss": 0.0685, "num_input_tokens_seen": 210596912, "step": 97500 }, { "epoch": 15.906199021207177, "grad_norm": 0.0008929313044063747, "learning_rate": 0.0001222998529195004, "loss": 0.0024, "num_input_tokens_seen": 210607376, "step": 97505 }, { "epoch": 15.907014681892333, "grad_norm": 0.015163707546889782, "learning_rate": 0.00012225321519954258, "loss": 0.0017, "num_input_tokens_seen": 210618192, "step": 97510 }, { "epoch": 15.907830342577487, "grad_norm": 0.0005488864844664931, "learning_rate": 0.00012220658513510224, "loss": 0.0007, "num_input_tokens_seen": 210629904, "step": 97515 }, { "epoch": 15.908646003262643, "grad_norm": 0.0006303298287093639, "learning_rate": 0.00012215996272712498, "loss": 0.0023, "num_input_tokens_seen": 210640592, "step": 97520 }, { "epoch": 15.909461663947798, "grad_norm": 0.005725410301238298, "learning_rate": 0.00012211334797655515, "loss": 0.0013, "num_input_tokens_seen": 210651312, "step": 97525 }, { "epoch": 15.910277324632952, "grad_norm": 0.0017852602759376168, "learning_rate": 0.00012206674088433784, "loss": 0.006, "num_input_tokens_seen": 210660880, "step": 97530 }, { "epoch": 15.911092985318108, "grad_norm": 0.046053625643253326, "learning_rate": 0.00012202014145141749, "loss": 0.0023, "num_input_tokens_seen": 210671472, "step": 97535 }, { "epoch": 15.911908646003262, "grad_norm": 0.0011109462939202785, "learning_rate": 0.00012197354967873847, "loss": 0.0006, "num_input_tokens_seen": 210682768, "step": 97540 }, { "epoch": 15.912724306688418, "grad_norm": 0.00119930156506598, "learning_rate": 0.00012192696556724497, "loss": 0.0007, "num_input_tokens_seen": 210693296, "step": 97545 }, { "epoch": 15.913539967373573, "grad_norm": 0.013788484036922455, "learning_rate": 0.00012188038911788119, "loss": 0.0024, "num_input_tokens_seen": 210704080, "step": 97550 }, { "epoch": 15.914355628058727, "grad_norm": 0.034569744020700455, "learning_rate": 0.00012183382033159101, "loss": 0.0018, "num_input_tokens_seen": 210714704, "step": 97555 }, { "epoch": 15.915171288743883, "grad_norm": 0.003412411315366626, "learning_rate": 0.00012178725920931816, "loss": 0.1418, "num_input_tokens_seen": 210725264, "step": 97560 }, { "epoch": 15.915986949429037, "grad_norm": 0.00040752938366495073, "learning_rate": 0.0001217407057520063, "loss": 0.0008, "num_input_tokens_seen": 210736272, "step": 97565 }, { "epoch": 15.916802610114193, "grad_norm": 0.003446828341111541, "learning_rate": 0.0001216941599605989, "loss": 0.0047, "num_input_tokens_seen": 210746320, "step": 97570 }, { "epoch": 15.917618270799348, "grad_norm": 0.02085341326892376, "learning_rate": 0.00012164762183603928, "loss": 0.0009, "num_input_tokens_seen": 210757712, "step": 97575 }, { "epoch": 15.918433931484502, "grad_norm": 0.0007540509686805308, "learning_rate": 0.00012160109137927061, "loss": 0.0015, "num_input_tokens_seen": 210768560, "step": 97580 }, { "epoch": 15.919249592169658, "grad_norm": 0.00683918921276927, "learning_rate": 0.00012155456859123582, "loss": 0.0037, "num_input_tokens_seen": 210779440, "step": 97585 }, { "epoch": 15.920065252854812, "grad_norm": 0.009121835231781006, "learning_rate": 0.00012150805347287774, "loss": 0.0957, "num_input_tokens_seen": 210789616, "step": 97590 }, { "epoch": 15.920880913539968, "grad_norm": 0.007207232527434826, "learning_rate": 0.00012146154602513915, "loss": 0.0037, "num_input_tokens_seen": 210800560, "step": 97595 }, { "epoch": 15.921696574225122, "grad_norm": 0.002390147652477026, "learning_rate": 0.00012141504624896244, "loss": 0.0044, "num_input_tokens_seen": 210809488, "step": 97600 }, { "epoch": 15.922512234910277, "grad_norm": 0.041978128254413605, "learning_rate": 0.0001213685541452903, "loss": 0.0018, "num_input_tokens_seen": 210819696, "step": 97605 }, { "epoch": 15.923327895595433, "grad_norm": 0.010881558991968632, "learning_rate": 0.00012132206971506449, "loss": 0.0018, "num_input_tokens_seen": 210831440, "step": 97610 }, { "epoch": 15.924143556280587, "grad_norm": 0.008309472352266312, "learning_rate": 0.00012127559295922764, "loss": 0.001, "num_input_tokens_seen": 210841680, "step": 97615 }, { "epoch": 15.924959216965743, "grad_norm": 0.00020515847427304834, "learning_rate": 0.00012122912387872098, "loss": 0.0013, "num_input_tokens_seen": 210852528, "step": 97620 }, { "epoch": 15.925774877650896, "grad_norm": 0.003146476112306118, "learning_rate": 0.000121182662474487, "loss": 0.0022, "num_input_tokens_seen": 210863440, "step": 97625 }, { "epoch": 15.926590538336052, "grad_norm": 0.0006061471067368984, "learning_rate": 0.00012113620874746656, "loss": 0.0008, "num_input_tokens_seen": 210874640, "step": 97630 }, { "epoch": 15.927406199021208, "grad_norm": 0.00795169360935688, "learning_rate": 0.00012108976269860183, "loss": 0.0568, "num_input_tokens_seen": 210886064, "step": 97635 }, { "epoch": 15.928221859706362, "grad_norm": 0.01079578511416912, "learning_rate": 0.00012104332432883342, "loss": 0.0019, "num_input_tokens_seen": 210894896, "step": 97640 }, { "epoch": 15.929037520391518, "grad_norm": 0.005672822240740061, "learning_rate": 0.0001209968936391031, "loss": 0.0252, "num_input_tokens_seen": 210905296, "step": 97645 }, { "epoch": 15.929853181076671, "grad_norm": 0.0013776031555607915, "learning_rate": 0.00012095047063035119, "loss": 0.0016, "num_input_tokens_seen": 210916368, "step": 97650 }, { "epoch": 15.930668841761827, "grad_norm": 0.003583358135074377, "learning_rate": 0.00012090405530351916, "loss": 0.0664, "num_input_tokens_seen": 210926032, "step": 97655 }, { "epoch": 15.931484502446983, "grad_norm": 0.20289726555347443, "learning_rate": 0.0001208576476595471, "loss": 0.0104, "num_input_tokens_seen": 210937584, "step": 97660 }, { "epoch": 15.932300163132137, "grad_norm": 0.0075626983307302, "learning_rate": 0.00012081124769937607, "loss": 0.0038, "num_input_tokens_seen": 210950224, "step": 97665 }, { "epoch": 15.933115823817293, "grad_norm": 0.07312007993459702, "learning_rate": 0.00012076485542394583, "loss": 0.0031, "num_input_tokens_seen": 210961104, "step": 97670 }, { "epoch": 15.933931484502446, "grad_norm": 0.04900915548205376, "learning_rate": 0.00012071847083419708, "loss": 0.0321, "num_input_tokens_seen": 210972880, "step": 97675 }, { "epoch": 15.934747145187602, "grad_norm": 0.006645069923251867, "learning_rate": 0.00012067209393106959, "loss": 0.0189, "num_input_tokens_seen": 210984272, "step": 97680 }, { "epoch": 15.935562805872756, "grad_norm": 0.0015154111897572875, "learning_rate": 0.00012062572471550337, "loss": 0.0016, "num_input_tokens_seen": 210994320, "step": 97685 }, { "epoch": 15.936378466557912, "grad_norm": 0.0016459511825814843, "learning_rate": 0.00012057936318843816, "loss": 0.0034, "num_input_tokens_seen": 211005392, "step": 97690 }, { "epoch": 15.937194127243067, "grad_norm": 0.0007665985031053424, "learning_rate": 0.00012053300935081341, "loss": 0.0039, "num_input_tokens_seen": 211016336, "step": 97695 }, { "epoch": 15.938009787928221, "grad_norm": 0.002678055316209793, "learning_rate": 0.00012048666320356865, "loss": 0.0012, "num_input_tokens_seen": 211027312, "step": 97700 }, { "epoch": 15.938825448613377, "grad_norm": 0.06500992923974991, "learning_rate": 0.0001204403247476431, "loss": 0.0033, "num_input_tokens_seen": 211037456, "step": 97705 }, { "epoch": 15.939641109298531, "grad_norm": 0.0016458886675536633, "learning_rate": 0.00012039399398397588, "loss": 0.0013, "num_input_tokens_seen": 211048624, "step": 97710 }, { "epoch": 15.940456769983687, "grad_norm": 0.11121902614831924, "learning_rate": 0.00012034767091350591, "loss": 0.0046, "num_input_tokens_seen": 211059248, "step": 97715 }, { "epoch": 15.941272430668842, "grad_norm": 0.0037498734891414642, "learning_rate": 0.00012030135553717204, "loss": 0.0221, "num_input_tokens_seen": 211069808, "step": 97720 }, { "epoch": 15.942088091353996, "grad_norm": 0.02941116690635681, "learning_rate": 0.00012025504785591273, "loss": 0.0391, "num_input_tokens_seen": 211081264, "step": 97725 }, { "epoch": 15.942903752039152, "grad_norm": 0.036870796233415604, "learning_rate": 0.00012020874787066688, "loss": 0.0061, "num_input_tokens_seen": 211092688, "step": 97730 }, { "epoch": 15.943719412724306, "grad_norm": 0.0026832197327166796, "learning_rate": 0.00012016245558237232, "loss": 0.0012, "num_input_tokens_seen": 211102960, "step": 97735 }, { "epoch": 15.944535073409462, "grad_norm": 0.012120860628783703, "learning_rate": 0.0001201161709919677, "loss": 0.0012, "num_input_tokens_seen": 211114416, "step": 97740 }, { "epoch": 15.945350734094617, "grad_norm": 0.0010019437177106738, "learning_rate": 0.00012006989410039055, "loss": 0.0397, "num_input_tokens_seen": 211125936, "step": 97745 }, { "epoch": 15.946166394779771, "grad_norm": 0.00462432811036706, "learning_rate": 0.00012002362490857921, "loss": 0.0298, "num_input_tokens_seen": 211138608, "step": 97750 }, { "epoch": 15.946982055464927, "grad_norm": 0.001551853958517313, "learning_rate": 0.00011997736341747085, "loss": 0.0032, "num_input_tokens_seen": 211151184, "step": 97755 }, { "epoch": 15.947797716150081, "grad_norm": 0.0018696035258471966, "learning_rate": 0.00011993110962800363, "loss": 0.0035, "num_input_tokens_seen": 211161584, "step": 97760 }, { "epoch": 15.948613376835237, "grad_norm": 0.0018065494950860739, "learning_rate": 0.00011988486354111433, "loss": 0.0109, "num_input_tokens_seen": 211172496, "step": 97765 }, { "epoch": 15.949429037520392, "grad_norm": 0.005451989360153675, "learning_rate": 0.0001198386251577408, "loss": 0.0009, "num_input_tokens_seen": 211183536, "step": 97770 }, { "epoch": 15.950244698205546, "grad_norm": 0.7213369011878967, "learning_rate": 0.00011979239447881945, "loss": 0.0131, "num_input_tokens_seen": 211194480, "step": 97775 }, { "epoch": 15.951060358890702, "grad_norm": 0.0018106530187651515, "learning_rate": 0.00011974617150528788, "loss": 0.0007, "num_input_tokens_seen": 211204944, "step": 97780 }, { "epoch": 15.951876019575856, "grad_norm": 0.002773257438093424, "learning_rate": 0.00011969995623808221, "loss": 0.0163, "num_input_tokens_seen": 211215472, "step": 97785 }, { "epoch": 15.952691680261012, "grad_norm": 0.016532791778445244, "learning_rate": 0.00011965374867813972, "loss": 0.0028, "num_input_tokens_seen": 211227216, "step": 97790 }, { "epoch": 15.953507340946166, "grad_norm": 0.0011013613548129797, "learning_rate": 0.00011960754882639619, "loss": 0.0986, "num_input_tokens_seen": 211238768, "step": 97795 }, { "epoch": 15.954323001631321, "grad_norm": 0.0032066667918115854, "learning_rate": 0.00011956135668378853, "loss": 0.0024, "num_input_tokens_seen": 211249232, "step": 97800 }, { "epoch": 15.955138662316477, "grad_norm": 0.0005308827967382967, "learning_rate": 0.00011951517225125231, "loss": 0.0011, "num_input_tokens_seen": 211259792, "step": 97805 }, { "epoch": 15.955954323001631, "grad_norm": 0.0034774949308484793, "learning_rate": 0.00011946899552972395, "loss": 0.0017, "num_input_tokens_seen": 211270448, "step": 97810 }, { "epoch": 15.956769983686787, "grad_norm": 0.0022737339604645967, "learning_rate": 0.00011942282652013914, "loss": 0.0017, "num_input_tokens_seen": 211280624, "step": 97815 }, { "epoch": 15.95758564437194, "grad_norm": 0.0004652600619010627, "learning_rate": 0.00011937666522343354, "loss": 0.0012, "num_input_tokens_seen": 211291760, "step": 97820 }, { "epoch": 15.958401305057096, "grad_norm": 0.001015088171698153, "learning_rate": 0.0001193305116405427, "loss": 0.0014, "num_input_tokens_seen": 211301392, "step": 97825 }, { "epoch": 15.959216965742252, "grad_norm": 0.002087209140881896, "learning_rate": 0.00011928436577240193, "loss": 0.0121, "num_input_tokens_seen": 211312560, "step": 97830 }, { "epoch": 15.960032626427406, "grad_norm": 0.0011707304511219263, "learning_rate": 0.00011923822761994646, "loss": 0.0021, "num_input_tokens_seen": 211324784, "step": 97835 }, { "epoch": 15.960848287112562, "grad_norm": 0.4978778660297394, "learning_rate": 0.00011919209718411134, "loss": 0.0095, "num_input_tokens_seen": 211335216, "step": 97840 }, { "epoch": 15.961663947797716, "grad_norm": 0.0035809699911624193, "learning_rate": 0.00011914597446583147, "loss": 0.0021, "num_input_tokens_seen": 211346864, "step": 97845 }, { "epoch": 15.962479608482871, "grad_norm": 0.0041794972494244576, "learning_rate": 0.00011909985946604157, "loss": 0.0073, "num_input_tokens_seen": 211358192, "step": 97850 }, { "epoch": 15.963295269168025, "grad_norm": 0.0012313545448705554, "learning_rate": 0.00011905375218567621, "loss": 0.0483, "num_input_tokens_seen": 211369968, "step": 97855 }, { "epoch": 15.964110929853181, "grad_norm": 0.014864086173474789, "learning_rate": 0.00011900765262566988, "loss": 0.0015, "num_input_tokens_seen": 211380304, "step": 97860 }, { "epoch": 15.964926590538337, "grad_norm": 0.0016739999409765005, "learning_rate": 0.00011896156078695675, "loss": 0.0012, "num_input_tokens_seen": 211391888, "step": 97865 }, { "epoch": 15.96574225122349, "grad_norm": 0.004763337317854166, "learning_rate": 0.00011891547667047082, "loss": 0.0037, "num_input_tokens_seen": 211402800, "step": 97870 }, { "epoch": 15.966557911908646, "grad_norm": 0.015505307354032993, "learning_rate": 0.00011886940027714649, "loss": 0.0027, "num_input_tokens_seen": 211413776, "step": 97875 }, { "epoch": 15.9673735725938, "grad_norm": 0.9275649785995483, "learning_rate": 0.00011882333160791697, "loss": 0.1289, "num_input_tokens_seen": 211424816, "step": 97880 }, { "epoch": 15.968189233278956, "grad_norm": 0.006364729721099138, "learning_rate": 0.00011877727066371646, "loss": 0.0037, "num_input_tokens_seen": 211435568, "step": 97885 }, { "epoch": 15.969004893964112, "grad_norm": 0.014689773321151733, "learning_rate": 0.00011873121744547794, "loss": 0.0689, "num_input_tokens_seen": 211446704, "step": 97890 }, { "epoch": 15.969820554649266, "grad_norm": 0.006804941687732935, "learning_rate": 0.00011868517195413525, "loss": 0.0021, "num_input_tokens_seen": 211456304, "step": 97895 }, { "epoch": 15.970636215334421, "grad_norm": 0.0007602769765071571, "learning_rate": 0.00011863913419062095, "loss": 0.0139, "num_input_tokens_seen": 211467824, "step": 97900 }, { "epoch": 15.971451876019575, "grad_norm": 0.0020225904881954193, "learning_rate": 0.00011859310415586871, "loss": 0.0684, "num_input_tokens_seen": 211478672, "step": 97905 }, { "epoch": 15.97226753670473, "grad_norm": 0.0005114731029607356, "learning_rate": 0.00011854708185081076, "loss": 0.001, "num_input_tokens_seen": 211489520, "step": 97910 }, { "epoch": 15.973083197389887, "grad_norm": 0.0024293966125696898, "learning_rate": 0.00011850106727638026, "loss": 0.0015, "num_input_tokens_seen": 211501008, "step": 97915 }, { "epoch": 15.97389885807504, "grad_norm": 0.1450122594833374, "learning_rate": 0.00011845506043350956, "loss": 0.008, "num_input_tokens_seen": 211510928, "step": 97920 }, { "epoch": 15.974714518760196, "grad_norm": 0.014834724366664886, "learning_rate": 0.00011840906132313117, "loss": 0.1368, "num_input_tokens_seen": 211521264, "step": 97925 }, { "epoch": 15.97553017944535, "grad_norm": 0.6983307003974915, "learning_rate": 0.00011836306994617718, "loss": 0.0056, "num_input_tokens_seen": 211532272, "step": 97930 }, { "epoch": 15.976345840130506, "grad_norm": 0.009043251164257526, "learning_rate": 0.00011831708630357968, "loss": 0.0009, "num_input_tokens_seen": 211542992, "step": 97935 }, { "epoch": 15.977161500815662, "grad_norm": 0.008625411428511143, "learning_rate": 0.0001182711103962707, "loss": 0.0021, "num_input_tokens_seen": 211551312, "step": 97940 }, { "epoch": 15.977977161500815, "grad_norm": 0.004919607657939196, "learning_rate": 0.00011822514222518188, "loss": 0.0007, "num_input_tokens_seen": 211562672, "step": 97945 }, { "epoch": 15.978792822185971, "grad_norm": 0.0008576642139814794, "learning_rate": 0.00011817918179124487, "loss": 0.0045, "num_input_tokens_seen": 211574384, "step": 97950 }, { "epoch": 15.979608482871125, "grad_norm": 0.0007568973815068603, "learning_rate": 0.00011813322909539115, "loss": 0.0119, "num_input_tokens_seen": 211585168, "step": 97955 }, { "epoch": 15.98042414355628, "grad_norm": 0.0010796175338327885, "learning_rate": 0.0001180872841385519, "loss": 0.0035, "num_input_tokens_seen": 211594384, "step": 97960 }, { "epoch": 15.981239804241435, "grad_norm": 0.09835665673017502, "learning_rate": 0.00011804134692165841, "loss": 0.0141, "num_input_tokens_seen": 211604496, "step": 97965 }, { "epoch": 15.98205546492659, "grad_norm": 0.03971698135137558, "learning_rate": 0.00011799541744564151, "loss": 0.0019, "num_input_tokens_seen": 211615056, "step": 97970 }, { "epoch": 15.982871125611746, "grad_norm": 0.020225724205374718, "learning_rate": 0.00011794949571143215, "loss": 0.001, "num_input_tokens_seen": 211626832, "step": 97975 }, { "epoch": 15.9836867862969, "grad_norm": 0.0011494300561025739, "learning_rate": 0.00011790358171996086, "loss": 0.0073, "num_input_tokens_seen": 211636752, "step": 97980 }, { "epoch": 15.984502446982056, "grad_norm": 0.00521137984469533, "learning_rate": 0.00011785767547215825, "loss": 0.006, "num_input_tokens_seen": 211648272, "step": 97985 }, { "epoch": 15.98531810766721, "grad_norm": 0.013017826713621616, "learning_rate": 0.00011781177696895462, "loss": 0.0018, "num_input_tokens_seen": 211659344, "step": 97990 }, { "epoch": 15.986133768352365, "grad_norm": 0.024100029841065407, "learning_rate": 0.00011776588621128015, "loss": 0.1615, "num_input_tokens_seen": 211670544, "step": 97995 }, { "epoch": 15.986949429037521, "grad_norm": 0.0009241014486178756, "learning_rate": 0.00011772000320006493, "loss": 0.0008, "num_input_tokens_seen": 211681776, "step": 98000 }, { "epoch": 15.987765089722675, "grad_norm": 0.2770422101020813, "learning_rate": 0.00011767412793623878, "loss": 0.0132, "num_input_tokens_seen": 211691952, "step": 98005 }, { "epoch": 15.98858075040783, "grad_norm": 0.004331182222813368, "learning_rate": 0.00011762826042073144, "loss": 0.001, "num_input_tokens_seen": 211702288, "step": 98010 }, { "epoch": 15.989396411092985, "grad_norm": 0.02343442477285862, "learning_rate": 0.00011758240065447234, "loss": 0.0029, "num_input_tokens_seen": 211712496, "step": 98015 }, { "epoch": 15.99021207177814, "grad_norm": 0.013188188895583153, "learning_rate": 0.00011753654863839114, "loss": 0.0017, "num_input_tokens_seen": 211724688, "step": 98020 }, { "epoch": 15.991027732463294, "grad_norm": 0.001225476386025548, "learning_rate": 0.00011749070437341702, "loss": 0.0013, "num_input_tokens_seen": 211736656, "step": 98025 }, { "epoch": 15.99184339314845, "grad_norm": 0.0011202679015696049, "learning_rate": 0.00011744486786047898, "loss": 0.0015, "num_input_tokens_seen": 211746864, "step": 98030 }, { "epoch": 15.992659053833606, "grad_norm": 0.002501759212464094, "learning_rate": 0.00011739903910050603, "loss": 0.0014, "num_input_tokens_seen": 211758896, "step": 98035 }, { "epoch": 15.99347471451876, "grad_norm": 0.4698786735534668, "learning_rate": 0.00011735321809442689, "loss": 0.0252, "num_input_tokens_seen": 211769968, "step": 98040 }, { "epoch": 15.994290375203915, "grad_norm": 0.466574490070343, "learning_rate": 0.00011730740484317021, "loss": 0.0282, "num_input_tokens_seen": 211782512, "step": 98045 }, { "epoch": 15.99510603588907, "grad_norm": 0.08782917261123657, "learning_rate": 0.00011726159934766445, "loss": 0.0015, "num_input_tokens_seen": 211792528, "step": 98050 }, { "epoch": 15.995921696574225, "grad_norm": 0.0014665591297671199, "learning_rate": 0.00011721580160883794, "loss": 0.0961, "num_input_tokens_seen": 211803696, "step": 98055 }, { "epoch": 15.99673735725938, "grad_norm": 0.005211786832660437, "learning_rate": 0.00011717001162761881, "loss": 0.0024, "num_input_tokens_seen": 211814128, "step": 98060 }, { "epoch": 15.997553017944535, "grad_norm": 0.0027232381980866194, "learning_rate": 0.000117124229404935, "loss": 0.0036, "num_input_tokens_seen": 211825872, "step": 98065 }, { "epoch": 15.99836867862969, "grad_norm": 0.001201624283567071, "learning_rate": 0.00011707845494171443, "loss": 0.0034, "num_input_tokens_seen": 211837392, "step": 98070 }, { "epoch": 15.999184339314844, "grad_norm": 0.0005471862968988717, "learning_rate": 0.00011703268823888475, "loss": 0.0009, "num_input_tokens_seen": 211847184, "step": 98075 }, { "epoch": 16.0, "grad_norm": 0.0004963973187841475, "learning_rate": 0.00011698692929737348, "loss": 0.0007, "num_input_tokens_seen": 211855376, "step": 98080 }, { "epoch": 16.0, "eval_loss": 0.262260377407074, "eval_runtime": 104.0194, "eval_samples_per_second": 26.197, "eval_steps_per_second": 6.556, "num_input_tokens_seen": 211855376, "step": 98080 }, { "epoch": 16.000815660685156, "grad_norm": 0.015522826462984085, "learning_rate": 0.00011694117811810795, "loss": 0.0904, "num_input_tokens_seen": 211865616, "step": 98085 }, { "epoch": 16.00163132137031, "grad_norm": 0.005051231477409601, "learning_rate": 0.00011689543470201536, "loss": 0.001, "num_input_tokens_seen": 211877392, "step": 98090 }, { "epoch": 16.002446982055464, "grad_norm": 0.028706401586532593, "learning_rate": 0.00011684969905002286, "loss": 0.002, "num_input_tokens_seen": 211887856, "step": 98095 }, { "epoch": 16.00326264274062, "grad_norm": 0.015213343314826488, "learning_rate": 0.00011680397116305719, "loss": 0.0018, "num_input_tokens_seen": 211898256, "step": 98100 }, { "epoch": 16.004078303425775, "grad_norm": 1.6256518363952637, "learning_rate": 0.00011675825104204523, "loss": 0.044, "num_input_tokens_seen": 211909136, "step": 98105 }, { "epoch": 16.00489396411093, "grad_norm": 0.0015888881171122193, "learning_rate": 0.00011671253868791343, "loss": 0.0027, "num_input_tokens_seen": 211920208, "step": 98110 }, { "epoch": 16.005709624796086, "grad_norm": 0.0021806336008012295, "learning_rate": 0.00011666683410158829, "loss": 0.0019, "num_input_tokens_seen": 211930512, "step": 98115 }, { "epoch": 16.00652528548124, "grad_norm": 0.24877884984016418, "learning_rate": 0.0001166211372839961, "loss": 0.0057, "num_input_tokens_seen": 211941968, "step": 98120 }, { "epoch": 16.007340946166394, "grad_norm": 0.05665234103798866, "learning_rate": 0.00011657544823606286, "loss": 0.012, "num_input_tokens_seen": 211953168, "step": 98125 }, { "epoch": 16.00815660685155, "grad_norm": 0.007977227680385113, "learning_rate": 0.00011652976695871459, "loss": 0.0072, "num_input_tokens_seen": 211964432, "step": 98130 }, { "epoch": 16.008972267536706, "grad_norm": 0.0041960496455430984, "learning_rate": 0.00011648409345287691, "loss": 0.0011, "num_input_tokens_seen": 211975504, "step": 98135 }, { "epoch": 16.00978792822186, "grad_norm": 0.0007657354581169784, "learning_rate": 0.00011643842771947588, "loss": 0.0075, "num_input_tokens_seen": 211987184, "step": 98140 }, { "epoch": 16.010603588907014, "grad_norm": 0.04990185424685478, "learning_rate": 0.00011639276975943641, "loss": 0.0046, "num_input_tokens_seen": 211998224, "step": 98145 }, { "epoch": 16.01141924959217, "grad_norm": 0.08976588398218155, "learning_rate": 0.00011634711957368438, "loss": 0.0022, "num_input_tokens_seen": 212008944, "step": 98150 }, { "epoch": 16.012234910277325, "grad_norm": 0.012806740589439869, "learning_rate": 0.00011630147716314443, "loss": 0.0014, "num_input_tokens_seen": 212020112, "step": 98155 }, { "epoch": 16.01305057096248, "grad_norm": 0.0011214031837880611, "learning_rate": 0.00011625584252874189, "loss": 0.0007, "num_input_tokens_seen": 212030928, "step": 98160 }, { "epoch": 16.013866231647636, "grad_norm": 0.7840534448623657, "learning_rate": 0.00011621021567140156, "loss": 0.0302, "num_input_tokens_seen": 212043152, "step": 98165 }, { "epoch": 16.01468189233279, "grad_norm": 0.00202510179951787, "learning_rate": 0.00011616459659204803, "loss": 0.0026, "num_input_tokens_seen": 212053936, "step": 98170 }, { "epoch": 16.015497553017944, "grad_norm": 0.00578334229066968, "learning_rate": 0.00011611898529160591, "loss": 0.0014, "num_input_tokens_seen": 212064016, "step": 98175 }, { "epoch": 16.0163132137031, "grad_norm": 0.003464588662609458, "learning_rate": 0.00011607338177099952, "loss": 0.0013, "num_input_tokens_seen": 212075216, "step": 98180 }, { "epoch": 16.017128874388256, "grad_norm": 0.007143693510442972, "learning_rate": 0.00011602778603115311, "loss": 0.01, "num_input_tokens_seen": 212086512, "step": 98185 }, { "epoch": 16.017944535073408, "grad_norm": 0.00191160524263978, "learning_rate": 0.00011598219807299076, "loss": 0.0045, "num_input_tokens_seen": 212097232, "step": 98190 }, { "epoch": 16.018760195758563, "grad_norm": 0.010454155504703522, "learning_rate": 0.00011593661789743626, "loss": 0.0412, "num_input_tokens_seen": 212107696, "step": 98195 }, { "epoch": 16.01957585644372, "grad_norm": 0.005367065314203501, "learning_rate": 0.00011589104550541346, "loss": 0.0029, "num_input_tokens_seen": 212116944, "step": 98200 }, { "epoch": 16.020391517128875, "grad_norm": 0.003508794354274869, "learning_rate": 0.00011584548089784585, "loss": 0.0008, "num_input_tokens_seen": 212126608, "step": 98205 }, { "epoch": 16.02120717781403, "grad_norm": 0.003974012564867735, "learning_rate": 0.00011579992407565698, "loss": 0.001, "num_input_tokens_seen": 212137040, "step": 98210 }, { "epoch": 16.022022838499183, "grad_norm": 0.004293251316994429, "learning_rate": 0.00011575437503976998, "loss": 0.0016, "num_input_tokens_seen": 212147792, "step": 98215 }, { "epoch": 16.02283849918434, "grad_norm": 0.03774509206414223, "learning_rate": 0.00011570883379110803, "loss": 0.0074, "num_input_tokens_seen": 212158032, "step": 98220 }, { "epoch": 16.023654159869494, "grad_norm": 0.04184706136584282, "learning_rate": 0.00011566330033059407, "loss": 0.0055, "num_input_tokens_seen": 212167536, "step": 98225 }, { "epoch": 16.02446982055465, "grad_norm": 0.005745955277234316, "learning_rate": 0.00011561777465915091, "loss": 0.0009, "num_input_tokens_seen": 212177712, "step": 98230 }, { "epoch": 16.025285481239806, "grad_norm": 0.0024114667903631926, "learning_rate": 0.00011557225677770116, "loss": 0.0006, "num_input_tokens_seen": 212188624, "step": 98235 }, { "epoch": 16.026101141924958, "grad_norm": 0.0005438401130959392, "learning_rate": 0.00011552674668716723, "loss": 0.0017, "num_input_tokens_seen": 212199504, "step": 98240 }, { "epoch": 16.026916802610113, "grad_norm": 0.007553048897534609, "learning_rate": 0.00011548124438847174, "loss": 0.0711, "num_input_tokens_seen": 212209552, "step": 98245 }, { "epoch": 16.02773246329527, "grad_norm": 0.010238065384328365, "learning_rate": 0.0001154357498825363, "loss": 0.1005, "num_input_tokens_seen": 212219376, "step": 98250 }, { "epoch": 16.028548123980425, "grad_norm": 0.30445703864097595, "learning_rate": 0.00011539026317028361, "loss": 0.017, "num_input_tokens_seen": 212229968, "step": 98255 }, { "epoch": 16.02936378466558, "grad_norm": 0.0014553911751136184, "learning_rate": 0.00011534478425263484, "loss": 0.0026, "num_input_tokens_seen": 212239792, "step": 98260 }, { "epoch": 16.030179445350733, "grad_norm": 0.009973454289138317, "learning_rate": 0.00011529931313051222, "loss": 0.0037, "num_input_tokens_seen": 212251632, "step": 98265 }, { "epoch": 16.03099510603589, "grad_norm": 0.04578957334160805, "learning_rate": 0.00011525384980483683, "loss": 0.1222, "num_input_tokens_seen": 212262864, "step": 98270 }, { "epoch": 16.031810766721044, "grad_norm": 0.002932838397100568, "learning_rate": 0.00011520839427653052, "loss": 0.0057, "num_input_tokens_seen": 212273328, "step": 98275 }, { "epoch": 16.0326264274062, "grad_norm": 0.001764679211191833, "learning_rate": 0.00011516294654651393, "loss": 0.0034, "num_input_tokens_seen": 212284880, "step": 98280 }, { "epoch": 16.033442088091356, "grad_norm": 0.003940457943826914, "learning_rate": 0.00011511750661570875, "loss": 0.008, "num_input_tokens_seen": 212294800, "step": 98285 }, { "epoch": 16.034257748776508, "grad_norm": 0.00984375923871994, "learning_rate": 0.00011507207448503526, "loss": 0.0023, "num_input_tokens_seen": 212305424, "step": 98290 }, { "epoch": 16.035073409461663, "grad_norm": 0.003804681124165654, "learning_rate": 0.00011502665015541481, "loss": 0.0024, "num_input_tokens_seen": 212316336, "step": 98295 }, { "epoch": 16.03588907014682, "grad_norm": 0.048975877463817596, "learning_rate": 0.0001149812336277673, "loss": 0.0059, "num_input_tokens_seen": 212327216, "step": 98300 }, { "epoch": 16.036704730831975, "grad_norm": 0.0003695717023219913, "learning_rate": 0.00011493582490301374, "loss": 0.0137, "num_input_tokens_seen": 212338576, "step": 98305 }, { "epoch": 16.03752039151713, "grad_norm": 0.03170023486018181, "learning_rate": 0.00011489042398207416, "loss": 0.0088, "num_input_tokens_seen": 212348496, "step": 98310 }, { "epoch": 16.038336052202283, "grad_norm": 0.005363030359148979, "learning_rate": 0.00011484503086586867, "loss": 0.0016, "num_input_tokens_seen": 212360240, "step": 98315 }, { "epoch": 16.03915171288744, "grad_norm": 0.00753002380952239, "learning_rate": 0.00011479964555531725, "loss": 0.0023, "num_input_tokens_seen": 212370256, "step": 98320 }, { "epoch": 16.039967373572594, "grad_norm": 0.017763635143637657, "learning_rate": 0.00011475426805133965, "loss": 0.0016, "num_input_tokens_seen": 212382288, "step": 98325 }, { "epoch": 16.04078303425775, "grad_norm": 0.0341855026781559, "learning_rate": 0.00011470889835485554, "loss": 0.0038, "num_input_tokens_seen": 212393008, "step": 98330 }, { "epoch": 16.041598694942905, "grad_norm": 0.0011914388742297888, "learning_rate": 0.0001146635364667844, "loss": 0.0016, "num_input_tokens_seen": 212404272, "step": 98335 }, { "epoch": 16.042414355628058, "grad_norm": 0.005368915386497974, "learning_rate": 0.0001146181823880455, "loss": 0.0069, "num_input_tokens_seen": 212415888, "step": 98340 }, { "epoch": 16.043230016313213, "grad_norm": 0.0011746870586648583, "learning_rate": 0.00011457283611955804, "loss": 0.1048, "num_input_tokens_seen": 212426320, "step": 98345 }, { "epoch": 16.04404567699837, "grad_norm": 0.0012608602410182357, "learning_rate": 0.00011452749766224102, "loss": 0.0018, "num_input_tokens_seen": 212436880, "step": 98350 }, { "epoch": 16.044861337683525, "grad_norm": 0.20264336466789246, "learning_rate": 0.00011448216701701309, "loss": 0.0085, "num_input_tokens_seen": 212446832, "step": 98355 }, { "epoch": 16.045676998368677, "grad_norm": 0.005520314909517765, "learning_rate": 0.00011443684418479344, "loss": 0.007, "num_input_tokens_seen": 212457424, "step": 98360 }, { "epoch": 16.046492659053833, "grad_norm": 0.0019661204423755407, "learning_rate": 0.00011439152916649992, "loss": 0.0011, "num_input_tokens_seen": 212468848, "step": 98365 }, { "epoch": 16.04730831973899, "grad_norm": 0.004119037184864283, "learning_rate": 0.00011434622196305156, "loss": 0.0018, "num_input_tokens_seen": 212480048, "step": 98370 }, { "epoch": 16.048123980424144, "grad_norm": 0.01271512359380722, "learning_rate": 0.00011430092257536596, "loss": 0.0038, "num_input_tokens_seen": 212490928, "step": 98375 }, { "epoch": 16.0489396411093, "grad_norm": 0.06934128701686859, "learning_rate": 0.00011425563100436175, "loss": 0.0044, "num_input_tokens_seen": 212500880, "step": 98380 }, { "epoch": 16.049755301794452, "grad_norm": 0.06471730768680573, "learning_rate": 0.00011421034725095625, "loss": 0.0029, "num_input_tokens_seen": 212511824, "step": 98385 }, { "epoch": 16.050570962479608, "grad_norm": 0.0072989496402442455, "learning_rate": 0.00011416507131606773, "loss": 0.0069, "num_input_tokens_seen": 212522032, "step": 98390 }, { "epoch": 16.051386623164763, "grad_norm": 0.057368844747543335, "learning_rate": 0.00011411980320061322, "loss": 0.0044, "num_input_tokens_seen": 212532016, "step": 98395 }, { "epoch": 16.05220228384992, "grad_norm": 0.0406746082007885, "learning_rate": 0.00011407454290551073, "loss": 0.041, "num_input_tokens_seen": 212542672, "step": 98400 }, { "epoch": 16.053017944535075, "grad_norm": 0.0061463662423193455, "learning_rate": 0.00011402929043167692, "loss": 0.0019, "num_input_tokens_seen": 212553104, "step": 98405 }, { "epoch": 16.053833605220227, "grad_norm": 0.0020612115040421486, "learning_rate": 0.00011398404578002946, "loss": 0.0008, "num_input_tokens_seen": 212564752, "step": 98410 }, { "epoch": 16.054649265905383, "grad_norm": 0.005618616472929716, "learning_rate": 0.00011393880895148473, "loss": 0.0019, "num_input_tokens_seen": 212575760, "step": 98415 }, { "epoch": 16.05546492659054, "grad_norm": 0.0006880142027512193, "learning_rate": 0.00011389357994696003, "loss": 0.0012, "num_input_tokens_seen": 212587376, "step": 98420 }, { "epoch": 16.056280587275694, "grad_norm": 0.009598582983016968, "learning_rate": 0.00011384835876737154, "loss": 0.0031, "num_input_tokens_seen": 212598896, "step": 98425 }, { "epoch": 16.05709624796085, "grad_norm": 0.008436683565378189, "learning_rate": 0.00011380314541363612, "loss": 0.0015, "num_input_tokens_seen": 212610448, "step": 98430 }, { "epoch": 16.057911908646002, "grad_norm": 0.054017916321754456, "learning_rate": 0.00011375793988666966, "loss": 0.0122, "num_input_tokens_seen": 212622032, "step": 98435 }, { "epoch": 16.058727569331158, "grad_norm": 0.004722116515040398, "learning_rate": 0.0001137127421873888, "loss": 0.0016, "num_input_tokens_seen": 212633904, "step": 98440 }, { "epoch": 16.059543230016313, "grad_norm": 0.05647740885615349, "learning_rate": 0.000113667552316709, "loss": 0.0043, "num_input_tokens_seen": 212645744, "step": 98445 }, { "epoch": 16.06035889070147, "grad_norm": 0.008270112797617912, "learning_rate": 0.00011362237027554645, "loss": 0.0028, "num_input_tokens_seen": 212655600, "step": 98450 }, { "epoch": 16.061174551386625, "grad_norm": 0.005991316866129637, "learning_rate": 0.00011357719606481675, "loss": 0.005, "num_input_tokens_seen": 212667408, "step": 98455 }, { "epoch": 16.061990212071777, "grad_norm": 0.005687070544809103, "learning_rate": 0.00011353202968543535, "loss": 0.0023, "num_input_tokens_seen": 212676304, "step": 98460 }, { "epoch": 16.062805872756933, "grad_norm": 0.0009080614545382559, "learning_rate": 0.00011348687113831768, "loss": 0.0088, "num_input_tokens_seen": 212686864, "step": 98465 }, { "epoch": 16.063621533442088, "grad_norm": 0.007313088979572058, "learning_rate": 0.00011344172042437889, "loss": 0.0016, "num_input_tokens_seen": 212697328, "step": 98470 }, { "epoch": 16.064437194127244, "grad_norm": 0.7464372515678406, "learning_rate": 0.00011339657754453398, "loss": 0.0857, "num_input_tokens_seen": 212708432, "step": 98475 }, { "epoch": 16.0652528548124, "grad_norm": 0.003813547547906637, "learning_rate": 0.00011335144249969793, "loss": 0.0138, "num_input_tokens_seen": 212719216, "step": 98480 }, { "epoch": 16.06606851549755, "grad_norm": 0.0055252122692763805, "learning_rate": 0.00011330631529078533, "loss": 0.0006, "num_input_tokens_seen": 212730320, "step": 98485 }, { "epoch": 16.066884176182707, "grad_norm": 0.038586895912885666, "learning_rate": 0.00011326119591871087, "loss": 0.0049, "num_input_tokens_seen": 212741520, "step": 98490 }, { "epoch": 16.067699836867863, "grad_norm": 0.01220710203051567, "learning_rate": 0.00011321608438438885, "loss": 0.0022, "num_input_tokens_seen": 212752112, "step": 98495 }, { "epoch": 16.06851549755302, "grad_norm": 0.005802713334560394, "learning_rate": 0.00011317098068873339, "loss": 0.0017, "num_input_tokens_seen": 212763280, "step": 98500 }, { "epoch": 16.069331158238175, "grad_norm": 0.019641386345028877, "learning_rate": 0.000113125884832659, "loss": 0.015, "num_input_tokens_seen": 212775120, "step": 98505 }, { "epoch": 16.070146818923327, "grad_norm": 0.0012488446664065123, "learning_rate": 0.00011308079681707911, "loss": 0.0019, "num_input_tokens_seen": 212785904, "step": 98510 }, { "epoch": 16.070962479608482, "grad_norm": 0.3433908224105835, "learning_rate": 0.00011303571664290801, "loss": 0.017, "num_input_tokens_seen": 212797296, "step": 98515 }, { "epoch": 16.071778140293638, "grad_norm": 0.018292676657438278, "learning_rate": 0.0001129906443110587, "loss": 0.0328, "num_input_tokens_seen": 212807312, "step": 98520 }, { "epoch": 16.072593800978794, "grad_norm": 0.00970328226685524, "learning_rate": 0.0001129455798224452, "loss": 0.0027, "num_input_tokens_seen": 212817712, "step": 98525 }, { "epoch": 16.07340946166395, "grad_norm": 0.07746391743421555, "learning_rate": 0.00011290052317798027, "loss": 0.0747, "num_input_tokens_seen": 212829104, "step": 98530 }, { "epoch": 16.0742251223491, "grad_norm": 0.020664365962147713, "learning_rate": 0.00011285547437857763, "loss": 0.0029, "num_input_tokens_seen": 212839856, "step": 98535 }, { "epoch": 16.075040783034257, "grad_norm": 0.000956006464548409, "learning_rate": 0.00011281043342514957, "loss": 0.0046, "num_input_tokens_seen": 212851280, "step": 98540 }, { "epoch": 16.075856443719413, "grad_norm": 0.005796929355710745, "learning_rate": 0.0001127654003186096, "loss": 0.0014, "num_input_tokens_seen": 212861808, "step": 98545 }, { "epoch": 16.07667210440457, "grad_norm": 0.03431297466158867, "learning_rate": 0.00011272037505986976, "loss": 0.0066, "num_input_tokens_seen": 212872496, "step": 98550 }, { "epoch": 16.07748776508972, "grad_norm": 0.011982480064034462, "learning_rate": 0.00011267535764984293, "loss": 0.0029, "num_input_tokens_seen": 212882160, "step": 98555 }, { "epoch": 16.078303425774877, "grad_norm": 0.008244593627750874, "learning_rate": 0.00011263034808944134, "loss": 0.0027, "num_input_tokens_seen": 212892720, "step": 98560 }, { "epoch": 16.079119086460032, "grad_norm": 0.013054000213742256, "learning_rate": 0.00011258534637957718, "loss": 0.0031, "num_input_tokens_seen": 212904208, "step": 98565 }, { "epoch": 16.079934747145188, "grad_norm": 0.0004884781083092093, "learning_rate": 0.0001125403525211624, "loss": 0.0011, "num_input_tokens_seen": 212913232, "step": 98570 }, { "epoch": 16.080750407830344, "grad_norm": 0.0003876937844324857, "learning_rate": 0.00011249536651510894, "loss": 0.0263, "num_input_tokens_seen": 212923280, "step": 98575 }, { "epoch": 16.081566068515496, "grad_norm": 0.052026715129613876, "learning_rate": 0.00011245038836232846, "loss": 0.005, "num_input_tokens_seen": 212933392, "step": 98580 }, { "epoch": 16.08238172920065, "grad_norm": 0.011535944417119026, "learning_rate": 0.0001124054180637325, "loss": 0.0011, "num_input_tokens_seen": 212944080, "step": 98585 }, { "epoch": 16.083197389885807, "grad_norm": 0.006972441915422678, "learning_rate": 0.00011236045562023245, "loss": 0.005, "num_input_tokens_seen": 212954128, "step": 98590 }, { "epoch": 16.084013050570963, "grad_norm": 0.016211438924074173, "learning_rate": 0.00011231550103273952, "loss": 0.0022, "num_input_tokens_seen": 212964784, "step": 98595 }, { "epoch": 16.08482871125612, "grad_norm": 0.004645606502890587, "learning_rate": 0.00011227055430216476, "loss": 0.0059, "num_input_tokens_seen": 212976048, "step": 98600 }, { "epoch": 16.08564437194127, "grad_norm": 0.005268410313874483, "learning_rate": 0.00011222561542941906, "loss": 0.002, "num_input_tokens_seen": 212985712, "step": 98605 }, { "epoch": 16.086460032626427, "grad_norm": 0.009896847419440746, "learning_rate": 0.00011218068441541323, "loss": 0.0014, "num_input_tokens_seen": 212996784, "step": 98610 }, { "epoch": 16.087275693311582, "grad_norm": 0.003218175610527396, "learning_rate": 0.0001121357612610578, "loss": 0.0008, "num_input_tokens_seen": 213007440, "step": 98615 }, { "epoch": 16.088091353996738, "grad_norm": 0.011862734332680702, "learning_rate": 0.0001120908459672632, "loss": 0.0013, "num_input_tokens_seen": 213018704, "step": 98620 }, { "epoch": 16.088907014681894, "grad_norm": 0.10315841436386108, "learning_rate": 0.00011204593853493978, "loss": 0.0052, "num_input_tokens_seen": 213029584, "step": 98625 }, { "epoch": 16.089722675367046, "grad_norm": 0.0024018334224820137, "learning_rate": 0.00011200103896499748, "loss": 0.0012, "num_input_tokens_seen": 213040816, "step": 98630 }, { "epoch": 16.0905383360522, "grad_norm": 0.0007672040374018252, "learning_rate": 0.00011195614725834636, "loss": 0.002, "num_input_tokens_seen": 213050928, "step": 98635 }, { "epoch": 16.091353996737357, "grad_norm": 0.03508285805583, "learning_rate": 0.0001119112634158962, "loss": 0.0061, "num_input_tokens_seen": 213061968, "step": 98640 }, { "epoch": 16.092169657422513, "grad_norm": 0.002200368558987975, "learning_rate": 0.00011186638743855643, "loss": 0.0007, "num_input_tokens_seen": 213073456, "step": 98645 }, { "epoch": 16.09298531810767, "grad_norm": 0.012955213896930218, "learning_rate": 0.00011182151932723706, "loss": 0.0059, "num_input_tokens_seen": 213084112, "step": 98650 }, { "epoch": 16.09380097879282, "grad_norm": 0.003678369102999568, "learning_rate": 0.00011177665908284667, "loss": 0.0025, "num_input_tokens_seen": 213095664, "step": 98655 }, { "epoch": 16.094616639477977, "grad_norm": 0.00983490701764822, "learning_rate": 0.00011173180670629496, "loss": 0.0021, "num_input_tokens_seen": 213106640, "step": 98660 }, { "epoch": 16.095432300163132, "grad_norm": 0.0047003780491650105, "learning_rate": 0.00011168696219849078, "loss": 0.0037, "num_input_tokens_seen": 213117840, "step": 98665 }, { "epoch": 16.096247960848288, "grad_norm": 0.0013329943176358938, "learning_rate": 0.00011164212556034287, "loss": 0.0368, "num_input_tokens_seen": 213129424, "step": 98670 }, { "epoch": 16.097063621533444, "grad_norm": 0.004717966075986624, "learning_rate": 0.00011159729679275999, "loss": 0.0038, "num_input_tokens_seen": 213140944, "step": 98675 }, { "epoch": 16.097879282218596, "grad_norm": 0.15023410320281982, "learning_rate": 0.00011155247589665057, "loss": 0.0082, "num_input_tokens_seen": 213151248, "step": 98680 }, { "epoch": 16.09869494290375, "grad_norm": 0.020730411633849144, "learning_rate": 0.00011150766287292302, "loss": 0.0035, "num_input_tokens_seen": 213162320, "step": 98685 }, { "epoch": 16.099510603588907, "grad_norm": 0.004016099963337183, "learning_rate": 0.00011146285772248555, "loss": 0.0024, "num_input_tokens_seen": 213172432, "step": 98690 }, { "epoch": 16.100326264274063, "grad_norm": 0.006559828761965036, "learning_rate": 0.00011141806044624614, "loss": 0.0014, "num_input_tokens_seen": 213183184, "step": 98695 }, { "epoch": 16.10114192495922, "grad_norm": 0.0008914469508454204, "learning_rate": 0.00011137327104511268, "loss": 0.0074, "num_input_tokens_seen": 213193072, "step": 98700 }, { "epoch": 16.10195758564437, "grad_norm": 0.006500248797237873, "learning_rate": 0.00011132848951999286, "loss": 0.0014, "num_input_tokens_seen": 213203824, "step": 98705 }, { "epoch": 16.102773246329527, "grad_norm": 0.004205408971756697, "learning_rate": 0.00011128371587179431, "loss": 0.0017, "num_input_tokens_seen": 213214800, "step": 98710 }, { "epoch": 16.103588907014682, "grad_norm": 0.0021865079179406166, "learning_rate": 0.00011123895010142437, "loss": 0.0033, "num_input_tokens_seen": 213225488, "step": 98715 }, { "epoch": 16.104404567699838, "grad_norm": 0.0016050647245720029, "learning_rate": 0.00011119419220979033, "loss": 0.0768, "num_input_tokens_seen": 213235760, "step": 98720 }, { "epoch": 16.10522022838499, "grad_norm": 0.01331863272935152, "learning_rate": 0.00011114944219779916, "loss": 0.0025, "num_input_tokens_seen": 213246704, "step": 98725 }, { "epoch": 16.106035889070146, "grad_norm": 0.0021768363658338785, "learning_rate": 0.00011110470006635781, "loss": 0.0024, "num_input_tokens_seen": 213258672, "step": 98730 }, { "epoch": 16.1068515497553, "grad_norm": 0.19919277727603912, "learning_rate": 0.00011105996581637312, "loss": 0.0032, "num_input_tokens_seen": 213270608, "step": 98735 }, { "epoch": 16.107667210440457, "grad_norm": 0.007927102036774158, "learning_rate": 0.00011101523944875163, "loss": 0.0018, "num_input_tokens_seen": 213281360, "step": 98740 }, { "epoch": 16.108482871125613, "grad_norm": 0.018627608194947243, "learning_rate": 0.00011097052096439974, "loss": 0.0013, "num_input_tokens_seen": 213292048, "step": 98745 }, { "epoch": 16.109298531810765, "grad_norm": 0.002585037611424923, "learning_rate": 0.00011092581036422378, "loss": 0.0068, "num_input_tokens_seen": 213303280, "step": 98750 }, { "epoch": 16.11011419249592, "grad_norm": 0.005573483649641275, "learning_rate": 0.00011088110764912984, "loss": 0.0034, "num_input_tokens_seen": 213313744, "step": 98755 }, { "epoch": 16.110929853181077, "grad_norm": 0.7131521701812744, "learning_rate": 0.00011083641282002387, "loss": 0.0908, "num_input_tokens_seen": 213324592, "step": 98760 }, { "epoch": 16.111745513866232, "grad_norm": 0.005434775725007057, "learning_rate": 0.00011079172587781172, "loss": 0.0012, "num_input_tokens_seen": 213336400, "step": 98765 }, { "epoch": 16.112561174551388, "grad_norm": 0.007682743947952986, "learning_rate": 0.00011074704682339897, "loss": 0.0043, "num_input_tokens_seen": 213346000, "step": 98770 }, { "epoch": 16.11337683523654, "grad_norm": 0.008814748376607895, "learning_rate": 0.00011070237565769097, "loss": 0.0025, "num_input_tokens_seen": 213358096, "step": 98775 }, { "epoch": 16.114192495921696, "grad_norm": 0.004235987085849047, "learning_rate": 0.0001106577123815935, "loss": 0.0019, "num_input_tokens_seen": 213367408, "step": 98780 }, { "epoch": 16.11500815660685, "grad_norm": 2.6287970542907715, "learning_rate": 0.0001106130569960111, "loss": 0.0368, "num_input_tokens_seen": 213377840, "step": 98785 }, { "epoch": 16.115823817292007, "grad_norm": 0.04276340827345848, "learning_rate": 0.00011056840950184921, "loss": 0.0035, "num_input_tokens_seen": 213388144, "step": 98790 }, { "epoch": 16.116639477977163, "grad_norm": 0.5271095633506775, "learning_rate": 0.00011052376990001256, "loss": 0.0459, "num_input_tokens_seen": 213399088, "step": 98795 }, { "epoch": 16.117455138662315, "grad_norm": 0.007122796028852463, "learning_rate": 0.00011047913819140576, "loss": 0.0013, "num_input_tokens_seen": 213409840, "step": 98800 }, { "epoch": 16.11827079934747, "grad_norm": 0.13764910399913788, "learning_rate": 0.00011043451437693342, "loss": 0.013, "num_input_tokens_seen": 213420528, "step": 98805 }, { "epoch": 16.119086460032626, "grad_norm": 0.0022578334901481867, "learning_rate": 0.00011038989845749981, "loss": 0.0181, "num_input_tokens_seen": 213430800, "step": 98810 }, { "epoch": 16.119902120717782, "grad_norm": 0.004519890993833542, "learning_rate": 0.00011034529043400915, "loss": 0.0032, "num_input_tokens_seen": 213441936, "step": 98815 }, { "epoch": 16.120717781402938, "grad_norm": 0.0013229832984507084, "learning_rate": 0.00011030069030736551, "loss": 0.0023, "num_input_tokens_seen": 213452688, "step": 98820 }, { "epoch": 16.12153344208809, "grad_norm": 0.004833935294300318, "learning_rate": 0.0001102560980784727, "loss": 0.0035, "num_input_tokens_seen": 213463312, "step": 98825 }, { "epoch": 16.122349102773246, "grad_norm": 0.06656540185213089, "learning_rate": 0.00011021151374823457, "loss": 0.0899, "num_input_tokens_seen": 213474192, "step": 98830 }, { "epoch": 16.1231647634584, "grad_norm": 0.028403708711266518, "learning_rate": 0.00011016693731755456, "loss": 0.0032, "num_input_tokens_seen": 213485232, "step": 98835 }, { "epoch": 16.123980424143557, "grad_norm": 0.01657683216035366, "learning_rate": 0.00011012236878733606, "loss": 0.0035, "num_input_tokens_seen": 213496048, "step": 98840 }, { "epoch": 16.124796084828713, "grad_norm": 0.009340647608041763, "learning_rate": 0.00011007780815848239, "loss": 0.0103, "num_input_tokens_seen": 213507472, "step": 98845 }, { "epoch": 16.125611745513865, "grad_norm": 0.0011907644802704453, "learning_rate": 0.00011003325543189663, "loss": 0.0055, "num_input_tokens_seen": 213517520, "step": 98850 }, { "epoch": 16.12642740619902, "grad_norm": 0.0006321436958387494, "learning_rate": 0.0001099887106084816, "loss": 0.0009, "num_input_tokens_seen": 213528336, "step": 98855 }, { "epoch": 16.127243066884176, "grad_norm": 0.0071949586272239685, "learning_rate": 0.00010994417368914011, "loss": 0.0026, "num_input_tokens_seen": 213539856, "step": 98860 }, { "epoch": 16.128058727569332, "grad_norm": 0.0012756186770275235, "learning_rate": 0.00010989964467477481, "loss": 0.0006, "num_input_tokens_seen": 213550448, "step": 98865 }, { "epoch": 16.128874388254488, "grad_norm": 0.002935679629445076, "learning_rate": 0.00010985512356628807, "loss": 0.0026, "num_input_tokens_seen": 213562192, "step": 98870 }, { "epoch": 16.12969004893964, "grad_norm": 0.006117780692875385, "learning_rate": 0.00010981061036458218, "loss": 0.002, "num_input_tokens_seen": 213573360, "step": 98875 }, { "epoch": 16.130505709624796, "grad_norm": 0.029337430372834206, "learning_rate": 0.00010976610507055906, "loss": 0.0021, "num_input_tokens_seen": 213584112, "step": 98880 }, { "epoch": 16.13132137030995, "grad_norm": 0.14752079546451569, "learning_rate": 0.00010972160768512123, "loss": 0.0049, "num_input_tokens_seen": 213595216, "step": 98885 }, { "epoch": 16.132137030995107, "grad_norm": 0.004511414561420679, "learning_rate": 0.00010967711820916982, "loss": 0.1444, "num_input_tokens_seen": 213606160, "step": 98890 }, { "epoch": 16.13295269168026, "grad_norm": 0.0020912738982588053, "learning_rate": 0.00010963263664360706, "loss": 0.108, "num_input_tokens_seen": 213615984, "step": 98895 }, { "epoch": 16.133768352365415, "grad_norm": 0.03671063110232353, "learning_rate": 0.00010958816298933383, "loss": 0.0032, "num_input_tokens_seen": 213627440, "step": 98900 }, { "epoch": 16.13458401305057, "grad_norm": 0.0017622812883928418, "learning_rate": 0.00010954369724725205, "loss": 0.0024, "num_input_tokens_seen": 213638384, "step": 98905 }, { "epoch": 16.135399673735726, "grad_norm": 0.11492667347192764, "learning_rate": 0.00010949923941826229, "loss": 0.0057, "num_input_tokens_seen": 213649040, "step": 98910 }, { "epoch": 16.136215334420882, "grad_norm": 0.012901815585792065, "learning_rate": 0.0001094547895032661, "loss": 0.001, "num_input_tokens_seen": 213658256, "step": 98915 }, { "epoch": 16.137030995106034, "grad_norm": 0.014208734035491943, "learning_rate": 0.00010941034750316375, "loss": 0.003, "num_input_tokens_seen": 213668560, "step": 98920 }, { "epoch": 16.13784665579119, "grad_norm": 0.0005539971170946956, "learning_rate": 0.00010936591341885648, "loss": 0.0036, "num_input_tokens_seen": 213678896, "step": 98925 }, { "epoch": 16.138662316476346, "grad_norm": 0.2483779489994049, "learning_rate": 0.0001093214872512443, "loss": 0.0058, "num_input_tokens_seen": 213689712, "step": 98930 }, { "epoch": 16.1394779771615, "grad_norm": 0.023973651230335236, "learning_rate": 0.00010927706900122791, "loss": 0.0031, "num_input_tokens_seen": 213699920, "step": 98935 }, { "epoch": 16.140293637846657, "grad_norm": 0.018499545753002167, "learning_rate": 0.00010923265866970739, "loss": 0.0085, "num_input_tokens_seen": 213710864, "step": 98940 }, { "epoch": 16.14110929853181, "grad_norm": 0.0017729535466060042, "learning_rate": 0.00010918825625758273, "loss": 0.0044, "num_input_tokens_seen": 213721200, "step": 98945 }, { "epoch": 16.141924959216965, "grad_norm": 0.001103546703234315, "learning_rate": 0.00010914386176575386, "loss": 0.0074, "num_input_tokens_seen": 213732752, "step": 98950 }, { "epoch": 16.14274061990212, "grad_norm": 0.0006307591684162617, "learning_rate": 0.00010909947519512048, "loss": 0.0062, "num_input_tokens_seen": 213743056, "step": 98955 }, { "epoch": 16.143556280587276, "grad_norm": 0.0002562662702985108, "learning_rate": 0.00010905509654658208, "loss": 0.0016, "num_input_tokens_seen": 213753584, "step": 98960 }, { "epoch": 16.144371941272432, "grad_norm": 0.09816617518663406, "learning_rate": 0.00010901072582103816, "loss": 0.0754, "num_input_tokens_seen": 213762992, "step": 98965 }, { "epoch": 16.145187601957584, "grad_norm": 0.022168634459376335, "learning_rate": 0.00010896636301938784, "loss": 0.0032, "num_input_tokens_seen": 213773072, "step": 98970 }, { "epoch": 16.14600326264274, "grad_norm": 0.03289058804512024, "learning_rate": 0.00010892200814253023, "loss": 0.0017, "num_input_tokens_seen": 213784816, "step": 98975 }, { "epoch": 16.146818923327896, "grad_norm": 0.0013062867801636457, "learning_rate": 0.00010887766119136427, "loss": 0.0015, "num_input_tokens_seen": 213795440, "step": 98980 }, { "epoch": 16.14763458401305, "grad_norm": 0.3104763627052307, "learning_rate": 0.00010883332216678853, "loss": 0.0055, "num_input_tokens_seen": 213807088, "step": 98985 }, { "epoch": 16.148450244698207, "grad_norm": 0.005590509623289108, "learning_rate": 0.00010878899106970203, "loss": 0.0013, "num_input_tokens_seen": 213818832, "step": 98990 }, { "epoch": 16.14926590538336, "grad_norm": 0.5460083484649658, "learning_rate": 0.00010874466790100268, "loss": 0.0888, "num_input_tokens_seen": 213829264, "step": 98995 }, { "epoch": 16.150081566068515, "grad_norm": 0.027004707604646683, "learning_rate": 0.00010870035266158918, "loss": 0.003, "num_input_tokens_seen": 213839792, "step": 99000 }, { "epoch": 16.15089722675367, "grad_norm": 0.0023864214308559895, "learning_rate": 0.00010865604535235918, "loss": 0.0017, "num_input_tokens_seen": 213850736, "step": 99005 }, { "epoch": 16.151712887438826, "grad_norm": 0.020365918055176735, "learning_rate": 0.0001086117459742112, "loss": 0.0013, "num_input_tokens_seen": 213862704, "step": 99010 }, { "epoch": 16.152528548123982, "grad_norm": 0.029816294088959694, "learning_rate": 0.00010856745452804234, "loss": 0.047, "num_input_tokens_seen": 213873296, "step": 99015 }, { "epoch": 16.153344208809134, "grad_norm": 0.0036337687633931637, "learning_rate": 0.0001085231710147509, "loss": 0.0026, "num_input_tokens_seen": 213884560, "step": 99020 }, { "epoch": 16.15415986949429, "grad_norm": 0.005663975607603788, "learning_rate": 0.00010847889543523376, "loss": 0.0053, "num_input_tokens_seen": 213894928, "step": 99025 }, { "epoch": 16.154975530179446, "grad_norm": 0.001540932571515441, "learning_rate": 0.00010843462779038876, "loss": 0.0009, "num_input_tokens_seen": 213905904, "step": 99030 }, { "epoch": 16.1557911908646, "grad_norm": 0.008301756344735622, "learning_rate": 0.00010839036808111246, "loss": 0.0046, "num_input_tokens_seen": 213915952, "step": 99035 }, { "epoch": 16.156606851549757, "grad_norm": 0.009943228214979172, "learning_rate": 0.00010834611630830244, "loss": 0.0047, "num_input_tokens_seen": 213927248, "step": 99040 }, { "epoch": 16.15742251223491, "grad_norm": 0.01648455671966076, "learning_rate": 0.00010830187247285489, "loss": 0.0082, "num_input_tokens_seen": 213938736, "step": 99045 }, { "epoch": 16.158238172920065, "grad_norm": 0.0015992774860933423, "learning_rate": 0.00010825763657566717, "loss": 0.0024, "num_input_tokens_seen": 213949840, "step": 99050 }, { "epoch": 16.15905383360522, "grad_norm": 0.0430905781686306, "learning_rate": 0.00010821340861763506, "loss": 0.0041, "num_input_tokens_seen": 213960112, "step": 99055 }, { "epoch": 16.159869494290376, "grad_norm": 0.010663102380931377, "learning_rate": 0.00010816918859965552, "loss": 0.0016, "num_input_tokens_seen": 213970416, "step": 99060 }, { "epoch": 16.160685154975532, "grad_norm": 0.06955873966217041, "learning_rate": 0.00010812497652262421, "loss": 0.0065, "num_input_tokens_seen": 213981264, "step": 99065 }, { "epoch": 16.161500815660684, "grad_norm": 0.01041333470493555, "learning_rate": 0.00010808077238743763, "loss": 0.0023, "num_input_tokens_seen": 213991888, "step": 99070 }, { "epoch": 16.16231647634584, "grad_norm": 0.03263779357075691, "learning_rate": 0.00010803657619499107, "loss": 0.0015, "num_input_tokens_seen": 214003600, "step": 99075 }, { "epoch": 16.163132137030995, "grad_norm": 0.017389433458447456, "learning_rate": 0.00010799238794618077, "loss": 0.0039, "num_input_tokens_seen": 214013808, "step": 99080 }, { "epoch": 16.16394779771615, "grad_norm": 0.00429139519110322, "learning_rate": 0.00010794820764190194, "loss": 0.0013, "num_input_tokens_seen": 214024624, "step": 99085 }, { "epoch": 16.164763458401303, "grad_norm": 0.034365396946668625, "learning_rate": 0.00010790403528305004, "loss": 0.0044, "num_input_tokens_seen": 214035376, "step": 99090 }, { "epoch": 16.16557911908646, "grad_norm": 0.023165516555309296, "learning_rate": 0.0001078598708705203, "loss": 0.0413, "num_input_tokens_seen": 214045104, "step": 99095 }, { "epoch": 16.166394779771615, "grad_norm": 0.0035238233394920826, "learning_rate": 0.00010781571440520777, "loss": 0.001, "num_input_tokens_seen": 214055536, "step": 99100 }, { "epoch": 16.16721044045677, "grad_norm": 0.0005931582418270409, "learning_rate": 0.00010777156588800724, "loss": 0.0009, "num_input_tokens_seen": 214065680, "step": 99105 }, { "epoch": 16.168026101141926, "grad_norm": 0.07158027589321136, "learning_rate": 0.00010772742531981356, "loss": 0.0012, "num_input_tokens_seen": 214074800, "step": 99110 }, { "epoch": 16.16884176182708, "grad_norm": 0.0070646717213094234, "learning_rate": 0.00010768329270152122, "loss": 0.001, "num_input_tokens_seen": 214085648, "step": 99115 }, { "epoch": 16.169657422512234, "grad_norm": 0.007711863610893488, "learning_rate": 0.00010763916803402463, "loss": 0.0053, "num_input_tokens_seen": 214095952, "step": 99120 }, { "epoch": 16.17047308319739, "grad_norm": 0.0007553675677627325, "learning_rate": 0.00010759505131821806, "loss": 0.004, "num_input_tokens_seen": 214107472, "step": 99125 }, { "epoch": 16.171288743882545, "grad_norm": 0.017188599333167076, "learning_rate": 0.00010755094255499542, "loss": 0.0038, "num_input_tokens_seen": 214120144, "step": 99130 }, { "epoch": 16.1721044045677, "grad_norm": 0.01340003963559866, "learning_rate": 0.00010750684174525111, "loss": 0.0032, "num_input_tokens_seen": 214131120, "step": 99135 }, { "epoch": 16.172920065252853, "grad_norm": 0.009431690908968449, "learning_rate": 0.00010746274888987822, "loss": 0.0011, "num_input_tokens_seen": 214141616, "step": 99140 }, { "epoch": 16.17373572593801, "grad_norm": 0.062283746898174286, "learning_rate": 0.00010741866398977101, "loss": 0.0047, "num_input_tokens_seen": 214152784, "step": 99145 }, { "epoch": 16.174551386623165, "grad_norm": 0.009358714334666729, "learning_rate": 0.00010737458704582232, "loss": 0.0009, "num_input_tokens_seen": 214163952, "step": 99150 }, { "epoch": 16.17536704730832, "grad_norm": 0.013387867249548435, "learning_rate": 0.00010733051805892602, "loss": 0.0024, "num_input_tokens_seen": 214173680, "step": 99155 }, { "epoch": 16.176182707993476, "grad_norm": 0.00021368158922996372, "learning_rate": 0.00010728645702997458, "loss": 0.0011, "num_input_tokens_seen": 214184912, "step": 99160 }, { "epoch": 16.17699836867863, "grad_norm": 0.012210970744490623, "learning_rate": 0.00010724240395986156, "loss": 0.0225, "num_input_tokens_seen": 214195152, "step": 99165 }, { "epoch": 16.177814029363784, "grad_norm": 0.0026052501052618027, "learning_rate": 0.00010719835884947921, "loss": 0.0007, "num_input_tokens_seen": 214205104, "step": 99170 }, { "epoch": 16.17862969004894, "grad_norm": 0.0005025434657000005, "learning_rate": 0.00010715432169972067, "loss": 0.0549, "num_input_tokens_seen": 214215184, "step": 99175 }, { "epoch": 16.179445350734095, "grad_norm": 0.000986822065897286, "learning_rate": 0.00010711029251147791, "loss": 0.0007, "num_input_tokens_seen": 214226896, "step": 99180 }, { "epoch": 16.18026101141925, "grad_norm": 0.018340054899454117, "learning_rate": 0.00010706627128564378, "loss": 0.0014, "num_input_tokens_seen": 214238672, "step": 99185 }, { "epoch": 16.181076672104403, "grad_norm": 0.009344175457954407, "learning_rate": 0.00010702225802310983, "loss": 0.0025, "num_input_tokens_seen": 214248432, "step": 99190 }, { "epoch": 16.18189233278956, "grad_norm": 0.00024389364989474416, "learning_rate": 0.00010697825272476847, "loss": 0.0015, "num_input_tokens_seen": 214259376, "step": 99195 }, { "epoch": 16.182707993474715, "grad_norm": 0.0008997777476906776, "learning_rate": 0.00010693425539151141, "loss": 0.0005, "num_input_tokens_seen": 214270256, "step": 99200 }, { "epoch": 16.18352365415987, "grad_norm": 0.003580469638109207, "learning_rate": 0.00010689026602423036, "loss": 0.0013, "num_input_tokens_seen": 214280624, "step": 99205 }, { "epoch": 16.184339314845026, "grad_norm": 0.013014066033065319, "learning_rate": 0.00010684628462381673, "loss": 0.0023, "num_input_tokens_seen": 214293232, "step": 99210 }, { "epoch": 16.18515497553018, "grad_norm": 0.0009328118176199496, "learning_rate": 0.00010680231119116185, "loss": 0.0022, "num_input_tokens_seen": 214302192, "step": 99215 }, { "epoch": 16.185970636215334, "grad_norm": 0.00644258176907897, "learning_rate": 0.00010675834572715698, "loss": 0.0013, "num_input_tokens_seen": 214313264, "step": 99220 }, { "epoch": 16.18678629690049, "grad_norm": 0.029108474031090736, "learning_rate": 0.00010671438823269314, "loss": 0.0137, "num_input_tokens_seen": 214323504, "step": 99225 }, { "epoch": 16.187601957585645, "grad_norm": 0.045794326812028885, "learning_rate": 0.00010667043870866105, "loss": 0.0027, "num_input_tokens_seen": 214332688, "step": 99230 }, { "epoch": 16.1884176182708, "grad_norm": 0.0035296755377203226, "learning_rate": 0.00010662649715595157, "loss": 0.0025, "num_input_tokens_seen": 214343728, "step": 99235 }, { "epoch": 16.189233278955953, "grad_norm": 0.015425696969032288, "learning_rate": 0.00010658256357545509, "loss": 0.036, "num_input_tokens_seen": 214354160, "step": 99240 }, { "epoch": 16.19004893964111, "grad_norm": 0.0035507178399711847, "learning_rate": 0.00010653863796806213, "loss": 0.0011, "num_input_tokens_seen": 214365232, "step": 99245 }, { "epoch": 16.190864600326265, "grad_norm": 0.0019442274933680892, "learning_rate": 0.00010649472033466273, "loss": 0.0018, "num_input_tokens_seen": 214377616, "step": 99250 }, { "epoch": 16.19168026101142, "grad_norm": 0.0013061447534710169, "learning_rate": 0.00010645081067614703, "loss": 0.0015, "num_input_tokens_seen": 214387856, "step": 99255 }, { "epoch": 16.192495921696572, "grad_norm": 0.0017848755232989788, "learning_rate": 0.00010640690899340494, "loss": 0.0713, "num_input_tokens_seen": 214398352, "step": 99260 }, { "epoch": 16.193311582381728, "grad_norm": 0.0009831542847678065, "learning_rate": 0.00010636301528732612, "loss": 0.0012, "num_input_tokens_seen": 214409520, "step": 99265 }, { "epoch": 16.194127243066884, "grad_norm": 0.00133727234788239, "learning_rate": 0.00010631912955880018, "loss": 0.0013, "num_input_tokens_seen": 214420400, "step": 99270 }, { "epoch": 16.19494290375204, "grad_norm": 0.0031839951407164335, "learning_rate": 0.00010627525180871633, "loss": 0.0046, "num_input_tokens_seen": 214431664, "step": 99275 }, { "epoch": 16.195758564437195, "grad_norm": 0.00220080791041255, "learning_rate": 0.00010623138203796429, "loss": 0.0027, "num_input_tokens_seen": 214441808, "step": 99280 }, { "epoch": 16.196574225122347, "grad_norm": 0.032900307327508926, "learning_rate": 0.00010618752024743255, "loss": 0.0039, "num_input_tokens_seen": 214453680, "step": 99285 }, { "epoch": 16.197389885807503, "grad_norm": 0.0010238890536129475, "learning_rate": 0.00010614366643801055, "loss": 0.0055, "num_input_tokens_seen": 214464176, "step": 99290 }, { "epoch": 16.19820554649266, "grad_norm": 0.009197982028126717, "learning_rate": 0.00010609982061058654, "loss": 0.0081, "num_input_tokens_seen": 214474000, "step": 99295 }, { "epoch": 16.199021207177815, "grad_norm": 0.5025539398193359, "learning_rate": 0.0001060559827660495, "loss": 0.0729, "num_input_tokens_seen": 214485328, "step": 99300 }, { "epoch": 16.19983686786297, "grad_norm": 0.0005666270735673606, "learning_rate": 0.0001060121529052877, "loss": 0.001, "num_input_tokens_seen": 214496080, "step": 99305 }, { "epoch": 16.200652528548122, "grad_norm": 0.005442335736006498, "learning_rate": 0.0001059683310291894, "loss": 0.0013, "num_input_tokens_seen": 214507216, "step": 99310 }, { "epoch": 16.201468189233278, "grad_norm": 0.0619342066347599, "learning_rate": 0.00010592451713864282, "loss": 0.0031, "num_input_tokens_seen": 214517648, "step": 99315 }, { "epoch": 16.202283849918434, "grad_norm": 0.0022380822338163853, "learning_rate": 0.00010588071123453574, "loss": 0.0033, "num_input_tokens_seen": 214528816, "step": 99320 }, { "epoch": 16.20309951060359, "grad_norm": 0.12963344156742096, "learning_rate": 0.00010583691331775608, "loss": 0.0072, "num_input_tokens_seen": 214539568, "step": 99325 }, { "epoch": 16.203915171288745, "grad_norm": 0.008783910423517227, "learning_rate": 0.0001057931233891914, "loss": 0.0029, "num_input_tokens_seen": 214551920, "step": 99330 }, { "epoch": 16.204730831973897, "grad_norm": 0.005578738637268543, "learning_rate": 0.00010574934144972908, "loss": 0.0104, "num_input_tokens_seen": 214562768, "step": 99335 }, { "epoch": 16.205546492659053, "grad_norm": 0.0020929095335304737, "learning_rate": 0.00010570556750025656, "loss": 0.0014, "num_input_tokens_seen": 214573520, "step": 99340 }, { "epoch": 16.20636215334421, "grad_norm": 0.012415740638971329, "learning_rate": 0.00010566180154166094, "loss": 0.0031, "num_input_tokens_seen": 214584336, "step": 99345 }, { "epoch": 16.207177814029365, "grad_norm": 0.002348710782825947, "learning_rate": 0.00010561804357482912, "loss": 0.002, "num_input_tokens_seen": 214594288, "step": 99350 }, { "epoch": 16.20799347471452, "grad_norm": 0.011959983967244625, "learning_rate": 0.00010557429360064796, "loss": 0.0026, "num_input_tokens_seen": 214605712, "step": 99355 }, { "epoch": 16.208809135399672, "grad_norm": 0.04903974384069443, "learning_rate": 0.00010553055162000414, "loss": 0.0033, "num_input_tokens_seen": 214616112, "step": 99360 }, { "epoch": 16.209624796084828, "grad_norm": 0.001221096026711166, "learning_rate": 0.0001054868176337841, "loss": 0.0012, "num_input_tokens_seen": 214626448, "step": 99365 }, { "epoch": 16.210440456769984, "grad_norm": 0.008052774704992771, "learning_rate": 0.00010544309164287418, "loss": 0.0021, "num_input_tokens_seen": 214636112, "step": 99370 }, { "epoch": 16.21125611745514, "grad_norm": 0.051362331956624985, "learning_rate": 0.00010539937364816049, "loss": 0.0023, "num_input_tokens_seen": 214646864, "step": 99375 }, { "epoch": 16.212071778140295, "grad_norm": 0.0010595549829304218, "learning_rate": 0.00010535566365052913, "loss": 0.0026, "num_input_tokens_seen": 214658192, "step": 99380 }, { "epoch": 16.212887438825447, "grad_norm": 0.003621351206675172, "learning_rate": 0.00010531196165086587, "loss": 0.0025, "num_input_tokens_seen": 214668976, "step": 99385 }, { "epoch": 16.213703099510603, "grad_norm": 0.01673658937215805, "learning_rate": 0.00010526826765005642, "loss": 0.0016, "num_input_tokens_seen": 214679728, "step": 99390 }, { "epoch": 16.21451876019576, "grad_norm": 0.0043097264133393764, "learning_rate": 0.00010522458164898624, "loss": 0.0014, "num_input_tokens_seen": 214691408, "step": 99395 }, { "epoch": 16.215334420880914, "grad_norm": 0.00409234594553709, "learning_rate": 0.00010518090364854077, "loss": 0.0015, "num_input_tokens_seen": 214702256, "step": 99400 }, { "epoch": 16.21615008156607, "grad_norm": 0.00013403875345829874, "learning_rate": 0.00010513723364960497, "loss": 0.0004, "num_input_tokens_seen": 214713456, "step": 99405 }, { "epoch": 16.216965742251222, "grad_norm": 0.00076089589856565, "learning_rate": 0.00010509357165306422, "loss": 0.0016, "num_input_tokens_seen": 214723664, "step": 99410 }, { "epoch": 16.217781402936378, "grad_norm": 0.0016013638814911246, "learning_rate": 0.00010504991765980321, "loss": 0.008, "num_input_tokens_seen": 214735792, "step": 99415 }, { "epoch": 16.218597063621534, "grad_norm": 0.009542165324091911, "learning_rate": 0.00010500627167070665, "loss": 0.0018, "num_input_tokens_seen": 214748272, "step": 99420 }, { "epoch": 16.21941272430669, "grad_norm": 0.002033479744568467, "learning_rate": 0.00010496263368665904, "loss": 0.0011, "num_input_tokens_seen": 214760112, "step": 99425 }, { "epoch": 16.22022838499184, "grad_norm": 0.00081063894322142, "learning_rate": 0.00010491900370854484, "loss": 0.0045, "num_input_tokens_seen": 214769712, "step": 99430 }, { "epoch": 16.221044045676997, "grad_norm": 0.46691176295280457, "learning_rate": 0.0001048753817372482, "loss": 0.103, "num_input_tokens_seen": 214781168, "step": 99435 }, { "epoch": 16.221859706362153, "grad_norm": 0.017485421150922775, "learning_rate": 0.00010483176777365322, "loss": 0.0022, "num_input_tokens_seen": 214793040, "step": 99440 }, { "epoch": 16.22267536704731, "grad_norm": 0.008777834475040436, "learning_rate": 0.00010478816181864376, "loss": 0.0054, "num_input_tokens_seen": 214804304, "step": 99445 }, { "epoch": 16.223491027732464, "grad_norm": 0.003942748997360468, "learning_rate": 0.0001047445638731036, "loss": 0.0016, "num_input_tokens_seen": 214814512, "step": 99450 }, { "epoch": 16.224306688417617, "grad_norm": 0.027318790555000305, "learning_rate": 0.00010470097393791622, "loss": 0.0019, "num_input_tokens_seen": 214824912, "step": 99455 }, { "epoch": 16.225122349102772, "grad_norm": 0.030543800443410873, "learning_rate": 0.00010465739201396512, "loss": 0.0013, "num_input_tokens_seen": 214835344, "step": 99460 }, { "epoch": 16.225938009787928, "grad_norm": 0.000653235474601388, "learning_rate": 0.00010461381810213344, "loss": 0.0008, "num_input_tokens_seen": 214846704, "step": 99465 }, { "epoch": 16.226753670473084, "grad_norm": 0.008265483193099499, "learning_rate": 0.00010457025220330435, "loss": 0.0011, "num_input_tokens_seen": 214857936, "step": 99470 }, { "epoch": 16.22756933115824, "grad_norm": 0.0002337160549359396, "learning_rate": 0.00010452669431836076, "loss": 0.0115, "num_input_tokens_seen": 214868208, "step": 99475 }, { "epoch": 16.22838499184339, "grad_norm": 0.0013019460020586848, "learning_rate": 0.00010448314444818541, "loss": 0.0032, "num_input_tokens_seen": 214878128, "step": 99480 }, { "epoch": 16.229200652528547, "grad_norm": 0.002835572464391589, "learning_rate": 0.00010443960259366081, "loss": 0.0006, "num_input_tokens_seen": 214887600, "step": 99485 }, { "epoch": 16.230016313213703, "grad_norm": 0.0008782123913988471, "learning_rate": 0.00010439606875566954, "loss": 0.0017, "num_input_tokens_seen": 214898992, "step": 99490 }, { "epoch": 16.23083197389886, "grad_norm": 0.011612669564783573, "learning_rate": 0.00010435254293509378, "loss": 0.0016, "num_input_tokens_seen": 214911024, "step": 99495 }, { "epoch": 16.231647634584014, "grad_norm": 0.012568632140755653, "learning_rate": 0.00010430902513281565, "loss": 0.0015, "num_input_tokens_seen": 214923376, "step": 99500 }, { "epoch": 16.232463295269167, "grad_norm": 0.0033671578858047724, "learning_rate": 0.00010426551534971706, "loss": 0.0012, "num_input_tokens_seen": 214934672, "step": 99505 }, { "epoch": 16.233278955954322, "grad_norm": 0.008715744130313396, "learning_rate": 0.00010422201358667987, "loss": 0.0006, "num_input_tokens_seen": 214946032, "step": 99510 }, { "epoch": 16.234094616639478, "grad_norm": 0.0662357434630394, "learning_rate": 0.00010417851984458565, "loss": 0.001, "num_input_tokens_seen": 214958096, "step": 99515 }, { "epoch": 16.234910277324634, "grad_norm": 0.008330133743584156, "learning_rate": 0.00010413503412431568, "loss": 0.001, "num_input_tokens_seen": 214970000, "step": 99520 }, { "epoch": 16.23572593800979, "grad_norm": 0.001419969368726015, "learning_rate": 0.00010409155642675178, "loss": 0.0055, "num_input_tokens_seen": 214980144, "step": 99525 }, { "epoch": 16.23654159869494, "grad_norm": 0.002042067004367709, "learning_rate": 0.00010404808675277444, "loss": 0.0012, "num_input_tokens_seen": 214990800, "step": 99530 }, { "epoch": 16.237357259380097, "grad_norm": 0.008127299137413502, "learning_rate": 0.00010400462510326513, "loss": 0.0012, "num_input_tokens_seen": 215000816, "step": 99535 }, { "epoch": 16.238172920065253, "grad_norm": 0.0008488766034133732, "learning_rate": 0.00010396117147910422, "loss": 0.0023, "num_input_tokens_seen": 215011632, "step": 99540 }, { "epoch": 16.23898858075041, "grad_norm": 0.08584143966436386, "learning_rate": 0.00010391772588117288, "loss": 0.0028, "num_input_tokens_seen": 215023504, "step": 99545 }, { "epoch": 16.239804241435564, "grad_norm": 0.004345927853137255, "learning_rate": 0.000103874288310351, "loss": 0.0026, "num_input_tokens_seen": 215033552, "step": 99550 }, { "epoch": 16.240619902120716, "grad_norm": 0.40933969616889954, "learning_rate": 0.0001038308587675193, "loss": 0.0147, "num_input_tokens_seen": 215044496, "step": 99555 }, { "epoch": 16.241435562805872, "grad_norm": 0.0008070006733760238, "learning_rate": 0.00010378743725355788, "loss": 0.0006, "num_input_tokens_seen": 215053776, "step": 99560 }, { "epoch": 16.242251223491028, "grad_norm": 0.00782832596451044, "learning_rate": 0.00010374402376934661, "loss": 0.196, "num_input_tokens_seen": 215064688, "step": 99565 }, { "epoch": 16.243066884176184, "grad_norm": 0.0021462365984916687, "learning_rate": 0.00010370061831576544, "loss": 0.0457, "num_input_tokens_seen": 215074576, "step": 99570 }, { "epoch": 16.24388254486134, "grad_norm": 0.004718319047242403, "learning_rate": 0.00010365722089369395, "loss": 0.0008, "num_input_tokens_seen": 215085744, "step": 99575 }, { "epoch": 16.24469820554649, "grad_norm": 0.02573045901954174, "learning_rate": 0.00010361383150401165, "loss": 0.0048, "num_input_tokens_seen": 215096432, "step": 99580 }, { "epoch": 16.245513866231647, "grad_norm": 0.020224103704094887, "learning_rate": 0.00010357045014759797, "loss": 0.001, "num_input_tokens_seen": 215107056, "step": 99585 }, { "epoch": 16.246329526916803, "grad_norm": 0.011003488674759865, "learning_rate": 0.00010352707682533197, "loss": 0.0019, "num_input_tokens_seen": 215118736, "step": 99590 }, { "epoch": 16.24714518760196, "grad_norm": 0.0029920844826847315, "learning_rate": 0.00010348371153809277, "loss": 0.0007, "num_input_tokens_seen": 215129680, "step": 99595 }, { "epoch": 16.247960848287114, "grad_norm": 0.0012058455031365156, "learning_rate": 0.00010344035428675914, "loss": 0.0011, "num_input_tokens_seen": 215140560, "step": 99600 }, { "epoch": 16.248776508972266, "grad_norm": 0.0012687857961282134, "learning_rate": 0.00010339700507220978, "loss": 0.0112, "num_input_tokens_seen": 215151984, "step": 99605 }, { "epoch": 16.249592169657422, "grad_norm": 0.0020511329639703035, "learning_rate": 0.0001033536638953233, "loss": 0.0008, "num_input_tokens_seen": 215163632, "step": 99610 }, { "epoch": 16.250407830342578, "grad_norm": 0.02142617478966713, "learning_rate": 0.00010331033075697793, "loss": 0.0037, "num_input_tokens_seen": 215174096, "step": 99615 }, { "epoch": 16.251223491027734, "grad_norm": 0.007285781670361757, "learning_rate": 0.00010326700565805197, "loss": 0.0025, "num_input_tokens_seen": 215185008, "step": 99620 }, { "epoch": 16.252039151712886, "grad_norm": 0.0007665912853553891, "learning_rate": 0.00010322368859942333, "loss": 0.0049, "num_input_tokens_seen": 215196464, "step": 99625 }, { "epoch": 16.25285481239804, "grad_norm": 0.003922312520444393, "learning_rate": 0.00010318037958197024, "loss": 0.0016, "num_input_tokens_seen": 215207536, "step": 99630 }, { "epoch": 16.253670473083197, "grad_norm": 0.0018488741479814053, "learning_rate": 0.0001031370786065699, "loss": 0.0008, "num_input_tokens_seen": 215216432, "step": 99635 }, { "epoch": 16.254486133768353, "grad_norm": 0.0002045558503596112, "learning_rate": 0.00010309378567410039, "loss": 0.0011, "num_input_tokens_seen": 215226384, "step": 99640 }, { "epoch": 16.25530179445351, "grad_norm": 0.09213671088218689, "learning_rate": 0.00010305050078543848, "loss": 0.0045, "num_input_tokens_seen": 215236752, "step": 99645 }, { "epoch": 16.25611745513866, "grad_norm": 0.0008904563146643341, "learning_rate": 0.00010300722394146212, "loss": 0.0016, "num_input_tokens_seen": 215246608, "step": 99650 }, { "epoch": 16.256933115823816, "grad_norm": 0.007827579975128174, "learning_rate": 0.00010296395514304763, "loss": 0.0011, "num_input_tokens_seen": 215257072, "step": 99655 }, { "epoch": 16.257748776508972, "grad_norm": 0.0644720047712326, "learning_rate": 0.00010292069439107254, "loss": 0.003, "num_input_tokens_seen": 215267120, "step": 99660 }, { "epoch": 16.258564437194128, "grad_norm": 0.0004442843492142856, "learning_rate": 0.00010287744168641311, "loss": 0.0025, "num_input_tokens_seen": 215277424, "step": 99665 }, { "epoch": 16.259380097879284, "grad_norm": 0.0010822077747434378, "learning_rate": 0.00010283419702994634, "loss": 0.0021, "num_input_tokens_seen": 215288112, "step": 99670 }, { "epoch": 16.260195758564436, "grad_norm": 0.10656613856554031, "learning_rate": 0.0001027909604225481, "loss": 0.0023, "num_input_tokens_seen": 215297776, "step": 99675 }, { "epoch": 16.26101141924959, "grad_norm": 0.013543715700507164, "learning_rate": 0.00010274773186509528, "loss": 0.0018, "num_input_tokens_seen": 215307280, "step": 99680 }, { "epoch": 16.261827079934747, "grad_norm": 0.00781995989382267, "learning_rate": 0.00010270451135846332, "loss": 0.0007, "num_input_tokens_seen": 215316880, "step": 99685 }, { "epoch": 16.262642740619903, "grad_norm": 0.0015151110710576177, "learning_rate": 0.00010266129890352872, "loss": 0.0034, "num_input_tokens_seen": 215328528, "step": 99690 }, { "epoch": 16.26345840130506, "grad_norm": 0.0037482441402971745, "learning_rate": 0.00010261809450116666, "loss": 0.0048, "num_input_tokens_seen": 215340720, "step": 99695 }, { "epoch": 16.26427406199021, "grad_norm": 0.5841511487960815, "learning_rate": 0.00010257489815225318, "loss": 0.1694, "num_input_tokens_seen": 215352624, "step": 99700 }, { "epoch": 16.265089722675366, "grad_norm": 0.006499356124550104, "learning_rate": 0.00010253170985766357, "loss": 0.0012, "num_input_tokens_seen": 215362480, "step": 99705 }, { "epoch": 16.265905383360522, "grad_norm": 0.0037484378553926945, "learning_rate": 0.00010248852961827309, "loss": 0.0023, "num_input_tokens_seen": 215372752, "step": 99710 }, { "epoch": 16.266721044045678, "grad_norm": 0.011288094334304333, "learning_rate": 0.00010244535743495681, "loss": 0.0025, "num_input_tokens_seen": 215383600, "step": 99715 }, { "epoch": 16.267536704730833, "grad_norm": 0.008340914733707905, "learning_rate": 0.00010240219330858969, "loss": 0.0654, "num_input_tokens_seen": 215393296, "step": 99720 }, { "epoch": 16.268352365415986, "grad_norm": 0.055706895887851715, "learning_rate": 0.00010235903724004652, "loss": 0.0107, "num_input_tokens_seen": 215402384, "step": 99725 }, { "epoch": 16.26916802610114, "grad_norm": 0.02399086207151413, "learning_rate": 0.00010231588923020196, "loss": 0.0013, "num_input_tokens_seen": 215413392, "step": 99730 }, { "epoch": 16.269983686786297, "grad_norm": 0.014908327721059322, "learning_rate": 0.00010227274927993035, "loss": 0.0008, "num_input_tokens_seen": 215424656, "step": 99735 }, { "epoch": 16.270799347471453, "grad_norm": 0.008819537237286568, "learning_rate": 0.000102229617390106, "loss": 0.0035, "num_input_tokens_seen": 215436272, "step": 99740 }, { "epoch": 16.27161500815661, "grad_norm": 0.0023069872986525297, "learning_rate": 0.00010218649356160314, "loss": 0.0006, "num_input_tokens_seen": 215447120, "step": 99745 }, { "epoch": 16.27243066884176, "grad_norm": 0.007820419035851955, "learning_rate": 0.00010214337779529548, "loss": 0.0046, "num_input_tokens_seen": 215457232, "step": 99750 }, { "epoch": 16.273246329526916, "grad_norm": 0.0034470772370696068, "learning_rate": 0.00010210027009205719, "loss": 0.0013, "num_input_tokens_seen": 215465840, "step": 99755 }, { "epoch": 16.274061990212072, "grad_norm": 0.3569754660129547, "learning_rate": 0.00010205717045276153, "loss": 0.0156, "num_input_tokens_seen": 215476272, "step": 99760 }, { "epoch": 16.274877650897228, "grad_norm": 0.002355807228013873, "learning_rate": 0.00010201407887828234, "loss": 0.0064, "num_input_tokens_seen": 215486128, "step": 99765 }, { "epoch": 16.275693311582383, "grad_norm": 0.0006279262597672641, "learning_rate": 0.0001019709953694925, "loss": 0.0008, "num_input_tokens_seen": 215496976, "step": 99770 }, { "epoch": 16.276508972267536, "grad_norm": 0.000803434697445482, "learning_rate": 0.00010192791992726558, "loss": 0.0029, "num_input_tokens_seen": 215506544, "step": 99775 }, { "epoch": 16.27732463295269, "grad_norm": 0.0010221730917692184, "learning_rate": 0.00010188485255247415, "loss": 0.0012, "num_input_tokens_seen": 215518384, "step": 99780 }, { "epoch": 16.278140293637847, "grad_norm": 0.0014653302496299148, "learning_rate": 0.00010184179324599147, "loss": 0.0035, "num_input_tokens_seen": 215527664, "step": 99785 }, { "epoch": 16.278955954323003, "grad_norm": 0.0004814395506400615, "learning_rate": 0.00010179874200868966, "loss": 0.0053, "num_input_tokens_seen": 215538640, "step": 99790 }, { "epoch": 16.27977161500816, "grad_norm": 0.0009255227050743997, "learning_rate": 0.00010175569884144182, "loss": 0.0007, "num_input_tokens_seen": 215549552, "step": 99795 }, { "epoch": 16.28058727569331, "grad_norm": 0.0268571674823761, "learning_rate": 0.00010171266374511962, "loss": 0.0723, "num_input_tokens_seen": 215561168, "step": 99800 }, { "epoch": 16.281402936378466, "grad_norm": 0.006670699920505285, "learning_rate": 0.00010166963672059588, "loss": 0.0011, "num_input_tokens_seen": 215573488, "step": 99805 }, { "epoch": 16.282218597063622, "grad_norm": 0.0005371726001612842, "learning_rate": 0.00010162661776874193, "loss": 0.0012, "num_input_tokens_seen": 215583536, "step": 99810 }, { "epoch": 16.283034257748778, "grad_norm": 0.001128238276578486, "learning_rate": 0.00010158360689043028, "loss": 0.0213, "num_input_tokens_seen": 215594384, "step": 99815 }, { "epoch": 16.28384991843393, "grad_norm": 0.0018089942168444395, "learning_rate": 0.00010154060408653198, "loss": 0.0025, "num_input_tokens_seen": 215604336, "step": 99820 }, { "epoch": 16.284665579119086, "grad_norm": 0.0002369862631894648, "learning_rate": 0.00010149760935791907, "loss": 0.0004, "num_input_tokens_seen": 215615984, "step": 99825 }, { "epoch": 16.28548123980424, "grad_norm": 0.02218855917453766, "learning_rate": 0.00010145462270546241, "loss": 0.0025, "num_input_tokens_seen": 215626576, "step": 99830 }, { "epoch": 16.286296900489397, "grad_norm": 0.0026553068310022354, "learning_rate": 0.00010141164413003351, "loss": 0.0009, "num_input_tokens_seen": 215636720, "step": 99835 }, { "epoch": 16.287112561174553, "grad_norm": 0.013349421322345734, "learning_rate": 0.00010136867363250329, "loss": 0.002, "num_input_tokens_seen": 215647152, "step": 99840 }, { "epoch": 16.287928221859705, "grad_norm": 0.0031846666242927313, "learning_rate": 0.00010132571121374257, "loss": 0.0022, "num_input_tokens_seen": 215657392, "step": 99845 }, { "epoch": 16.28874388254486, "grad_norm": 0.002197221852838993, "learning_rate": 0.00010128275687462212, "loss": 0.1308, "num_input_tokens_seen": 215669776, "step": 99850 }, { "epoch": 16.289559543230016, "grad_norm": 0.0004804141935892403, "learning_rate": 0.0001012398106160124, "loss": 0.003, "num_input_tokens_seen": 215679440, "step": 99855 }, { "epoch": 16.290375203915172, "grad_norm": 0.004510819911956787, "learning_rate": 0.00010119687243878379, "loss": 0.034, "num_input_tokens_seen": 215690000, "step": 99860 }, { "epoch": 16.291190864600328, "grad_norm": 0.022695958614349365, "learning_rate": 0.00010115394234380642, "loss": 0.0013, "num_input_tokens_seen": 215701808, "step": 99865 }, { "epoch": 16.29200652528548, "grad_norm": 0.007637821137905121, "learning_rate": 0.00010111102033195041, "loss": 0.0124, "num_input_tokens_seen": 215713200, "step": 99870 }, { "epoch": 16.292822185970635, "grad_norm": 0.007616210263222456, "learning_rate": 0.00010106810640408564, "loss": 0.0028, "num_input_tokens_seen": 215724208, "step": 99875 }, { "epoch": 16.29363784665579, "grad_norm": 0.0030214902944862843, "learning_rate": 0.00010102520056108172, "loss": 0.0015, "num_input_tokens_seen": 215735792, "step": 99880 }, { "epoch": 16.294453507340947, "grad_norm": 0.026455482468008995, "learning_rate": 0.00010098230280380826, "loss": 0.003, "num_input_tokens_seen": 215747024, "step": 99885 }, { "epoch": 16.295269168026103, "grad_norm": 0.02323424257338047, "learning_rate": 0.00010093941313313465, "loss": 0.002, "num_input_tokens_seen": 215756432, "step": 99890 }, { "epoch": 16.296084828711255, "grad_norm": 0.020892612636089325, "learning_rate": 0.00010089653154992994, "loss": 0.0013, "num_input_tokens_seen": 215766224, "step": 99895 }, { "epoch": 16.29690048939641, "grad_norm": 0.0010717104887589812, "learning_rate": 0.00010085365805506358, "loss": 0.0027, "num_input_tokens_seen": 215776944, "step": 99900 }, { "epoch": 16.297716150081566, "grad_norm": 0.008391788229346275, "learning_rate": 0.00010081079264940391, "loss": 0.0614, "num_input_tokens_seen": 215788432, "step": 99905 }, { "epoch": 16.298531810766722, "grad_norm": 0.03581464663147926, "learning_rate": 0.00010076793533382022, "loss": 0.0028, "num_input_tokens_seen": 215799600, "step": 99910 }, { "epoch": 16.299347471451878, "grad_norm": 0.025882374495267868, "learning_rate": 0.00010072508610918046, "loss": 0.002, "num_input_tokens_seen": 215811184, "step": 99915 }, { "epoch": 16.30016313213703, "grad_norm": 0.31241723895072937, "learning_rate": 0.00010068224497635369, "loss": 0.0057, "num_input_tokens_seen": 215822320, "step": 99920 }, { "epoch": 16.300978792822185, "grad_norm": 0.043358415365219116, "learning_rate": 0.00010063941193620751, "loss": 0.0023, "num_input_tokens_seen": 215833808, "step": 99925 }, { "epoch": 16.30179445350734, "grad_norm": 0.003072767984122038, "learning_rate": 0.0001005965869896105, "loss": 0.0016, "num_input_tokens_seen": 215845392, "step": 99930 }, { "epoch": 16.302610114192497, "grad_norm": 0.031429387629032135, "learning_rate": 0.00010055377013743012, "loss": 0.0018, "num_input_tokens_seen": 215855312, "step": 99935 }, { "epoch": 16.303425774877653, "grad_norm": 0.0011349570704624057, "learning_rate": 0.0001005109613805344, "loss": 0.0035, "num_input_tokens_seen": 215866224, "step": 99940 }, { "epoch": 16.304241435562805, "grad_norm": 0.0012783968122676015, "learning_rate": 0.00010046816071979087, "loss": 0.002, "num_input_tokens_seen": 215877424, "step": 99945 }, { "epoch": 16.30505709624796, "grad_norm": 0.00905763078480959, "learning_rate": 0.0001004253681560669, "loss": 0.0014, "num_input_tokens_seen": 215889488, "step": 99950 }, { "epoch": 16.305872756933116, "grad_norm": 0.04101025313138962, "learning_rate": 0.00010038258369022974, "loss": 0.0225, "num_input_tokens_seen": 215899280, "step": 99955 }, { "epoch": 16.306688417618272, "grad_norm": 0.00823147501796484, "learning_rate": 0.00010033980732314646, "loss": 0.0009, "num_input_tokens_seen": 215909648, "step": 99960 }, { "epoch": 16.307504078303428, "grad_norm": 0.0012991069816052914, "learning_rate": 0.00010029703905568399, "loss": 0.0017, "num_input_tokens_seen": 215920720, "step": 99965 }, { "epoch": 16.30831973898858, "grad_norm": 0.008023583330214024, "learning_rate": 0.00010025427888870909, "loss": 0.001, "num_input_tokens_seen": 215931120, "step": 99970 }, { "epoch": 16.309135399673735, "grad_norm": 0.014352011494338512, "learning_rate": 0.00010021152682308837, "loss": 0.0121, "num_input_tokens_seen": 215941680, "step": 99975 }, { "epoch": 16.30995106035889, "grad_norm": 0.0030116417910903692, "learning_rate": 0.00010016878285968816, "loss": 0.001, "num_input_tokens_seen": 215951152, "step": 99980 }, { "epoch": 16.310766721044047, "grad_norm": 0.02265646867454052, "learning_rate": 0.00010012604699937483, "loss": 0.0031, "num_input_tokens_seen": 215962544, "step": 99985 }, { "epoch": 16.3115823817292, "grad_norm": 0.0016602237010374665, "learning_rate": 0.00010008331924301445, "loss": 0.0066, "num_input_tokens_seen": 215972848, "step": 99990 }, { "epoch": 16.312398042414355, "grad_norm": 0.022008635103702545, "learning_rate": 0.00010004059959147293, "loss": 0.001, "num_input_tokens_seen": 215983824, "step": 99995 }, { "epoch": 16.31321370309951, "grad_norm": 0.004182157106697559, "learning_rate": 9.999788804561605e-05, "loss": 0.001, "num_input_tokens_seen": 215994512, "step": 100000 }, { "epoch": 16.314029363784666, "grad_norm": 0.0018245892133563757, "learning_rate": 9.995518460630937e-05, "loss": 0.0033, "num_input_tokens_seen": 216004976, "step": 100005 }, { "epoch": 16.31484502446982, "grad_norm": 0.008289888501167297, "learning_rate": 9.991248927441837e-05, "loss": 0.0016, "num_input_tokens_seen": 216016176, "step": 100010 }, { "epoch": 16.315660685154974, "grad_norm": 0.001752890762872994, "learning_rate": 9.986980205080837e-05, "loss": 0.0025, "num_input_tokens_seen": 216026480, "step": 100015 }, { "epoch": 16.31647634584013, "grad_norm": 0.00022865060600452125, "learning_rate": 9.982712293634438e-05, "loss": 0.0101, "num_input_tokens_seen": 216036336, "step": 100020 }, { "epoch": 16.317292006525285, "grad_norm": 0.00168028159532696, "learning_rate": 9.97844519318914e-05, "loss": 0.0014, "num_input_tokens_seen": 216046416, "step": 100025 }, { "epoch": 16.31810766721044, "grad_norm": 0.0004960019723512232, "learning_rate": 9.974178903831427e-05, "loss": 0.0019, "num_input_tokens_seen": 216057776, "step": 100030 }, { "epoch": 16.318923327895597, "grad_norm": 0.003168502589687705, "learning_rate": 9.969913425647747e-05, "loss": 0.0031, "num_input_tokens_seen": 216068560, "step": 100035 }, { "epoch": 16.31973898858075, "grad_norm": 0.007589966524392366, "learning_rate": 9.965648758724544e-05, "loss": 0.0034, "num_input_tokens_seen": 216079056, "step": 100040 }, { "epoch": 16.320554649265905, "grad_norm": 0.00240236334502697, "learning_rate": 9.961384903148269e-05, "loss": 0.0015, "num_input_tokens_seen": 216091728, "step": 100045 }, { "epoch": 16.32137030995106, "grad_norm": 0.0010267647448927164, "learning_rate": 9.957121859005324e-05, "loss": 0.0009, "num_input_tokens_seen": 216102448, "step": 100050 }, { "epoch": 16.322185970636216, "grad_norm": 0.0273845586925745, "learning_rate": 9.952859626382099e-05, "loss": 0.0021, "num_input_tokens_seen": 216114768, "step": 100055 }, { "epoch": 16.32300163132137, "grad_norm": 0.00038628673064522445, "learning_rate": 9.948598205364979e-05, "loss": 0.0091, "num_input_tokens_seen": 216125200, "step": 100060 }, { "epoch": 16.323817292006524, "grad_norm": 0.0002461184049025178, "learning_rate": 9.944337596040326e-05, "loss": 0.0013, "num_input_tokens_seen": 216134992, "step": 100065 }, { "epoch": 16.32463295269168, "grad_norm": 0.0075553716160357, "learning_rate": 9.940077798494485e-05, "loss": 0.0012, "num_input_tokens_seen": 216146544, "step": 100070 }, { "epoch": 16.325448613376835, "grad_norm": 0.026246318593621254, "learning_rate": 9.935818812813784e-05, "loss": 0.0019, "num_input_tokens_seen": 216156816, "step": 100075 }, { "epoch": 16.32626427406199, "grad_norm": 0.005259836558252573, "learning_rate": 9.931560639084541e-05, "loss": 0.0011, "num_input_tokens_seen": 216167600, "step": 100080 }, { "epoch": 16.327079934747147, "grad_norm": 0.0017907143337652087, "learning_rate": 9.927303277393051e-05, "loss": 0.0053, "num_input_tokens_seen": 216178096, "step": 100085 }, { "epoch": 16.3278955954323, "grad_norm": 0.021919501945376396, "learning_rate": 9.923046727825602e-05, "loss": 0.0019, "num_input_tokens_seen": 216187952, "step": 100090 }, { "epoch": 16.328711256117455, "grad_norm": 0.06038171425461769, "learning_rate": 9.918790990468446e-05, "loss": 0.0047, "num_input_tokens_seen": 216197712, "step": 100095 }, { "epoch": 16.32952691680261, "grad_norm": 0.01833273656666279, "learning_rate": 9.914536065407842e-05, "loss": 0.0442, "num_input_tokens_seen": 216209392, "step": 100100 }, { "epoch": 16.330342577487766, "grad_norm": 0.0007771208183839917, "learning_rate": 9.910281952730011e-05, "loss": 0.0004, "num_input_tokens_seen": 216221136, "step": 100105 }, { "epoch": 16.33115823817292, "grad_norm": 0.02714325487613678, "learning_rate": 9.906028652521176e-05, "loss": 0.0012, "num_input_tokens_seen": 216231152, "step": 100110 }, { "epoch": 16.331973898858074, "grad_norm": 0.00896800123155117, "learning_rate": 9.901776164867538e-05, "loss": 0.0014, "num_input_tokens_seen": 216241488, "step": 100115 }, { "epoch": 16.33278955954323, "grad_norm": 0.0016855057328939438, "learning_rate": 9.89752448985527e-05, "loss": 0.0026, "num_input_tokens_seen": 216252400, "step": 100120 }, { "epoch": 16.333605220228385, "grad_norm": 0.0003753203200176358, "learning_rate": 9.893273627570542e-05, "loss": 0.0022, "num_input_tokens_seen": 216262320, "step": 100125 }, { "epoch": 16.33442088091354, "grad_norm": 0.010976849123835564, "learning_rate": 9.889023578099504e-05, "loss": 0.001, "num_input_tokens_seen": 216272912, "step": 100130 }, { "epoch": 16.335236541598697, "grad_norm": 0.012816797941923141, "learning_rate": 9.884774341528285e-05, "loss": 0.001, "num_input_tokens_seen": 216282928, "step": 100135 }, { "epoch": 16.33605220228385, "grad_norm": 0.5955064296722412, "learning_rate": 9.880525917943006e-05, "loss": 0.0952, "num_input_tokens_seen": 216293648, "step": 100140 }, { "epoch": 16.336867862969005, "grad_norm": 0.03133242577314377, "learning_rate": 9.876278307429764e-05, "loss": 0.0027, "num_input_tokens_seen": 216304240, "step": 100145 }, { "epoch": 16.33768352365416, "grad_norm": 0.0009458880522288382, "learning_rate": 9.872031510074625e-05, "loss": 0.0006, "num_input_tokens_seen": 216313904, "step": 100150 }, { "epoch": 16.338499184339316, "grad_norm": 0.03488912805914879, "learning_rate": 9.867785525963707e-05, "loss": 0.0011, "num_input_tokens_seen": 216324176, "step": 100155 }, { "epoch": 16.339314845024468, "grad_norm": 0.010793568566441536, "learning_rate": 9.863540355182998e-05, "loss": 0.0022, "num_input_tokens_seen": 216334640, "step": 100160 }, { "epoch": 16.340130505709624, "grad_norm": 0.032283537089824677, "learning_rate": 9.859295997818585e-05, "loss": 0.0082, "num_input_tokens_seen": 216344912, "step": 100165 }, { "epoch": 16.34094616639478, "grad_norm": 0.0003030995430890471, "learning_rate": 9.855052453956437e-05, "loss": 0.0009, "num_input_tokens_seen": 216355760, "step": 100170 }, { "epoch": 16.341761827079935, "grad_norm": 0.0007913812878541648, "learning_rate": 9.850809723682603e-05, "loss": 0.0007, "num_input_tokens_seen": 216367760, "step": 100175 }, { "epoch": 16.34257748776509, "grad_norm": 0.0012517020804807544, "learning_rate": 9.846567807083018e-05, "loss": 0.0011, "num_input_tokens_seen": 216379600, "step": 100180 }, { "epoch": 16.343393148450243, "grad_norm": 0.0006271583843044937, "learning_rate": 9.842326704243682e-05, "loss": 0.0011, "num_input_tokens_seen": 216391856, "step": 100185 }, { "epoch": 16.3442088091354, "grad_norm": 0.0003550504916347563, "learning_rate": 9.838086415250547e-05, "loss": 0.0101, "num_input_tokens_seen": 216402512, "step": 100190 }, { "epoch": 16.345024469820554, "grad_norm": 0.00036078577977605164, "learning_rate": 9.833846940189533e-05, "loss": 0.0006, "num_input_tokens_seen": 216414960, "step": 100195 }, { "epoch": 16.34584013050571, "grad_norm": 0.005881108809262514, "learning_rate": 9.829608279146568e-05, "loss": 0.0012, "num_input_tokens_seen": 216425904, "step": 100200 }, { "epoch": 16.346655791190866, "grad_norm": 0.0021825244184583426, "learning_rate": 9.825370432207554e-05, "loss": 0.0013, "num_input_tokens_seen": 216436656, "step": 100205 }, { "epoch": 16.347471451876018, "grad_norm": 0.06234239786863327, "learning_rate": 9.821133399458371e-05, "loss": 0.0021, "num_input_tokens_seen": 216446800, "step": 100210 }, { "epoch": 16.348287112561174, "grad_norm": 0.008905709721148014, "learning_rate": 9.81689718098489e-05, "loss": 0.013, "num_input_tokens_seen": 216457264, "step": 100215 }, { "epoch": 16.34910277324633, "grad_norm": 0.558327853679657, "learning_rate": 9.81266177687296e-05, "loss": 0.1895, "num_input_tokens_seen": 216467664, "step": 100220 }, { "epoch": 16.349918433931485, "grad_norm": 0.0004579754895530641, "learning_rate": 9.808427187208424e-05, "loss": 0.003, "num_input_tokens_seen": 216477936, "step": 100225 }, { "epoch": 16.35073409461664, "grad_norm": 0.0029174680821597576, "learning_rate": 9.8041934120771e-05, "loss": 0.0014, "num_input_tokens_seen": 216489488, "step": 100230 }, { "epoch": 16.351549755301793, "grad_norm": 0.00225959368981421, "learning_rate": 9.799960451564787e-05, "loss": 0.0026, "num_input_tokens_seen": 216498992, "step": 100235 }, { "epoch": 16.35236541598695, "grad_norm": 0.0002525453455746174, "learning_rate": 9.795728305757267e-05, "loss": 0.002, "num_input_tokens_seen": 216509936, "step": 100240 }, { "epoch": 16.353181076672104, "grad_norm": 0.004050148651003838, "learning_rate": 9.791496974740321e-05, "loss": 0.0037, "num_input_tokens_seen": 216521328, "step": 100245 }, { "epoch": 16.35399673735726, "grad_norm": 0.000871855765581131, "learning_rate": 9.787266458599697e-05, "loss": 0.0021, "num_input_tokens_seen": 216532592, "step": 100250 }, { "epoch": 16.354812398042416, "grad_norm": 0.0029846071265637875, "learning_rate": 9.783036757421132e-05, "loss": 0.0008, "num_input_tokens_seen": 216544432, "step": 100255 }, { "epoch": 16.355628058727568, "grad_norm": 0.004262133967131376, "learning_rate": 9.778807871290346e-05, "loss": 0.0008, "num_input_tokens_seen": 216556240, "step": 100260 }, { "epoch": 16.356443719412724, "grad_norm": 0.001604230608791113, "learning_rate": 9.774579800293026e-05, "loss": 0.0014, "num_input_tokens_seen": 216568464, "step": 100265 }, { "epoch": 16.35725938009788, "grad_norm": 0.016766056418418884, "learning_rate": 9.770352544514904e-05, "loss": 0.0019, "num_input_tokens_seen": 216580272, "step": 100270 }, { "epoch": 16.358075040783035, "grad_norm": 0.0005941848503425717, "learning_rate": 9.766126104041601e-05, "loss": 0.001, "num_input_tokens_seen": 216590800, "step": 100275 }, { "epoch": 16.35889070146819, "grad_norm": 0.0018064542673528194, "learning_rate": 9.761900478958813e-05, "loss": 0.0012, "num_input_tokens_seen": 216601648, "step": 100280 }, { "epoch": 16.359706362153343, "grad_norm": 0.0018726956332102418, "learning_rate": 9.757675669352133e-05, "loss": 0.0008, "num_input_tokens_seen": 216611600, "step": 100285 }, { "epoch": 16.3605220228385, "grad_norm": 0.005994674749672413, "learning_rate": 9.753451675307234e-05, "loss": 0.0005, "num_input_tokens_seen": 216622224, "step": 100290 }, { "epoch": 16.361337683523654, "grad_norm": 0.0016363600734621286, "learning_rate": 9.749228496909668e-05, "loss": 0.0003, "num_input_tokens_seen": 216633648, "step": 100295 }, { "epoch": 16.36215334420881, "grad_norm": 0.0008571971557103097, "learning_rate": 9.745006134245072e-05, "loss": 0.0023, "num_input_tokens_seen": 216644656, "step": 100300 }, { "epoch": 16.362969004893966, "grad_norm": 0.2415996789932251, "learning_rate": 9.740784587398965e-05, "loss": 0.0177, "num_input_tokens_seen": 216656176, "step": 100305 }, { "epoch": 16.363784665579118, "grad_norm": 0.017015738412737846, "learning_rate": 9.736563856456959e-05, "loss": 0.0018, "num_input_tokens_seen": 216666064, "step": 100310 }, { "epoch": 16.364600326264274, "grad_norm": 0.0016154218465089798, "learning_rate": 9.73234394150454e-05, "loss": 0.003, "num_input_tokens_seen": 216677136, "step": 100315 }, { "epoch": 16.36541598694943, "grad_norm": 0.0032554895151406527, "learning_rate": 9.728124842627278e-05, "loss": 0.0006, "num_input_tokens_seen": 216689456, "step": 100320 }, { "epoch": 16.366231647634585, "grad_norm": 0.09540196508169174, "learning_rate": 9.723906559910634e-05, "loss": 0.002, "num_input_tokens_seen": 216701392, "step": 100325 }, { "epoch": 16.36704730831974, "grad_norm": 0.0013432763516902924, "learning_rate": 9.719689093440126e-05, "loss": 0.0004, "num_input_tokens_seen": 216709264, "step": 100330 }, { "epoch": 16.367862969004893, "grad_norm": 0.003039369825273752, "learning_rate": 9.715472443301215e-05, "loss": 0.0007, "num_input_tokens_seen": 216720912, "step": 100335 }, { "epoch": 16.36867862969005, "grad_norm": 0.014389106072485447, "learning_rate": 9.711256609579367e-05, "loss": 0.0013, "num_input_tokens_seen": 216732848, "step": 100340 }, { "epoch": 16.369494290375204, "grad_norm": 0.07118832319974899, "learning_rate": 9.707041592360005e-05, "loss": 0.0018, "num_input_tokens_seen": 216743440, "step": 100345 }, { "epoch": 16.37030995106036, "grad_norm": 0.0002033188648056239, "learning_rate": 9.702827391728564e-05, "loss": 0.0004, "num_input_tokens_seen": 216754320, "step": 100350 }, { "epoch": 16.371125611745512, "grad_norm": 0.00933680310845375, "learning_rate": 9.69861400777045e-05, "loss": 0.0015, "num_input_tokens_seen": 216764720, "step": 100355 }, { "epoch": 16.371941272430668, "grad_norm": 0.0018059660214930773, "learning_rate": 9.694401440571043e-05, "loss": 0.0023, "num_input_tokens_seen": 216774832, "step": 100360 }, { "epoch": 16.372756933115824, "grad_norm": 0.0006134548457339406, "learning_rate": 9.690189690215728e-05, "loss": 0.0005, "num_input_tokens_seen": 216784880, "step": 100365 }, { "epoch": 16.37357259380098, "grad_norm": 0.0003548564272932708, "learning_rate": 9.685978756789854e-05, "loss": 0.006, "num_input_tokens_seen": 216795504, "step": 100370 }, { "epoch": 16.374388254486135, "grad_norm": 0.011922224424779415, "learning_rate": 9.681768640378757e-05, "loss": 0.0151, "num_input_tokens_seen": 216805520, "step": 100375 }, { "epoch": 16.375203915171287, "grad_norm": 0.00019268895266577601, "learning_rate": 9.677559341067759e-05, "loss": 0.0018, "num_input_tokens_seen": 216816880, "step": 100380 }, { "epoch": 16.376019575856443, "grad_norm": 0.0481029748916626, "learning_rate": 9.673350858942198e-05, "loss": 0.0063, "num_input_tokens_seen": 216828080, "step": 100385 }, { "epoch": 16.3768352365416, "grad_norm": 0.000992569257505238, "learning_rate": 9.669143194087315e-05, "loss": 0.0049, "num_input_tokens_seen": 216838640, "step": 100390 }, { "epoch": 16.377650897226754, "grad_norm": 0.0029242881573736668, "learning_rate": 9.664936346588432e-05, "loss": 0.0011, "num_input_tokens_seen": 216850320, "step": 100395 }, { "epoch": 16.37846655791191, "grad_norm": 0.0005835880292579532, "learning_rate": 9.660730316530757e-05, "loss": 0.0007, "num_input_tokens_seen": 216862288, "step": 100400 }, { "epoch": 16.379282218597062, "grad_norm": 0.0011815468315035105, "learning_rate": 9.65652510399958e-05, "loss": 0.0027, "num_input_tokens_seen": 216871664, "step": 100405 }, { "epoch": 16.380097879282218, "grad_norm": 0.0024670150596648455, "learning_rate": 9.652320709080082e-05, "loss": 0.0015, "num_input_tokens_seen": 216882608, "step": 100410 }, { "epoch": 16.380913539967374, "grad_norm": 0.002712622517719865, "learning_rate": 9.648117131857509e-05, "loss": 0.0006, "num_input_tokens_seen": 216894352, "step": 100415 }, { "epoch": 16.38172920065253, "grad_norm": 0.06670382618904114, "learning_rate": 9.643914372417011e-05, "loss": 0.0026, "num_input_tokens_seen": 216906480, "step": 100420 }, { "epoch": 16.382544861337685, "grad_norm": 0.5531919002532959, "learning_rate": 9.639712430843806e-05, "loss": 0.0123, "num_input_tokens_seen": 216917840, "step": 100425 }, { "epoch": 16.383360522022837, "grad_norm": 0.17486710846424103, "learning_rate": 9.635511307223005e-05, "loss": 0.0099, "num_input_tokens_seen": 216929104, "step": 100430 }, { "epoch": 16.384176182707993, "grad_norm": 0.06316438317298889, "learning_rate": 9.631311001639798e-05, "loss": 0.0009, "num_input_tokens_seen": 216939152, "step": 100435 }, { "epoch": 16.38499184339315, "grad_norm": 0.0012215528404340148, "learning_rate": 9.62711151417926e-05, "loss": 0.0007, "num_input_tokens_seen": 216949744, "step": 100440 }, { "epoch": 16.385807504078304, "grad_norm": 0.004731375258415937, "learning_rate": 9.622912844926551e-05, "loss": 0.0019, "num_input_tokens_seen": 216960912, "step": 100445 }, { "epoch": 16.38662316476346, "grad_norm": 0.0028271775227040052, "learning_rate": 9.618714993966704e-05, "loss": 0.0006, "num_input_tokens_seen": 216972816, "step": 100450 }, { "epoch": 16.387438825448612, "grad_norm": 0.010311473160982132, "learning_rate": 9.614517961384856e-05, "loss": 0.0034, "num_input_tokens_seen": 216985040, "step": 100455 }, { "epoch": 16.388254486133768, "grad_norm": 0.025720862671732903, "learning_rate": 9.610321747266005e-05, "loss": 0.0011, "num_input_tokens_seen": 216994576, "step": 100460 }, { "epoch": 16.389070146818923, "grad_norm": 0.008032547309994698, "learning_rate": 9.60612635169525e-05, "loss": 0.0021, "num_input_tokens_seen": 217004816, "step": 100465 }, { "epoch": 16.38988580750408, "grad_norm": 0.0005619633011519909, "learning_rate": 9.601931774757561e-05, "loss": 0.0011, "num_input_tokens_seen": 217014064, "step": 100470 }, { "epoch": 16.390701468189235, "grad_norm": 0.0003188513219356537, "learning_rate": 9.597738016537988e-05, "loss": 0.0025, "num_input_tokens_seen": 217024528, "step": 100475 }, { "epoch": 16.391517128874387, "grad_norm": 0.0011438900837674737, "learning_rate": 9.593545077121507e-05, "loss": 0.0005, "num_input_tokens_seen": 217035472, "step": 100480 }, { "epoch": 16.392332789559543, "grad_norm": 0.003715142607688904, "learning_rate": 9.589352956593095e-05, "loss": 0.0035, "num_input_tokens_seen": 217046096, "step": 100485 }, { "epoch": 16.3931484502447, "grad_norm": 0.041871681809425354, "learning_rate": 9.585161655037705e-05, "loss": 0.0072, "num_input_tokens_seen": 217056368, "step": 100490 }, { "epoch": 16.393964110929854, "grad_norm": 0.0028728186152875423, "learning_rate": 9.580971172540287e-05, "loss": 0.1086, "num_input_tokens_seen": 217065808, "step": 100495 }, { "epoch": 16.39477977161501, "grad_norm": 0.010508287698030472, "learning_rate": 9.576781509185766e-05, "loss": 0.0006, "num_input_tokens_seen": 217077648, "step": 100500 }, { "epoch": 16.395595432300162, "grad_norm": 0.004550436977297068, "learning_rate": 9.572592665059043e-05, "loss": 0.0045, "num_input_tokens_seen": 217087952, "step": 100505 }, { "epoch": 16.396411092985318, "grad_norm": 0.0050187078304588795, "learning_rate": 9.568404640245022e-05, "loss": 0.0017, "num_input_tokens_seen": 217096688, "step": 100510 }, { "epoch": 16.397226753670473, "grad_norm": 0.2578853666782379, "learning_rate": 9.564217434828565e-05, "loss": 0.0078, "num_input_tokens_seen": 217106864, "step": 100515 }, { "epoch": 16.39804241435563, "grad_norm": 0.0025490387342870235, "learning_rate": 9.56003104889454e-05, "loss": 0.0107, "num_input_tokens_seen": 217118064, "step": 100520 }, { "epoch": 16.39885807504078, "grad_norm": 0.029599115252494812, "learning_rate": 9.55584548252778e-05, "loss": 0.0018, "num_input_tokens_seen": 217127376, "step": 100525 }, { "epoch": 16.399673735725937, "grad_norm": 0.006668840069323778, "learning_rate": 9.55166073581314e-05, "loss": 0.0011, "num_input_tokens_seen": 217136816, "step": 100530 }, { "epoch": 16.400489396411093, "grad_norm": 0.1902245283126831, "learning_rate": 9.547476808835381e-05, "loss": 0.0056, "num_input_tokens_seen": 217147952, "step": 100535 }, { "epoch": 16.40130505709625, "grad_norm": 0.44840678572654724, "learning_rate": 9.54329370167935e-05, "loss": 0.024, "num_input_tokens_seen": 217158800, "step": 100540 }, { "epoch": 16.402120717781404, "grad_norm": 0.0007062299409881234, "learning_rate": 9.539111414429769e-05, "loss": 0.0004, "num_input_tokens_seen": 217169040, "step": 100545 }, { "epoch": 16.402936378466556, "grad_norm": 0.00029036731575615704, "learning_rate": 9.53492994717145e-05, "loss": 0.0014, "num_input_tokens_seen": 217180496, "step": 100550 }, { "epoch": 16.403752039151712, "grad_norm": 0.019752731546759605, "learning_rate": 9.530749299989078e-05, "loss": 0.0011, "num_input_tokens_seen": 217191536, "step": 100555 }, { "epoch": 16.404567699836868, "grad_norm": 0.0012440073769539595, "learning_rate": 9.526569472967444e-05, "loss": 0.0038, "num_input_tokens_seen": 217203568, "step": 100560 }, { "epoch": 16.405383360522023, "grad_norm": 0.031068088486790657, "learning_rate": 9.522390466191194e-05, "loss": 0.0022, "num_input_tokens_seen": 217213328, "step": 100565 }, { "epoch": 16.40619902120718, "grad_norm": 0.0025797896087169647, "learning_rate": 9.518212279745075e-05, "loss": 0.0012, "num_input_tokens_seen": 217223504, "step": 100570 }, { "epoch": 16.40701468189233, "grad_norm": 0.004863258916884661, "learning_rate": 9.514034913713714e-05, "loss": 0.0036, "num_input_tokens_seen": 217234672, "step": 100575 }, { "epoch": 16.407830342577487, "grad_norm": 0.005317374598234892, "learning_rate": 9.509858368181812e-05, "loss": 0.0019, "num_input_tokens_seen": 217244944, "step": 100580 }, { "epoch": 16.408646003262643, "grad_norm": 0.0019695402588695288, "learning_rate": 9.505682643233993e-05, "loss": 0.0011, "num_input_tokens_seen": 217254864, "step": 100585 }, { "epoch": 16.4094616639478, "grad_norm": 0.017480649054050446, "learning_rate": 9.501507738954884e-05, "loss": 0.0906, "num_input_tokens_seen": 217264464, "step": 100590 }, { "epoch": 16.410277324632954, "grad_norm": 0.0020491848699748516, "learning_rate": 9.497333655429097e-05, "loss": 0.0006, "num_input_tokens_seen": 217275152, "step": 100595 }, { "epoch": 16.411092985318106, "grad_norm": 0.44643938541412354, "learning_rate": 9.493160392741229e-05, "loss": 0.0839, "num_input_tokens_seen": 217286096, "step": 100600 }, { "epoch": 16.411908646003262, "grad_norm": 0.0006037737475708127, "learning_rate": 9.488987950975847e-05, "loss": 0.0016, "num_input_tokens_seen": 217296400, "step": 100605 }, { "epoch": 16.412724306688418, "grad_norm": 0.0008719302131794393, "learning_rate": 9.484816330217522e-05, "loss": 0.002, "num_input_tokens_seen": 217307856, "step": 100610 }, { "epoch": 16.413539967373573, "grad_norm": 0.0005770606803707778, "learning_rate": 9.480645530550785e-05, "loss": 0.0008, "num_input_tokens_seen": 217318320, "step": 100615 }, { "epoch": 16.41435562805873, "grad_norm": 0.0018063917523249984, "learning_rate": 9.47647555206017e-05, "loss": 0.0006, "num_input_tokens_seen": 217327888, "step": 100620 }, { "epoch": 16.41517128874388, "grad_norm": 0.001203131745569408, "learning_rate": 9.472306394830188e-05, "loss": 0.0012, "num_input_tokens_seen": 217339664, "step": 100625 }, { "epoch": 16.415986949429037, "grad_norm": 0.007052603177726269, "learning_rate": 9.46813805894533e-05, "loss": 0.002, "num_input_tokens_seen": 217351056, "step": 100630 }, { "epoch": 16.416802610114193, "grad_norm": 0.003801350248977542, "learning_rate": 9.46397054449007e-05, "loss": 0.0769, "num_input_tokens_seen": 217361552, "step": 100635 }, { "epoch": 16.41761827079935, "grad_norm": 0.00352098629809916, "learning_rate": 9.459803851548876e-05, "loss": 0.0005, "num_input_tokens_seen": 217371440, "step": 100640 }, { "epoch": 16.418433931484504, "grad_norm": 0.0003427791816648096, "learning_rate": 9.455637980206177e-05, "loss": 0.0005, "num_input_tokens_seen": 217381424, "step": 100645 }, { "epoch": 16.419249592169656, "grad_norm": 0.02932632900774479, "learning_rate": 9.451472930546417e-05, "loss": 0.0009, "num_input_tokens_seen": 217392368, "step": 100650 }, { "epoch": 16.420065252854812, "grad_norm": 0.013364373706281185, "learning_rate": 9.447308702653995e-05, "loss": 0.0019, "num_input_tokens_seen": 217402736, "step": 100655 }, { "epoch": 16.420880913539968, "grad_norm": 0.007504095323383808, "learning_rate": 9.443145296613303e-05, "loss": 0.0027, "num_input_tokens_seen": 217413712, "step": 100660 }, { "epoch": 16.421696574225123, "grad_norm": 0.06577505171298981, "learning_rate": 9.438982712508726e-05, "loss": 0.0044, "num_input_tokens_seen": 217425232, "step": 100665 }, { "epoch": 16.42251223491028, "grad_norm": 0.0009927983628585935, "learning_rate": 9.434820950424605e-05, "loss": 0.0005, "num_input_tokens_seen": 217435984, "step": 100670 }, { "epoch": 16.42332789559543, "grad_norm": 0.002471206011250615, "learning_rate": 9.430660010445325e-05, "loss": 0.0428, "num_input_tokens_seen": 217447568, "step": 100675 }, { "epoch": 16.424143556280587, "grad_norm": 0.007938587106764317, "learning_rate": 9.426499892655155e-05, "loss": 0.0021, "num_input_tokens_seen": 217458800, "step": 100680 }, { "epoch": 16.424959216965743, "grad_norm": 0.0019349503563717008, "learning_rate": 9.422340597138457e-05, "loss": 0.0022, "num_input_tokens_seen": 217468208, "step": 100685 }, { "epoch": 16.4257748776509, "grad_norm": 0.004137630108743906, "learning_rate": 9.418182123979496e-05, "loss": 0.0018, "num_input_tokens_seen": 217479120, "step": 100690 }, { "epoch": 16.42659053833605, "grad_norm": 0.0010737567208707333, "learning_rate": 9.414024473262561e-05, "loss": 0.1038, "num_input_tokens_seen": 217488976, "step": 100695 }, { "epoch": 16.427406199021206, "grad_norm": 0.0011117482790723443, "learning_rate": 9.409867645071901e-05, "loss": 0.0024, "num_input_tokens_seen": 217499728, "step": 100700 }, { "epoch": 16.428221859706362, "grad_norm": 0.06709881126880646, "learning_rate": 9.405711639491771e-05, "loss": 0.1206, "num_input_tokens_seen": 217510832, "step": 100705 }, { "epoch": 16.429037520391518, "grad_norm": 0.6046932935714722, "learning_rate": 9.401556456606392e-05, "loss": 0.0548, "num_input_tokens_seen": 217522128, "step": 100710 }, { "epoch": 16.429853181076673, "grad_norm": 0.0038243194576352835, "learning_rate": 9.397402096499973e-05, "loss": 0.0022, "num_input_tokens_seen": 217533104, "step": 100715 }, { "epoch": 16.430668841761825, "grad_norm": 0.0012090579839423299, "learning_rate": 9.393248559256706e-05, "loss": 0.0023, "num_input_tokens_seen": 217542896, "step": 100720 }, { "epoch": 16.43148450244698, "grad_norm": 0.021008076146245003, "learning_rate": 9.389095844960771e-05, "loss": 0.0032, "num_input_tokens_seen": 217554320, "step": 100725 }, { "epoch": 16.432300163132137, "grad_norm": 0.0003240357618778944, "learning_rate": 9.384943953696329e-05, "loss": 0.0008, "num_input_tokens_seen": 217565328, "step": 100730 }, { "epoch": 16.433115823817293, "grad_norm": 0.6940795183181763, "learning_rate": 9.380792885547523e-05, "loss": 0.0253, "num_input_tokens_seen": 217576720, "step": 100735 }, { "epoch": 16.43393148450245, "grad_norm": 0.035171881318092346, "learning_rate": 9.376642640598476e-05, "loss": 0.0024, "num_input_tokens_seen": 217588336, "step": 100740 }, { "epoch": 16.4347471451876, "grad_norm": 0.0009132300619967282, "learning_rate": 9.372493218933303e-05, "loss": 0.0047, "num_input_tokens_seen": 217597840, "step": 100745 }, { "epoch": 16.435562805872756, "grad_norm": 0.06404221057891846, "learning_rate": 9.368344620636094e-05, "loss": 0.0043, "num_input_tokens_seen": 217609552, "step": 100750 }, { "epoch": 16.436378466557912, "grad_norm": 0.005812915042042732, "learning_rate": 9.364196845790924e-05, "loss": 0.0019, "num_input_tokens_seen": 217620752, "step": 100755 }, { "epoch": 16.437194127243067, "grad_norm": 0.009950408712029457, "learning_rate": 9.360049894481854e-05, "loss": 0.0012, "num_input_tokens_seen": 217631984, "step": 100760 }, { "epoch": 16.438009787928223, "grad_norm": 0.1717950999736786, "learning_rate": 9.355903766792929e-05, "loss": 0.004, "num_input_tokens_seen": 217643056, "step": 100765 }, { "epoch": 16.438825448613375, "grad_norm": 0.0056233457289636135, "learning_rate": 9.351758462808174e-05, "loss": 0.0009, "num_input_tokens_seen": 217653968, "step": 100770 }, { "epoch": 16.43964110929853, "grad_norm": 0.0006802030256949365, "learning_rate": 9.347613982611603e-05, "loss": 0.001, "num_input_tokens_seen": 217664432, "step": 100775 }, { "epoch": 16.440456769983687, "grad_norm": 0.1902633160352707, "learning_rate": 9.343470326287206e-05, "loss": 0.051, "num_input_tokens_seen": 217674768, "step": 100780 }, { "epoch": 16.441272430668842, "grad_norm": 0.0022835908457636833, "learning_rate": 9.339327493918958e-05, "loss": 0.0009, "num_input_tokens_seen": 217686256, "step": 100785 }, { "epoch": 16.442088091353998, "grad_norm": 0.0061843437142670155, "learning_rate": 9.335185485590807e-05, "loss": 0.0011, "num_input_tokens_seen": 217696784, "step": 100790 }, { "epoch": 16.44290375203915, "grad_norm": 0.0036174890119582415, "learning_rate": 9.331044301386732e-05, "loss": 0.0014, "num_input_tokens_seen": 217707056, "step": 100795 }, { "epoch": 16.443719412724306, "grad_norm": 0.0004997859941795468, "learning_rate": 9.326903941390613e-05, "loss": 0.0022, "num_input_tokens_seen": 217718864, "step": 100800 }, { "epoch": 16.44453507340946, "grad_norm": 0.0004371833929326385, "learning_rate": 9.322764405686412e-05, "loss": 0.0006, "num_input_tokens_seen": 217730160, "step": 100805 }, { "epoch": 16.445350734094617, "grad_norm": 0.0010925378883257508, "learning_rate": 9.318625694357962e-05, "loss": 0.0006, "num_input_tokens_seen": 217741936, "step": 100810 }, { "epoch": 16.446166394779773, "grad_norm": 0.0005652908002957702, "learning_rate": 9.314487807489186e-05, "loss": 0.0005, "num_input_tokens_seen": 217751664, "step": 100815 }, { "epoch": 16.446982055464925, "grad_norm": 0.015928996726870537, "learning_rate": 9.310350745163931e-05, "loss": 0.0466, "num_input_tokens_seen": 217762416, "step": 100820 }, { "epoch": 16.44779771615008, "grad_norm": 0.002068012021481991, "learning_rate": 9.306214507466032e-05, "loss": 0.0022, "num_input_tokens_seen": 217773552, "step": 100825 }, { "epoch": 16.448613376835237, "grad_norm": 0.006545333191752434, "learning_rate": 9.302079094479321e-05, "loss": 0.0033, "num_input_tokens_seen": 217784048, "step": 100830 }, { "epoch": 16.449429037520392, "grad_norm": 0.007893134839832783, "learning_rate": 9.297944506287609e-05, "loss": 0.0713, "num_input_tokens_seen": 217796400, "step": 100835 }, { "epoch": 16.450244698205548, "grad_norm": 0.0014113986399024725, "learning_rate": 9.293810742974679e-05, "loss": 0.001, "num_input_tokens_seen": 217807600, "step": 100840 }, { "epoch": 16.4510603588907, "grad_norm": 0.0015867466572672129, "learning_rate": 9.28967780462432e-05, "loss": 0.0039, "num_input_tokens_seen": 217818224, "step": 100845 }, { "epoch": 16.451876019575856, "grad_norm": 0.004073529504239559, "learning_rate": 9.28554569132028e-05, "loss": 0.0017, "num_input_tokens_seen": 217830032, "step": 100850 }, { "epoch": 16.45269168026101, "grad_norm": 0.0011026524007320404, "learning_rate": 9.28141440314631e-05, "loss": 0.0011, "num_input_tokens_seen": 217842448, "step": 100855 }, { "epoch": 16.453507340946167, "grad_norm": 0.013522444292902946, "learning_rate": 9.277283940186132e-05, "loss": 0.0016, "num_input_tokens_seen": 217853008, "step": 100860 }, { "epoch": 16.454323001631323, "grad_norm": 0.013203301467001438, "learning_rate": 9.273154302523456e-05, "loss": 0.0008, "num_input_tokens_seen": 217864080, "step": 100865 }, { "epoch": 16.455138662316475, "grad_norm": 0.03669867664575577, "learning_rate": 9.269025490241972e-05, "loss": 0.0028, "num_input_tokens_seen": 217875536, "step": 100870 }, { "epoch": 16.45595432300163, "grad_norm": 0.007512333802878857, "learning_rate": 9.264897503425357e-05, "loss": 0.0025, "num_input_tokens_seen": 217885904, "step": 100875 }, { "epoch": 16.456769983686787, "grad_norm": 0.0010369790252298117, "learning_rate": 9.260770342157272e-05, "loss": 0.0024, "num_input_tokens_seen": 217895504, "step": 100880 }, { "epoch": 16.457585644371942, "grad_norm": 0.0006969812093302608, "learning_rate": 9.256644006521358e-05, "loss": 0.1333, "num_input_tokens_seen": 217906480, "step": 100885 }, { "epoch": 16.458401305057095, "grad_norm": 0.009753060527145863, "learning_rate": 9.252518496601237e-05, "loss": 0.0016, "num_input_tokens_seen": 217917776, "step": 100890 }, { "epoch": 16.45921696574225, "grad_norm": 0.004010849166661501, "learning_rate": 9.248393812480522e-05, "loss": 0.0009, "num_input_tokens_seen": 217928560, "step": 100895 }, { "epoch": 16.460032626427406, "grad_norm": 0.028223834931850433, "learning_rate": 9.244269954242806e-05, "loss": 0.0016, "num_input_tokens_seen": 217938288, "step": 100900 }, { "epoch": 16.46084828711256, "grad_norm": 0.029094593599438667, "learning_rate": 9.240146921971642e-05, "loss": 0.0026, "num_input_tokens_seen": 217948464, "step": 100905 }, { "epoch": 16.461663947797717, "grad_norm": 0.000985774677246809, "learning_rate": 9.23602471575064e-05, "loss": 0.0488, "num_input_tokens_seen": 217958128, "step": 100910 }, { "epoch": 16.46247960848287, "grad_norm": 0.15060503780841827, "learning_rate": 9.231903335663283e-05, "loss": 0.0053, "num_input_tokens_seen": 217968432, "step": 100915 }, { "epoch": 16.463295269168025, "grad_norm": 0.0014211301458999515, "learning_rate": 9.227782781793148e-05, "loss": 0.0007, "num_input_tokens_seen": 217979760, "step": 100920 }, { "epoch": 16.46411092985318, "grad_norm": 0.002074574586004019, "learning_rate": 9.223663054223692e-05, "loss": 0.0039, "num_input_tokens_seen": 217990128, "step": 100925 }, { "epoch": 16.464926590538337, "grad_norm": 0.21957442164421082, "learning_rate": 9.219544153038462e-05, "loss": 0.0276, "num_input_tokens_seen": 218001232, "step": 100930 }, { "epoch": 16.465742251223492, "grad_norm": 0.0020057554356753826, "learning_rate": 9.21542607832087e-05, "loss": 0.0021, "num_input_tokens_seen": 218011824, "step": 100935 }, { "epoch": 16.466557911908644, "grad_norm": 0.001532508060336113, "learning_rate": 9.211308830154441e-05, "loss": 0.0053, "num_input_tokens_seen": 218020624, "step": 100940 }, { "epoch": 16.4673735725938, "grad_norm": 0.004052693955600262, "learning_rate": 9.20719240862255e-05, "loss": 0.0007, "num_input_tokens_seen": 218031280, "step": 100945 }, { "epoch": 16.468189233278956, "grad_norm": 0.0012395764933899045, "learning_rate": 9.203076813808687e-05, "loss": 0.0012, "num_input_tokens_seen": 218041520, "step": 100950 }, { "epoch": 16.46900489396411, "grad_norm": 0.003163372864946723, "learning_rate": 9.198962045796195e-05, "loss": 0.0022, "num_input_tokens_seen": 218051856, "step": 100955 }, { "epoch": 16.469820554649267, "grad_norm": 0.00656129838898778, "learning_rate": 9.194848104668513e-05, "loss": 0.0031, "num_input_tokens_seen": 218062480, "step": 100960 }, { "epoch": 16.47063621533442, "grad_norm": 0.0020579954143613577, "learning_rate": 9.190734990508998e-05, "loss": 0.0057, "num_input_tokens_seen": 218073808, "step": 100965 }, { "epoch": 16.471451876019575, "grad_norm": 0.002411720808595419, "learning_rate": 9.18662270340101e-05, "loss": 0.1435, "num_input_tokens_seen": 218084336, "step": 100970 }, { "epoch": 16.47226753670473, "grad_norm": 0.41781216859817505, "learning_rate": 9.182511243427888e-05, "loss": 0.0312, "num_input_tokens_seen": 218095408, "step": 100975 }, { "epoch": 16.473083197389887, "grad_norm": 0.035349469631910324, "learning_rate": 9.178400610672954e-05, "loss": 0.0021, "num_input_tokens_seen": 218106416, "step": 100980 }, { "epoch": 16.473898858075042, "grad_norm": 0.0007361548487097025, "learning_rate": 9.174290805219521e-05, "loss": 0.0053, "num_input_tokens_seen": 218117840, "step": 100985 }, { "epoch": 16.474714518760194, "grad_norm": 0.07083853334188461, "learning_rate": 9.170181827150875e-05, "loss": 0.0028, "num_input_tokens_seen": 218127440, "step": 100990 }, { "epoch": 16.47553017944535, "grad_norm": 0.0019853876437991858, "learning_rate": 9.166073676550291e-05, "loss": 0.0465, "num_input_tokens_seen": 218139568, "step": 100995 }, { "epoch": 16.476345840130506, "grad_norm": 0.0003953164559789002, "learning_rate": 9.161966353501023e-05, "loss": 0.0199, "num_input_tokens_seen": 218149616, "step": 101000 }, { "epoch": 16.47716150081566, "grad_norm": 0.008749466389417648, "learning_rate": 9.157859858086315e-05, "loss": 0.0054, "num_input_tokens_seen": 218160560, "step": 101005 }, { "epoch": 16.477977161500817, "grad_norm": 0.0012293050531297922, "learning_rate": 9.153754190389379e-05, "loss": 0.0016, "num_input_tokens_seen": 218172304, "step": 101010 }, { "epoch": 16.47879282218597, "grad_norm": 0.007884092628955841, "learning_rate": 9.149649350493456e-05, "loss": 0.079, "num_input_tokens_seen": 218182288, "step": 101015 }, { "epoch": 16.479608482871125, "grad_norm": 0.0007453529397025704, "learning_rate": 9.145545338481682e-05, "loss": 0.001, "num_input_tokens_seen": 218193840, "step": 101020 }, { "epoch": 16.48042414355628, "grad_norm": 0.006361248902976513, "learning_rate": 9.141442154437286e-05, "loss": 0.0017, "num_input_tokens_seen": 218205456, "step": 101025 }, { "epoch": 16.481239804241437, "grad_norm": 0.028780123218894005, "learning_rate": 9.137339798443372e-05, "loss": 0.0035, "num_input_tokens_seen": 218214896, "step": 101030 }, { "epoch": 16.482055464926592, "grad_norm": 0.0026272626128047705, "learning_rate": 9.133238270583133e-05, "loss": 0.0034, "num_input_tokens_seen": 218224304, "step": 101035 }, { "epoch": 16.482871125611744, "grad_norm": 0.0005200458108447492, "learning_rate": 9.129137570939632e-05, "loss": 0.0015, "num_input_tokens_seen": 218235568, "step": 101040 }, { "epoch": 16.4836867862969, "grad_norm": 0.00038884973037056625, "learning_rate": 9.125037699596039e-05, "loss": 0.0125, "num_input_tokens_seen": 218247056, "step": 101045 }, { "epoch": 16.484502446982056, "grad_norm": 0.007529503665864468, "learning_rate": 9.12093865663538e-05, "loss": 0.0008, "num_input_tokens_seen": 218257840, "step": 101050 }, { "epoch": 16.48531810766721, "grad_norm": 0.03659482300281525, "learning_rate": 9.11684044214079e-05, "loss": 0.0039, "num_input_tokens_seen": 218269168, "step": 101055 }, { "epoch": 16.486133768352367, "grad_norm": 0.008131932467222214, "learning_rate": 9.112743056195261e-05, "loss": 0.0494, "num_input_tokens_seen": 218279440, "step": 101060 }, { "epoch": 16.48694942903752, "grad_norm": 0.0013752073282375932, "learning_rate": 9.10864649888189e-05, "loss": 0.0017, "num_input_tokens_seen": 218291472, "step": 101065 }, { "epoch": 16.487765089722675, "grad_norm": 0.010680504143238068, "learning_rate": 9.104550770283648e-05, "loss": 0.0026, "num_input_tokens_seen": 218302096, "step": 101070 }, { "epoch": 16.48858075040783, "grad_norm": 0.020263204351067543, "learning_rate": 9.100455870483587e-05, "loss": 0.0015, "num_input_tokens_seen": 218312720, "step": 101075 }, { "epoch": 16.489396411092986, "grad_norm": 0.000559748790692538, "learning_rate": 9.096361799564651e-05, "loss": 0.0066, "num_input_tokens_seen": 218323440, "step": 101080 }, { "epoch": 16.49021207177814, "grad_norm": 0.028419995680451393, "learning_rate": 9.092268557609856e-05, "loss": 0.0115, "num_input_tokens_seen": 218333616, "step": 101085 }, { "epoch": 16.491027732463294, "grad_norm": 0.000426318816607818, "learning_rate": 9.088176144702104e-05, "loss": 0.0007, "num_input_tokens_seen": 218345520, "step": 101090 }, { "epoch": 16.49184339314845, "grad_norm": 0.013602585531771183, "learning_rate": 9.084084560924394e-05, "loss": 0.0716, "num_input_tokens_seen": 218355984, "step": 101095 }, { "epoch": 16.492659053833606, "grad_norm": 0.002315348945558071, "learning_rate": 9.079993806359587e-05, "loss": 0.1014, "num_input_tokens_seen": 218366832, "step": 101100 }, { "epoch": 16.49347471451876, "grad_norm": 0.02149783819913864, "learning_rate": 9.075903881090636e-05, "loss": 0.0027, "num_input_tokens_seen": 218375568, "step": 101105 }, { "epoch": 16.494290375203914, "grad_norm": 0.023237407207489014, "learning_rate": 9.071814785200399e-05, "loss": 0.0104, "num_input_tokens_seen": 218386832, "step": 101110 }, { "epoch": 16.49510603588907, "grad_norm": 0.0034335225354880095, "learning_rate": 9.067726518771762e-05, "loss": 0.0032, "num_input_tokens_seen": 218398576, "step": 101115 }, { "epoch": 16.495921696574225, "grad_norm": 0.00745142437517643, "learning_rate": 9.063639081887576e-05, "loss": 0.0021, "num_input_tokens_seen": 218407600, "step": 101120 }, { "epoch": 16.49673735725938, "grad_norm": 0.029559500515460968, "learning_rate": 9.059552474630672e-05, "loss": 0.002, "num_input_tokens_seen": 218419376, "step": 101125 }, { "epoch": 16.497553017944536, "grad_norm": 0.0032735865097492933, "learning_rate": 9.055466697083875e-05, "loss": 0.0013, "num_input_tokens_seen": 218429744, "step": 101130 }, { "epoch": 16.49836867862969, "grad_norm": 0.7694928050041199, "learning_rate": 9.051381749329984e-05, "loss": 0.0928, "num_input_tokens_seen": 218439536, "step": 101135 }, { "epoch": 16.499184339314844, "grad_norm": 0.15859928727149963, "learning_rate": 9.04729763145179e-05, "loss": 0.0169, "num_input_tokens_seen": 218451184, "step": 101140 }, { "epoch": 16.5, "grad_norm": 0.001213204930536449, "learning_rate": 9.043214343532063e-05, "loss": 0.01, "num_input_tokens_seen": 218462448, "step": 101145 }, { "epoch": 16.500815660685156, "grad_norm": 0.03677457943558693, "learning_rate": 9.039131885653556e-05, "loss": 0.0021, "num_input_tokens_seen": 218474256, "step": 101150 }, { "epoch": 16.50163132137031, "grad_norm": 0.002333037555217743, "learning_rate": 9.035050257898991e-05, "loss": 0.0083, "num_input_tokens_seen": 218485520, "step": 101155 }, { "epoch": 16.502446982055464, "grad_norm": 0.004182538483291864, "learning_rate": 9.030969460351124e-05, "loss": 0.0028, "num_input_tokens_seen": 218496432, "step": 101160 }, { "epoch": 16.50326264274062, "grad_norm": 0.000874399789609015, "learning_rate": 9.026889493092605e-05, "loss": 0.0056, "num_input_tokens_seen": 218507344, "step": 101165 }, { "epoch": 16.504078303425775, "grad_norm": 0.007135962136089802, "learning_rate": 9.022810356206179e-05, "loss": 0.0041, "num_input_tokens_seen": 218518704, "step": 101170 }, { "epoch": 16.50489396411093, "grad_norm": 0.3483419716358185, "learning_rate": 9.018732049774459e-05, "loss": 0.0233, "num_input_tokens_seen": 218529744, "step": 101175 }, { "epoch": 16.505709624796086, "grad_norm": 0.009324166923761368, "learning_rate": 9.014654573880143e-05, "loss": 0.0054, "num_input_tokens_seen": 218540400, "step": 101180 }, { "epoch": 16.50652528548124, "grad_norm": 0.004226782359182835, "learning_rate": 9.010577928605823e-05, "loss": 0.0026, "num_input_tokens_seen": 218552272, "step": 101185 }, { "epoch": 16.507340946166394, "grad_norm": 0.06776424497365952, "learning_rate": 9.00650211403417e-05, "loss": 0.0442, "num_input_tokens_seen": 218561424, "step": 101190 }, { "epoch": 16.50815660685155, "grad_norm": 0.03477175533771515, "learning_rate": 9.002427130247726e-05, "loss": 0.004, "num_input_tokens_seen": 218572624, "step": 101195 }, { "epoch": 16.508972267536706, "grad_norm": 0.00041841110214591026, "learning_rate": 8.998352977329127e-05, "loss": 0.001, "num_input_tokens_seen": 218583376, "step": 101200 }, { "epoch": 16.50978792822186, "grad_norm": 0.004782752133905888, "learning_rate": 8.994279655360899e-05, "loss": 0.0014, "num_input_tokens_seen": 218594224, "step": 101205 }, { "epoch": 16.510603588907014, "grad_norm": 0.37345457077026367, "learning_rate": 8.99020716442564e-05, "loss": 0.0122, "num_input_tokens_seen": 218604880, "step": 101210 }, { "epoch": 16.51141924959217, "grad_norm": 0.002796668093651533, "learning_rate": 8.986135504605831e-05, "loss": 0.0006, "num_input_tokens_seen": 218615152, "step": 101215 }, { "epoch": 16.512234910277325, "grad_norm": 2.6558995246887207, "learning_rate": 8.982064675984025e-05, "loss": 0.0671, "num_input_tokens_seen": 218625904, "step": 101220 }, { "epoch": 16.51305057096248, "grad_norm": 0.0028051333501935005, "learning_rate": 8.977994678642714e-05, "loss": 0.0047, "num_input_tokens_seen": 218636976, "step": 101225 }, { "epoch": 16.513866231647633, "grad_norm": 0.0031697454396635294, "learning_rate": 8.973925512664383e-05, "loss": 0.0067, "num_input_tokens_seen": 218647408, "step": 101230 }, { "epoch": 16.51468189233279, "grad_norm": 0.8315576910972595, "learning_rate": 8.969857178131497e-05, "loss": 0.0166, "num_input_tokens_seen": 218658864, "step": 101235 }, { "epoch": 16.515497553017944, "grad_norm": 0.011037657037377357, "learning_rate": 8.965789675126501e-05, "loss": 0.0027, "num_input_tokens_seen": 218670032, "step": 101240 }, { "epoch": 16.5163132137031, "grad_norm": 0.0005106102908030152, "learning_rate": 8.961723003731837e-05, "loss": 0.0023, "num_input_tokens_seen": 218681424, "step": 101245 }, { "epoch": 16.517128874388256, "grad_norm": 0.034446511417627335, "learning_rate": 8.95765716402992e-05, "loss": 0.0586, "num_input_tokens_seen": 218692208, "step": 101250 }, { "epoch": 16.517944535073408, "grad_norm": 0.007385551929473877, "learning_rate": 8.953592156103141e-05, "loss": 0.0023, "num_input_tokens_seen": 218703024, "step": 101255 }, { "epoch": 16.518760195758563, "grad_norm": 0.017408102750778198, "learning_rate": 8.949527980033889e-05, "loss": 0.007, "num_input_tokens_seen": 218713840, "step": 101260 }, { "epoch": 16.51957585644372, "grad_norm": 0.0040249088779091835, "learning_rate": 8.945464635904532e-05, "loss": 0.0009, "num_input_tokens_seen": 218724624, "step": 101265 }, { "epoch": 16.520391517128875, "grad_norm": 0.000684377911966294, "learning_rate": 8.94140212379741e-05, "loss": 0.0006, "num_input_tokens_seen": 218736080, "step": 101270 }, { "epoch": 16.52120717781403, "grad_norm": 0.06121218577027321, "learning_rate": 8.937340443794867e-05, "loss": 0.0047, "num_input_tokens_seen": 218745872, "step": 101275 }, { "epoch": 16.522022838499183, "grad_norm": 0.004632898606359959, "learning_rate": 8.933279595979205e-05, "loss": 0.0019, "num_input_tokens_seen": 218757136, "step": 101280 }, { "epoch": 16.52283849918434, "grad_norm": 0.002208688296377659, "learning_rate": 8.929219580432735e-05, "loss": 0.0092, "num_input_tokens_seen": 218768080, "step": 101285 }, { "epoch": 16.523654159869494, "grad_norm": 0.0014574574306607246, "learning_rate": 8.925160397237725e-05, "loss": 0.0044, "num_input_tokens_seen": 218779472, "step": 101290 }, { "epoch": 16.52446982055465, "grad_norm": 0.022507159039378166, "learning_rate": 8.921102046476454e-05, "loss": 0.0022, "num_input_tokens_seen": 218790064, "step": 101295 }, { "epoch": 16.525285481239806, "grad_norm": 0.22740155458450317, "learning_rate": 8.917044528231145e-05, "loss": 0.0156, "num_input_tokens_seen": 218800720, "step": 101300 }, { "epoch": 16.526101141924958, "grad_norm": 0.0901143029332161, "learning_rate": 8.912987842584075e-05, "loss": 0.0502, "num_input_tokens_seen": 218810512, "step": 101305 }, { "epoch": 16.526916802610113, "grad_norm": 0.03165145590901375, "learning_rate": 8.908931989617403e-05, "loss": 0.0027, "num_input_tokens_seen": 218820432, "step": 101310 }, { "epoch": 16.52773246329527, "grad_norm": 2.9440736770629883, "learning_rate": 8.904876969413372e-05, "loss": 0.0321, "num_input_tokens_seen": 218831152, "step": 101315 }, { "epoch": 16.528548123980425, "grad_norm": 0.015373525209724903, "learning_rate": 8.900822782054124e-05, "loss": 0.0378, "num_input_tokens_seen": 218842640, "step": 101320 }, { "epoch": 16.52936378466558, "grad_norm": 0.10055476427078247, "learning_rate": 8.896769427621848e-05, "loss": 0.0081, "num_input_tokens_seen": 218852720, "step": 101325 }, { "epoch": 16.530179445350733, "grad_norm": 0.004800902679562569, "learning_rate": 8.892716906198683e-05, "loss": 0.0021, "num_input_tokens_seen": 218862288, "step": 101330 }, { "epoch": 16.53099510603589, "grad_norm": 0.00658207293599844, "learning_rate": 8.88866521786676e-05, "loss": 0.0013, "num_input_tokens_seen": 218873136, "step": 101335 }, { "epoch": 16.531810766721044, "grad_norm": 0.0034926505759358406, "learning_rate": 8.884614362708188e-05, "loss": 0.001, "num_input_tokens_seen": 218884496, "step": 101340 }, { "epoch": 16.5326264274062, "grad_norm": 0.0007944152457639575, "learning_rate": 8.88056434080507e-05, "loss": 0.0017, "num_input_tokens_seen": 218894800, "step": 101345 }, { "epoch": 16.533442088091356, "grad_norm": 0.01701800711452961, "learning_rate": 8.876515152239472e-05, "loss": 0.0054, "num_input_tokens_seen": 218904688, "step": 101350 }, { "epoch": 16.534257748776508, "grad_norm": 0.008906069211661816, "learning_rate": 8.872466797093464e-05, "loss": 0.0028, "num_input_tokens_seen": 218914896, "step": 101355 }, { "epoch": 16.535073409461663, "grad_norm": 0.045917339622974396, "learning_rate": 8.868419275449096e-05, "loss": 0.0212, "num_input_tokens_seen": 218927632, "step": 101360 }, { "epoch": 16.53588907014682, "grad_norm": 0.053435854613780975, "learning_rate": 8.864372587388387e-05, "loss": 0.0033, "num_input_tokens_seen": 218938928, "step": 101365 }, { "epoch": 16.536704730831975, "grad_norm": 0.002096776617690921, "learning_rate": 8.860326732993352e-05, "loss": 0.0022, "num_input_tokens_seen": 218949168, "step": 101370 }, { "epoch": 16.53752039151713, "grad_norm": 0.0024299444630742073, "learning_rate": 8.856281712345988e-05, "loss": 0.0161, "num_input_tokens_seen": 218959344, "step": 101375 }, { "epoch": 16.538336052202283, "grad_norm": 0.0005039930110797286, "learning_rate": 8.852237525528262e-05, "loss": 0.0066, "num_input_tokens_seen": 218970416, "step": 101380 }, { "epoch": 16.53915171288744, "grad_norm": 0.0017340783961117268, "learning_rate": 8.848194172622148e-05, "loss": 0.0011, "num_input_tokens_seen": 218982320, "step": 101385 }, { "epoch": 16.539967373572594, "grad_norm": 0.14250528812408447, "learning_rate": 8.844151653709581e-05, "loss": 0.0057, "num_input_tokens_seen": 218992848, "step": 101390 }, { "epoch": 16.54078303425775, "grad_norm": 0.0025329969357699156, "learning_rate": 8.840109968872495e-05, "loss": 0.0005, "num_input_tokens_seen": 219004464, "step": 101395 }, { "epoch": 16.541598694942905, "grad_norm": 0.0013825197238475084, "learning_rate": 8.836069118192791e-05, "loss": 0.0019, "num_input_tokens_seen": 219015952, "step": 101400 }, { "epoch": 16.542414355628058, "grad_norm": 0.0008943129214458168, "learning_rate": 8.83202910175237e-05, "loss": 0.0019, "num_input_tokens_seen": 219026320, "step": 101405 }, { "epoch": 16.543230016313213, "grad_norm": 0.00017344093066640198, "learning_rate": 8.827989919633106e-05, "loss": 0.0024, "num_input_tokens_seen": 219036624, "step": 101410 }, { "epoch": 16.54404567699837, "grad_norm": 0.0071393647231161594, "learning_rate": 8.82395157191685e-05, "loss": 0.0187, "num_input_tokens_seen": 219046896, "step": 101415 }, { "epoch": 16.544861337683525, "grad_norm": 0.0163432527333498, "learning_rate": 8.819914058685458e-05, "loss": 0.0008, "num_input_tokens_seen": 219057904, "step": 101420 }, { "epoch": 16.545676998368677, "grad_norm": 0.5346035957336426, "learning_rate": 8.815877380020743e-05, "loss": 0.0643, "num_input_tokens_seen": 219067760, "step": 101425 }, { "epoch": 16.546492659053833, "grad_norm": 0.00447038421407342, "learning_rate": 8.811841536004505e-05, "loss": 0.0015, "num_input_tokens_seen": 219078672, "step": 101430 }, { "epoch": 16.54730831973899, "grad_norm": 0.02596464194357395, "learning_rate": 8.807806526718565e-05, "loss": 0.0014, "num_input_tokens_seen": 219089424, "step": 101435 }, { "epoch": 16.548123980424144, "grad_norm": 0.0009697464993223548, "learning_rate": 8.803772352244683e-05, "loss": 0.0009, "num_input_tokens_seen": 219099920, "step": 101440 }, { "epoch": 16.5489396411093, "grad_norm": 0.000986489118076861, "learning_rate": 8.799739012664615e-05, "loss": 0.0024, "num_input_tokens_seen": 219110864, "step": 101445 }, { "epoch": 16.549755301794452, "grad_norm": 0.03657018765807152, "learning_rate": 8.795706508060102e-05, "loss": 0.0038, "num_input_tokens_seen": 219121616, "step": 101450 }, { "epoch": 16.550570962479608, "grad_norm": 0.12137595564126968, "learning_rate": 8.791674838512864e-05, "loss": 0.0052, "num_input_tokens_seen": 219133328, "step": 101455 }, { "epoch": 16.551386623164763, "grad_norm": 0.0015651886351406574, "learning_rate": 8.787644004104617e-05, "loss": 0.0008, "num_input_tokens_seen": 219144016, "step": 101460 }, { "epoch": 16.55220228384992, "grad_norm": 0.002112816786393523, "learning_rate": 8.78361400491704e-05, "loss": 0.0012, "num_input_tokens_seen": 219154992, "step": 101465 }, { "epoch": 16.553017944535075, "grad_norm": 0.01039827335625887, "learning_rate": 8.779584841031818e-05, "loss": 0.0007, "num_input_tokens_seen": 219165104, "step": 101470 }, { "epoch": 16.553833605220227, "grad_norm": 0.009416126646101475, "learning_rate": 8.775556512530597e-05, "loss": 0.0055, "num_input_tokens_seen": 219176496, "step": 101475 }, { "epoch": 16.554649265905383, "grad_norm": 0.005137943662703037, "learning_rate": 8.771529019495022e-05, "loss": 0.0029, "num_input_tokens_seen": 219185584, "step": 101480 }, { "epoch": 16.55546492659054, "grad_norm": 0.0015324490377679467, "learning_rate": 8.767502362006713e-05, "loss": 0.0008, "num_input_tokens_seen": 219196048, "step": 101485 }, { "epoch": 16.556280587275694, "grad_norm": 0.006908372975885868, "learning_rate": 8.763476540147275e-05, "loss": 0.0007, "num_input_tokens_seen": 219206704, "step": 101490 }, { "epoch": 16.55709624796085, "grad_norm": 0.0011675909627228975, "learning_rate": 8.759451553998299e-05, "loss": 0.0061, "num_input_tokens_seen": 219216880, "step": 101495 }, { "epoch": 16.557911908646002, "grad_norm": 0.3666263818740845, "learning_rate": 8.755427403641352e-05, "loss": 0.0224, "num_input_tokens_seen": 219227216, "step": 101500 }, { "epoch": 16.558727569331158, "grad_norm": 0.13365083932876587, "learning_rate": 8.751404089157993e-05, "loss": 0.0062, "num_input_tokens_seen": 219237136, "step": 101505 }, { "epoch": 16.559543230016313, "grad_norm": 0.010968620888888836, "learning_rate": 8.747381610629762e-05, "loss": 0.0013, "num_input_tokens_seen": 219247760, "step": 101510 }, { "epoch": 16.56035889070147, "grad_norm": 0.02978862263262272, "learning_rate": 8.74335996813817e-05, "loss": 0.0021, "num_input_tokens_seen": 219259408, "step": 101515 }, { "epoch": 16.561174551386625, "grad_norm": 0.00066575180971995, "learning_rate": 8.739339161764725e-05, "loss": 0.0026, "num_input_tokens_seen": 219271216, "step": 101520 }, { "epoch": 16.561990212071777, "grad_norm": 0.680747389793396, "learning_rate": 8.735319191590918e-05, "loss": 0.14, "num_input_tokens_seen": 219281904, "step": 101525 }, { "epoch": 16.562805872756933, "grad_norm": 0.08297364413738251, "learning_rate": 8.731300057698216e-05, "loss": 0.0041, "num_input_tokens_seen": 219292848, "step": 101530 }, { "epoch": 16.563621533442088, "grad_norm": 0.005971312522888184, "learning_rate": 8.727281760168055e-05, "loss": 0.0015, "num_input_tokens_seen": 219303888, "step": 101535 }, { "epoch": 16.564437194127244, "grad_norm": 0.0019951933063566685, "learning_rate": 8.723264299081912e-05, "loss": 0.0009, "num_input_tokens_seen": 219315120, "step": 101540 }, { "epoch": 16.5652528548124, "grad_norm": 0.015224998816847801, "learning_rate": 8.719247674521157e-05, "loss": 0.0035, "num_input_tokens_seen": 219325648, "step": 101545 }, { "epoch": 16.56606851549755, "grad_norm": 0.009353390894830227, "learning_rate": 8.715231886567248e-05, "loss": 0.0731, "num_input_tokens_seen": 219336496, "step": 101550 }, { "epoch": 16.566884176182707, "grad_norm": 0.0951535701751709, "learning_rate": 8.711216935301508e-05, "loss": 0.0042, "num_input_tokens_seen": 219347120, "step": 101555 }, { "epoch": 16.567699836867863, "grad_norm": 0.0017404680838808417, "learning_rate": 8.70720282080536e-05, "loss": 0.0032, "num_input_tokens_seen": 219357744, "step": 101560 }, { "epoch": 16.56851549755302, "grad_norm": 0.005474665202200413, "learning_rate": 8.703189543160106e-05, "loss": 0.001, "num_input_tokens_seen": 219368848, "step": 101565 }, { "epoch": 16.569331158238175, "grad_norm": 0.07186252623796463, "learning_rate": 8.699177102447126e-05, "loss": 0.0842, "num_input_tokens_seen": 219379760, "step": 101570 }, { "epoch": 16.570146818923327, "grad_norm": 0.04543714597821236, "learning_rate": 8.695165498747698e-05, "loss": 0.005, "num_input_tokens_seen": 219389488, "step": 101575 }, { "epoch": 16.570962479608482, "grad_norm": 0.01591755822300911, "learning_rate": 8.691154732143147e-05, "loss": 0.0016, "num_input_tokens_seen": 219400400, "step": 101580 }, { "epoch": 16.571778140293638, "grad_norm": 0.2322196513414383, "learning_rate": 8.687144802714753e-05, "loss": 0.0085, "num_input_tokens_seen": 219412208, "step": 101585 }, { "epoch": 16.572593800978794, "grad_norm": 0.0029731979593634605, "learning_rate": 8.683135710543777e-05, "loss": 0.0034, "num_input_tokens_seen": 219423888, "step": 101590 }, { "epoch": 16.57340946166395, "grad_norm": 0.0015354871284216642, "learning_rate": 8.679127455711466e-05, "loss": 0.0008, "num_input_tokens_seen": 219434128, "step": 101595 }, { "epoch": 16.5742251223491, "grad_norm": 0.07060811668634415, "learning_rate": 8.675120038299062e-05, "loss": 0.0883, "num_input_tokens_seen": 219445328, "step": 101600 }, { "epoch": 16.575040783034257, "grad_norm": 0.0051761167123913765, "learning_rate": 8.671113458387775e-05, "loss": 0.0018, "num_input_tokens_seen": 219456976, "step": 101605 }, { "epoch": 16.575856443719413, "grad_norm": 0.5322887897491455, "learning_rate": 8.667107716058798e-05, "loss": 0.0237, "num_input_tokens_seen": 219467856, "step": 101610 }, { "epoch": 16.57667210440457, "grad_norm": 0.0018487609922885895, "learning_rate": 8.66310281139332e-05, "loss": 0.001, "num_input_tokens_seen": 219476944, "step": 101615 }, { "epoch": 16.57748776508972, "grad_norm": 0.017845647409558296, "learning_rate": 8.659098744472505e-05, "loss": 0.0113, "num_input_tokens_seen": 219487728, "step": 101620 }, { "epoch": 16.578303425774877, "grad_norm": 0.057124871760606766, "learning_rate": 8.655095515377498e-05, "loss": 0.0044, "num_input_tokens_seen": 219499120, "step": 101625 }, { "epoch": 16.579119086460032, "grad_norm": 0.0012420967686921358, "learning_rate": 8.65109312418943e-05, "loss": 0.0027, "num_input_tokens_seen": 219510192, "step": 101630 }, { "epoch": 16.579934747145188, "grad_norm": 0.00048453285126015544, "learning_rate": 8.647091570989413e-05, "loss": 0.0225, "num_input_tokens_seen": 219520848, "step": 101635 }, { "epoch": 16.580750407830344, "grad_norm": 0.0012041418813169003, "learning_rate": 8.643090855858549e-05, "loss": 0.0031, "num_input_tokens_seen": 219530896, "step": 101640 }, { "epoch": 16.581566068515496, "grad_norm": 0.03431824967265129, "learning_rate": 8.639090978877912e-05, "loss": 0.0115, "num_input_tokens_seen": 219542576, "step": 101645 }, { "epoch": 16.58238172920065, "grad_norm": 0.0006362311542034149, "learning_rate": 8.635091940128548e-05, "loss": 0.0017, "num_input_tokens_seen": 219553776, "step": 101650 }, { "epoch": 16.583197389885807, "grad_norm": 0.0020954282954335213, "learning_rate": 8.631093739691553e-05, "loss": 0.0027, "num_input_tokens_seen": 219565200, "step": 101655 }, { "epoch": 16.584013050570963, "grad_norm": 0.16877447068691254, "learning_rate": 8.627096377647898e-05, "loss": 0.0057, "num_input_tokens_seen": 219576976, "step": 101660 }, { "epoch": 16.58482871125612, "grad_norm": 0.004283056128770113, "learning_rate": 8.623099854078643e-05, "loss": 0.0015, "num_input_tokens_seen": 219588336, "step": 101665 }, { "epoch": 16.58564437194127, "grad_norm": 0.03166608139872551, "learning_rate": 8.619104169064734e-05, "loss": 0.0021, "num_input_tokens_seen": 219600144, "step": 101670 }, { "epoch": 16.586460032626427, "grad_norm": 0.03157994523644447, "learning_rate": 8.615109322687203e-05, "loss": 0.0021, "num_input_tokens_seen": 219610384, "step": 101675 }, { "epoch": 16.587275693311582, "grad_norm": 0.0213120449334383, "learning_rate": 8.611115315026951e-05, "loss": 0.0063, "num_input_tokens_seen": 219620880, "step": 101680 }, { "epoch": 16.588091353996738, "grad_norm": 0.2438468486070633, "learning_rate": 8.607122146164986e-05, "loss": 0.009, "num_input_tokens_seen": 219631856, "step": 101685 }, { "epoch": 16.588907014681894, "grad_norm": 0.027045302093029022, "learning_rate": 8.60312981618217e-05, "loss": 0.0024, "num_input_tokens_seen": 219643376, "step": 101690 }, { "epoch": 16.589722675367046, "grad_norm": 0.013801125809550285, "learning_rate": 8.599138325159472e-05, "loss": 0.0033, "num_input_tokens_seen": 219655248, "step": 101695 }, { "epoch": 16.5905383360522, "grad_norm": 0.00023734763090033084, "learning_rate": 8.595147673177728e-05, "loss": 0.0838, "num_input_tokens_seen": 219665360, "step": 101700 }, { "epoch": 16.591353996737357, "grad_norm": 0.0007523433305323124, "learning_rate": 8.591157860317871e-05, "loss": 0.0011, "num_input_tokens_seen": 219676848, "step": 101705 }, { "epoch": 16.592169657422513, "grad_norm": 0.0046799443662166595, "learning_rate": 8.587168886660707e-05, "loss": 0.0008, "num_input_tokens_seen": 219688272, "step": 101710 }, { "epoch": 16.59298531810767, "grad_norm": 0.051697228103876114, "learning_rate": 8.583180752287123e-05, "loss": 0.0031, "num_input_tokens_seen": 219698160, "step": 101715 }, { "epoch": 16.59380097879282, "grad_norm": 0.0009116280707530677, "learning_rate": 8.579193457277895e-05, "loss": 0.0042, "num_input_tokens_seen": 219708400, "step": 101720 }, { "epoch": 16.594616639477977, "grad_norm": 0.001496818382292986, "learning_rate": 8.575207001713875e-05, "loss": 0.0041, "num_input_tokens_seen": 219718640, "step": 101725 }, { "epoch": 16.595432300163132, "grad_norm": 0.007261293474584818, "learning_rate": 8.571221385675832e-05, "loss": 0.0027, "num_input_tokens_seen": 219730608, "step": 101730 }, { "epoch": 16.596247960848288, "grad_norm": 0.07469271868467331, "learning_rate": 8.567236609244544e-05, "loss": 0.004, "num_input_tokens_seen": 219740656, "step": 101735 }, { "epoch": 16.597063621533444, "grad_norm": 0.005149087402969599, "learning_rate": 8.563252672500771e-05, "loss": 0.0118, "num_input_tokens_seen": 219750896, "step": 101740 }, { "epoch": 16.597879282218596, "grad_norm": 0.011800228618085384, "learning_rate": 8.559269575525247e-05, "loss": 0.0018, "num_input_tokens_seen": 219762256, "step": 101745 }, { "epoch": 16.59869494290375, "grad_norm": 0.01077636331319809, "learning_rate": 8.555287318398697e-05, "loss": 0.0302, "num_input_tokens_seen": 219773104, "step": 101750 }, { "epoch": 16.599510603588907, "grad_norm": 0.01085708662867546, "learning_rate": 8.551305901201822e-05, "loss": 0.0022, "num_input_tokens_seen": 219784592, "step": 101755 }, { "epoch": 16.600326264274063, "grad_norm": 0.01687583513557911, "learning_rate": 8.54732532401532e-05, "loss": 0.0041, "num_input_tokens_seen": 219796112, "step": 101760 }, { "epoch": 16.601141924959215, "grad_norm": 0.01624632440507412, "learning_rate": 8.543345586919854e-05, "loss": 0.0039, "num_input_tokens_seen": 219806288, "step": 101765 }, { "epoch": 16.60195758564437, "grad_norm": 0.013760429807007313, "learning_rate": 8.53936668999608e-05, "loss": 0.0012, "num_input_tokens_seen": 219816496, "step": 101770 }, { "epoch": 16.602773246329527, "grad_norm": 0.004715532064437866, "learning_rate": 8.535388633324625e-05, "loss": 0.0031, "num_input_tokens_seen": 219828336, "step": 101775 }, { "epoch": 16.603588907014682, "grad_norm": 0.0017586436588317156, "learning_rate": 8.531411416986152e-05, "loss": 0.0012, "num_input_tokens_seen": 219839568, "step": 101780 }, { "epoch": 16.604404567699838, "grad_norm": 0.0021014027297496796, "learning_rate": 8.5274350410612e-05, "loss": 0.0637, "num_input_tokens_seen": 219849680, "step": 101785 }, { "epoch": 16.605220228384994, "grad_norm": 0.007624736521393061, "learning_rate": 8.523459505630415e-05, "loss": 0.0013, "num_input_tokens_seen": 219861232, "step": 101790 }, { "epoch": 16.606035889070146, "grad_norm": 0.0007187062292359769, "learning_rate": 8.51948481077432e-05, "loss": 0.0009, "num_input_tokens_seen": 219872624, "step": 101795 }, { "epoch": 16.6068515497553, "grad_norm": 0.02473929524421692, "learning_rate": 8.515510956573507e-05, "loss": 0.0026, "num_input_tokens_seen": 219883952, "step": 101800 }, { "epoch": 16.607667210440457, "grad_norm": 0.004971285816282034, "learning_rate": 8.511537943108466e-05, "loss": 0.0316, "num_input_tokens_seen": 219895408, "step": 101805 }, { "epoch": 16.608482871125613, "grad_norm": 0.0360880121588707, "learning_rate": 8.507565770459769e-05, "loss": 0.0052, "num_input_tokens_seen": 219906224, "step": 101810 }, { "epoch": 16.609298531810765, "grad_norm": 0.001713123987428844, "learning_rate": 8.503594438707856e-05, "loss": 0.0013, "num_input_tokens_seen": 219919248, "step": 101815 }, { "epoch": 16.61011419249592, "grad_norm": 0.0006942551117390394, "learning_rate": 8.499623947933276e-05, "loss": 0.0011, "num_input_tokens_seen": 219930704, "step": 101820 }, { "epoch": 16.610929853181077, "grad_norm": 0.00033700844505801797, "learning_rate": 8.495654298216438e-05, "loss": 0.0009, "num_input_tokens_seen": 219941232, "step": 101825 }, { "epoch": 16.611745513866232, "grad_norm": 0.00037762854481115937, "learning_rate": 8.49168548963784e-05, "loss": 0.0014, "num_input_tokens_seen": 219951472, "step": 101830 }, { "epoch": 16.612561174551388, "grad_norm": 0.009005128405988216, "learning_rate": 8.487717522277872e-05, "loss": 0.0056, "num_input_tokens_seen": 219961936, "step": 101835 }, { "epoch": 16.61337683523654, "grad_norm": 0.0003597979375626892, "learning_rate": 8.483750396216988e-05, "loss": 0.0989, "num_input_tokens_seen": 219972976, "step": 101840 }, { "epoch": 16.614192495921696, "grad_norm": 0.009030995890498161, "learning_rate": 8.479784111535549e-05, "loss": 0.0047, "num_input_tokens_seen": 219983760, "step": 101845 }, { "epoch": 16.61500815660685, "grad_norm": 0.025649599730968475, "learning_rate": 8.475818668313984e-05, "loss": 0.0085, "num_input_tokens_seen": 219993808, "step": 101850 }, { "epoch": 16.615823817292007, "grad_norm": 0.0020423270761966705, "learning_rate": 8.471854066632607e-05, "loss": 0.0196, "num_input_tokens_seen": 220004560, "step": 101855 }, { "epoch": 16.616639477977163, "grad_norm": 0.000799459929112345, "learning_rate": 8.467890306571795e-05, "loss": 0.0015, "num_input_tokens_seen": 220016816, "step": 101860 }, { "epoch": 16.617455138662315, "grad_norm": 0.006219548638910055, "learning_rate": 8.463927388211878e-05, "loss": 0.0029, "num_input_tokens_seen": 220028304, "step": 101865 }, { "epoch": 16.61827079934747, "grad_norm": 0.010150066576898098, "learning_rate": 8.459965311633161e-05, "loss": 0.0124, "num_input_tokens_seen": 220038768, "step": 101870 }, { "epoch": 16.619086460032626, "grad_norm": 0.12095730751752853, "learning_rate": 8.456004076915952e-05, "loss": 0.0057, "num_input_tokens_seen": 220049136, "step": 101875 }, { "epoch": 16.619902120717782, "grad_norm": 0.05160725861787796, "learning_rate": 8.452043684140514e-05, "loss": 0.0016, "num_input_tokens_seen": 220060048, "step": 101880 }, { "epoch": 16.620717781402938, "grad_norm": 0.08182302862405777, "learning_rate": 8.448084133387124e-05, "loss": 0.0067, "num_input_tokens_seen": 220071088, "step": 101885 }, { "epoch": 16.62153344208809, "grad_norm": 0.005083407275378704, "learning_rate": 8.444125424736016e-05, "loss": 0.0007, "num_input_tokens_seen": 220081712, "step": 101890 }, { "epoch": 16.622349102773246, "grad_norm": 0.004309537820518017, "learning_rate": 8.440167558267431e-05, "loss": 0.0007, "num_input_tokens_seen": 220092016, "step": 101895 }, { "epoch": 16.6231647634584, "grad_norm": 0.18933819234371185, "learning_rate": 8.436210534061567e-05, "loss": 0.0073, "num_input_tokens_seen": 220104464, "step": 101900 }, { "epoch": 16.623980424143557, "grad_norm": 0.07430551201105118, "learning_rate": 8.432254352198626e-05, "loss": 0.0036, "num_input_tokens_seen": 220115184, "step": 101905 }, { "epoch": 16.624796084828713, "grad_norm": 0.046566374599933624, "learning_rate": 8.428299012758778e-05, "loss": 0.0054, "num_input_tokens_seen": 220125104, "step": 101910 }, { "epoch": 16.625611745513865, "grad_norm": 0.047151170670986176, "learning_rate": 8.424344515822197e-05, "loss": 0.0031, "num_input_tokens_seen": 220136496, "step": 101915 }, { "epoch": 16.62642740619902, "grad_norm": 0.0035677056293934584, "learning_rate": 8.420390861468996e-05, "loss": 0.0015, "num_input_tokens_seen": 220146928, "step": 101920 }, { "epoch": 16.627243066884176, "grad_norm": 0.0034210113808512688, "learning_rate": 8.416438049779351e-05, "loss": 0.0005, "num_input_tokens_seen": 220156752, "step": 101925 }, { "epoch": 16.628058727569332, "grad_norm": 0.0017582608852535486, "learning_rate": 8.412486080833315e-05, "loss": 0.0005, "num_input_tokens_seen": 220168048, "step": 101930 }, { "epoch": 16.628874388254488, "grad_norm": 0.0009029234643094242, "learning_rate": 8.408534954711034e-05, "loss": 0.1427, "num_input_tokens_seen": 220178448, "step": 101935 }, { "epoch": 16.62969004893964, "grad_norm": 0.8179956078529358, "learning_rate": 8.404584671492526e-05, "loss": 0.0238, "num_input_tokens_seen": 220189488, "step": 101940 }, { "epoch": 16.630505709624796, "grad_norm": 0.0009043613681569695, "learning_rate": 8.400635231257902e-05, "loss": 0.0212, "num_input_tokens_seen": 220201008, "step": 101945 }, { "epoch": 16.63132137030995, "grad_norm": 0.043483562767505646, "learning_rate": 8.396686634087159e-05, "loss": 0.0069, "num_input_tokens_seen": 220212464, "step": 101950 }, { "epoch": 16.632137030995107, "grad_norm": 0.02974863536655903, "learning_rate": 8.392738880060358e-05, "loss": 0.0283, "num_input_tokens_seen": 220221744, "step": 101955 }, { "epoch": 16.63295269168026, "grad_norm": 0.036095499992370605, "learning_rate": 8.388791969257458e-05, "loss": 0.0014, "num_input_tokens_seen": 220233232, "step": 101960 }, { "epoch": 16.633768352365415, "grad_norm": 0.0013275218661874533, "learning_rate": 8.384845901758498e-05, "loss": 0.0014, "num_input_tokens_seen": 220244240, "step": 101965 }, { "epoch": 16.63458401305057, "grad_norm": 0.0053516267798841, "learning_rate": 8.380900677643421e-05, "loss": 0.0011, "num_input_tokens_seen": 220255536, "step": 101970 }, { "epoch": 16.635399673735726, "grad_norm": 0.0003697921638377011, "learning_rate": 8.376956296992195e-05, "loss": 0.0031, "num_input_tokens_seen": 220267728, "step": 101975 }, { "epoch": 16.636215334420882, "grad_norm": 0.0016742395237088203, "learning_rate": 8.373012759884746e-05, "loss": 0.0466, "num_input_tokens_seen": 220279088, "step": 101980 }, { "epoch": 16.637030995106034, "grad_norm": 0.01789051480591297, "learning_rate": 8.369070066401003e-05, "loss": 0.0015, "num_input_tokens_seen": 220289296, "step": 101985 }, { "epoch": 16.63784665579119, "grad_norm": 0.008225271478295326, "learning_rate": 8.365128216620871e-05, "loss": 0.001, "num_input_tokens_seen": 220297744, "step": 101990 }, { "epoch": 16.638662316476346, "grad_norm": 0.07070305198431015, "learning_rate": 8.361187210624232e-05, "loss": 0.0183, "num_input_tokens_seen": 220309712, "step": 101995 }, { "epoch": 16.6394779771615, "grad_norm": 3.6314432621002197, "learning_rate": 8.357247048490957e-05, "loss": 0.0253, "num_input_tokens_seen": 220320848, "step": 102000 }, { "epoch": 16.640293637846657, "grad_norm": 0.003030292922630906, "learning_rate": 8.353307730300897e-05, "loss": 0.0011, "num_input_tokens_seen": 220331248, "step": 102005 }, { "epoch": 16.64110929853181, "grad_norm": 0.0020804372616112232, "learning_rate": 8.349369256133888e-05, "loss": 0.0684, "num_input_tokens_seen": 220341488, "step": 102010 }, { "epoch": 16.641924959216965, "grad_norm": 0.008241880685091019, "learning_rate": 8.345431626069744e-05, "loss": 0.0013, "num_input_tokens_seen": 220352656, "step": 102015 }, { "epoch": 16.64274061990212, "grad_norm": 0.012514442205429077, "learning_rate": 8.34149484018828e-05, "loss": 0.0595, "num_input_tokens_seen": 220364080, "step": 102020 }, { "epoch": 16.643556280587276, "grad_norm": 0.013286514207720757, "learning_rate": 8.337558898569264e-05, "loss": 0.001, "num_input_tokens_seen": 220375056, "step": 102025 }, { "epoch": 16.644371941272432, "grad_norm": 0.003999212756752968, "learning_rate": 8.333623801292472e-05, "loss": 0.002, "num_input_tokens_seen": 220386832, "step": 102030 }, { "epoch": 16.645187601957584, "grad_norm": 0.013726749457418919, "learning_rate": 8.329689548437652e-05, "loss": 0.0026, "num_input_tokens_seen": 220397776, "step": 102035 }, { "epoch": 16.64600326264274, "grad_norm": 0.029906732961535454, "learning_rate": 8.325756140084533e-05, "loss": 0.0021, "num_input_tokens_seen": 220408528, "step": 102040 }, { "epoch": 16.646818923327896, "grad_norm": 0.0009055176051333547, "learning_rate": 8.321823576312837e-05, "loss": 0.0022, "num_input_tokens_seen": 220419888, "step": 102045 }, { "epoch": 16.64763458401305, "grad_norm": 0.009986934252083302, "learning_rate": 8.317891857202253e-05, "loss": 0.0007, "num_input_tokens_seen": 220431056, "step": 102050 }, { "epoch": 16.648450244698207, "grad_norm": 0.10870281606912613, "learning_rate": 8.313960982832475e-05, "loss": 0.0024, "num_input_tokens_seen": 220441360, "step": 102055 }, { "epoch": 16.64926590538336, "grad_norm": 0.03312503546476364, "learning_rate": 8.310030953283154e-05, "loss": 0.0016, "num_input_tokens_seen": 220452528, "step": 102060 }, { "epoch": 16.650081566068515, "grad_norm": 0.0009839548729360104, "learning_rate": 8.30610176863394e-05, "loss": 0.0014, "num_input_tokens_seen": 220463600, "step": 102065 }, { "epoch": 16.65089722675367, "grad_norm": 0.008478997275233269, "learning_rate": 8.302173428964472e-05, "loss": 0.0021, "num_input_tokens_seen": 220474256, "step": 102070 }, { "epoch": 16.651712887438826, "grad_norm": 0.00024375740031246096, "learning_rate": 8.298245934354353e-05, "loss": 0.0211, "num_input_tokens_seen": 220484912, "step": 102075 }, { "epoch": 16.652528548123982, "grad_norm": 0.007510208059102297, "learning_rate": 8.29431928488319e-05, "loss": 0.0009, "num_input_tokens_seen": 220495280, "step": 102080 }, { "epoch": 16.653344208809134, "grad_norm": 0.01156105101108551, "learning_rate": 8.290393480630549e-05, "loss": 0.0029, "num_input_tokens_seen": 220505840, "step": 102085 }, { "epoch": 16.65415986949429, "grad_norm": 0.0036057571414858103, "learning_rate": 8.286468521676e-05, "loss": 0.0031, "num_input_tokens_seen": 220517584, "step": 102090 }, { "epoch": 16.654975530179446, "grad_norm": 0.023752061650156975, "learning_rate": 8.282544408099079e-05, "loss": 0.0027, "num_input_tokens_seen": 220528464, "step": 102095 }, { "epoch": 16.6557911908646, "grad_norm": 0.0068184020929038525, "learning_rate": 8.278621139979325e-05, "loss": 0.001, "num_input_tokens_seen": 220538864, "step": 102100 }, { "epoch": 16.656606851549757, "grad_norm": 0.011158975772559643, "learning_rate": 8.274698717396234e-05, "loss": 0.006, "num_input_tokens_seen": 220549264, "step": 102105 }, { "epoch": 16.65742251223491, "grad_norm": 0.3075798451900482, "learning_rate": 8.270777140429308e-05, "loss": 0.0094, "num_input_tokens_seen": 220560112, "step": 102110 }, { "epoch": 16.658238172920065, "grad_norm": 0.05352885648608208, "learning_rate": 8.266856409158025e-05, "loss": 0.0013, "num_input_tokens_seen": 220570960, "step": 102115 }, { "epoch": 16.65905383360522, "grad_norm": 0.027840284630656242, "learning_rate": 8.262936523661835e-05, "loss": 0.0027, "num_input_tokens_seen": 220582096, "step": 102120 }, { "epoch": 16.659869494290376, "grad_norm": 0.07525905966758728, "learning_rate": 8.259017484020181e-05, "loss": 0.0033, "num_input_tokens_seen": 220592016, "step": 102125 }, { "epoch": 16.660685154975532, "grad_norm": 0.14387553930282593, "learning_rate": 8.255099290312495e-05, "loss": 0.006, "num_input_tokens_seen": 220601936, "step": 102130 }, { "epoch": 16.661500815660684, "grad_norm": 0.056067511439323425, "learning_rate": 8.251181942618174e-05, "loss": 0.0031, "num_input_tokens_seen": 220612048, "step": 102135 }, { "epoch": 16.66231647634584, "grad_norm": 0.002062713261693716, "learning_rate": 8.247265441016621e-05, "loss": 0.0008, "num_input_tokens_seen": 220622608, "step": 102140 }, { "epoch": 16.663132137030995, "grad_norm": 0.001355032087303698, "learning_rate": 8.243349785587195e-05, "loss": 0.0038, "num_input_tokens_seen": 220633264, "step": 102145 }, { "epoch": 16.66394779771615, "grad_norm": 0.018915260210633278, "learning_rate": 8.23943497640926e-05, "loss": 0.0021, "num_input_tokens_seen": 220643696, "step": 102150 }, { "epoch": 16.664763458401303, "grad_norm": 0.0024553509429097176, "learning_rate": 8.235521013562148e-05, "loss": 0.0262, "num_input_tokens_seen": 220655248, "step": 102155 }, { "epoch": 16.66557911908646, "grad_norm": 0.004102553240954876, "learning_rate": 8.231607897125188e-05, "loss": 0.0022, "num_input_tokens_seen": 220664816, "step": 102160 }, { "epoch": 16.666394779771615, "grad_norm": 0.001667728298343718, "learning_rate": 8.227695627177678e-05, "loss": 0.0028, "num_input_tokens_seen": 220674448, "step": 102165 }, { "epoch": 16.66721044045677, "grad_norm": 0.003264149883762002, "learning_rate": 8.223784203798912e-05, "loss": 0.0034, "num_input_tokens_seen": 220685936, "step": 102170 }, { "epoch": 16.668026101141926, "grad_norm": 0.023338552564382553, "learning_rate": 8.219873627068141e-05, "loss": 0.005, "num_input_tokens_seen": 220696976, "step": 102175 }, { "epoch": 16.66884176182708, "grad_norm": 0.0016530726570636034, "learning_rate": 8.21596389706466e-05, "loss": 0.0054, "num_input_tokens_seen": 220707504, "step": 102180 }, { "epoch": 16.669657422512234, "grad_norm": 0.003783087246119976, "learning_rate": 8.212055013867654e-05, "loss": 0.0005, "num_input_tokens_seen": 220718928, "step": 102185 }, { "epoch": 16.67047308319739, "grad_norm": 0.0009449265198782086, "learning_rate": 8.208146977556386e-05, "loss": 0.0008, "num_input_tokens_seen": 220728784, "step": 102190 }, { "epoch": 16.671288743882545, "grad_norm": 0.0008899805252440274, "learning_rate": 8.204239788210011e-05, "loss": 0.002, "num_input_tokens_seen": 220740688, "step": 102195 }, { "epoch": 16.6721044045677, "grad_norm": 0.000524374539963901, "learning_rate": 8.200333445907766e-05, "loss": 0.0033, "num_input_tokens_seen": 220750000, "step": 102200 }, { "epoch": 16.672920065252853, "grad_norm": 0.0010391840478405356, "learning_rate": 8.196427950728763e-05, "loss": 0.0019, "num_input_tokens_seen": 220761136, "step": 102205 }, { "epoch": 16.67373572593801, "grad_norm": 0.006226594094187021, "learning_rate": 8.192523302752192e-05, "loss": 0.0011, "num_input_tokens_seen": 220771856, "step": 102210 }, { "epoch": 16.674551386623165, "grad_norm": 0.0289426501840353, "learning_rate": 8.188619502057176e-05, "loss": 0.0014, "num_input_tokens_seen": 220782512, "step": 102215 }, { "epoch": 16.67536704730832, "grad_norm": 0.03636833652853966, "learning_rate": 8.184716548722825e-05, "loss": 0.0035, "num_input_tokens_seen": 220793552, "step": 102220 }, { "epoch": 16.676182707993476, "grad_norm": 0.005582255311310291, "learning_rate": 8.180814442828238e-05, "loss": 0.0568, "num_input_tokens_seen": 220803792, "step": 102225 }, { "epoch": 16.67699836867863, "grad_norm": 0.017085885629057884, "learning_rate": 8.1769131844525e-05, "loss": 0.002, "num_input_tokens_seen": 220814896, "step": 102230 }, { "epoch": 16.677814029363784, "grad_norm": 0.22158437967300415, "learning_rate": 8.173012773674671e-05, "loss": 0.0081, "num_input_tokens_seen": 220825264, "step": 102235 }, { "epoch": 16.67862969004894, "grad_norm": 0.013059341348707676, "learning_rate": 8.169113210573803e-05, "loss": 0.003, "num_input_tokens_seen": 220835824, "step": 102240 }, { "epoch": 16.679445350734095, "grad_norm": 0.012183960527181625, "learning_rate": 8.165214495228918e-05, "loss": 0.0026, "num_input_tokens_seen": 220847536, "step": 102245 }, { "epoch": 16.68026101141925, "grad_norm": 0.001767508452758193, "learning_rate": 8.161316627719035e-05, "loss": 0.0591, "num_input_tokens_seen": 220858256, "step": 102250 }, { "epoch": 16.681076672104403, "grad_norm": 0.01171040441840887, "learning_rate": 8.157419608123145e-05, "loss": 0.0016, "num_input_tokens_seen": 220868784, "step": 102255 }, { "epoch": 16.68189233278956, "grad_norm": 0.008576060645282269, "learning_rate": 8.153523436520226e-05, "loss": 0.0025, "num_input_tokens_seen": 220880656, "step": 102260 }, { "epoch": 16.682707993474715, "grad_norm": 0.07342652976512909, "learning_rate": 8.149628112989243e-05, "loss": 0.0014, "num_input_tokens_seen": 220890448, "step": 102265 }, { "epoch": 16.68352365415987, "grad_norm": 0.008088194765150547, "learning_rate": 8.145733637609137e-05, "loss": 0.0012, "num_input_tokens_seen": 220901040, "step": 102270 }, { "epoch": 16.684339314845026, "grad_norm": 0.0017258359584957361, "learning_rate": 8.141840010458835e-05, "loss": 0.0012, "num_input_tokens_seen": 220911536, "step": 102275 }, { "epoch": 16.68515497553018, "grad_norm": 0.4991852343082428, "learning_rate": 8.137947231617237e-05, "loss": 0.0058, "num_input_tokens_seen": 220923056, "step": 102280 }, { "epoch": 16.685970636215334, "grad_norm": 0.021041272208094597, "learning_rate": 8.134055301163263e-05, "loss": 0.0041, "num_input_tokens_seen": 220933712, "step": 102285 }, { "epoch": 16.68678629690049, "grad_norm": 0.0014039005618542433, "learning_rate": 8.130164219175745e-05, "loss": 0.0015, "num_input_tokens_seen": 220945968, "step": 102290 }, { "epoch": 16.687601957585645, "grad_norm": 0.0002282148489030078, "learning_rate": 8.126273985733595e-05, "loss": 0.0004, "num_input_tokens_seen": 220956112, "step": 102295 }, { "epoch": 16.6884176182708, "grad_norm": 0.0002533920051064342, "learning_rate": 8.122384600915594e-05, "loss": 0.0031, "num_input_tokens_seen": 220966896, "step": 102300 }, { "epoch": 16.689233278955953, "grad_norm": 0.0018143865745514631, "learning_rate": 8.118496064800618e-05, "loss": 0.0009, "num_input_tokens_seen": 220977936, "step": 102305 }, { "epoch": 16.69004893964111, "grad_norm": 0.02916126139461994, "learning_rate": 8.11460837746743e-05, "loss": 0.002, "num_input_tokens_seen": 220989264, "step": 102310 }, { "epoch": 16.690864600326265, "grad_norm": 0.024176111444830894, "learning_rate": 8.110721538994859e-05, "loss": 0.0031, "num_input_tokens_seen": 220999600, "step": 102315 }, { "epoch": 16.69168026101142, "grad_norm": 0.004567863419651985, "learning_rate": 8.106835549461633e-05, "loss": 0.0037, "num_input_tokens_seen": 221010640, "step": 102320 }, { "epoch": 16.692495921696576, "grad_norm": 0.0015767280710861087, "learning_rate": 8.102950408946552e-05, "loss": 0.0011, "num_input_tokens_seen": 221021200, "step": 102325 }, { "epoch": 16.693311582381728, "grad_norm": 1.0787469148635864, "learning_rate": 8.099066117528308e-05, "loss": 0.0256, "num_input_tokens_seen": 221033968, "step": 102330 }, { "epoch": 16.694127243066884, "grad_norm": 0.18266649544239044, "learning_rate": 8.095182675285673e-05, "loss": 0.0064, "num_input_tokens_seen": 221043632, "step": 102335 }, { "epoch": 16.69494290375204, "grad_norm": 0.0012430261122062802, "learning_rate": 8.091300082297293e-05, "loss": 0.0018, "num_input_tokens_seen": 221054032, "step": 102340 }, { "epoch": 16.695758564437195, "grad_norm": 0.0010876876767724752, "learning_rate": 8.087418338641906e-05, "loss": 0.001, "num_input_tokens_seen": 221065072, "step": 102345 }, { "epoch": 16.696574225122347, "grad_norm": 0.002364259911701083, "learning_rate": 8.083537444398131e-05, "loss": 0.0016, "num_input_tokens_seen": 221074864, "step": 102350 }, { "epoch": 16.697389885807503, "grad_norm": 0.00427822582423687, "learning_rate": 8.079657399644664e-05, "loss": 0.0054, "num_input_tokens_seen": 221084784, "step": 102355 }, { "epoch": 16.69820554649266, "grad_norm": 0.014106487855315208, "learning_rate": 8.07577820446011e-05, "loss": 0.0008, "num_input_tokens_seen": 221095248, "step": 102360 }, { "epoch": 16.699021207177815, "grad_norm": 0.014190200716257095, "learning_rate": 8.071899858923098e-05, "loss": 0.0316, "num_input_tokens_seen": 221105904, "step": 102365 }, { "epoch": 16.69983686786297, "grad_norm": 0.0014781900681555271, "learning_rate": 8.068022363112227e-05, "loss": 0.0009, "num_input_tokens_seen": 221116368, "step": 102370 }, { "epoch": 16.700652528548122, "grad_norm": 0.005353093612939119, "learning_rate": 8.064145717106075e-05, "loss": 0.0816, "num_input_tokens_seen": 221127472, "step": 102375 }, { "epoch": 16.701468189233278, "grad_norm": 0.0054572150111198425, "learning_rate": 8.06026992098321e-05, "loss": 0.012, "num_input_tokens_seen": 221139280, "step": 102380 }, { "epoch": 16.702283849918434, "grad_norm": 0.00832337699830532, "learning_rate": 8.056394974822185e-05, "loss": 0.0045, "num_input_tokens_seen": 221150416, "step": 102385 }, { "epoch": 16.70309951060359, "grad_norm": 0.017912698909640312, "learning_rate": 8.052520878701519e-05, "loss": 0.002, "num_input_tokens_seen": 221161296, "step": 102390 }, { "epoch": 16.703915171288745, "grad_norm": 0.0015576289733871818, "learning_rate": 8.04864763269973e-05, "loss": 0.0005, "num_input_tokens_seen": 221170480, "step": 102395 }, { "epoch": 16.704730831973897, "grad_norm": 0.004138452000916004, "learning_rate": 8.044775236895319e-05, "loss": 0.0031, "num_input_tokens_seen": 221179408, "step": 102400 }, { "epoch": 16.705546492659053, "grad_norm": 0.005685943178832531, "learning_rate": 8.040903691366753e-05, "loss": 0.0016, "num_input_tokens_seen": 221190192, "step": 102405 }, { "epoch": 16.70636215334421, "grad_norm": 0.0030472618527710438, "learning_rate": 8.037032996192522e-05, "loss": 0.0003, "num_input_tokens_seen": 221200912, "step": 102410 }, { "epoch": 16.707177814029365, "grad_norm": 0.0015341610414907336, "learning_rate": 8.033163151451028e-05, "loss": 0.0008, "num_input_tokens_seen": 221211920, "step": 102415 }, { "epoch": 16.70799347471452, "grad_norm": 0.004777231719344854, "learning_rate": 8.029294157220746e-05, "loss": 0.0181, "num_input_tokens_seen": 221223280, "step": 102420 }, { "epoch": 16.708809135399672, "grad_norm": 0.002837100997567177, "learning_rate": 8.025426013580033e-05, "loss": 0.0018, "num_input_tokens_seen": 221234480, "step": 102425 }, { "epoch": 16.709624796084828, "grad_norm": 0.0010288365883752704, "learning_rate": 8.021558720607342e-05, "loss": 0.0042, "num_input_tokens_seen": 221243984, "step": 102430 }, { "epoch": 16.710440456769984, "grad_norm": 0.0010139404330402613, "learning_rate": 8.01769227838099e-05, "loss": 0.0007, "num_input_tokens_seen": 221255536, "step": 102435 }, { "epoch": 16.71125611745514, "grad_norm": 0.002217318629845977, "learning_rate": 8.013826686979381e-05, "loss": 0.0004, "num_input_tokens_seen": 221266832, "step": 102440 }, { "epoch": 16.712071778140295, "grad_norm": 0.0004526945995166898, "learning_rate": 8.00996194648082e-05, "loss": 0.0008, "num_input_tokens_seen": 221277616, "step": 102445 }, { "epoch": 16.712887438825447, "grad_norm": 0.01151892077177763, "learning_rate": 8.006098056963668e-05, "loss": 0.0008, "num_input_tokens_seen": 221289360, "step": 102450 }, { "epoch": 16.713703099510603, "grad_norm": 0.002219117246568203, "learning_rate": 8.002235018506194e-05, "loss": 0.0009, "num_input_tokens_seen": 221301104, "step": 102455 }, { "epoch": 16.71451876019576, "grad_norm": 0.002388479420915246, "learning_rate": 7.998372831186723e-05, "loss": 0.0022, "num_input_tokens_seen": 221311824, "step": 102460 }, { "epoch": 16.715334420880914, "grad_norm": 0.2429102659225464, "learning_rate": 7.99451149508349e-05, "loss": 0.0069, "num_input_tokens_seen": 221322192, "step": 102465 }, { "epoch": 16.71615008156607, "grad_norm": 0.028374364599585533, "learning_rate": 7.990651010274791e-05, "loss": 0.0017, "num_input_tokens_seen": 221333552, "step": 102470 }, { "epoch": 16.716965742251222, "grad_norm": 0.025974059477448463, "learning_rate": 7.98679137683882e-05, "loss": 0.0024, "num_input_tokens_seen": 221344368, "step": 102475 }, { "epoch": 16.717781402936378, "grad_norm": 0.01371039729565382, "learning_rate": 7.982932594853837e-05, "loss": 0.0029, "num_input_tokens_seen": 221355792, "step": 102480 }, { "epoch": 16.718597063621534, "grad_norm": 0.0056204842403531075, "learning_rate": 7.979074664398012e-05, "loss": 0.0037, "num_input_tokens_seen": 221365520, "step": 102485 }, { "epoch": 16.71941272430669, "grad_norm": 0.00258276448585093, "learning_rate": 7.975217585549566e-05, "loss": 0.0026, "num_input_tokens_seen": 221375856, "step": 102490 }, { "epoch": 16.72022838499184, "grad_norm": 0.003028827253729105, "learning_rate": 7.97136135838662e-05, "loss": 0.0009, "num_input_tokens_seen": 221386064, "step": 102495 }, { "epoch": 16.721044045676997, "grad_norm": 0.027169395238161087, "learning_rate": 7.967505982987372e-05, "loss": 0.0087, "num_input_tokens_seen": 221397392, "step": 102500 }, { "epoch": 16.721859706362153, "grad_norm": 0.00019676069496199489, "learning_rate": 7.963651459429932e-05, "loss": 0.0019, "num_input_tokens_seen": 221406288, "step": 102505 }, { "epoch": 16.72267536704731, "grad_norm": 0.0002667378284968436, "learning_rate": 7.959797787792428e-05, "loss": 0.1659, "num_input_tokens_seen": 221416720, "step": 102510 }, { "epoch": 16.723491027732464, "grad_norm": 0.0024046706967055798, "learning_rate": 7.955944968152951e-05, "loss": 0.0011, "num_input_tokens_seen": 221427536, "step": 102515 }, { "epoch": 16.724306688417617, "grad_norm": 0.011399206705391407, "learning_rate": 7.952093000589583e-05, "loss": 0.0009, "num_input_tokens_seen": 221437712, "step": 102520 }, { "epoch": 16.725122349102772, "grad_norm": 0.041121955960989, "learning_rate": 7.948241885180396e-05, "loss": 0.0017, "num_input_tokens_seen": 221448720, "step": 102525 }, { "epoch": 16.725938009787928, "grad_norm": 0.0030209736432880163, "learning_rate": 7.944391622003427e-05, "loss": 0.0007, "num_input_tokens_seen": 221460016, "step": 102530 }, { "epoch": 16.726753670473084, "grad_norm": 0.004665852524340153, "learning_rate": 7.94054221113672e-05, "loss": 0.0005, "num_input_tokens_seen": 221470128, "step": 102535 }, { "epoch": 16.72756933115824, "grad_norm": 0.0013338279677554965, "learning_rate": 7.936693652658278e-05, "loss": 0.0003, "num_input_tokens_seen": 221481488, "step": 102540 }, { "epoch": 16.72838499184339, "grad_norm": 0.04637147858738899, "learning_rate": 7.9328459466461e-05, "loss": 0.0025, "num_input_tokens_seen": 221492912, "step": 102545 }, { "epoch": 16.729200652528547, "grad_norm": 0.0005551987560465932, "learning_rate": 7.928999093178157e-05, "loss": 0.0008, "num_input_tokens_seen": 221503856, "step": 102550 }, { "epoch": 16.730016313213703, "grad_norm": 0.003815334988757968, "learning_rate": 7.925153092332438e-05, "loss": 0.0021, "num_input_tokens_seen": 221514736, "step": 102555 }, { "epoch": 16.73083197389886, "grad_norm": 0.0010202974081039429, "learning_rate": 7.921307944186845e-05, "loss": 0.0008, "num_input_tokens_seen": 221526256, "step": 102560 }, { "epoch": 16.731647634584014, "grad_norm": 0.0013838201994076371, "learning_rate": 7.91746364881935e-05, "loss": 0.0016, "num_input_tokens_seen": 221537168, "step": 102565 }, { "epoch": 16.732463295269167, "grad_norm": 0.0036898739635944366, "learning_rate": 7.913620206307814e-05, "loss": 0.0022, "num_input_tokens_seen": 221547632, "step": 102570 }, { "epoch": 16.733278955954322, "grad_norm": 0.0027934997342526913, "learning_rate": 7.909777616730185e-05, "loss": 0.0014, "num_input_tokens_seen": 221558832, "step": 102575 }, { "epoch": 16.734094616639478, "grad_norm": 0.0016968920826911926, "learning_rate": 7.905935880164278e-05, "loss": 0.0029, "num_input_tokens_seen": 221569136, "step": 102580 }, { "epoch": 16.734910277324634, "grad_norm": 0.10568311810493469, "learning_rate": 7.902094996688009e-05, "loss": 0.0028, "num_input_tokens_seen": 221579824, "step": 102585 }, { "epoch": 16.73572593800979, "grad_norm": 0.003486029338091612, "learning_rate": 7.89825496637916e-05, "loss": 0.0006, "num_input_tokens_seen": 221591632, "step": 102590 }, { "epoch": 16.73654159869494, "grad_norm": 0.038367435336112976, "learning_rate": 7.894415789315612e-05, "loss": 0.0034, "num_input_tokens_seen": 221600624, "step": 102595 }, { "epoch": 16.737357259380097, "grad_norm": 0.01631920039653778, "learning_rate": 7.890577465575121e-05, "loss": 0.0204, "num_input_tokens_seen": 221612016, "step": 102600 }, { "epoch": 16.738172920065253, "grad_norm": 0.019952211529016495, "learning_rate": 7.886739995235504e-05, "loss": 0.0027, "num_input_tokens_seen": 221623184, "step": 102605 }, { "epoch": 16.73898858075041, "grad_norm": 0.001536556170322001, "learning_rate": 7.882903378374528e-05, "loss": 0.0004, "num_input_tokens_seen": 221633520, "step": 102610 }, { "epoch": 16.739804241435564, "grad_norm": 0.019367242231965065, "learning_rate": 7.879067615069946e-05, "loss": 0.0014, "num_input_tokens_seen": 221646256, "step": 102615 }, { "epoch": 16.740619902120716, "grad_norm": 0.2488294392824173, "learning_rate": 7.875232705399488e-05, "loss": 0.0085, "num_input_tokens_seen": 221657232, "step": 102620 }, { "epoch": 16.741435562805872, "grad_norm": 0.012795425951480865, "learning_rate": 7.871398649440886e-05, "loss": 0.0023, "num_input_tokens_seen": 221669520, "step": 102625 }, { "epoch": 16.742251223491028, "grad_norm": 0.9706597328186035, "learning_rate": 7.867565447271829e-05, "loss": 0.0807, "num_input_tokens_seen": 221680144, "step": 102630 }, { "epoch": 16.743066884176184, "grad_norm": 0.062246449291706085, "learning_rate": 7.863733098970006e-05, "loss": 0.0026, "num_input_tokens_seen": 221690672, "step": 102635 }, { "epoch": 16.74388254486134, "grad_norm": 0.0013690270716324449, "learning_rate": 7.85990160461309e-05, "loss": 0.004, "num_input_tokens_seen": 221701296, "step": 102640 }, { "epoch": 16.74469820554649, "grad_norm": 0.0025680058170109987, "learning_rate": 7.856070964278722e-05, "loss": 0.0009, "num_input_tokens_seen": 221712816, "step": 102645 }, { "epoch": 16.745513866231647, "grad_norm": 0.00345767755061388, "learning_rate": 7.852241178044539e-05, "loss": 0.0077, "num_input_tokens_seen": 221724656, "step": 102650 }, { "epoch": 16.746329526916803, "grad_norm": 0.011416045017540455, "learning_rate": 7.848412245988157e-05, "loss": 0.0007, "num_input_tokens_seen": 221735568, "step": 102655 }, { "epoch": 16.74714518760196, "grad_norm": 0.00027606345247477293, "learning_rate": 7.84458416818718e-05, "loss": 0.0062, "num_input_tokens_seen": 221746448, "step": 102660 }, { "epoch": 16.747960848287114, "grad_norm": 0.030321862548589706, "learning_rate": 7.840756944719174e-05, "loss": 0.0532, "num_input_tokens_seen": 221757584, "step": 102665 }, { "epoch": 16.748776508972266, "grad_norm": 0.0008409228757955134, "learning_rate": 7.836930575661716e-05, "loss": 0.0013, "num_input_tokens_seen": 221769296, "step": 102670 }, { "epoch": 16.749592169657422, "grad_norm": 0.02600860223174095, "learning_rate": 7.83310506109235e-05, "loss": 0.0053, "num_input_tokens_seen": 221780400, "step": 102675 }, { "epoch": 16.750407830342578, "grad_norm": 0.0010515855392441154, "learning_rate": 7.829280401088601e-05, "loss": 0.0036, "num_input_tokens_seen": 221791312, "step": 102680 }, { "epoch": 16.751223491027734, "grad_norm": 0.07031827419996262, "learning_rate": 7.82545659572798e-05, "loss": 0.002, "num_input_tokens_seen": 221802192, "step": 102685 }, { "epoch": 16.752039151712886, "grad_norm": 0.004942765459418297, "learning_rate": 7.821633645087984e-05, "loss": 0.002, "num_input_tokens_seen": 221813520, "step": 102690 }, { "epoch": 16.75285481239804, "grad_norm": 1.208173155784607, "learning_rate": 7.817811549246079e-05, "loss": 0.0556, "num_input_tokens_seen": 221823376, "step": 102695 }, { "epoch": 16.753670473083197, "grad_norm": 0.0008134068921208382, "learning_rate": 7.813990308279755e-05, "loss": 0.0021, "num_input_tokens_seen": 221834352, "step": 102700 }, { "epoch": 16.754486133768353, "grad_norm": 0.00394340418279171, "learning_rate": 7.810169922266413e-05, "loss": 0.0025, "num_input_tokens_seen": 221845168, "step": 102705 }, { "epoch": 16.75530179445351, "grad_norm": 0.041125066578388214, "learning_rate": 7.806350391283507e-05, "loss": 0.0022, "num_input_tokens_seen": 221856112, "step": 102710 }, { "epoch": 16.75611745513866, "grad_norm": 0.3884848356246948, "learning_rate": 7.80253171540844e-05, "loss": 0.0127, "num_input_tokens_seen": 221867440, "step": 102715 }, { "epoch": 16.756933115823816, "grad_norm": 0.004419300705194473, "learning_rate": 7.798713894718602e-05, "loss": 0.0017, "num_input_tokens_seen": 221877072, "step": 102720 }, { "epoch": 16.757748776508972, "grad_norm": 0.00803311262279749, "learning_rate": 7.794896929291361e-05, "loss": 0.0293, "num_input_tokens_seen": 221886832, "step": 102725 }, { "epoch": 16.758564437194128, "grad_norm": 0.0018697066698223352, "learning_rate": 7.791080819204072e-05, "loss": 0.0008, "num_input_tokens_seen": 221897552, "step": 102730 }, { "epoch": 16.759380097879284, "grad_norm": 0.0029300868045538664, "learning_rate": 7.78726556453408e-05, "loss": 0.0007, "num_input_tokens_seen": 221907728, "step": 102735 }, { "epoch": 16.760195758564436, "grad_norm": 0.20307497680187225, "learning_rate": 7.783451165358696e-05, "loss": 0.012, "num_input_tokens_seen": 221918480, "step": 102740 }, { "epoch": 16.76101141924959, "grad_norm": 0.008771148510277271, "learning_rate": 7.779637621755236e-05, "loss": 0.002, "num_input_tokens_seen": 221930160, "step": 102745 }, { "epoch": 16.761827079934747, "grad_norm": 0.030420590192079544, "learning_rate": 7.775824933800979e-05, "loss": 0.0134, "num_input_tokens_seen": 221940464, "step": 102750 }, { "epoch": 16.762642740619903, "grad_norm": 0.00467941677197814, "learning_rate": 7.772013101573195e-05, "loss": 0.0015, "num_input_tokens_seen": 221951920, "step": 102755 }, { "epoch": 16.76345840130506, "grad_norm": 0.003154685953631997, "learning_rate": 7.768202125149132e-05, "loss": 0.0021, "num_input_tokens_seen": 221962736, "step": 102760 }, { "epoch": 16.76427406199021, "grad_norm": 0.0002088886103592813, "learning_rate": 7.76439200460603e-05, "loss": 0.0004, "num_input_tokens_seen": 221974832, "step": 102765 }, { "epoch": 16.765089722675366, "grad_norm": 0.000144814039231278, "learning_rate": 7.7605827400211e-05, "loss": 0.001, "num_input_tokens_seen": 221984528, "step": 102770 }, { "epoch": 16.765905383360522, "grad_norm": 0.29430752992630005, "learning_rate": 7.75677433147155e-05, "loss": 0.0105, "num_input_tokens_seen": 221994768, "step": 102775 }, { "epoch": 16.766721044045678, "grad_norm": 0.017759494483470917, "learning_rate": 7.752966779034553e-05, "loss": 0.003, "num_input_tokens_seen": 222004752, "step": 102780 }, { "epoch": 16.767536704730833, "grad_norm": 0.0003618684713728726, "learning_rate": 7.749160082787283e-05, "loss": 0.0011, "num_input_tokens_seen": 222015408, "step": 102785 }, { "epoch": 16.768352365415986, "grad_norm": 0.01978623867034912, "learning_rate": 7.745354242806884e-05, "loss": 0.0011, "num_input_tokens_seen": 222026288, "step": 102790 }, { "epoch": 16.76916802610114, "grad_norm": 0.0008822871604934335, "learning_rate": 7.741549259170483e-05, "loss": 0.0028, "num_input_tokens_seen": 222037008, "step": 102795 }, { "epoch": 16.769983686786297, "grad_norm": 0.0161877628415823, "learning_rate": 7.737745131955192e-05, "loss": 0.0022, "num_input_tokens_seen": 222047280, "step": 102800 }, { "epoch": 16.770799347471453, "grad_norm": 0.03637959808111191, "learning_rate": 7.733941861238114e-05, "loss": 0.0056, "num_input_tokens_seen": 222058160, "step": 102805 }, { "epoch": 16.77161500815661, "grad_norm": 0.003914504777640104, "learning_rate": 7.730139447096319e-05, "loss": 0.0007, "num_input_tokens_seen": 222069840, "step": 102810 }, { "epoch": 16.77243066884176, "grad_norm": 0.00933013390749693, "learning_rate": 7.726337889606861e-05, "loss": 0.0053, "num_input_tokens_seen": 222080720, "step": 102815 }, { "epoch": 16.773246329526916, "grad_norm": 0.006620690226554871, "learning_rate": 7.722537188846817e-05, "loss": 0.0008, "num_input_tokens_seen": 222091472, "step": 102820 }, { "epoch": 16.774061990212072, "grad_norm": 0.00023687862267252058, "learning_rate": 7.718737344893167e-05, "loss": 0.0014, "num_input_tokens_seen": 222103952, "step": 102825 }, { "epoch": 16.774877650897228, "grad_norm": 0.07730203866958618, "learning_rate": 7.714938357822965e-05, "loss": 0.0021, "num_input_tokens_seen": 222115280, "step": 102830 }, { "epoch": 16.775693311582383, "grad_norm": 0.0037358549889177084, "learning_rate": 7.711140227713154e-05, "loss": 0.0012, "num_input_tokens_seen": 222125008, "step": 102835 }, { "epoch": 16.776508972267536, "grad_norm": 0.004577254876494408, "learning_rate": 7.70734295464075e-05, "loss": 0.0288, "num_input_tokens_seen": 222136848, "step": 102840 }, { "epoch": 16.77732463295269, "grad_norm": 0.024739494547247887, "learning_rate": 7.703546538682688e-05, "loss": 0.0027, "num_input_tokens_seen": 222148656, "step": 102845 }, { "epoch": 16.778140293637847, "grad_norm": 0.0011499575339257717, "learning_rate": 7.699750979915915e-05, "loss": 0.0016, "num_input_tokens_seen": 222158896, "step": 102850 }, { "epoch": 16.778955954323003, "grad_norm": 0.007596036419272423, "learning_rate": 7.695956278417349e-05, "loss": 0.0078, "num_input_tokens_seen": 222169680, "step": 102855 }, { "epoch": 16.77977161500816, "grad_norm": 0.0020268235821276903, "learning_rate": 7.692162434263894e-05, "loss": 0.0018, "num_input_tokens_seen": 222181776, "step": 102860 }, { "epoch": 16.78058727569331, "grad_norm": 0.0006173772853799164, "learning_rate": 7.688369447532444e-05, "loss": 0.0024, "num_input_tokens_seen": 222192752, "step": 102865 }, { "epoch": 16.781402936378466, "grad_norm": 0.004046997986733913, "learning_rate": 7.684577318299857e-05, "loss": 0.0091, "num_input_tokens_seen": 222203408, "step": 102870 }, { "epoch": 16.782218597063622, "grad_norm": 0.010694226250052452, "learning_rate": 7.680786046642996e-05, "loss": 0.0009, "num_input_tokens_seen": 222213904, "step": 102875 }, { "epoch": 16.783034257748778, "grad_norm": 0.0009469189099036157, "learning_rate": 7.676995632638689e-05, "loss": 0.0013, "num_input_tokens_seen": 222223920, "step": 102880 }, { "epoch": 16.78384991843393, "grad_norm": 0.0009855440584942698, "learning_rate": 7.67320607636376e-05, "loss": 0.0004, "num_input_tokens_seen": 222234672, "step": 102885 }, { "epoch": 16.784665579119086, "grad_norm": 0.000437272887211293, "learning_rate": 7.669417377894999e-05, "loss": 0.0005, "num_input_tokens_seen": 222245264, "step": 102890 }, { "epoch": 16.78548123980424, "grad_norm": 0.00246535986661911, "learning_rate": 7.665629537309199e-05, "loss": 0.0007, "num_input_tokens_seen": 222255856, "step": 102895 }, { "epoch": 16.786296900489397, "grad_norm": 0.0025290593039244413, "learning_rate": 7.661842554683124e-05, "loss": 0.0005, "num_input_tokens_seen": 222266416, "step": 102900 }, { "epoch": 16.787112561174553, "grad_norm": 0.009743542410433292, "learning_rate": 7.658056430093512e-05, "loss": 0.001, "num_input_tokens_seen": 222276048, "step": 102905 }, { "epoch": 16.787928221859705, "grad_norm": 0.01540299877524376, "learning_rate": 7.654271163617105e-05, "loss": 0.0012, "num_input_tokens_seen": 222286512, "step": 102910 }, { "epoch": 16.78874388254486, "grad_norm": 0.06199616193771362, "learning_rate": 7.650486755330616e-05, "loss": 0.0094, "num_input_tokens_seen": 222297072, "step": 102915 }, { "epoch": 16.789559543230016, "grad_norm": 0.0010426411172375083, "learning_rate": 7.646703205310718e-05, "loss": 0.1021, "num_input_tokens_seen": 222309328, "step": 102920 }, { "epoch": 16.790375203915172, "grad_norm": 0.0029606884345412254, "learning_rate": 7.642920513634138e-05, "loss": 0.0032, "num_input_tokens_seen": 222320272, "step": 102925 }, { "epoch": 16.791190864600328, "grad_norm": 0.03106829896569252, "learning_rate": 7.639138680377478e-05, "loss": 0.0023, "num_input_tokens_seen": 222331248, "step": 102930 }, { "epoch": 16.79200652528548, "grad_norm": 0.013690001331269741, "learning_rate": 7.63535770561744e-05, "loss": 0.0032, "num_input_tokens_seen": 222342160, "step": 102935 }, { "epoch": 16.792822185970635, "grad_norm": 0.0004479142662603408, "learning_rate": 7.631577589430593e-05, "loss": 0.0007, "num_input_tokens_seen": 222353008, "step": 102940 }, { "epoch": 16.79363784665579, "grad_norm": 0.0002369188005104661, "learning_rate": 7.627798331893604e-05, "loss": 0.0006, "num_input_tokens_seen": 222362864, "step": 102945 }, { "epoch": 16.794453507340947, "grad_norm": 0.17374515533447266, "learning_rate": 7.62401993308301e-05, "loss": 0.0057, "num_input_tokens_seen": 222373776, "step": 102950 }, { "epoch": 16.795269168026103, "grad_norm": 0.0009877387201413512, "learning_rate": 7.620242393075432e-05, "loss": 0.1063, "num_input_tokens_seen": 222382640, "step": 102955 }, { "epoch": 16.796084828711255, "grad_norm": 0.017264485359191895, "learning_rate": 7.61646571194738e-05, "loss": 0.0024, "num_input_tokens_seen": 222395056, "step": 102960 }, { "epoch": 16.79690048939641, "grad_norm": 0.0007135859923437238, "learning_rate": 7.612689889775443e-05, "loss": 0.0008, "num_input_tokens_seen": 222404304, "step": 102965 }, { "epoch": 16.797716150081566, "grad_norm": 0.0024973410181701183, "learning_rate": 7.60891492663609e-05, "loss": 0.0013, "num_input_tokens_seen": 222414448, "step": 102970 }, { "epoch": 16.798531810766722, "grad_norm": 0.059601426124572754, "learning_rate": 7.605140822605883e-05, "loss": 0.0252, "num_input_tokens_seen": 222424624, "step": 102975 }, { "epoch": 16.799347471451878, "grad_norm": 0.0006366174784488976, "learning_rate": 7.601367577761248e-05, "loss": 0.033, "num_input_tokens_seen": 222434576, "step": 102980 }, { "epoch": 16.80016313213703, "grad_norm": 0.003634232562035322, "learning_rate": 7.597595192178702e-05, "loss": 0.0011, "num_input_tokens_seen": 222444624, "step": 102985 }, { "epoch": 16.800978792822185, "grad_norm": 0.6686355471611023, "learning_rate": 7.59382366593468e-05, "loss": 0.0108, "num_input_tokens_seen": 222453552, "step": 102990 }, { "epoch": 16.80179445350734, "grad_norm": 0.0005412403261289, "learning_rate": 7.590052999105618e-05, "loss": 0.0027, "num_input_tokens_seen": 222465072, "step": 102995 }, { "epoch": 16.802610114192497, "grad_norm": 0.02159789204597473, "learning_rate": 7.586283191767929e-05, "loss": 0.0104, "num_input_tokens_seen": 222476496, "step": 103000 }, { "epoch": 16.803425774877653, "grad_norm": 0.000245524977799505, "learning_rate": 7.582514243998023e-05, "loss": 0.0066, "num_input_tokens_seen": 222487408, "step": 103005 }, { "epoch": 16.804241435562805, "grad_norm": 0.00048794830217957497, "learning_rate": 7.578746155872268e-05, "loss": 0.0003, "num_input_tokens_seen": 222499184, "step": 103010 }, { "epoch": 16.80505709624796, "grad_norm": 0.009726781398057938, "learning_rate": 7.574978927467046e-05, "loss": 0.0056, "num_input_tokens_seen": 222511120, "step": 103015 }, { "epoch": 16.805872756933116, "grad_norm": 0.0008596468833275139, "learning_rate": 7.571212558858692e-05, "loss": 0.0013, "num_input_tokens_seen": 222522096, "step": 103020 }, { "epoch": 16.806688417618272, "grad_norm": 0.0003236684715375304, "learning_rate": 7.567447050123538e-05, "loss": 0.0009, "num_input_tokens_seen": 222534416, "step": 103025 }, { "epoch": 16.807504078303424, "grad_norm": 0.22375887632369995, "learning_rate": 7.563682401337901e-05, "loss": 0.0048, "num_input_tokens_seen": 222546096, "step": 103030 }, { "epoch": 16.80831973898858, "grad_norm": 0.0021393627393990755, "learning_rate": 7.559918612578065e-05, "loss": 0.0008, "num_input_tokens_seen": 222556848, "step": 103035 }, { "epoch": 16.809135399673735, "grad_norm": 0.011619689874351025, "learning_rate": 7.55615568392034e-05, "loss": 0.0873, "num_input_tokens_seen": 222568272, "step": 103040 }, { "epoch": 16.80995106035889, "grad_norm": 0.017800265923142433, "learning_rate": 7.552393615440939e-05, "loss": 0.001, "num_input_tokens_seen": 222579248, "step": 103045 }, { "epoch": 16.810766721044047, "grad_norm": 0.000227114578592591, "learning_rate": 7.548632407216155e-05, "loss": 0.0048, "num_input_tokens_seen": 222590800, "step": 103050 }, { "epoch": 16.8115823817292, "grad_norm": 0.0071326131001114845, "learning_rate": 7.544872059322161e-05, "loss": 0.0005, "num_input_tokens_seen": 222601680, "step": 103055 }, { "epoch": 16.812398042414355, "grad_norm": 0.3597804605960846, "learning_rate": 7.541112571835218e-05, "loss": 0.0067, "num_input_tokens_seen": 222610352, "step": 103060 }, { "epoch": 16.81321370309951, "grad_norm": 0.00383292930200696, "learning_rate": 7.537353944831471e-05, "loss": 0.043, "num_input_tokens_seen": 222621648, "step": 103065 }, { "epoch": 16.814029363784666, "grad_norm": 0.0040711634792387486, "learning_rate": 7.533596178387136e-05, "loss": 0.0041, "num_input_tokens_seen": 222632528, "step": 103070 }, { "epoch": 16.81484502446982, "grad_norm": 0.011220073327422142, "learning_rate": 7.529839272578326e-05, "loss": 0.0011, "num_input_tokens_seen": 222642768, "step": 103075 }, { "epoch": 16.815660685154974, "grad_norm": 0.7358731031417847, "learning_rate": 7.526083227481223e-05, "loss": 0.1654, "num_input_tokens_seen": 222653392, "step": 103080 }, { "epoch": 16.81647634584013, "grad_norm": 0.0004308177740313113, "learning_rate": 7.522328043171899e-05, "loss": 0.0012, "num_input_tokens_seen": 222664112, "step": 103085 }, { "epoch": 16.817292006525285, "grad_norm": 0.010764655657112598, "learning_rate": 7.518573719726507e-05, "loss": 0.009, "num_input_tokens_seen": 222675408, "step": 103090 }, { "epoch": 16.81810766721044, "grad_norm": 0.001309889485128224, "learning_rate": 7.514820257221088e-05, "loss": 0.0014, "num_input_tokens_seen": 222685520, "step": 103095 }, { "epoch": 16.818923327895597, "grad_norm": 0.009377938695251942, "learning_rate": 7.511067655731757e-05, "loss": 0.0075, "num_input_tokens_seen": 222696592, "step": 103100 }, { "epoch": 16.81973898858075, "grad_norm": 0.03656245023012161, "learning_rate": 7.507315915334517e-05, "loss": 0.01, "num_input_tokens_seen": 222707888, "step": 103105 }, { "epoch": 16.820554649265905, "grad_norm": 0.012941381894052029, "learning_rate": 7.503565036105447e-05, "loss": 0.0023, "num_input_tokens_seen": 222718896, "step": 103110 }, { "epoch": 16.82137030995106, "grad_norm": 0.0073400200344622135, "learning_rate": 7.49981501812052e-05, "loss": 0.0073, "num_input_tokens_seen": 222729904, "step": 103115 }, { "epoch": 16.822185970636216, "grad_norm": 0.02853931486606598, "learning_rate": 7.496065861455786e-05, "loss": 0.0019, "num_input_tokens_seen": 222739984, "step": 103120 }, { "epoch": 16.82300163132137, "grad_norm": 0.0017178819980472326, "learning_rate": 7.492317566187167e-05, "loss": 0.1302, "num_input_tokens_seen": 222751280, "step": 103125 }, { "epoch": 16.823817292006524, "grad_norm": 0.0007930384599603713, "learning_rate": 7.48857013239067e-05, "loss": 0.0028, "num_input_tokens_seen": 222760656, "step": 103130 }, { "epoch": 16.82463295269168, "grad_norm": 0.019918566569685936, "learning_rate": 7.484823560142235e-05, "loss": 0.0105, "num_input_tokens_seen": 222772976, "step": 103135 }, { "epoch": 16.825448613376835, "grad_norm": 0.004923573229461908, "learning_rate": 7.481077849517776e-05, "loss": 0.0048, "num_input_tokens_seen": 222782896, "step": 103140 }, { "epoch": 16.82626427406199, "grad_norm": 0.0015327023575082421, "learning_rate": 7.477333000593218e-05, "loss": 0.0007, "num_input_tokens_seen": 222793808, "step": 103145 }, { "epoch": 16.827079934747147, "grad_norm": 0.08445288985967636, "learning_rate": 7.473589013444449e-05, "loss": 0.005, "num_input_tokens_seen": 222805200, "step": 103150 }, { "epoch": 16.8278955954323, "grad_norm": 0.001118175801821053, "learning_rate": 7.469845888147348e-05, "loss": 0.0117, "num_input_tokens_seen": 222816816, "step": 103155 }, { "epoch": 16.828711256117455, "grad_norm": 0.20687885582447052, "learning_rate": 7.466103624777776e-05, "loss": 0.0045, "num_input_tokens_seen": 222827376, "step": 103160 }, { "epoch": 16.82952691680261, "grad_norm": 0.007650259882211685, "learning_rate": 7.462362223411568e-05, "loss": 0.0816, "num_input_tokens_seen": 222838320, "step": 103165 }, { "epoch": 16.830342577487766, "grad_norm": 0.0002957629330921918, "learning_rate": 7.458621684124556e-05, "loss": 0.0005, "num_input_tokens_seen": 222849104, "step": 103170 }, { "epoch": 16.83115823817292, "grad_norm": 0.006406435277312994, "learning_rate": 7.454882006992541e-05, "loss": 0.001, "num_input_tokens_seen": 222860688, "step": 103175 }, { "epoch": 16.831973898858074, "grad_norm": 0.00206724158488214, "learning_rate": 7.451143192091304e-05, "loss": 0.0024, "num_input_tokens_seen": 222872208, "step": 103180 }, { "epoch": 16.83278955954323, "grad_norm": 0.014522617682814598, "learning_rate": 7.447405239496646e-05, "loss": 0.0011, "num_input_tokens_seen": 222881776, "step": 103185 }, { "epoch": 16.833605220228385, "grad_norm": 0.006389922928065062, "learning_rate": 7.443668149284289e-05, "loss": 0.001, "num_input_tokens_seen": 222892144, "step": 103190 }, { "epoch": 16.83442088091354, "grad_norm": 0.020866891369223595, "learning_rate": 7.439931921529996e-05, "loss": 0.0867, "num_input_tokens_seen": 222902608, "step": 103195 }, { "epoch": 16.835236541598697, "grad_norm": 0.00029015709878876805, "learning_rate": 7.436196556309454e-05, "loss": 0.0008, "num_input_tokens_seen": 222913648, "step": 103200 }, { "epoch": 16.83605220228385, "grad_norm": 0.031761713325977325, "learning_rate": 7.432462053698413e-05, "loss": 0.0039, "num_input_tokens_seen": 222924336, "step": 103205 }, { "epoch": 16.836867862969005, "grad_norm": 0.0018911163788288832, "learning_rate": 7.428728413772502e-05, "loss": 0.0087, "num_input_tokens_seen": 222934800, "step": 103210 }, { "epoch": 16.83768352365416, "grad_norm": 0.034703124314546585, "learning_rate": 7.42499563660744e-05, "loss": 0.0156, "num_input_tokens_seen": 222945488, "step": 103215 }, { "epoch": 16.838499184339316, "grad_norm": 0.04697426036000252, "learning_rate": 7.421263722278826e-05, "loss": 0.0027, "num_input_tokens_seen": 222954608, "step": 103220 }, { "epoch": 16.839314845024468, "grad_norm": 0.9602540135383606, "learning_rate": 7.417532670862343e-05, "loss": 0.0421, "num_input_tokens_seen": 222965712, "step": 103225 }, { "epoch": 16.840130505709624, "grad_norm": 0.0008537370013073087, "learning_rate": 7.413802482433557e-05, "loss": 0.0017, "num_input_tokens_seen": 222976400, "step": 103230 }, { "epoch": 16.84094616639478, "grad_norm": 0.0003730040625669062, "learning_rate": 7.41007315706811e-05, "loss": 0.0023, "num_input_tokens_seen": 222987184, "step": 103235 }, { "epoch": 16.841761827079935, "grad_norm": 0.002069843467324972, "learning_rate": 7.406344694841538e-05, "loss": 0.0007, "num_input_tokens_seen": 222999056, "step": 103240 }, { "epoch": 16.84257748776509, "grad_norm": 0.0002810598525684327, "learning_rate": 7.402617095829434e-05, "loss": 0.0023, "num_input_tokens_seen": 223008944, "step": 103245 }, { "epoch": 16.843393148450243, "grad_norm": 0.010194149799644947, "learning_rate": 7.398890360107336e-05, "loss": 0.0027, "num_input_tokens_seen": 223020464, "step": 103250 }, { "epoch": 16.8442088091354, "grad_norm": 0.011366844177246094, "learning_rate": 7.395164487750766e-05, "loss": 0.0049, "num_input_tokens_seen": 223031632, "step": 103255 }, { "epoch": 16.845024469820554, "grad_norm": 0.002313476288691163, "learning_rate": 7.391439478835233e-05, "loss": 0.0026, "num_input_tokens_seen": 223041840, "step": 103260 }, { "epoch": 16.84584013050571, "grad_norm": 0.05588260293006897, "learning_rate": 7.387715333436235e-05, "loss": 0.0019, "num_input_tokens_seen": 223052144, "step": 103265 }, { "epoch": 16.846655791190866, "grad_norm": 0.0007918982882983983, "learning_rate": 7.383992051629246e-05, "loss": 0.0008, "num_input_tokens_seen": 223062704, "step": 103270 }, { "epoch": 16.847471451876018, "grad_norm": 0.004887265618890524, "learning_rate": 7.380269633489717e-05, "loss": 0.0007, "num_input_tokens_seen": 223072720, "step": 103275 }, { "epoch": 16.848287112561174, "grad_norm": 0.0006118956953287125, "learning_rate": 7.376548079093087e-05, "loss": 0.0033, "num_input_tokens_seen": 223082480, "step": 103280 }, { "epoch": 16.84910277324633, "grad_norm": 0.002460494404658675, "learning_rate": 7.372827388514792e-05, "loss": 0.0047, "num_input_tokens_seen": 223092432, "step": 103285 }, { "epoch": 16.849918433931485, "grad_norm": 0.0028719434048980474, "learning_rate": 7.369107561830218e-05, "loss": 0.0069, "num_input_tokens_seen": 223102992, "step": 103290 }, { "epoch": 16.85073409461664, "grad_norm": 0.0015683751553297043, "learning_rate": 7.365388599114764e-05, "loss": 0.0015, "num_input_tokens_seen": 223113520, "step": 103295 }, { "epoch": 16.851549755301793, "grad_norm": 0.0028194987680763006, "learning_rate": 7.361670500443796e-05, "loss": 0.0012, "num_input_tokens_seen": 223125872, "step": 103300 }, { "epoch": 16.85236541598695, "grad_norm": 0.01556948758661747, "learning_rate": 7.357953265892665e-05, "loss": 0.0013, "num_input_tokens_seen": 223137936, "step": 103305 }, { "epoch": 16.853181076672104, "grad_norm": 0.00034893525298684835, "learning_rate": 7.354236895536704e-05, "loss": 0.001, "num_input_tokens_seen": 223148592, "step": 103310 }, { "epoch": 16.85399673735726, "grad_norm": 0.045288246124982834, "learning_rate": 7.350521389451231e-05, "loss": 0.002, "num_input_tokens_seen": 223159056, "step": 103315 }, { "epoch": 16.854812398042416, "grad_norm": 0.0001437898463336751, "learning_rate": 7.346806747711554e-05, "loss": 0.001, "num_input_tokens_seen": 223170096, "step": 103320 }, { "epoch": 16.855628058727568, "grad_norm": 0.006058032624423504, "learning_rate": 7.343092970392929e-05, "loss": 0.0014, "num_input_tokens_seen": 223179248, "step": 103325 }, { "epoch": 16.856443719412724, "grad_norm": 0.02314470149576664, "learning_rate": 7.339380057570666e-05, "loss": 0.0055, "num_input_tokens_seen": 223190384, "step": 103330 }, { "epoch": 16.85725938009788, "grad_norm": 0.0012422037543728948, "learning_rate": 7.335668009319962e-05, "loss": 0.0115, "num_input_tokens_seen": 223200752, "step": 103335 }, { "epoch": 16.858075040783035, "grad_norm": 0.27252358198165894, "learning_rate": 7.331956825716091e-05, "loss": 0.0034, "num_input_tokens_seen": 223212400, "step": 103340 }, { "epoch": 16.85889070146819, "grad_norm": 0.7663934826850891, "learning_rate": 7.328246506834224e-05, "loss": 0.0342, "num_input_tokens_seen": 223223888, "step": 103345 }, { "epoch": 16.859706362153343, "grad_norm": 0.002288650721311569, "learning_rate": 7.32453705274958e-05, "loss": 0.0044, "num_input_tokens_seen": 223234384, "step": 103350 }, { "epoch": 16.8605220228385, "grad_norm": 0.04717063903808594, "learning_rate": 7.320828463537333e-05, "loss": 0.0056, "num_input_tokens_seen": 223245392, "step": 103355 }, { "epoch": 16.861337683523654, "grad_norm": 0.01188843697309494, "learning_rate": 7.317120739272643e-05, "loss": 0.0772, "num_input_tokens_seen": 223255664, "step": 103360 }, { "epoch": 16.86215334420881, "grad_norm": 0.006205637939274311, "learning_rate": 7.313413880030645e-05, "loss": 0.0006, "num_input_tokens_seen": 223267024, "step": 103365 }, { "epoch": 16.862969004893966, "grad_norm": 0.01393813919275999, "learning_rate": 7.309707885886462e-05, "loss": 0.001, "num_input_tokens_seen": 223278192, "step": 103370 }, { "epoch": 16.863784665579118, "grad_norm": 0.009371892549097538, "learning_rate": 7.306002756915214e-05, "loss": 0.0032, "num_input_tokens_seen": 223288912, "step": 103375 }, { "epoch": 16.864600326264274, "grad_norm": 0.0004683208535425365, "learning_rate": 7.302298493191972e-05, "loss": 0.0005, "num_input_tokens_seen": 223300400, "step": 103380 }, { "epoch": 16.86541598694943, "grad_norm": 0.059036415070295334, "learning_rate": 7.298595094791826e-05, "loss": 0.0727, "num_input_tokens_seen": 223311856, "step": 103385 }, { "epoch": 16.866231647634585, "grad_norm": 0.004718313459306955, "learning_rate": 7.294892561789817e-05, "loss": 0.138, "num_input_tokens_seen": 223323248, "step": 103390 }, { "epoch": 16.86704730831974, "grad_norm": 0.005941477138549089, "learning_rate": 7.291190894260985e-05, "loss": 0.0011, "num_input_tokens_seen": 223335216, "step": 103395 }, { "epoch": 16.867862969004893, "grad_norm": 0.007990571670234203, "learning_rate": 7.287490092280346e-05, "loss": 0.0116, "num_input_tokens_seen": 223345808, "step": 103400 }, { "epoch": 16.86867862969005, "grad_norm": 0.005993335507810116, "learning_rate": 7.28379015592291e-05, "loss": 0.0011, "num_input_tokens_seen": 223357584, "step": 103405 }, { "epoch": 16.869494290375204, "grad_norm": 0.019427742809057236, "learning_rate": 7.280091085263657e-05, "loss": 0.027, "num_input_tokens_seen": 223367248, "step": 103410 }, { "epoch": 16.87030995106036, "grad_norm": 0.6797969937324524, "learning_rate": 7.276392880377548e-05, "loss": 0.0292, "num_input_tokens_seen": 223377584, "step": 103415 }, { "epoch": 16.871125611745512, "grad_norm": 0.007113391533493996, "learning_rate": 7.27269554133954e-05, "loss": 0.0082, "num_input_tokens_seen": 223389104, "step": 103420 }, { "epoch": 16.871941272430668, "grad_norm": 0.0298901479691267, "learning_rate": 7.268999068224557e-05, "loss": 0.0033, "num_input_tokens_seen": 223400112, "step": 103425 }, { "epoch": 16.872756933115824, "grad_norm": 0.002857472514733672, "learning_rate": 7.265303461107519e-05, "loss": 0.0063, "num_input_tokens_seen": 223411216, "step": 103430 }, { "epoch": 16.87357259380098, "grad_norm": 0.001847639330662787, "learning_rate": 7.261608720063317e-05, "loss": 0.0025, "num_input_tokens_seen": 223423440, "step": 103435 }, { "epoch": 16.874388254486135, "grad_norm": 0.0019470597617328167, "learning_rate": 7.25791484516683e-05, "loss": 0.003, "num_input_tokens_seen": 223432944, "step": 103440 }, { "epoch": 16.875203915171287, "grad_norm": 0.002979752840474248, "learning_rate": 7.254221836492925e-05, "loss": 0.1159, "num_input_tokens_seen": 223443824, "step": 103445 }, { "epoch": 16.876019575856443, "grad_norm": 0.05077700689435005, "learning_rate": 7.250529694116436e-05, "loss": 0.0033, "num_input_tokens_seen": 223455024, "step": 103450 }, { "epoch": 16.8768352365416, "grad_norm": 0.0010402423795312643, "learning_rate": 7.246838418112189e-05, "loss": 0.0084, "num_input_tokens_seen": 223464720, "step": 103455 }, { "epoch": 16.877650897226754, "grad_norm": 0.007393400650471449, "learning_rate": 7.243148008555017e-05, "loss": 0.003, "num_input_tokens_seen": 223475216, "step": 103460 }, { "epoch": 16.87846655791191, "grad_norm": 0.00041530292946845293, "learning_rate": 7.239458465519672e-05, "loss": 0.0021, "num_input_tokens_seen": 223485232, "step": 103465 }, { "epoch": 16.879282218597062, "grad_norm": 0.00204356387257576, "learning_rate": 7.235769789080954e-05, "loss": 0.1144, "num_input_tokens_seen": 223495696, "step": 103470 }, { "epoch": 16.880097879282218, "grad_norm": 0.0011162912705913186, "learning_rate": 7.232081979313615e-05, "loss": 0.0077, "num_input_tokens_seen": 223508336, "step": 103475 }, { "epoch": 16.880913539967374, "grad_norm": 0.00022204949345905334, "learning_rate": 7.228395036292384e-05, "loss": 0.0016, "num_input_tokens_seen": 223517680, "step": 103480 }, { "epoch": 16.88172920065253, "grad_norm": 0.0025065632071346045, "learning_rate": 7.224708960091992e-05, "loss": 0.0014, "num_input_tokens_seen": 223528240, "step": 103485 }, { "epoch": 16.882544861337685, "grad_norm": 0.0005242445622570813, "learning_rate": 7.221023750787136e-05, "loss": 0.0004, "num_input_tokens_seen": 223539248, "step": 103490 }, { "epoch": 16.883360522022837, "grad_norm": 0.005537915974855423, "learning_rate": 7.217339408452505e-05, "loss": 0.0026, "num_input_tokens_seen": 223549968, "step": 103495 }, { "epoch": 16.884176182707993, "grad_norm": 0.0028699340764433146, "learning_rate": 7.21365593316276e-05, "loss": 0.0005, "num_input_tokens_seen": 223561040, "step": 103500 }, { "epoch": 16.88499184339315, "grad_norm": 0.0010814238339662552, "learning_rate": 7.209973324992558e-05, "loss": 0.0042, "num_input_tokens_seen": 223571152, "step": 103505 }, { "epoch": 16.885807504078304, "grad_norm": 0.04626893624663353, "learning_rate": 7.206291584016533e-05, "loss": 0.0028, "num_input_tokens_seen": 223582032, "step": 103510 }, { "epoch": 16.88662316476346, "grad_norm": 0.07623946666717529, "learning_rate": 7.202610710309293e-05, "loss": 0.0025, "num_input_tokens_seen": 223594224, "step": 103515 }, { "epoch": 16.887438825448612, "grad_norm": 0.008306603878736496, "learning_rate": 7.198930703945439e-05, "loss": 0.0021, "num_input_tokens_seen": 223604144, "step": 103520 }, { "epoch": 16.888254486133768, "grad_norm": 0.0008515430381521583, "learning_rate": 7.19525156499955e-05, "loss": 0.0033, "num_input_tokens_seen": 223613616, "step": 103525 }, { "epoch": 16.889070146818923, "grad_norm": 0.005168965086340904, "learning_rate": 7.191573293546195e-05, "loss": 0.0034, "num_input_tokens_seen": 223625744, "step": 103530 }, { "epoch": 16.88988580750408, "grad_norm": 0.04074247553944588, "learning_rate": 7.187895889659906e-05, "loss": 0.0018, "num_input_tokens_seen": 223635632, "step": 103535 }, { "epoch": 16.890701468189235, "grad_norm": 0.04196935519576073, "learning_rate": 7.184219353415228e-05, "loss": 0.0592, "num_input_tokens_seen": 223647120, "step": 103540 }, { "epoch": 16.891517128874387, "grad_norm": 0.005630762781947851, "learning_rate": 7.180543684886654e-05, "loss": 0.0018, "num_input_tokens_seen": 223658416, "step": 103545 }, { "epoch": 16.892332789559543, "grad_norm": 0.004152704030275345, "learning_rate": 7.176868884148679e-05, "loss": 0.0043, "num_input_tokens_seen": 223669072, "step": 103550 }, { "epoch": 16.8931484502447, "grad_norm": 0.1723194271326065, "learning_rate": 7.173194951275786e-05, "loss": 0.0052, "num_input_tokens_seen": 223678096, "step": 103555 }, { "epoch": 16.893964110929854, "grad_norm": 0.1672668755054474, "learning_rate": 7.169521886342417e-05, "loss": 0.0097, "num_input_tokens_seen": 223688624, "step": 103560 }, { "epoch": 16.894779771615006, "grad_norm": 0.017755307257175446, "learning_rate": 7.165849689423043e-05, "loss": 0.0029, "num_input_tokens_seen": 223699216, "step": 103565 }, { "epoch": 16.895595432300162, "grad_norm": 0.0071003190241754055, "learning_rate": 7.162178360592037e-05, "loss": 0.0035, "num_input_tokens_seen": 223709904, "step": 103570 }, { "epoch": 16.896411092985318, "grad_norm": 0.0005213333643041551, "learning_rate": 7.15850789992386e-05, "loss": 0.001, "num_input_tokens_seen": 223721072, "step": 103575 }, { "epoch": 16.897226753670473, "grad_norm": 0.012438364326953888, "learning_rate": 7.154838307492839e-05, "loss": 0.0216, "num_input_tokens_seen": 223732784, "step": 103580 }, { "epoch": 16.89804241435563, "grad_norm": 0.00046717922668904066, "learning_rate": 7.151169583373402e-05, "loss": 0.001, "num_input_tokens_seen": 223743792, "step": 103585 }, { "epoch": 16.898858075040785, "grad_norm": 0.08749958872795105, "learning_rate": 7.147501727639844e-05, "loss": 0.0107, "num_input_tokens_seen": 223755344, "step": 103590 }, { "epoch": 16.899673735725937, "grad_norm": 0.0009969095699489117, "learning_rate": 7.14383474036655e-05, "loss": 0.0079, "num_input_tokens_seen": 223766032, "step": 103595 }, { "epoch": 16.900489396411093, "grad_norm": 0.008744543418288231, "learning_rate": 7.140168621627786e-05, "loss": 0.0014, "num_input_tokens_seen": 223775440, "step": 103600 }, { "epoch": 16.90130505709625, "grad_norm": 0.0034252856858074665, "learning_rate": 7.136503371497888e-05, "loss": 0.0155, "num_input_tokens_seen": 223785712, "step": 103605 }, { "epoch": 16.902120717781404, "grad_norm": 0.0015179646434262395, "learning_rate": 7.132838990051132e-05, "loss": 0.001, "num_input_tokens_seen": 223795312, "step": 103610 }, { "epoch": 16.902936378466556, "grad_norm": 0.0007867804961279035, "learning_rate": 7.129175477361766e-05, "loss": 0.0014, "num_input_tokens_seen": 223806736, "step": 103615 }, { "epoch": 16.903752039151712, "grad_norm": 0.6858383417129517, "learning_rate": 7.125512833504049e-05, "loss": 0.0167, "num_input_tokens_seen": 223817424, "step": 103620 }, { "epoch": 16.904567699836868, "grad_norm": 0.004548117518424988, "learning_rate": 7.121851058552209e-05, "loss": 0.0011, "num_input_tokens_seen": 223827856, "step": 103625 }, { "epoch": 16.905383360522023, "grad_norm": 0.01413120049983263, "learning_rate": 7.118190152580444e-05, "loss": 0.0771, "num_input_tokens_seen": 223837104, "step": 103630 }, { "epoch": 16.90619902120718, "grad_norm": 0.0013508544070646167, "learning_rate": 7.114530115662959e-05, "loss": 0.0889, "num_input_tokens_seen": 223848720, "step": 103635 }, { "epoch": 16.90701468189233, "grad_norm": 0.006228696089237928, "learning_rate": 7.110870947873926e-05, "loss": 0.0014, "num_input_tokens_seen": 223859216, "step": 103640 }, { "epoch": 16.907830342577487, "grad_norm": 0.0016030416591092944, "learning_rate": 7.107212649287497e-05, "loss": 0.001, "num_input_tokens_seen": 223870448, "step": 103645 }, { "epoch": 16.908646003262643, "grad_norm": 0.005661866627633572, "learning_rate": 7.103555219977825e-05, "loss": 0.0006, "num_input_tokens_seen": 223880720, "step": 103650 }, { "epoch": 16.9094616639478, "grad_norm": 0.008992401883006096, "learning_rate": 7.099898660019016e-05, "loss": 0.0096, "num_input_tokens_seen": 223891856, "step": 103655 }, { "epoch": 16.910277324632954, "grad_norm": 0.0017499992391094565, "learning_rate": 7.096242969485189e-05, "loss": 0.0006, "num_input_tokens_seen": 223903280, "step": 103660 }, { "epoch": 16.911092985318106, "grad_norm": 0.016757963225245476, "learning_rate": 7.092588148450413e-05, "loss": 0.0029, "num_input_tokens_seen": 223914160, "step": 103665 }, { "epoch": 16.911908646003262, "grad_norm": 0.0017213866813108325, "learning_rate": 7.088934196988795e-05, "loss": 0.0004, "num_input_tokens_seen": 223925424, "step": 103670 }, { "epoch": 16.912724306688418, "grad_norm": 0.12674477696418762, "learning_rate": 7.085281115174335e-05, "loss": 0.0066, "num_input_tokens_seen": 223936880, "step": 103675 }, { "epoch": 16.913539967373573, "grad_norm": 0.07425151765346527, "learning_rate": 7.081628903081116e-05, "loss": 0.003, "num_input_tokens_seen": 223947120, "step": 103680 }, { "epoch": 16.91435562805873, "grad_norm": 0.0015521092573180795, "learning_rate": 7.077977560783117e-05, "loss": 0.0013, "num_input_tokens_seen": 223957936, "step": 103685 }, { "epoch": 16.91517128874388, "grad_norm": 0.003834107890725136, "learning_rate": 7.074327088354371e-05, "loss": 0.0017, "num_input_tokens_seen": 223966544, "step": 103690 }, { "epoch": 16.915986949429037, "grad_norm": 0.0003727386356331408, "learning_rate": 7.070677485868821e-05, "loss": 0.0007, "num_input_tokens_seen": 223978640, "step": 103695 }, { "epoch": 16.916802610114193, "grad_norm": 0.005801330786198378, "learning_rate": 7.067028753400473e-05, "loss": 0.0086, "num_input_tokens_seen": 223988752, "step": 103700 }, { "epoch": 16.91761827079935, "grad_norm": 0.264864981174469, "learning_rate": 7.06338089102323e-05, "loss": 0.0039, "num_input_tokens_seen": 223999536, "step": 103705 }, { "epoch": 16.918433931484504, "grad_norm": 0.00687580369412899, "learning_rate": 7.05973389881106e-05, "loss": 0.0169, "num_input_tokens_seen": 224011088, "step": 103710 }, { "epoch": 16.919249592169656, "grad_norm": 0.008090431801974773, "learning_rate": 7.056087776837838e-05, "loss": 0.0043, "num_input_tokens_seen": 224021488, "step": 103715 }, { "epoch": 16.920065252854812, "grad_norm": 0.10078676789999008, "learning_rate": 7.052442525177499e-05, "loss": 0.0035, "num_input_tokens_seen": 224032528, "step": 103720 }, { "epoch": 16.920880913539968, "grad_norm": 0.015244879759848118, "learning_rate": 7.048798143903873e-05, "loss": 0.0012, "num_input_tokens_seen": 224044432, "step": 103725 }, { "epoch": 16.921696574225123, "grad_norm": 0.007751537952572107, "learning_rate": 7.045154633090861e-05, "loss": 0.0162, "num_input_tokens_seen": 224054992, "step": 103730 }, { "epoch": 16.92251223491028, "grad_norm": 0.0004403532948344946, "learning_rate": 7.041511992812255e-05, "loss": 0.0827, "num_input_tokens_seen": 224065136, "step": 103735 }, { "epoch": 16.92332789559543, "grad_norm": 0.040228236466646194, "learning_rate": 7.037870223141935e-05, "loss": 0.0069, "num_input_tokens_seen": 224077392, "step": 103740 }, { "epoch": 16.924143556280587, "grad_norm": 0.05721784383058548, "learning_rate": 7.034229324153652e-05, "loss": 0.0022, "num_input_tokens_seen": 224089040, "step": 103745 }, { "epoch": 16.924959216965743, "grad_norm": 0.02705197036266327, "learning_rate": 7.030589295921224e-05, "loss": 0.0349, "num_input_tokens_seen": 224100976, "step": 103750 }, { "epoch": 16.9257748776509, "grad_norm": 0.0023379966150969267, "learning_rate": 7.026950138518423e-05, "loss": 0.0046, "num_input_tokens_seen": 224112528, "step": 103755 }, { "epoch": 16.92659053833605, "grad_norm": 0.002170360879972577, "learning_rate": 7.023311852018988e-05, "loss": 0.0008, "num_input_tokens_seen": 224123024, "step": 103760 }, { "epoch": 16.927406199021206, "grad_norm": 0.0021907107438892126, "learning_rate": 7.019674436496653e-05, "loss": 0.0013, "num_input_tokens_seen": 224135216, "step": 103765 }, { "epoch": 16.928221859706362, "grad_norm": 0.6404808759689331, "learning_rate": 7.01603789202515e-05, "loss": 0.0145, "num_input_tokens_seen": 224147056, "step": 103770 }, { "epoch": 16.929037520391518, "grad_norm": 0.0004183925047982484, "learning_rate": 7.01240221867816e-05, "loss": 0.0006, "num_input_tokens_seen": 224158096, "step": 103775 }, { "epoch": 16.929853181076673, "grad_norm": 0.00027834964566864073, "learning_rate": 7.008767416529376e-05, "loss": 0.0007, "num_input_tokens_seen": 224168080, "step": 103780 }, { "epoch": 16.930668841761825, "grad_norm": 0.7886783480644226, "learning_rate": 7.00513348565246e-05, "loss": 0.0603, "num_input_tokens_seen": 224179248, "step": 103785 }, { "epoch": 16.93148450244698, "grad_norm": 0.0031499317847192287, "learning_rate": 7.001500426121055e-05, "loss": 0.0295, "num_input_tokens_seen": 224190960, "step": 103790 }, { "epoch": 16.932300163132137, "grad_norm": 0.01849014312028885, "learning_rate": 6.997868238008793e-05, "loss": 0.0008, "num_input_tokens_seen": 224200656, "step": 103795 }, { "epoch": 16.933115823817293, "grad_norm": 0.007087129633873701, "learning_rate": 6.994236921389268e-05, "loss": 0.0026, "num_input_tokens_seen": 224212016, "step": 103800 }, { "epoch": 16.93393148450245, "grad_norm": 0.09033387899398804, "learning_rate": 6.990606476336114e-05, "loss": 0.0034, "num_input_tokens_seen": 224222448, "step": 103805 }, { "epoch": 16.9347471451876, "grad_norm": 0.011910875327885151, "learning_rate": 6.98697690292286e-05, "loss": 0.0434, "num_input_tokens_seen": 224233008, "step": 103810 }, { "epoch": 16.935562805872756, "grad_norm": 0.002567737130448222, "learning_rate": 6.983348201223105e-05, "loss": 0.0023, "num_input_tokens_seen": 224245200, "step": 103815 }, { "epoch": 16.936378466557912, "grad_norm": 0.0010787706123664975, "learning_rate": 6.97972037131035e-05, "loss": 0.0005, "num_input_tokens_seen": 224255888, "step": 103820 }, { "epoch": 16.937194127243067, "grad_norm": 0.010754414834082127, "learning_rate": 6.976093413258156e-05, "loss": 0.0013, "num_input_tokens_seen": 224265904, "step": 103825 }, { "epoch": 16.938009787928223, "grad_norm": 0.0027486232575029135, "learning_rate": 6.972467327139987e-05, "loss": 0.0028, "num_input_tokens_seen": 224275664, "step": 103830 }, { "epoch": 16.938825448613375, "grad_norm": 0.0008375774486921728, "learning_rate": 6.968842113029372e-05, "loss": 0.0018, "num_input_tokens_seen": 224286768, "step": 103835 }, { "epoch": 16.93964110929853, "grad_norm": 0.02906578592956066, "learning_rate": 6.965217770999738e-05, "loss": 0.0031, "num_input_tokens_seen": 224297008, "step": 103840 }, { "epoch": 16.940456769983687, "grad_norm": 0.005131200421601534, "learning_rate": 6.961594301124585e-05, "loss": 0.0058, "num_input_tokens_seen": 224307216, "step": 103845 }, { "epoch": 16.941272430668842, "grad_norm": 0.01488333661109209, "learning_rate": 6.957971703477301e-05, "loss": 0.0146, "num_input_tokens_seen": 224317424, "step": 103850 }, { "epoch": 16.942088091353998, "grad_norm": 0.12265031039714813, "learning_rate": 6.954349978131342e-05, "loss": 0.0061, "num_input_tokens_seen": 224328688, "step": 103855 }, { "epoch": 16.94290375203915, "grad_norm": 0.0015900880098342896, "learning_rate": 6.950729125160066e-05, "loss": 0.0003, "num_input_tokens_seen": 224339312, "step": 103860 }, { "epoch": 16.943719412724306, "grad_norm": 0.03072093427181244, "learning_rate": 6.947109144636898e-05, "loss": 0.0038, "num_input_tokens_seen": 224351280, "step": 103865 }, { "epoch": 16.94453507340946, "grad_norm": 0.002643781015649438, "learning_rate": 6.943490036635158e-05, "loss": 0.0006, "num_input_tokens_seen": 224362064, "step": 103870 }, { "epoch": 16.945350734094617, "grad_norm": 0.005371812731027603, "learning_rate": 6.939871801228236e-05, "loss": 0.0027, "num_input_tokens_seen": 224372976, "step": 103875 }, { "epoch": 16.946166394779773, "grad_norm": 0.007469065487384796, "learning_rate": 6.936254438489414e-05, "loss": 0.002, "num_input_tokens_seen": 224384880, "step": 103880 }, { "epoch": 16.946982055464925, "grad_norm": 0.012764902785420418, "learning_rate": 6.932637948492038e-05, "loss": 0.001, "num_input_tokens_seen": 224396496, "step": 103885 }, { "epoch": 16.94779771615008, "grad_norm": 0.07486124336719513, "learning_rate": 6.929022331309392e-05, "loss": 0.0041, "num_input_tokens_seen": 224408208, "step": 103890 }, { "epoch": 16.948613376835237, "grad_norm": 0.6385883688926697, "learning_rate": 6.925407587014743e-05, "loss": 0.0952, "num_input_tokens_seen": 224418288, "step": 103895 }, { "epoch": 16.949429037520392, "grad_norm": 0.5001348257064819, "learning_rate": 6.921793715681358e-05, "loss": 0.1072, "num_input_tokens_seen": 224428688, "step": 103900 }, { "epoch": 16.950244698205548, "grad_norm": 0.013521653600037098, "learning_rate": 6.918180717382466e-05, "loss": 0.0017, "num_input_tokens_seen": 224439216, "step": 103905 }, { "epoch": 16.9510603588907, "grad_norm": 0.003049998078495264, "learning_rate": 6.914568592191301e-05, "loss": 0.005, "num_input_tokens_seen": 224450256, "step": 103910 }, { "epoch": 16.951876019575856, "grad_norm": 0.020504631102085114, "learning_rate": 6.910957340181056e-05, "loss": 0.0039, "num_input_tokens_seen": 224461872, "step": 103915 }, { "epoch": 16.95269168026101, "grad_norm": 0.0012223842786625028, "learning_rate": 6.907346961424926e-05, "loss": 0.0754, "num_input_tokens_seen": 224473712, "step": 103920 }, { "epoch": 16.953507340946167, "grad_norm": 0.009138503111898899, "learning_rate": 6.903737455996073e-05, "loss": 0.0026, "num_input_tokens_seen": 224484560, "step": 103925 }, { "epoch": 16.954323001631323, "grad_norm": 0.005319240037351847, "learning_rate": 6.900128823967655e-05, "loss": 0.0008, "num_input_tokens_seen": 224494608, "step": 103930 }, { "epoch": 16.955138662316475, "grad_norm": 0.0023457880597561598, "learning_rate": 6.896521065412803e-05, "loss": 0.0019, "num_input_tokens_seen": 224505488, "step": 103935 }, { "epoch": 16.95595432300163, "grad_norm": 0.0007057767361402512, "learning_rate": 6.89291418040463e-05, "loss": 0.006, "num_input_tokens_seen": 224516304, "step": 103940 }, { "epoch": 16.956769983686787, "grad_norm": 0.0032573130447417498, "learning_rate": 6.889308169016229e-05, "loss": 0.0007, "num_input_tokens_seen": 224527056, "step": 103945 }, { "epoch": 16.957585644371942, "grad_norm": 0.05521783605217934, "learning_rate": 6.885703031320706e-05, "loss": 0.0149, "num_input_tokens_seen": 224537360, "step": 103950 }, { "epoch": 16.958401305057095, "grad_norm": 0.00016859463357832283, "learning_rate": 6.882098767391087e-05, "loss": 0.0048, "num_input_tokens_seen": 224548240, "step": 103955 }, { "epoch": 16.95921696574225, "grad_norm": 0.0033800648525357246, "learning_rate": 6.878495377300453e-05, "loss": 0.0009, "num_input_tokens_seen": 224560336, "step": 103960 }, { "epoch": 16.960032626427406, "grad_norm": 0.01001080870628357, "learning_rate": 6.874892861121795e-05, "loss": 0.0011, "num_input_tokens_seen": 224572496, "step": 103965 }, { "epoch": 16.96084828711256, "grad_norm": 0.004089967347681522, "learning_rate": 6.871291218928166e-05, "loss": 0.0012, "num_input_tokens_seen": 224583216, "step": 103970 }, { "epoch": 16.961663947797717, "grad_norm": 0.0007215813966467977, "learning_rate": 6.867690450792508e-05, "loss": 0.0007, "num_input_tokens_seen": 224595696, "step": 103975 }, { "epoch": 16.96247960848287, "grad_norm": 0.0013670484768226743, "learning_rate": 6.864090556787838e-05, "loss": 0.0016, "num_input_tokens_seen": 224606288, "step": 103980 }, { "epoch": 16.963295269168025, "grad_norm": 0.011604733765125275, "learning_rate": 6.860491536987079e-05, "loss": 0.0015, "num_input_tokens_seen": 224617488, "step": 103985 }, { "epoch": 16.96411092985318, "grad_norm": 0.04235439747571945, "learning_rate": 6.856893391463192e-05, "loss": 0.0052, "num_input_tokens_seen": 224628720, "step": 103990 }, { "epoch": 16.964926590538337, "grad_norm": 0.0010337868006899953, "learning_rate": 6.853296120289094e-05, "loss": 0.0016, "num_input_tokens_seen": 224640432, "step": 103995 }, { "epoch": 16.965742251223492, "grad_norm": 0.02236298657953739, "learning_rate": 6.849699723537684e-05, "loss": 0.0067, "num_input_tokens_seen": 224651024, "step": 104000 }, { "epoch": 16.966557911908644, "grad_norm": 0.00834821816533804, "learning_rate": 6.84610420128185e-05, "loss": 0.0031, "num_input_tokens_seen": 224661904, "step": 104005 }, { "epoch": 16.9673735725938, "grad_norm": 0.0009754026541486382, "learning_rate": 6.842509553594462e-05, "loss": 0.0054, "num_input_tokens_seen": 224672592, "step": 104010 }, { "epoch": 16.968189233278956, "grad_norm": 0.07079813629388809, "learning_rate": 6.83891578054836e-05, "loss": 0.0036, "num_input_tokens_seen": 224683600, "step": 104015 }, { "epoch": 16.96900489396411, "grad_norm": 0.0004905558307655156, "learning_rate": 6.835322882216388e-05, "loss": 0.0014, "num_input_tokens_seen": 224694032, "step": 104020 }, { "epoch": 16.969820554649267, "grad_norm": 0.00019701993733178824, "learning_rate": 6.831730858671353e-05, "loss": 0.0125, "num_input_tokens_seen": 224705712, "step": 104025 }, { "epoch": 16.97063621533442, "grad_norm": 0.018763495609164238, "learning_rate": 6.828139709986058e-05, "loss": 0.0194, "num_input_tokens_seen": 224714768, "step": 104030 }, { "epoch": 16.971451876019575, "grad_norm": 0.009430291131138802, "learning_rate": 6.824549436233279e-05, "loss": 0.0012, "num_input_tokens_seen": 224725808, "step": 104035 }, { "epoch": 16.97226753670473, "grad_norm": 0.003956033382564783, "learning_rate": 6.820960037485779e-05, "loss": 0.0015, "num_input_tokens_seen": 224736752, "step": 104040 }, { "epoch": 16.973083197389887, "grad_norm": 0.0390721932053566, "learning_rate": 6.8173715138163e-05, "loss": 0.002, "num_input_tokens_seen": 224747728, "step": 104045 }, { "epoch": 16.973898858075042, "grad_norm": 0.0026890782173722982, "learning_rate": 6.813783865297563e-05, "loss": 0.0016, "num_input_tokens_seen": 224758320, "step": 104050 }, { "epoch": 16.974714518760194, "grad_norm": 0.00021689318236894906, "learning_rate": 6.810197092002285e-05, "loss": 0.0009, "num_input_tokens_seen": 224769232, "step": 104055 }, { "epoch": 16.97553017944535, "grad_norm": 0.008539892733097076, "learning_rate": 6.806611194003154e-05, "loss": 0.0053, "num_input_tokens_seen": 224780016, "step": 104060 }, { "epoch": 16.976345840130506, "grad_norm": 0.002406371058896184, "learning_rate": 6.803026171372845e-05, "loss": 0.001, "num_input_tokens_seen": 224792560, "step": 104065 }, { "epoch": 16.97716150081566, "grad_norm": 0.011961296200752258, "learning_rate": 6.799442024184005e-05, "loss": 0.0015, "num_input_tokens_seen": 224803216, "step": 104070 }, { "epoch": 16.977977161500817, "grad_norm": 0.0037891874089837074, "learning_rate": 6.795858752509276e-05, "loss": 0.0011, "num_input_tokens_seen": 224814768, "step": 104075 }, { "epoch": 16.97879282218597, "grad_norm": 0.006421142257750034, "learning_rate": 6.792276356421278e-05, "loss": 0.001, "num_input_tokens_seen": 224826128, "step": 104080 }, { "epoch": 16.979608482871125, "grad_norm": 0.0007180146058090031, "learning_rate": 6.788694835992615e-05, "loss": 0.0108, "num_input_tokens_seen": 224836848, "step": 104085 }, { "epoch": 16.98042414355628, "grad_norm": 0.0014143181033432484, "learning_rate": 6.785114191295854e-05, "loss": 0.0017, "num_input_tokens_seen": 224848816, "step": 104090 }, { "epoch": 16.981239804241437, "grad_norm": 0.012540574185550213, "learning_rate": 6.78153442240359e-05, "loss": 0.0022, "num_input_tokens_seen": 224858992, "step": 104095 }, { "epoch": 16.982055464926592, "grad_norm": 0.045478545129299164, "learning_rate": 6.777955529388358e-05, "loss": 0.0078, "num_input_tokens_seen": 224869616, "step": 104100 }, { "epoch": 16.982871125611744, "grad_norm": 0.005564156919717789, "learning_rate": 6.774377512322688e-05, "loss": 0.0043, "num_input_tokens_seen": 224880880, "step": 104105 }, { "epoch": 16.9836867862969, "grad_norm": 0.004332674667239189, "learning_rate": 6.77080037127909e-05, "loss": 0.0024, "num_input_tokens_seen": 224890896, "step": 104110 }, { "epoch": 16.984502446982056, "grad_norm": 0.004451078828424215, "learning_rate": 6.767224106330067e-05, "loss": 0.0009, "num_input_tokens_seen": 224901168, "step": 104115 }, { "epoch": 16.98531810766721, "grad_norm": 0.0015942800091579556, "learning_rate": 6.763648717548088e-05, "loss": 0.1505, "num_input_tokens_seen": 224912048, "step": 104120 }, { "epoch": 16.986133768352367, "grad_norm": 0.00040785997407510877, "learning_rate": 6.760074205005617e-05, "loss": 0.0015, "num_input_tokens_seen": 224922768, "step": 104125 }, { "epoch": 16.98694942903752, "grad_norm": 0.00912250392138958, "learning_rate": 6.756500568775098e-05, "loss": 0.0708, "num_input_tokens_seen": 224934064, "step": 104130 }, { "epoch": 16.987765089722675, "grad_norm": 0.0014891140162944794, "learning_rate": 6.752927808928955e-05, "loss": 0.002, "num_input_tokens_seen": 224944720, "step": 104135 }, { "epoch": 16.98858075040783, "grad_norm": 0.01289412658661604, "learning_rate": 6.749355925539591e-05, "loss": 0.0025, "num_input_tokens_seen": 224956144, "step": 104140 }, { "epoch": 16.989396411092986, "grad_norm": 0.10449906438589096, "learning_rate": 6.745784918679399e-05, "loss": 0.1343, "num_input_tokens_seen": 224966960, "step": 104145 }, { "epoch": 16.99021207177814, "grad_norm": 0.0009628917323425412, "learning_rate": 6.742214788420742e-05, "loss": 0.0271, "num_input_tokens_seen": 224976528, "step": 104150 }, { "epoch": 16.991027732463294, "grad_norm": 0.0012101201573386788, "learning_rate": 6.73864553483598e-05, "loss": 0.0006, "num_input_tokens_seen": 224989584, "step": 104155 }, { "epoch": 16.99184339314845, "grad_norm": 0.0003157875908073038, "learning_rate": 6.735077157997448e-05, "loss": 0.0019, "num_input_tokens_seen": 225000272, "step": 104160 }, { "epoch": 16.992659053833606, "grad_norm": 0.1327991783618927, "learning_rate": 6.731509657977464e-05, "loss": 0.007, "num_input_tokens_seen": 225011440, "step": 104165 }, { "epoch": 16.99347471451876, "grad_norm": 0.009232837706804276, "learning_rate": 6.727943034848327e-05, "loss": 0.0019, "num_input_tokens_seen": 225022096, "step": 104170 }, { "epoch": 16.994290375203914, "grad_norm": 0.001391786616295576, "learning_rate": 6.72437728868232e-05, "loss": 0.0006, "num_input_tokens_seen": 225033232, "step": 104175 }, { "epoch": 16.99510603588907, "grad_norm": 0.0021746442653238773, "learning_rate": 6.720812419551703e-05, "loss": 0.0037, "num_input_tokens_seen": 225043664, "step": 104180 }, { "epoch": 16.995921696574225, "grad_norm": 0.0031335726380348206, "learning_rate": 6.717248427528727e-05, "loss": 0.0139, "num_input_tokens_seen": 225053840, "step": 104185 }, { "epoch": 16.99673735725938, "grad_norm": 0.002999882446601987, "learning_rate": 6.713685312685619e-05, "loss": 0.0019, "num_input_tokens_seen": 225064688, "step": 104190 }, { "epoch": 16.997553017944536, "grad_norm": 0.17880254983901978, "learning_rate": 6.710123075094593e-05, "loss": 0.0087, "num_input_tokens_seen": 225075696, "step": 104195 }, { "epoch": 16.99836867862969, "grad_norm": 0.0011067570885643363, "learning_rate": 6.70656171482783e-05, "loss": 0.0007, "num_input_tokens_seen": 225084624, "step": 104200 }, { "epoch": 16.999184339314844, "grad_norm": 0.9609259963035583, "learning_rate": 6.703001231957535e-05, "loss": 0.0853, "num_input_tokens_seen": 225095632, "step": 104205 }, { "epoch": 17.0, "grad_norm": 0.22522194683551788, "learning_rate": 6.699441626555824e-05, "loss": 0.0111, "num_input_tokens_seen": 225105296, "step": 104210 }, { "epoch": 17.0, "eval_loss": 0.29674801230430603, "eval_runtime": 104.219, "eval_samples_per_second": 26.147, "eval_steps_per_second": 6.544, "num_input_tokens_seen": 225105296, "step": 104210 }, { "epoch": 17.000815660685156, "grad_norm": 0.0023162413854151964, "learning_rate": 6.695882898694883e-05, "loss": 0.0008, "num_input_tokens_seen": 225116784, "step": 104215 }, { "epoch": 17.00163132137031, "grad_norm": 0.0006858249544166028, "learning_rate": 6.692325048446784e-05, "loss": 0.0014, "num_input_tokens_seen": 225129328, "step": 104220 }, { "epoch": 17.002446982055464, "grad_norm": 0.7215015888214111, "learning_rate": 6.688768075883683e-05, "loss": 0.0454, "num_input_tokens_seen": 225138544, "step": 104225 }, { "epoch": 17.00326264274062, "grad_norm": 0.015421743504703045, "learning_rate": 6.685211981077616e-05, "loss": 0.0055, "num_input_tokens_seen": 225149072, "step": 104230 }, { "epoch": 17.004078303425775, "grad_norm": 0.001810228219255805, "learning_rate": 6.68165676410069e-05, "loss": 0.0718, "num_input_tokens_seen": 225159344, "step": 104235 }, { "epoch": 17.00489396411093, "grad_norm": 0.004929349757730961, "learning_rate": 6.678102425024946e-05, "loss": 0.0034, "num_input_tokens_seen": 225170096, "step": 104240 }, { "epoch": 17.005709624796086, "grad_norm": 0.030193855985999107, "learning_rate": 6.674548963922412e-05, "loss": 0.0033, "num_input_tokens_seen": 225181104, "step": 104245 }, { "epoch": 17.00652528548124, "grad_norm": 0.0012685685651376843, "learning_rate": 6.670996380865101e-05, "loss": 0.0022, "num_input_tokens_seen": 225191376, "step": 104250 }, { "epoch": 17.007340946166394, "grad_norm": 0.037792641669511795, "learning_rate": 6.667444675925022e-05, "loss": 0.0028, "num_input_tokens_seen": 225202000, "step": 104255 }, { "epoch": 17.00815660685155, "grad_norm": 0.008928779512643814, "learning_rate": 6.663893849174147e-05, "loss": 0.002, "num_input_tokens_seen": 225212592, "step": 104260 }, { "epoch": 17.008972267536706, "grad_norm": 0.02849671244621277, "learning_rate": 6.660343900684434e-05, "loss": 0.0034, "num_input_tokens_seen": 225224912, "step": 104265 }, { "epoch": 17.00978792822186, "grad_norm": 0.0014940741239115596, "learning_rate": 6.656794830527835e-05, "loss": 0.0019, "num_input_tokens_seen": 225235856, "step": 104270 }, { "epoch": 17.010603588907014, "grad_norm": 0.004284200258553028, "learning_rate": 6.653246638776273e-05, "loss": 0.0025, "num_input_tokens_seen": 225246640, "step": 104275 }, { "epoch": 17.01141924959217, "grad_norm": 0.15875405073165894, "learning_rate": 6.649699325501657e-05, "loss": 0.008, "num_input_tokens_seen": 225258864, "step": 104280 }, { "epoch": 17.012234910277325, "grad_norm": 0.1828366369009018, "learning_rate": 6.64615289077588e-05, "loss": 0.0078, "num_input_tokens_seen": 225270416, "step": 104285 }, { "epoch": 17.01305057096248, "grad_norm": 0.02164081484079361, "learning_rate": 6.642607334670808e-05, "loss": 0.0041, "num_input_tokens_seen": 225280560, "step": 104290 }, { "epoch": 17.013866231647636, "grad_norm": 0.01167318969964981, "learning_rate": 6.639062657258305e-05, "loss": 0.0009, "num_input_tokens_seen": 225290864, "step": 104295 }, { "epoch": 17.01468189233279, "grad_norm": 0.009521464817225933, "learning_rate": 6.635518858610207e-05, "loss": 0.0031, "num_input_tokens_seen": 225302576, "step": 104300 }, { "epoch": 17.015497553017944, "grad_norm": 0.02223174087703228, "learning_rate": 6.631975938798312e-05, "loss": 0.0019, "num_input_tokens_seen": 225314000, "step": 104305 }, { "epoch": 17.0163132137031, "grad_norm": 0.0020480211824178696, "learning_rate": 6.62843389789447e-05, "loss": 0.0011, "num_input_tokens_seen": 225323952, "step": 104310 }, { "epoch": 17.017128874388256, "grad_norm": 0.031527843326330185, "learning_rate": 6.624892735970412e-05, "loss": 0.0033, "num_input_tokens_seen": 225334416, "step": 104315 }, { "epoch": 17.017944535073408, "grad_norm": 0.018897900357842445, "learning_rate": 6.621352453097951e-05, "loss": 0.0012, "num_input_tokens_seen": 225345008, "step": 104320 }, { "epoch": 17.018760195758563, "grad_norm": 0.004922399763017893, "learning_rate": 6.617813049348787e-05, "loss": 0.0071, "num_input_tokens_seen": 225357008, "step": 104325 }, { "epoch": 17.01957585644372, "grad_norm": 0.003675678512081504, "learning_rate": 6.6142745247947e-05, "loss": 0.0025, "num_input_tokens_seen": 225367312, "step": 104330 }, { "epoch": 17.020391517128875, "grad_norm": 0.0006571310805156827, "learning_rate": 6.610736879507356e-05, "loss": 0.0049, "num_input_tokens_seen": 225378224, "step": 104335 }, { "epoch": 17.02120717781403, "grad_norm": 0.008297720924019814, "learning_rate": 6.607200113558493e-05, "loss": 0.001, "num_input_tokens_seen": 225389936, "step": 104340 }, { "epoch": 17.022022838499183, "grad_norm": 0.0017699478194117546, "learning_rate": 6.603664227019745e-05, "loss": 0.001, "num_input_tokens_seen": 225400592, "step": 104345 }, { "epoch": 17.02283849918434, "grad_norm": 0.0011303251376375556, "learning_rate": 6.600129219962819e-05, "loss": 0.0008, "num_input_tokens_seen": 225411632, "step": 104350 }, { "epoch": 17.023654159869494, "grad_norm": 0.031992051750421524, "learning_rate": 6.596595092459307e-05, "loss": 0.0027, "num_input_tokens_seen": 225423088, "step": 104355 }, { "epoch": 17.02446982055465, "grad_norm": 0.00496671674773097, "learning_rate": 6.593061844580878e-05, "loss": 0.0034, "num_input_tokens_seen": 225434448, "step": 104360 }, { "epoch": 17.025285481239806, "grad_norm": 0.014466633088886738, "learning_rate": 6.589529476399097e-05, "loss": 0.0022, "num_input_tokens_seen": 225445456, "step": 104365 }, { "epoch": 17.026101141924958, "grad_norm": 0.13525429368019104, "learning_rate": 6.585997987985592e-05, "loss": 0.043, "num_input_tokens_seen": 225456400, "step": 104370 }, { "epoch": 17.026916802610113, "grad_norm": 0.5867858529090881, "learning_rate": 6.582467379411889e-05, "loss": 0.0628, "num_input_tokens_seen": 225466256, "step": 104375 }, { "epoch": 17.02773246329527, "grad_norm": 0.0019362906459718943, "learning_rate": 6.578937650749573e-05, "loss": 0.0012, "num_input_tokens_seen": 225477008, "step": 104380 }, { "epoch": 17.028548123980425, "grad_norm": 0.006260257679969072, "learning_rate": 6.575408802070171e-05, "loss": 0.0039, "num_input_tokens_seen": 225488592, "step": 104385 }, { "epoch": 17.02936378466558, "grad_norm": 0.021634496748447418, "learning_rate": 6.571880833445198e-05, "loss": 0.0118, "num_input_tokens_seen": 225499248, "step": 104390 }, { "epoch": 17.030179445350733, "grad_norm": 0.0008673843694850802, "learning_rate": 6.568353744946154e-05, "loss": 0.0025, "num_input_tokens_seen": 225509968, "step": 104395 }, { "epoch": 17.03099510603589, "grad_norm": 0.0009401759598404169, "learning_rate": 6.564827536644519e-05, "loss": 0.0013, "num_input_tokens_seen": 225520752, "step": 104400 }, { "epoch": 17.031810766721044, "grad_norm": 0.014809912070631981, "learning_rate": 6.561302208611752e-05, "loss": 0.0016, "num_input_tokens_seen": 225530736, "step": 104405 }, { "epoch": 17.0326264274062, "grad_norm": 0.03141998499631882, "learning_rate": 6.557777760919303e-05, "loss": 0.0033, "num_input_tokens_seen": 225542864, "step": 104410 }, { "epoch": 17.033442088091356, "grad_norm": 0.001158158527687192, "learning_rate": 6.554254193638598e-05, "loss": 0.004, "num_input_tokens_seen": 225553296, "step": 104415 }, { "epoch": 17.034257748776508, "grad_norm": 0.49954646825790405, "learning_rate": 6.550731506841046e-05, "loss": 0.0202, "num_input_tokens_seen": 225564016, "step": 104420 }, { "epoch": 17.035073409461663, "grad_norm": 0.0024979086592793465, "learning_rate": 6.54720970059804e-05, "loss": 0.0082, "num_input_tokens_seen": 225575024, "step": 104425 }, { "epoch": 17.03588907014682, "grad_norm": 0.004868679214268923, "learning_rate": 6.543688774980944e-05, "loss": 0.0118, "num_input_tokens_seen": 225586608, "step": 104430 }, { "epoch": 17.036704730831975, "grad_norm": 0.008296649903059006, "learning_rate": 6.540168730061141e-05, "loss": 0.0011, "num_input_tokens_seen": 225596592, "step": 104435 }, { "epoch": 17.03752039151713, "grad_norm": 0.024006053805351257, "learning_rate": 6.53664956590993e-05, "loss": 0.0015, "num_input_tokens_seen": 225607024, "step": 104440 }, { "epoch": 17.038336052202283, "grad_norm": 0.008441988378763199, "learning_rate": 6.533131282598676e-05, "loss": 0.0037, "num_input_tokens_seen": 225617968, "step": 104445 }, { "epoch": 17.03915171288744, "grad_norm": 0.03972849249839783, "learning_rate": 6.529613880198638e-05, "loss": 0.0035, "num_input_tokens_seen": 225628976, "step": 104450 }, { "epoch": 17.039967373572594, "grad_norm": 0.01612004078924656, "learning_rate": 6.526097358781141e-05, "loss": 0.0494, "num_input_tokens_seen": 225639088, "step": 104455 }, { "epoch": 17.04078303425775, "grad_norm": 0.0036437015514820814, "learning_rate": 6.522581718417409e-05, "loss": 0.0086, "num_input_tokens_seen": 225649008, "step": 104460 }, { "epoch": 17.041598694942905, "grad_norm": 0.08305442333221436, "learning_rate": 6.519066959178738e-05, "loss": 0.0044, "num_input_tokens_seen": 225659632, "step": 104465 }, { "epoch": 17.042414355628058, "grad_norm": 0.000809130840934813, "learning_rate": 6.515553081136311e-05, "loss": 0.08, "num_input_tokens_seen": 225670320, "step": 104470 }, { "epoch": 17.043230016313213, "grad_norm": 0.01146237924695015, "learning_rate": 6.512040084361388e-05, "loss": 0.0071, "num_input_tokens_seen": 225680080, "step": 104475 }, { "epoch": 17.04404567699837, "grad_norm": 0.04106791317462921, "learning_rate": 6.508527968925115e-05, "loss": 0.0034, "num_input_tokens_seen": 225691024, "step": 104480 }, { "epoch": 17.044861337683525, "grad_norm": 0.00045756070176139474, "learning_rate": 6.505016734898722e-05, "loss": 0.0125, "num_input_tokens_seen": 225701840, "step": 104485 }, { "epoch": 17.045676998368677, "grad_norm": 0.007039316929876804, "learning_rate": 6.501506382353317e-05, "loss": 0.0033, "num_input_tokens_seen": 225713680, "step": 104490 }, { "epoch": 17.046492659053833, "grad_norm": 0.447841078042984, "learning_rate": 6.497996911360093e-05, "loss": 0.0284, "num_input_tokens_seen": 225724336, "step": 104495 }, { "epoch": 17.04730831973899, "grad_norm": 0.001014457899145782, "learning_rate": 6.494488321990122e-05, "loss": 0.0045, "num_input_tokens_seen": 225735376, "step": 104500 }, { "epoch": 17.048123980424144, "grad_norm": 0.06084701418876648, "learning_rate": 6.490980614314556e-05, "loss": 0.0037, "num_input_tokens_seen": 225746736, "step": 104505 }, { "epoch": 17.0489396411093, "grad_norm": 0.008512042462825775, "learning_rate": 6.487473788404446e-05, "loss": 0.0013, "num_input_tokens_seen": 225757264, "step": 104510 }, { "epoch": 17.049755301794452, "grad_norm": 0.003893906017765403, "learning_rate": 6.483967844330901e-05, "loss": 0.0015, "num_input_tokens_seen": 225769104, "step": 104515 }, { "epoch": 17.050570962479608, "grad_norm": 0.004594842437654734, "learning_rate": 6.480462782164925e-05, "loss": 0.003, "num_input_tokens_seen": 225780464, "step": 104520 }, { "epoch": 17.051386623164763, "grad_norm": 1.0909298658370972, "learning_rate": 6.476958601977595e-05, "loss": 0.0641, "num_input_tokens_seen": 225790736, "step": 104525 }, { "epoch": 17.05220228384992, "grad_norm": 0.001894684974104166, "learning_rate": 6.473455303839909e-05, "loss": 0.002, "num_input_tokens_seen": 225801584, "step": 104530 }, { "epoch": 17.053017944535075, "grad_norm": 0.0030393460765480995, "learning_rate": 6.469952887822866e-05, "loss": 0.0006, "num_input_tokens_seen": 225811920, "step": 104535 }, { "epoch": 17.053833605220227, "grad_norm": 0.007903835736215115, "learning_rate": 6.466451353997455e-05, "loss": 0.003, "num_input_tokens_seen": 225822000, "step": 104540 }, { "epoch": 17.054649265905383, "grad_norm": 0.012811913155019283, "learning_rate": 6.462950702434633e-05, "loss": 0.0116, "num_input_tokens_seen": 225833456, "step": 104545 }, { "epoch": 17.05546492659054, "grad_norm": 0.0014708518283441663, "learning_rate": 6.459450933205346e-05, "loss": 0.002, "num_input_tokens_seen": 225845072, "step": 104550 }, { "epoch": 17.056280587275694, "grad_norm": 0.0006639196653850377, "learning_rate": 6.455952046380514e-05, "loss": 0.0022, "num_input_tokens_seen": 225854960, "step": 104555 }, { "epoch": 17.05709624796085, "grad_norm": 0.02281191386282444, "learning_rate": 6.452454042031059e-05, "loss": 0.0023, "num_input_tokens_seen": 225866544, "step": 104560 }, { "epoch": 17.057911908646002, "grad_norm": 0.00603465223684907, "learning_rate": 6.448956920227867e-05, "loss": 0.0024, "num_input_tokens_seen": 225876016, "step": 104565 }, { "epoch": 17.058727569331158, "grad_norm": 0.007597712334245443, "learning_rate": 6.445460681041815e-05, "loss": 0.005, "num_input_tokens_seen": 225888176, "step": 104570 }, { "epoch": 17.059543230016313, "grad_norm": 0.0021226252429187298, "learning_rate": 6.441965324543737e-05, "loss": 0.0094, "num_input_tokens_seen": 225898672, "step": 104575 }, { "epoch": 17.06035889070147, "grad_norm": 0.005578478332608938, "learning_rate": 6.438470850804512e-05, "loss": 0.0033, "num_input_tokens_seen": 225910256, "step": 104580 }, { "epoch": 17.061174551386625, "grad_norm": 0.0020552859641611576, "learning_rate": 6.43497725989492e-05, "loss": 0.0037, "num_input_tokens_seen": 225921968, "step": 104585 }, { "epoch": 17.061990212071777, "grad_norm": 0.016926616430282593, "learning_rate": 6.431484551885797e-05, "loss": 0.0042, "num_input_tokens_seen": 225932848, "step": 104590 }, { "epoch": 17.062805872756933, "grad_norm": 0.6727308034896851, "learning_rate": 6.427992726847892e-05, "loss": 0.1606, "num_input_tokens_seen": 225944240, "step": 104595 }, { "epoch": 17.063621533442088, "grad_norm": 0.003617391223087907, "learning_rate": 6.424501784852004e-05, "loss": 0.003, "num_input_tokens_seen": 225955856, "step": 104600 }, { "epoch": 17.064437194127244, "grad_norm": 0.0032554580830037594, "learning_rate": 6.421011725968856e-05, "loss": 0.0086, "num_input_tokens_seen": 225966160, "step": 104605 }, { "epoch": 17.0652528548124, "grad_norm": 0.0010663648135960102, "learning_rate": 6.4175225502692e-05, "loss": 0.0017, "num_input_tokens_seen": 225977328, "step": 104610 }, { "epoch": 17.06606851549755, "grad_norm": 0.003959180787205696, "learning_rate": 6.414034257823725e-05, "loss": 0.003, "num_input_tokens_seen": 225986480, "step": 104615 }, { "epoch": 17.066884176182707, "grad_norm": 0.0037438899744302034, "learning_rate": 6.410546848703153e-05, "loss": 0.0056, "num_input_tokens_seen": 225997840, "step": 104620 }, { "epoch": 17.067699836867863, "grad_norm": 0.010389694944024086, "learning_rate": 6.407060322978131e-05, "loss": 0.0045, "num_input_tokens_seen": 226009360, "step": 104625 }, { "epoch": 17.06851549755302, "grad_norm": 0.0010346529306843877, "learning_rate": 6.403574680719343e-05, "loss": 0.0026, "num_input_tokens_seen": 226020176, "step": 104630 }, { "epoch": 17.069331158238175, "grad_norm": 0.00428399071097374, "learning_rate": 6.400089921997415e-05, "loss": 0.0009, "num_input_tokens_seen": 226031216, "step": 104635 }, { "epoch": 17.070146818923327, "grad_norm": 0.007730559445917606, "learning_rate": 6.39660604688298e-05, "loss": 0.0014, "num_input_tokens_seen": 226041680, "step": 104640 }, { "epoch": 17.070962479608482, "grad_norm": 0.018052903935313225, "learning_rate": 6.393123055446637e-05, "loss": 0.0013, "num_input_tokens_seen": 226052784, "step": 104645 }, { "epoch": 17.071778140293638, "grad_norm": 0.092511385679245, "learning_rate": 6.389640947758973e-05, "loss": 0.0125, "num_input_tokens_seen": 226063824, "step": 104650 }, { "epoch": 17.072593800978794, "grad_norm": 0.005565250292420387, "learning_rate": 6.38615972389056e-05, "loss": 0.0027, "num_input_tokens_seen": 226074896, "step": 104655 }, { "epoch": 17.07340946166395, "grad_norm": 0.1231377050280571, "learning_rate": 6.382679383911949e-05, "loss": 0.0029, "num_input_tokens_seen": 226084816, "step": 104660 }, { "epoch": 17.0742251223491, "grad_norm": 0.001401619054377079, "learning_rate": 6.37919992789367e-05, "loss": 0.0017, "num_input_tokens_seen": 226095376, "step": 104665 }, { "epoch": 17.075040783034257, "grad_norm": 0.012654143385589123, "learning_rate": 6.375721355906245e-05, "loss": 0.0195, "num_input_tokens_seen": 226105936, "step": 104670 }, { "epoch": 17.075856443719413, "grad_norm": 0.000800961337517947, "learning_rate": 6.372243668020167e-05, "loss": 0.0029, "num_input_tokens_seen": 226116368, "step": 104675 }, { "epoch": 17.07667210440457, "grad_norm": 0.00023649254580959678, "learning_rate": 6.368766864305914e-05, "loss": 0.0043, "num_input_tokens_seen": 226127056, "step": 104680 }, { "epoch": 17.07748776508972, "grad_norm": 0.001044937875121832, "learning_rate": 6.365290944833952e-05, "loss": 0.0022, "num_input_tokens_seen": 226137200, "step": 104685 }, { "epoch": 17.078303425774877, "grad_norm": 0.17576022446155548, "learning_rate": 6.361815909674722e-05, "loss": 0.005, "num_input_tokens_seen": 226147600, "step": 104690 }, { "epoch": 17.079119086460032, "grad_norm": 0.002019402338191867, "learning_rate": 6.358341758898656e-05, "loss": 0.0009, "num_input_tokens_seen": 226156784, "step": 104695 }, { "epoch": 17.079934747145188, "grad_norm": 0.012892481870949268, "learning_rate": 6.354868492576154e-05, "loss": 0.0011, "num_input_tokens_seen": 226167600, "step": 104700 }, { "epoch": 17.080750407830344, "grad_norm": 0.024450616911053658, "learning_rate": 6.351396110777613e-05, "loss": 0.0041, "num_input_tokens_seen": 226179088, "step": 104705 }, { "epoch": 17.081566068515496, "grad_norm": 0.00017877235950436443, "learning_rate": 6.347924613573402e-05, "loss": 0.0021, "num_input_tokens_seen": 226190928, "step": 104710 }, { "epoch": 17.08238172920065, "grad_norm": 0.0333293154835701, "learning_rate": 6.344454001033873e-05, "loss": 0.0037, "num_input_tokens_seen": 226201072, "step": 104715 }, { "epoch": 17.083197389885807, "grad_norm": 0.009895344264805317, "learning_rate": 6.340984273229355e-05, "loss": 0.0013, "num_input_tokens_seen": 226212688, "step": 104720 }, { "epoch": 17.084013050570963, "grad_norm": 0.0060686697252094746, "learning_rate": 6.337515430230196e-05, "loss": 0.0024, "num_input_tokens_seen": 226223760, "step": 104725 }, { "epoch": 17.08482871125612, "grad_norm": 0.00028824826586060226, "learning_rate": 6.334047472106657e-05, "loss": 0.0022, "num_input_tokens_seen": 226233264, "step": 104730 }, { "epoch": 17.08564437194127, "grad_norm": 0.0006094975979067385, "learning_rate": 6.330580398929047e-05, "loss": 0.0006, "num_input_tokens_seen": 226243440, "step": 104735 }, { "epoch": 17.086460032626427, "grad_norm": 0.010639664717018604, "learning_rate": 6.327114210767632e-05, "loss": 0.0015, "num_input_tokens_seen": 226255504, "step": 104740 }, { "epoch": 17.087275693311582, "grad_norm": 0.5435303449630737, "learning_rate": 6.323648907692642e-05, "loss": 0.0707, "num_input_tokens_seen": 226266768, "step": 104745 }, { "epoch": 17.088091353996738, "grad_norm": 0.04907776787877083, "learning_rate": 6.320184489774317e-05, "loss": 0.0057, "num_input_tokens_seen": 226277616, "step": 104750 }, { "epoch": 17.088907014681894, "grad_norm": 0.0444052629172802, "learning_rate": 6.316720957082867e-05, "loss": 0.0028, "num_input_tokens_seen": 226288144, "step": 104755 }, { "epoch": 17.089722675367046, "grad_norm": 0.008682828396558762, "learning_rate": 6.31325830968848e-05, "loss": 0.0018, "num_input_tokens_seen": 226300560, "step": 104760 }, { "epoch": 17.0905383360522, "grad_norm": 0.01425588596612215, "learning_rate": 6.30979654766134e-05, "loss": 0.0016, "num_input_tokens_seen": 226311248, "step": 104765 }, { "epoch": 17.091353996737357, "grad_norm": 0.05291926860809326, "learning_rate": 6.306335671071589e-05, "loss": 0.0026, "num_input_tokens_seen": 226323056, "step": 104770 }, { "epoch": 17.092169657422513, "grad_norm": 0.004806335549801588, "learning_rate": 6.302875679989384e-05, "loss": 0.0005, "num_input_tokens_seen": 226333072, "step": 104775 }, { "epoch": 17.09298531810767, "grad_norm": 0.0032293670810759068, "learning_rate": 6.299416574484828e-05, "loss": 0.0017, "num_input_tokens_seen": 226343920, "step": 104780 }, { "epoch": 17.09380097879282, "grad_norm": 0.008256429806351662, "learning_rate": 6.29595835462804e-05, "loss": 0.0042, "num_input_tokens_seen": 226354288, "step": 104785 }, { "epoch": 17.094616639477977, "grad_norm": 0.030211608856916428, "learning_rate": 6.2925010204891e-05, "loss": 0.0055, "num_input_tokens_seen": 226365520, "step": 104790 }, { "epoch": 17.095432300163132, "grad_norm": 0.000742889940738678, "learning_rate": 6.289044572138069e-05, "loss": 0.0009, "num_input_tokens_seen": 226376944, "step": 104795 }, { "epoch": 17.096247960848288, "grad_norm": 0.00248258956708014, "learning_rate": 6.285589009644999e-05, "loss": 0.0345, "num_input_tokens_seen": 226387824, "step": 104800 }, { "epoch": 17.097063621533444, "grad_norm": 0.00329033937305212, "learning_rate": 6.282134333079926e-05, "loss": 0.0042, "num_input_tokens_seen": 226399056, "step": 104805 }, { "epoch": 17.097879282218596, "grad_norm": 0.47946974635124207, "learning_rate": 6.278680542512866e-05, "loss": 0.0237, "num_input_tokens_seen": 226408624, "step": 104810 }, { "epoch": 17.09869494290375, "grad_norm": 0.0005470985197462142, "learning_rate": 6.275227638013803e-05, "loss": 0.0034, "num_input_tokens_seen": 226418640, "step": 104815 }, { "epoch": 17.099510603588907, "grad_norm": 0.0025842119939625263, "learning_rate": 6.271775619652719e-05, "loss": 0.0012, "num_input_tokens_seen": 226430576, "step": 104820 }, { "epoch": 17.100326264274063, "grad_norm": 0.002082726452499628, "learning_rate": 6.268324487499583e-05, "loss": 0.0068, "num_input_tokens_seen": 226441456, "step": 104825 }, { "epoch": 17.10114192495922, "grad_norm": 0.0005375830223783851, "learning_rate": 6.264874241624324e-05, "loss": 0.0008, "num_input_tokens_seen": 226453040, "step": 104830 }, { "epoch": 17.10195758564437, "grad_norm": 0.0021206443198025227, "learning_rate": 6.261424882096866e-05, "loss": 0.0015, "num_input_tokens_seen": 226464624, "step": 104835 }, { "epoch": 17.102773246329527, "grad_norm": 0.0012728559086099267, "learning_rate": 6.257976408987115e-05, "loss": 0.0006, "num_input_tokens_seen": 226476048, "step": 104840 }, { "epoch": 17.103588907014682, "grad_norm": 0.0008652537362650037, "learning_rate": 6.254528822364985e-05, "loss": 0.0035, "num_input_tokens_seen": 226487408, "step": 104845 }, { "epoch": 17.104404567699838, "grad_norm": 0.001977971289306879, "learning_rate": 6.2510821223003e-05, "loss": 0.0152, "num_input_tokens_seen": 226498896, "step": 104850 }, { "epoch": 17.10522022838499, "grad_norm": 0.00024319304793607444, "learning_rate": 6.247636308862953e-05, "loss": 0.0007, "num_input_tokens_seen": 226509456, "step": 104855 }, { "epoch": 17.106035889070146, "grad_norm": 0.0021559372544288635, "learning_rate": 6.244191382122744e-05, "loss": 0.0014, "num_input_tokens_seen": 226518640, "step": 104860 }, { "epoch": 17.1068515497553, "grad_norm": 0.013561434112489223, "learning_rate": 6.240747342149511e-05, "loss": 0.0011, "num_input_tokens_seen": 226528944, "step": 104865 }, { "epoch": 17.107667210440457, "grad_norm": 0.01310847606509924, "learning_rate": 6.237304189013049e-05, "loss": 0.0011, "num_input_tokens_seen": 226540528, "step": 104870 }, { "epoch": 17.108482871125613, "grad_norm": 0.05319130793213844, "learning_rate": 6.233861922783135e-05, "loss": 0.0035, "num_input_tokens_seen": 226551024, "step": 104875 }, { "epoch": 17.109298531810765, "grad_norm": 0.005616676993668079, "learning_rate": 6.230420543529525e-05, "loss": 0.0015, "num_input_tokens_seen": 226561616, "step": 104880 }, { "epoch": 17.11011419249592, "grad_norm": 0.002177638467401266, "learning_rate": 6.226980051321973e-05, "loss": 0.0006, "num_input_tokens_seen": 226572240, "step": 104885 }, { "epoch": 17.110929853181077, "grad_norm": 0.0035934222396463156, "learning_rate": 6.223540446230202e-05, "loss": 0.0009, "num_input_tokens_seen": 226583728, "step": 104890 }, { "epoch": 17.111745513866232, "grad_norm": 0.00031812474480830133, "learning_rate": 6.220101728323913e-05, "loss": 0.0012, "num_input_tokens_seen": 226593392, "step": 104895 }, { "epoch": 17.112561174551388, "grad_norm": 0.08002560585737228, "learning_rate": 6.216663897672803e-05, "loss": 0.0038, "num_input_tokens_seen": 226605040, "step": 104900 }, { "epoch": 17.11337683523654, "grad_norm": 0.0005137875559739769, "learning_rate": 6.213226954346546e-05, "loss": 0.0949, "num_input_tokens_seen": 226614992, "step": 104905 }, { "epoch": 17.114192495921696, "grad_norm": 0.00699897576123476, "learning_rate": 6.209790898414785e-05, "loss": 0.0015, "num_input_tokens_seen": 226624432, "step": 104910 }, { "epoch": 17.11500815660685, "grad_norm": 0.0014027615543454885, "learning_rate": 6.206355729947171e-05, "loss": 0.0036, "num_input_tokens_seen": 226634928, "step": 104915 }, { "epoch": 17.115823817292007, "grad_norm": 0.038768503814935684, "learning_rate": 6.20292144901331e-05, "loss": 0.002, "num_input_tokens_seen": 226646736, "step": 104920 }, { "epoch": 17.116639477977163, "grad_norm": 0.09800397604703903, "learning_rate": 6.199488055682806e-05, "loss": 0.0085, "num_input_tokens_seen": 226657040, "step": 104925 }, { "epoch": 17.117455138662315, "grad_norm": 0.0012403081636875868, "learning_rate": 6.196055550025243e-05, "loss": 0.0076, "num_input_tokens_seen": 226668112, "step": 104930 }, { "epoch": 17.11827079934747, "grad_norm": 0.00031859471346251667, "learning_rate": 6.192623932110187e-05, "loss": 0.0014, "num_input_tokens_seen": 226679152, "step": 104935 }, { "epoch": 17.119086460032626, "grad_norm": 0.0007942294469103217, "learning_rate": 6.189193202007176e-05, "loss": 0.001, "num_input_tokens_seen": 226690224, "step": 104940 }, { "epoch": 17.119902120717782, "grad_norm": 0.0008386078989133239, "learning_rate": 6.185763359785729e-05, "loss": 0.0002, "num_input_tokens_seen": 226701264, "step": 104945 }, { "epoch": 17.120717781402938, "grad_norm": 0.013233168050646782, "learning_rate": 6.182334405515399e-05, "loss": 0.0021, "num_input_tokens_seen": 226711600, "step": 104950 }, { "epoch": 17.12153344208809, "grad_norm": 0.005685046315193176, "learning_rate": 6.178906339265622e-05, "loss": 0.0009, "num_input_tokens_seen": 226722672, "step": 104955 }, { "epoch": 17.122349102773246, "grad_norm": 0.002959656063467264, "learning_rate": 6.175479161105923e-05, "loss": 0.0063, "num_input_tokens_seen": 226734064, "step": 104960 }, { "epoch": 17.1231647634584, "grad_norm": 0.14378587901592255, "learning_rate": 6.17205287110571e-05, "loss": 0.0028, "num_input_tokens_seen": 226745392, "step": 104965 }, { "epoch": 17.123980424143557, "grad_norm": 0.0013684089062735438, "learning_rate": 6.16862746933447e-05, "loss": 0.0456, "num_input_tokens_seen": 226754512, "step": 104970 }, { "epoch": 17.124796084828713, "grad_norm": 0.0015672908630222082, "learning_rate": 6.165202955861577e-05, "loss": 0.0004, "num_input_tokens_seen": 226765328, "step": 104975 }, { "epoch": 17.125611745513865, "grad_norm": 0.0004411570553202182, "learning_rate": 6.161779330756473e-05, "loss": 0.0005, "num_input_tokens_seen": 226775888, "step": 104980 }, { "epoch": 17.12642740619902, "grad_norm": 0.00609734607860446, "learning_rate": 6.158356594088504e-05, "loss": 0.0011, "num_input_tokens_seen": 226787728, "step": 104985 }, { "epoch": 17.127243066884176, "grad_norm": 0.0029845749959349632, "learning_rate": 6.154934745927076e-05, "loss": 0.0018, "num_input_tokens_seen": 226798736, "step": 104990 }, { "epoch": 17.128058727569332, "grad_norm": 0.021898532286286354, "learning_rate": 6.151513786341495e-05, "loss": 0.0025, "num_input_tokens_seen": 226808880, "step": 104995 }, { "epoch": 17.128874388254488, "grad_norm": 0.0029983953572809696, "learning_rate": 6.148093715401138e-05, "loss": 0.0043, "num_input_tokens_seen": 226819696, "step": 105000 }, { "epoch": 17.12969004893964, "grad_norm": 0.001115454942919314, "learning_rate": 6.144674533175265e-05, "loss": 0.0011, "num_input_tokens_seen": 226830608, "step": 105005 }, { "epoch": 17.130505709624796, "grad_norm": 0.0005920961848460138, "learning_rate": 6.141256239733212e-05, "loss": 0.0014, "num_input_tokens_seen": 226841712, "step": 105010 }, { "epoch": 17.13132137030995, "grad_norm": 0.00042530731298029423, "learning_rate": 6.137838835144239e-05, "loss": 0.0014, "num_input_tokens_seen": 226852432, "step": 105015 }, { "epoch": 17.132137030995107, "grad_norm": 0.001647250261157751, "learning_rate": 6.1344223194776e-05, "loss": 0.0005, "num_input_tokens_seen": 226861552, "step": 105020 }, { "epoch": 17.13295269168026, "grad_norm": 0.013181711547076702, "learning_rate": 6.13100669280255e-05, "loss": 0.0023, "num_input_tokens_seen": 226873392, "step": 105025 }, { "epoch": 17.133768352365415, "grad_norm": 0.0009910253575071692, "learning_rate": 6.127591955188295e-05, "loss": 0.0029, "num_input_tokens_seen": 226884368, "step": 105030 }, { "epoch": 17.13458401305057, "grad_norm": 0.009880750440061092, "learning_rate": 6.124178106704042e-05, "loss": 0.0017, "num_input_tokens_seen": 226895088, "step": 105035 }, { "epoch": 17.135399673735726, "grad_norm": 0.00189596030395478, "learning_rate": 6.120765147418989e-05, "loss": 0.0019, "num_input_tokens_seen": 226905872, "step": 105040 }, { "epoch": 17.136215334420882, "grad_norm": 0.037216730415821075, "learning_rate": 6.117353077402288e-05, "loss": 0.003, "num_input_tokens_seen": 226917072, "step": 105045 }, { "epoch": 17.137030995106034, "grad_norm": 0.006446500774472952, "learning_rate": 6.113941896723097e-05, "loss": 0.0048, "num_input_tokens_seen": 226929584, "step": 105050 }, { "epoch": 17.13784665579119, "grad_norm": 0.03469831869006157, "learning_rate": 6.110531605450548e-05, "loss": 0.0021, "num_input_tokens_seen": 226941456, "step": 105055 }, { "epoch": 17.138662316476346, "grad_norm": 0.14603827893733978, "learning_rate": 6.107122203653742e-05, "loss": 0.0047, "num_input_tokens_seen": 226950672, "step": 105060 }, { "epoch": 17.1394779771615, "grad_norm": 0.0005901344702579081, "learning_rate": 6.103713691401813e-05, "loss": 0.0036, "num_input_tokens_seen": 226961008, "step": 105065 }, { "epoch": 17.140293637846657, "grad_norm": 0.001494377851486206, "learning_rate": 6.1003060687637836e-05, "loss": 0.0052, "num_input_tokens_seen": 226971632, "step": 105070 }, { "epoch": 17.14110929853181, "grad_norm": 0.0025044973008334637, "learning_rate": 6.09689933580877e-05, "loss": 0.002, "num_input_tokens_seen": 226982192, "step": 105075 }, { "epoch": 17.141924959216965, "grad_norm": 0.0009858324192464352, "learning_rate": 6.0934934926057616e-05, "loss": 0.0199, "num_input_tokens_seen": 226992752, "step": 105080 }, { "epoch": 17.14274061990212, "grad_norm": 0.008440673351287842, "learning_rate": 6.0900885392238316e-05, "loss": 0.0011, "num_input_tokens_seen": 227003152, "step": 105085 }, { "epoch": 17.143556280587276, "grad_norm": 0.023581720888614655, "learning_rate": 6.086684475731935e-05, "loss": 0.0122, "num_input_tokens_seen": 227014128, "step": 105090 }, { "epoch": 17.144371941272432, "grad_norm": 7.596343040466309, "learning_rate": 6.083281302199112e-05, "loss": 0.0715, "num_input_tokens_seen": 227024336, "step": 105095 }, { "epoch": 17.145187601957584, "grad_norm": 0.0012364864815026522, "learning_rate": 6.0798790186942784e-05, "loss": 0.0035, "num_input_tokens_seen": 227035632, "step": 105100 }, { "epoch": 17.14600326264274, "grad_norm": 0.004918968304991722, "learning_rate": 6.0764776252864365e-05, "loss": 0.0023, "num_input_tokens_seen": 227046288, "step": 105105 }, { "epoch": 17.146818923327896, "grad_norm": 0.04475180432200432, "learning_rate": 6.073077122044479e-05, "loss": 0.1003, "num_input_tokens_seen": 227056624, "step": 105110 }, { "epoch": 17.14763458401305, "grad_norm": 0.0032711310777813196, "learning_rate": 6.069677509037358e-05, "loss": 0.0035, "num_input_tokens_seen": 227067984, "step": 105115 }, { "epoch": 17.148450244698207, "grad_norm": 0.004042398650199175, "learning_rate": 6.066278786333928e-05, "loss": 0.0006, "num_input_tokens_seen": 227078800, "step": 105120 }, { "epoch": 17.14926590538336, "grad_norm": 0.00016070179117377847, "learning_rate": 6.062880954003114e-05, "loss": 0.0012, "num_input_tokens_seen": 227091120, "step": 105125 }, { "epoch": 17.150081566068515, "grad_norm": 0.0017621108563616872, "learning_rate": 6.059484012113736e-05, "loss": 0.0025, "num_input_tokens_seen": 227100752, "step": 105130 }, { "epoch": 17.15089722675367, "grad_norm": 0.024079062044620514, "learning_rate": 6.0560879607346795e-05, "loss": 0.0089, "num_input_tokens_seen": 227112176, "step": 105135 }, { "epoch": 17.151712887438826, "grad_norm": 0.010822267271578312, "learning_rate": 6.0526927999347224e-05, "loss": 0.0036, "num_input_tokens_seen": 227123504, "step": 105140 }, { "epoch": 17.152528548123982, "grad_norm": 0.04497012123465538, "learning_rate": 6.049298529782721e-05, "loss": 0.0014, "num_input_tokens_seen": 227133712, "step": 105145 }, { "epoch": 17.153344208809134, "grad_norm": 0.017783276736736298, "learning_rate": 6.045905150347419e-05, "loss": 0.0007, "num_input_tokens_seen": 227144816, "step": 105150 }, { "epoch": 17.15415986949429, "grad_norm": 0.004039814695715904, "learning_rate": 6.0425126616976186e-05, "loss": 0.0032, "num_input_tokens_seen": 227155152, "step": 105155 }, { "epoch": 17.154975530179446, "grad_norm": 0.0007679007248952985, "learning_rate": 6.039121063902064e-05, "loss": 0.006, "num_input_tokens_seen": 227165680, "step": 105160 }, { "epoch": 17.1557911908646, "grad_norm": 0.0743962973356247, "learning_rate": 6.03573035702949e-05, "loss": 0.004, "num_input_tokens_seen": 227177168, "step": 105165 }, { "epoch": 17.156606851549757, "grad_norm": 0.005738751031458378, "learning_rate": 6.032340541148612e-05, "loss": 0.0047, "num_input_tokens_seen": 227186576, "step": 105170 }, { "epoch": 17.15742251223491, "grad_norm": 0.0028034579008817673, "learning_rate": 6.0289516163281264e-05, "loss": 0.0023, "num_input_tokens_seen": 227197264, "step": 105175 }, { "epoch": 17.158238172920065, "grad_norm": 0.06388754397630692, "learning_rate": 6.025563582636723e-05, "loss": 0.0022, "num_input_tokens_seen": 227207984, "step": 105180 }, { "epoch": 17.15905383360522, "grad_norm": 0.00031546890386380255, "learning_rate": 6.0221764401430565e-05, "loss": 0.0045, "num_input_tokens_seen": 227219568, "step": 105185 }, { "epoch": 17.159869494290376, "grad_norm": 0.0043876804411411285, "learning_rate": 6.0187901889157735e-05, "loss": 0.0056, "num_input_tokens_seen": 227231056, "step": 105190 }, { "epoch": 17.160685154975532, "grad_norm": 0.016384651884436607, "learning_rate": 6.015404829023502e-05, "loss": 0.001, "num_input_tokens_seen": 227243184, "step": 105195 }, { "epoch": 17.161500815660684, "grad_norm": 0.0020360194612294436, "learning_rate": 6.012020360534853e-05, "loss": 0.0006, "num_input_tokens_seen": 227255024, "step": 105200 }, { "epoch": 17.16231647634584, "grad_norm": 0.0018645500531420112, "learning_rate": 6.008636783518401e-05, "loss": 0.001, "num_input_tokens_seen": 227266576, "step": 105205 }, { "epoch": 17.163132137030995, "grad_norm": 0.017587218433618546, "learning_rate": 6.005254098042751e-05, "loss": 0.0515, "num_input_tokens_seen": 227277072, "step": 105210 }, { "epoch": 17.16394779771615, "grad_norm": 0.0003726345603354275, "learning_rate": 6.00187230417642e-05, "loss": 0.0028, "num_input_tokens_seen": 227287280, "step": 105215 }, { "epoch": 17.164763458401303, "grad_norm": 0.015266234055161476, "learning_rate": 5.998491401987982e-05, "loss": 0.0011, "num_input_tokens_seen": 227299632, "step": 105220 }, { "epoch": 17.16557911908646, "grad_norm": 0.0002786066324915737, "learning_rate": 5.9951113915459154e-05, "loss": 0.0005, "num_input_tokens_seen": 227311024, "step": 105225 }, { "epoch": 17.166394779771615, "grad_norm": 0.037656597793102264, "learning_rate": 5.9917322729187594e-05, "loss": 0.0017, "num_input_tokens_seen": 227321424, "step": 105230 }, { "epoch": 17.16721044045677, "grad_norm": 0.0023824572563171387, "learning_rate": 5.9883540461749596e-05, "loss": 0.1298, "num_input_tokens_seen": 227330864, "step": 105235 }, { "epoch": 17.168026101141926, "grad_norm": 0.01473549846559763, "learning_rate": 5.984976711383017e-05, "loss": 0.0052, "num_input_tokens_seen": 227342640, "step": 105240 }, { "epoch": 17.16884176182708, "grad_norm": 0.46272391080856323, "learning_rate": 5.981600268611337e-05, "loss": 0.0114, "num_input_tokens_seen": 227354864, "step": 105245 }, { "epoch": 17.169657422512234, "grad_norm": 0.014566629193723202, "learning_rate": 5.9782247179283875e-05, "loss": 0.0105, "num_input_tokens_seen": 227366000, "step": 105250 }, { "epoch": 17.17047308319739, "grad_norm": 0.0011301173362880945, "learning_rate": 5.9748500594025425e-05, "loss": 0.0083, "num_input_tokens_seen": 227375696, "step": 105255 }, { "epoch": 17.171288743882545, "grad_norm": 0.01436126884073019, "learning_rate": 5.971476293102229e-05, "loss": 0.0459, "num_input_tokens_seen": 227385936, "step": 105260 }, { "epoch": 17.1721044045677, "grad_norm": 0.016061117872595787, "learning_rate": 5.9681034190957886e-05, "loss": 0.0011, "num_input_tokens_seen": 227396720, "step": 105265 }, { "epoch": 17.172920065252853, "grad_norm": 0.0018371932674199343, "learning_rate": 5.964731437451593e-05, "loss": 0.0013, "num_input_tokens_seen": 227407472, "step": 105270 }, { "epoch": 17.17373572593801, "grad_norm": 0.0007460727938450873, "learning_rate": 5.961360348237982e-05, "loss": 0.0033, "num_input_tokens_seen": 227417616, "step": 105275 }, { "epoch": 17.174551386623165, "grad_norm": 0.007591134402900934, "learning_rate": 5.9579901515232684e-05, "loss": 0.0052, "num_input_tokens_seen": 227428816, "step": 105280 }, { "epoch": 17.17536704730832, "grad_norm": 0.022664356976747513, "learning_rate": 5.954620847375758e-05, "loss": 0.0025, "num_input_tokens_seen": 227438160, "step": 105285 }, { "epoch": 17.176182707993476, "grad_norm": 0.004871509037911892, "learning_rate": 5.9512524358637296e-05, "loss": 0.001, "num_input_tokens_seen": 227448144, "step": 105290 }, { "epoch": 17.17699836867863, "grad_norm": 0.017250265926122665, "learning_rate": 5.9478849170554513e-05, "loss": 0.0011, "num_input_tokens_seen": 227459856, "step": 105295 }, { "epoch": 17.177814029363784, "grad_norm": 0.0011680923635140061, "learning_rate": 5.944518291019168e-05, "loss": 0.001, "num_input_tokens_seen": 227470896, "step": 105300 }, { "epoch": 17.17862969004894, "grad_norm": 9.958234295481816e-05, "learning_rate": 5.9411525578231094e-05, "loss": 0.0013, "num_input_tokens_seen": 227481328, "step": 105305 }, { "epoch": 17.179445350734095, "grad_norm": 0.006742374040186405, "learning_rate": 5.9377877175354865e-05, "loss": 0.0023, "num_input_tokens_seen": 227493104, "step": 105310 }, { "epoch": 17.18026101141925, "grad_norm": 0.027583172544836998, "learning_rate": 5.934423770224495e-05, "loss": 0.0021, "num_input_tokens_seen": 227504400, "step": 105315 }, { "epoch": 17.181076672104403, "grad_norm": 0.0003342593845445663, "learning_rate": 5.931060715958309e-05, "loss": 0.0065, "num_input_tokens_seen": 227516272, "step": 105320 }, { "epoch": 17.18189233278956, "grad_norm": 0.0027394210919737816, "learning_rate": 5.9276985548050775e-05, "loss": 0.0023, "num_input_tokens_seen": 227526704, "step": 105325 }, { "epoch": 17.182707993474715, "grad_norm": 0.008948412723839283, "learning_rate": 5.924337286832948e-05, "loss": 0.0009, "num_input_tokens_seen": 227537520, "step": 105330 }, { "epoch": 17.18352365415987, "grad_norm": 0.0016631630714982748, "learning_rate": 5.9209769121100374e-05, "loss": 0.0023, "num_input_tokens_seen": 227548688, "step": 105335 }, { "epoch": 17.184339314845026, "grad_norm": 0.0002861691755242646, "learning_rate": 5.917617430704447e-05, "loss": 0.0008, "num_input_tokens_seen": 227558320, "step": 105340 }, { "epoch": 17.18515497553018, "grad_norm": 0.00043132706196047366, "learning_rate": 5.9142588426842615e-05, "loss": 0.0028, "num_input_tokens_seen": 227569392, "step": 105345 }, { "epoch": 17.185970636215334, "grad_norm": 0.0013702671276405454, "learning_rate": 5.9109011481175364e-05, "loss": 0.0183, "num_input_tokens_seen": 227580976, "step": 105350 }, { "epoch": 17.18678629690049, "grad_norm": 0.009048002772033215, "learning_rate": 5.907544347072352e-05, "loss": 0.0019, "num_input_tokens_seen": 227591216, "step": 105355 }, { "epoch": 17.187601957585645, "grad_norm": 0.009810620918869972, "learning_rate": 5.904188439616692e-05, "loss": 0.001, "num_input_tokens_seen": 227602288, "step": 105360 }, { "epoch": 17.1884176182708, "grad_norm": 0.7481173276901245, "learning_rate": 5.9008334258186195e-05, "loss": 0.062, "num_input_tokens_seen": 227614064, "step": 105365 }, { "epoch": 17.189233278955953, "grad_norm": 0.007403769996017218, "learning_rate": 5.897479305746079e-05, "loss": 0.0053, "num_input_tokens_seen": 227624432, "step": 105370 }, { "epoch": 17.19004893964111, "grad_norm": 0.001949156867340207, "learning_rate": 5.894126079467077e-05, "loss": 0.0016, "num_input_tokens_seen": 227635792, "step": 105375 }, { "epoch": 17.190864600326265, "grad_norm": 0.0001903936208691448, "learning_rate": 5.890773747049566e-05, "loss": 0.0003, "num_input_tokens_seen": 227645552, "step": 105380 }, { "epoch": 17.19168026101142, "grad_norm": 0.019825341179966927, "learning_rate": 5.88742230856148e-05, "loss": 0.0017, "num_input_tokens_seen": 227656304, "step": 105385 }, { "epoch": 17.192495921696572, "grad_norm": 0.010199088603258133, "learning_rate": 5.884071764070736e-05, "loss": 0.0013, "num_input_tokens_seen": 227666416, "step": 105390 }, { "epoch": 17.193311582381728, "grad_norm": 0.007535295560956001, "learning_rate": 5.880722113645248e-05, "loss": 0.0056, "num_input_tokens_seen": 227677424, "step": 105395 }, { "epoch": 17.194127243066884, "grad_norm": 0.005390184931457043, "learning_rate": 5.877373357352894e-05, "loss": 0.0006, "num_input_tokens_seen": 227688784, "step": 105400 }, { "epoch": 17.19494290375204, "grad_norm": 0.0014657375868409872, "learning_rate": 5.874025495261548e-05, "loss": 0.0003, "num_input_tokens_seen": 227698800, "step": 105405 }, { "epoch": 17.195758564437195, "grad_norm": 0.0014141921419650316, "learning_rate": 5.870678527439049e-05, "loss": 0.0042, "num_input_tokens_seen": 227708688, "step": 105410 }, { "epoch": 17.196574225122347, "grad_norm": 0.0031896489672362804, "learning_rate": 5.867332453953228e-05, "loss": 0.1541, "num_input_tokens_seen": 227719120, "step": 105415 }, { "epoch": 17.197389885807503, "grad_norm": 0.5455310940742493, "learning_rate": 5.863987274871907e-05, "loss": 0.0074, "num_input_tokens_seen": 227730128, "step": 105420 }, { "epoch": 17.19820554649266, "grad_norm": 0.0009574625873938203, "learning_rate": 5.860642990262871e-05, "loss": 0.003, "num_input_tokens_seen": 227742608, "step": 105425 }, { "epoch": 17.199021207177815, "grad_norm": 0.010655845515429974, "learning_rate": 5.857299600193899e-05, "loss": 0.0007, "num_input_tokens_seen": 227753680, "step": 105430 }, { "epoch": 17.19983686786297, "grad_norm": 0.0006226776167750359, "learning_rate": 5.853957104732749e-05, "loss": 0.0049, "num_input_tokens_seen": 227764592, "step": 105435 }, { "epoch": 17.200652528548122, "grad_norm": 0.011091392487287521, "learning_rate": 5.850615503947166e-05, "loss": 0.0009, "num_input_tokens_seen": 227776048, "step": 105440 }, { "epoch": 17.201468189233278, "grad_norm": 0.00044521092786453664, "learning_rate": 5.8472747979048665e-05, "loss": 0.0007, "num_input_tokens_seen": 227787024, "step": 105445 }, { "epoch": 17.202283849918434, "grad_norm": 0.2557276785373688, "learning_rate": 5.843934986673549e-05, "loss": 0.0067, "num_input_tokens_seen": 227799056, "step": 105450 }, { "epoch": 17.20309951060359, "grad_norm": 0.0029599005356431007, "learning_rate": 5.840596070320914e-05, "loss": 0.0052, "num_input_tokens_seen": 227809296, "step": 105455 }, { "epoch": 17.203915171288745, "grad_norm": 0.00032506947172805667, "learning_rate": 5.837258048914612e-05, "loss": 0.0046, "num_input_tokens_seen": 227820624, "step": 105460 }, { "epoch": 17.204730831973897, "grad_norm": 0.00039597388240508735, "learning_rate": 5.833920922522301e-05, "loss": 0.0019, "num_input_tokens_seen": 227830640, "step": 105465 }, { "epoch": 17.205546492659053, "grad_norm": 0.0009330728207714856, "learning_rate": 5.830584691211615e-05, "loss": 0.0005, "num_input_tokens_seen": 227841904, "step": 105470 }, { "epoch": 17.20636215334421, "grad_norm": 0.0004881395725533366, "learning_rate": 5.827249355050163e-05, "loss": 0.0017, "num_input_tokens_seen": 227851536, "step": 105475 }, { "epoch": 17.207177814029365, "grad_norm": 0.0007340696756727993, "learning_rate": 5.823914914105527e-05, "loss": 0.0005, "num_input_tokens_seen": 227862288, "step": 105480 }, { "epoch": 17.20799347471452, "grad_norm": 0.008161459118127823, "learning_rate": 5.820581368445316e-05, "loss": 0.0044, "num_input_tokens_seen": 227872720, "step": 105485 }, { "epoch": 17.208809135399672, "grad_norm": 0.008229502476751804, "learning_rate": 5.817248718137053e-05, "loss": 0.0008, "num_input_tokens_seen": 227884336, "step": 105490 }, { "epoch": 17.209624796084828, "grad_norm": 0.0020968979224562645, "learning_rate": 5.8139169632483e-05, "loss": 0.0005, "num_input_tokens_seen": 227895248, "step": 105495 }, { "epoch": 17.210440456769984, "grad_norm": 0.0001975457853404805, "learning_rate": 5.810586103846577e-05, "loss": 0.0035, "num_input_tokens_seen": 227906064, "step": 105500 }, { "epoch": 17.21125611745514, "grad_norm": 0.0006039740983396769, "learning_rate": 5.807256139999384e-05, "loss": 0.0008, "num_input_tokens_seen": 227917552, "step": 105505 }, { "epoch": 17.212071778140295, "grad_norm": 0.0028502692002803087, "learning_rate": 5.8039270717742065e-05, "loss": 0.0009, "num_input_tokens_seen": 227927792, "step": 105510 }, { "epoch": 17.212887438825447, "grad_norm": 0.0009251784649677575, "learning_rate": 5.8005988992385184e-05, "loss": 0.0043, "num_input_tokens_seen": 227938800, "step": 105515 }, { "epoch": 17.213703099510603, "grad_norm": 0.011867698282003403, "learning_rate": 5.79727162245976e-05, "loss": 0.0007, "num_input_tokens_seen": 227950160, "step": 105520 }, { "epoch": 17.21451876019576, "grad_norm": 0.00040899330633692443, "learning_rate": 5.7939452415053664e-05, "loss": 0.0009, "num_input_tokens_seen": 227961904, "step": 105525 }, { "epoch": 17.215334420880914, "grad_norm": 0.0014331662096083164, "learning_rate": 5.7906197564427557e-05, "loss": 0.0168, "num_input_tokens_seen": 227972944, "step": 105530 }, { "epoch": 17.21615008156607, "grad_norm": 0.024769123643636703, "learning_rate": 5.7872951673393184e-05, "loss": 0.0035, "num_input_tokens_seen": 227982800, "step": 105535 }, { "epoch": 17.216965742251222, "grad_norm": 0.0008173759561032057, "learning_rate": 5.7839714742624284e-05, "loss": 0.0008, "num_input_tokens_seen": 227993104, "step": 105540 }, { "epoch": 17.217781402936378, "grad_norm": 0.0005532324430532753, "learning_rate": 5.780648677279454e-05, "loss": 0.0015, "num_input_tokens_seen": 228004208, "step": 105545 }, { "epoch": 17.218597063621534, "grad_norm": 0.008099708706140518, "learning_rate": 5.777326776457725e-05, "loss": 0.0006, "num_input_tokens_seen": 228014864, "step": 105550 }, { "epoch": 17.21941272430669, "grad_norm": 0.011459157802164555, "learning_rate": 5.774005771864571e-05, "loss": 0.0009, "num_input_tokens_seen": 228026480, "step": 105555 }, { "epoch": 17.22022838499184, "grad_norm": 0.0037186089903116226, "learning_rate": 5.7706856635672986e-05, "loss": 0.0006, "num_input_tokens_seen": 228037328, "step": 105560 }, { "epoch": 17.221044045676997, "grad_norm": 0.08368998020887375, "learning_rate": 5.767366451633188e-05, "loss": 0.0018, "num_input_tokens_seen": 228048944, "step": 105565 }, { "epoch": 17.221859706362153, "grad_norm": 0.0037311904598027468, "learning_rate": 5.764048136129507e-05, "loss": 0.0009, "num_input_tokens_seen": 228058832, "step": 105570 }, { "epoch": 17.22267536704731, "grad_norm": 0.003354444168508053, "learning_rate": 5.760730717123508e-05, "loss": 0.0008, "num_input_tokens_seen": 228069424, "step": 105575 }, { "epoch": 17.223491027732464, "grad_norm": 0.00479935435578227, "learning_rate": 5.757414194682426e-05, "loss": 0.026, "num_input_tokens_seen": 228080464, "step": 105580 }, { "epoch": 17.224306688417617, "grad_norm": 0.013407070189714432, "learning_rate": 5.754098568873456e-05, "loss": 0.0021, "num_input_tokens_seen": 228091696, "step": 105585 }, { "epoch": 17.225122349102772, "grad_norm": 0.511169970035553, "learning_rate": 5.7507838397638346e-05, "loss": 0.0432, "num_input_tokens_seen": 228102800, "step": 105590 }, { "epoch": 17.225938009787928, "grad_norm": 0.0041065155528485775, "learning_rate": 5.7474700074206856e-05, "loss": 0.0055, "num_input_tokens_seen": 228113744, "step": 105595 }, { "epoch": 17.226753670473084, "grad_norm": 0.004399343393743038, "learning_rate": 5.7441570719112216e-05, "loss": 0.0104, "num_input_tokens_seen": 228124336, "step": 105600 }, { "epoch": 17.22756933115824, "grad_norm": 0.018818650394678116, "learning_rate": 5.740845033302533e-05, "loss": 0.0077, "num_input_tokens_seen": 228134576, "step": 105605 }, { "epoch": 17.22838499184339, "grad_norm": 0.06135449558496475, "learning_rate": 5.737533891661789e-05, "loss": 0.0034, "num_input_tokens_seen": 228145200, "step": 105610 }, { "epoch": 17.229200652528547, "grad_norm": 0.025219673290848732, "learning_rate": 5.734223647056053e-05, "loss": 0.0023, "num_input_tokens_seen": 228156048, "step": 105615 }, { "epoch": 17.230016313213703, "grad_norm": 0.023373369127511978, "learning_rate": 5.7309142995524475e-05, "loss": 0.0034, "num_input_tokens_seen": 228165808, "step": 105620 }, { "epoch": 17.23083197389886, "grad_norm": 0.08417593687772751, "learning_rate": 5.7276058492179984e-05, "loss": 0.0059, "num_input_tokens_seen": 228176592, "step": 105625 }, { "epoch": 17.231647634584014, "grad_norm": 0.01127390656620264, "learning_rate": 5.724298296119796e-05, "loss": 0.0018, "num_input_tokens_seen": 228187696, "step": 105630 }, { "epoch": 17.232463295269167, "grad_norm": 0.03395693004131317, "learning_rate": 5.7209916403248574e-05, "loss": 0.0029, "num_input_tokens_seen": 228198224, "step": 105635 }, { "epoch": 17.233278955954322, "grad_norm": 0.005188298411667347, "learning_rate": 5.717685881900192e-05, "loss": 0.0085, "num_input_tokens_seen": 228209040, "step": 105640 }, { "epoch": 17.234094616639478, "grad_norm": 0.0007153578335419297, "learning_rate": 5.714381020912801e-05, "loss": 0.0043, "num_input_tokens_seen": 228219984, "step": 105645 }, { "epoch": 17.234910277324634, "grad_norm": 0.0007897784234955907, "learning_rate": 5.711077057429659e-05, "loss": 0.0018, "num_input_tokens_seen": 228230544, "step": 105650 }, { "epoch": 17.23572593800979, "grad_norm": 0.005037500057369471, "learning_rate": 5.7077739915177226e-05, "loss": 0.0008, "num_input_tokens_seen": 228240880, "step": 105655 }, { "epoch": 17.23654159869494, "grad_norm": 0.015429419465363026, "learning_rate": 5.704471823243934e-05, "loss": 0.0014, "num_input_tokens_seen": 228250384, "step": 105660 }, { "epoch": 17.237357259380097, "grad_norm": 0.001333926455117762, "learning_rate": 5.701170552675217e-05, "loss": 0.0006, "num_input_tokens_seen": 228261968, "step": 105665 }, { "epoch": 17.238172920065253, "grad_norm": 0.008555333130061626, "learning_rate": 5.6978701798784785e-05, "loss": 0.0025, "num_input_tokens_seen": 228271664, "step": 105670 }, { "epoch": 17.23898858075041, "grad_norm": 0.1655566245317459, "learning_rate": 5.6945707049205985e-05, "loss": 0.0066, "num_input_tokens_seen": 228282704, "step": 105675 }, { "epoch": 17.239804241435564, "grad_norm": 0.0028315861709415913, "learning_rate": 5.691272127868452e-05, "loss": 0.0021, "num_input_tokens_seen": 228294032, "step": 105680 }, { "epoch": 17.240619902120716, "grad_norm": 0.007239778526127338, "learning_rate": 5.6879744487888854e-05, "loss": 0.0013, "num_input_tokens_seen": 228305648, "step": 105685 }, { "epoch": 17.241435562805872, "grad_norm": 0.00043697163346223533, "learning_rate": 5.684677667748717e-05, "loss": 0.0003, "num_input_tokens_seen": 228316304, "step": 105690 }, { "epoch": 17.242251223491028, "grad_norm": 0.0034264670684933662, "learning_rate": 5.681381784814799e-05, "loss": 0.0017, "num_input_tokens_seen": 228326608, "step": 105695 }, { "epoch": 17.243066884176184, "grad_norm": 0.0002579323190730065, "learning_rate": 5.678086800053878e-05, "loss": 0.002, "num_input_tokens_seen": 228337168, "step": 105700 }, { "epoch": 17.24388254486134, "grad_norm": 0.003010801738128066, "learning_rate": 5.674792713532772e-05, "loss": 0.0018, "num_input_tokens_seen": 228347632, "step": 105705 }, { "epoch": 17.24469820554649, "grad_norm": 0.0034991370048373938, "learning_rate": 5.671499525318208e-05, "loss": 0.0256, "num_input_tokens_seen": 228359408, "step": 105710 }, { "epoch": 17.245513866231647, "grad_norm": 0.0008481157710775733, "learning_rate": 5.668207235476957e-05, "loss": 0.0003, "num_input_tokens_seen": 228370768, "step": 105715 }, { "epoch": 17.246329526916803, "grad_norm": 0.004653229843825102, "learning_rate": 5.664915844075702e-05, "loss": 0.004, "num_input_tokens_seen": 228382000, "step": 105720 }, { "epoch": 17.24714518760196, "grad_norm": 0.0016604745760560036, "learning_rate": 5.6616253511811934e-05, "loss": 0.0016, "num_input_tokens_seen": 228392688, "step": 105725 }, { "epoch": 17.247960848287114, "grad_norm": 0.000807464646641165, "learning_rate": 5.6583357568600776e-05, "loss": 0.0008, "num_input_tokens_seen": 228404240, "step": 105730 }, { "epoch": 17.248776508972266, "grad_norm": 0.004214088898152113, "learning_rate": 5.6550470611790584e-05, "loss": 0.0027, "num_input_tokens_seen": 228414992, "step": 105735 }, { "epoch": 17.249592169657422, "grad_norm": 0.002003757981583476, "learning_rate": 5.6517592642047424e-05, "loss": 0.0013, "num_input_tokens_seen": 228426224, "step": 105740 }, { "epoch": 17.250407830342578, "grad_norm": 0.00016738157137297094, "learning_rate": 5.648472366003804e-05, "loss": 0.0055, "num_input_tokens_seen": 228437552, "step": 105745 }, { "epoch": 17.251223491027734, "grad_norm": 0.04558353126049042, "learning_rate": 5.6451863666428236e-05, "loss": 0.0024, "num_input_tokens_seen": 228449072, "step": 105750 }, { "epoch": 17.252039151712886, "grad_norm": 0.0008563185692764819, "learning_rate": 5.6419012661884206e-05, "loss": 0.0007, "num_input_tokens_seen": 228460240, "step": 105755 }, { "epoch": 17.25285481239804, "grad_norm": 0.0002939916157629341, "learning_rate": 5.6386170647071464e-05, "loss": 0.0151, "num_input_tokens_seen": 228471088, "step": 105760 }, { "epoch": 17.253670473083197, "grad_norm": 0.005507839843630791, "learning_rate": 5.6353337622655935e-05, "loss": 0.0009, "num_input_tokens_seen": 228482224, "step": 105765 }, { "epoch": 17.254486133768353, "grad_norm": 0.0018616120796650648, "learning_rate": 5.632051358930263e-05, "loss": 0.054, "num_input_tokens_seen": 228493104, "step": 105770 }, { "epoch": 17.25530179445351, "grad_norm": 0.06906536966562271, "learning_rate": 5.628769854767707e-05, "loss": 0.0026, "num_input_tokens_seen": 228502800, "step": 105775 }, { "epoch": 17.25611745513866, "grad_norm": 0.0015966150676831603, "learning_rate": 5.6254892498444175e-05, "loss": 0.0018, "num_input_tokens_seen": 228511952, "step": 105780 }, { "epoch": 17.256933115823816, "grad_norm": 0.000293926423182711, "learning_rate": 5.6222095442268805e-05, "loss": 0.0052, "num_input_tokens_seen": 228522576, "step": 105785 }, { "epoch": 17.257748776508972, "grad_norm": 0.006921887863427401, "learning_rate": 5.6189307379815645e-05, "loss": 0.0011, "num_input_tokens_seen": 228532336, "step": 105790 }, { "epoch": 17.258564437194128, "grad_norm": 0.0023210467770695686, "learning_rate": 5.615652831174917e-05, "loss": 0.0009, "num_input_tokens_seen": 228544304, "step": 105795 }, { "epoch": 17.259380097879284, "grad_norm": 0.004994812421500683, "learning_rate": 5.612375823873373e-05, "loss": 0.0014, "num_input_tokens_seen": 228555664, "step": 105800 }, { "epoch": 17.260195758564436, "grad_norm": 0.004979619290679693, "learning_rate": 5.60909971614334e-05, "loss": 0.0012, "num_input_tokens_seen": 228565840, "step": 105805 }, { "epoch": 17.26101141924959, "grad_norm": 0.004859395790845156, "learning_rate": 5.605824508051216e-05, "loss": 0.001, "num_input_tokens_seen": 228577936, "step": 105810 }, { "epoch": 17.261827079934747, "grad_norm": 0.21579575538635254, "learning_rate": 5.602550199663381e-05, "loss": 0.0092, "num_input_tokens_seen": 228587760, "step": 105815 }, { "epoch": 17.262642740619903, "grad_norm": 0.0003847281914204359, "learning_rate": 5.599276791046182e-05, "loss": 0.0007, "num_input_tokens_seen": 228598192, "step": 105820 }, { "epoch": 17.26345840130506, "grad_norm": 0.003210867289453745, "learning_rate": 5.5960042822659596e-05, "loss": 0.0025, "num_input_tokens_seen": 228609744, "step": 105825 }, { "epoch": 17.26427406199021, "grad_norm": 0.0023815941531211138, "learning_rate": 5.592732673389056e-05, "loss": 0.0004, "num_input_tokens_seen": 228620816, "step": 105830 }, { "epoch": 17.265089722675366, "grad_norm": 0.005627058446407318, "learning_rate": 5.5894619644817455e-05, "loss": 0.0006, "num_input_tokens_seen": 228631088, "step": 105835 }, { "epoch": 17.265905383360522, "grad_norm": 0.045385442674160004, "learning_rate": 5.586192155610342e-05, "loss": 0.0016, "num_input_tokens_seen": 228640368, "step": 105840 }, { "epoch": 17.266721044045678, "grad_norm": 0.01856350153684616, "learning_rate": 5.582923246841082e-05, "loss": 0.0016, "num_input_tokens_seen": 228651024, "step": 105845 }, { "epoch": 17.267536704730833, "grad_norm": 0.01033297274261713, "learning_rate": 5.5796552382402446e-05, "loss": 0.0014, "num_input_tokens_seen": 228661680, "step": 105850 }, { "epoch": 17.268352365415986, "grad_norm": 0.0004607291193678975, "learning_rate": 5.576388129874027e-05, "loss": 0.0006, "num_input_tokens_seen": 228671664, "step": 105855 }, { "epoch": 17.26916802610114, "grad_norm": 0.010909834876656532, "learning_rate": 5.5731219218086824e-05, "loss": 0.017, "num_input_tokens_seen": 228681968, "step": 105860 }, { "epoch": 17.269983686786297, "grad_norm": 0.0018032594816759229, "learning_rate": 5.569856614110358e-05, "loss": 0.002, "num_input_tokens_seen": 228693616, "step": 105865 }, { "epoch": 17.270799347471453, "grad_norm": 0.0004068401758559048, "learning_rate": 5.566592206845272e-05, "loss": 0.0012, "num_input_tokens_seen": 228704400, "step": 105870 }, { "epoch": 17.27161500815661, "grad_norm": 0.025432869791984558, "learning_rate": 5.563328700079545e-05, "loss": 0.0035, "num_input_tokens_seen": 228714704, "step": 105875 }, { "epoch": 17.27243066884176, "grad_norm": 0.003037866437807679, "learning_rate": 5.560066093879351e-05, "loss": 0.0006, "num_input_tokens_seen": 228724656, "step": 105880 }, { "epoch": 17.273246329526916, "grad_norm": 0.00031391988159157336, "learning_rate": 5.556804388310777e-05, "loss": 0.0005, "num_input_tokens_seen": 228735280, "step": 105885 }, { "epoch": 17.274061990212072, "grad_norm": 0.0008085937006399035, "learning_rate": 5.5535435834399626e-05, "loss": 0.0047, "num_input_tokens_seen": 228746288, "step": 105890 }, { "epoch": 17.274877650897228, "grad_norm": 0.0009067684295587242, "learning_rate": 5.550283679332951e-05, "loss": 0.0014, "num_input_tokens_seen": 228757200, "step": 105895 }, { "epoch": 17.275693311582383, "grad_norm": 0.0005091895000077784, "learning_rate": 5.5470246760558455e-05, "loss": 0.0009, "num_input_tokens_seen": 228768720, "step": 105900 }, { "epoch": 17.276508972267536, "grad_norm": 0.00601581484079361, "learning_rate": 5.543766573674663e-05, "loss": 0.0006, "num_input_tokens_seen": 228778640, "step": 105905 }, { "epoch": 17.27732463295269, "grad_norm": 0.00027671127463690937, "learning_rate": 5.5405093722554534e-05, "loss": 0.0483, "num_input_tokens_seen": 228790256, "step": 105910 }, { "epoch": 17.278140293637847, "grad_norm": 0.08887345343828201, "learning_rate": 5.5372530718642235e-05, "loss": 0.0021, "num_input_tokens_seen": 228801232, "step": 105915 }, { "epoch": 17.278955954323003, "grad_norm": 0.006647658068686724, "learning_rate": 5.533997672566965e-05, "loss": 0.0006, "num_input_tokens_seen": 228812528, "step": 105920 }, { "epoch": 17.27977161500816, "grad_norm": 0.047590646892786026, "learning_rate": 5.5307431744296534e-05, "loss": 0.006, "num_input_tokens_seen": 228822800, "step": 105925 }, { "epoch": 17.28058727569331, "grad_norm": 0.002654826734215021, "learning_rate": 5.5274895775182464e-05, "loss": 0.0044, "num_input_tokens_seen": 228833584, "step": 105930 }, { "epoch": 17.281402936378466, "grad_norm": 0.004782018251717091, "learning_rate": 5.524236881898681e-05, "loss": 0.0032, "num_input_tokens_seen": 228843984, "step": 105935 }, { "epoch": 17.282218597063622, "grad_norm": 0.009541511535644531, "learning_rate": 5.5209850876368705e-05, "loss": 0.0007, "num_input_tokens_seen": 228855824, "step": 105940 }, { "epoch": 17.283034257748778, "grad_norm": 0.003033567452803254, "learning_rate": 5.517734194798729e-05, "loss": 0.0005, "num_input_tokens_seen": 228866384, "step": 105945 }, { "epoch": 17.28384991843393, "grad_norm": 0.017595946788787842, "learning_rate": 5.514484203450132e-05, "loss": 0.0296, "num_input_tokens_seen": 228878000, "step": 105950 }, { "epoch": 17.284665579119086, "grad_norm": 0.00025085004745051265, "learning_rate": 5.511235113656943e-05, "loss": 0.0023, "num_input_tokens_seen": 228888240, "step": 105955 }, { "epoch": 17.28548123980424, "grad_norm": 0.0006416584365069866, "learning_rate": 5.50798692548502e-05, "loss": 0.0134, "num_input_tokens_seen": 228899920, "step": 105960 }, { "epoch": 17.286296900489397, "grad_norm": 0.004831426776945591, "learning_rate": 5.504739639000178e-05, "loss": 0.0011, "num_input_tokens_seen": 228911760, "step": 105965 }, { "epoch": 17.287112561174553, "grad_norm": 0.0004655012162402272, "learning_rate": 5.501493254268225e-05, "loss": 0.0026, "num_input_tokens_seen": 228922192, "step": 105970 }, { "epoch": 17.287928221859705, "grad_norm": 0.0009831018978729844, "learning_rate": 5.4982477713549806e-05, "loss": 0.0009, "num_input_tokens_seen": 228932848, "step": 105975 }, { "epoch": 17.28874388254486, "grad_norm": 0.002096477197483182, "learning_rate": 5.495003190326181e-05, "loss": 0.0022, "num_input_tokens_seen": 228944560, "step": 105980 }, { "epoch": 17.289559543230016, "grad_norm": 0.0002526229072827846, "learning_rate": 5.491759511247618e-05, "loss": 0.0034, "num_input_tokens_seen": 228955824, "step": 105985 }, { "epoch": 17.290375203915172, "grad_norm": 0.0006310658063739538, "learning_rate": 5.488516734184995e-05, "loss": 0.0006, "num_input_tokens_seen": 228966672, "step": 105990 }, { "epoch": 17.291190864600328, "grad_norm": 0.01685495860874653, "learning_rate": 5.485274859204065e-05, "loss": 0.0009, "num_input_tokens_seen": 228978224, "step": 105995 }, { "epoch": 17.29200652528548, "grad_norm": 0.013227180577814579, "learning_rate": 5.482033886370491e-05, "loss": 0.0009, "num_input_tokens_seen": 228988880, "step": 106000 }, { "epoch": 17.292822185970635, "grad_norm": 0.05939861759543419, "learning_rate": 5.478793815749994e-05, "loss": 0.0024, "num_input_tokens_seen": 229000944, "step": 106005 }, { "epoch": 17.29363784665579, "grad_norm": 0.011226335540413857, "learning_rate": 5.4755546474082044e-05, "loss": 0.0013, "num_input_tokens_seen": 229011472, "step": 106010 }, { "epoch": 17.294453507340947, "grad_norm": 0.00020121457055211067, "learning_rate": 5.472316381410786e-05, "loss": 0.0002, "num_input_tokens_seen": 229022032, "step": 106015 }, { "epoch": 17.295269168026103, "grad_norm": 0.002397694159299135, "learning_rate": 5.46907901782337e-05, "loss": 0.0004, "num_input_tokens_seen": 229033328, "step": 106020 }, { "epoch": 17.296084828711255, "grad_norm": 0.002774233929812908, "learning_rate": 5.4658425567115535e-05, "loss": 0.0004, "num_input_tokens_seen": 229044656, "step": 106025 }, { "epoch": 17.29690048939641, "grad_norm": 0.0003554042486939579, "learning_rate": 5.4626069981409395e-05, "loss": 0.0039, "num_input_tokens_seen": 229056656, "step": 106030 }, { "epoch": 17.297716150081566, "grad_norm": 0.06345248967409134, "learning_rate": 5.459372342177088e-05, "loss": 0.0025, "num_input_tokens_seen": 229067440, "step": 106035 }, { "epoch": 17.298531810766722, "grad_norm": 0.01404933538287878, "learning_rate": 5.456138588885562e-05, "loss": 0.0013, "num_input_tokens_seen": 229077168, "step": 106040 }, { "epoch": 17.299347471451878, "grad_norm": 0.0008357339538633823, "learning_rate": 5.452905738331898e-05, "loss": 0.0006, "num_input_tokens_seen": 229087984, "step": 106045 }, { "epoch": 17.30016313213703, "grad_norm": 0.03588249534368515, "learning_rate": 5.449673790581611e-05, "loss": 0.0018, "num_input_tokens_seen": 229098448, "step": 106050 }, { "epoch": 17.300978792822185, "grad_norm": 0.009078881703317165, "learning_rate": 5.446442745700198e-05, "loss": 0.0031, "num_input_tokens_seen": 229110160, "step": 106055 }, { "epoch": 17.30179445350734, "grad_norm": 0.11321654915809631, "learning_rate": 5.443212603753145e-05, "loss": 0.0039, "num_input_tokens_seen": 229119024, "step": 106060 }, { "epoch": 17.302610114192497, "grad_norm": 0.00385329220443964, "learning_rate": 5.439983364805912e-05, "loss": 0.0027, "num_input_tokens_seen": 229130128, "step": 106065 }, { "epoch": 17.303425774877653, "grad_norm": 0.0014652871759608388, "learning_rate": 5.436755028923945e-05, "loss": 0.0007, "num_input_tokens_seen": 229141552, "step": 106070 }, { "epoch": 17.304241435562805, "grad_norm": 0.0064964075572788715, "learning_rate": 5.433527596172666e-05, "loss": 0.0034, "num_input_tokens_seen": 229152208, "step": 106075 }, { "epoch": 17.30505709624796, "grad_norm": 0.002419215627014637, "learning_rate": 5.430301066617493e-05, "loss": 0.0009, "num_input_tokens_seen": 229163504, "step": 106080 }, { "epoch": 17.305872756933116, "grad_norm": 0.00044188229367136955, "learning_rate": 5.4270754403238034e-05, "loss": 0.0015, "num_input_tokens_seen": 229174864, "step": 106085 }, { "epoch": 17.306688417618272, "grad_norm": 0.12870584428310394, "learning_rate": 5.4238507173569816e-05, "loss": 0.0022, "num_input_tokens_seen": 229185584, "step": 106090 }, { "epoch": 17.307504078303428, "grad_norm": 0.0003019646101165563, "learning_rate": 5.420626897782366e-05, "loss": 0.0563, "num_input_tokens_seen": 229196432, "step": 106095 }, { "epoch": 17.30831973898858, "grad_norm": 0.002201332477852702, "learning_rate": 5.417403981665309e-05, "loss": 0.0031, "num_input_tokens_seen": 229207088, "step": 106100 }, { "epoch": 17.309135399673735, "grad_norm": 0.006552582141011953, "learning_rate": 5.414181969071108e-05, "loss": 0.0006, "num_input_tokens_seen": 229216560, "step": 106105 }, { "epoch": 17.30995106035889, "grad_norm": 0.0005127699696458876, "learning_rate": 5.410960860065073e-05, "loss": 0.0004, "num_input_tokens_seen": 229227600, "step": 106110 }, { "epoch": 17.310766721044047, "grad_norm": 0.0056101856753230095, "learning_rate": 5.407740654712473e-05, "loss": 0.001, "num_input_tokens_seen": 229236816, "step": 106115 }, { "epoch": 17.3115823817292, "grad_norm": 0.03478972986340523, "learning_rate": 5.4045213530785896e-05, "loss": 0.0252, "num_input_tokens_seen": 229247248, "step": 106120 }, { "epoch": 17.312398042414355, "grad_norm": 0.0011379508068785071, "learning_rate": 5.401302955228654e-05, "loss": 0.0013, "num_input_tokens_seen": 229257264, "step": 106125 }, { "epoch": 17.31321370309951, "grad_norm": 0.00601399689912796, "learning_rate": 5.398085461227886e-05, "loss": 0.0029, "num_input_tokens_seen": 229266416, "step": 106130 }, { "epoch": 17.314029363784666, "grad_norm": 0.004694198723882437, "learning_rate": 5.394868871141506e-05, "loss": 0.0071, "num_input_tokens_seen": 229275984, "step": 106135 }, { "epoch": 17.31484502446982, "grad_norm": 0.000795087544247508, "learning_rate": 5.3916531850346895e-05, "loss": 0.0004, "num_input_tokens_seen": 229287568, "step": 106140 }, { "epoch": 17.315660685154974, "grad_norm": 0.027289612218737602, "learning_rate": 5.388438402972612e-05, "loss": 0.0022, "num_input_tokens_seen": 229297904, "step": 106145 }, { "epoch": 17.31647634584013, "grad_norm": 0.0041520558297634125, "learning_rate": 5.385224525020421e-05, "loss": 0.0032, "num_input_tokens_seen": 229309264, "step": 106150 }, { "epoch": 17.317292006525285, "grad_norm": 0.004298088140785694, "learning_rate": 5.382011551243254e-05, "loss": 0.001, "num_input_tokens_seen": 229319760, "step": 106155 }, { "epoch": 17.31810766721044, "grad_norm": 0.00606426689773798, "learning_rate": 5.3787994817062256e-05, "loss": 0.0015, "num_input_tokens_seen": 229330832, "step": 106160 }, { "epoch": 17.318923327895597, "grad_norm": 0.000536845822352916, "learning_rate": 5.3755883164744335e-05, "loss": 0.0011, "num_input_tokens_seen": 229341040, "step": 106165 }, { "epoch": 17.31973898858075, "grad_norm": 0.11563540250062943, "learning_rate": 5.372378055612953e-05, "loss": 0.0039, "num_input_tokens_seen": 229350992, "step": 106170 }, { "epoch": 17.320554649265905, "grad_norm": 0.00938224047422409, "learning_rate": 5.369168699186844e-05, "loss": 0.0019, "num_input_tokens_seen": 229361488, "step": 106175 }, { "epoch": 17.32137030995106, "grad_norm": 0.002466683741658926, "learning_rate": 5.365960247261148e-05, "loss": 0.054, "num_input_tokens_seen": 229371472, "step": 106180 }, { "epoch": 17.322185970636216, "grad_norm": 0.726901113986969, "learning_rate": 5.3627526999008966e-05, "loss": 0.0237, "num_input_tokens_seen": 229383216, "step": 106185 }, { "epoch": 17.32300163132137, "grad_norm": 0.03403662145137787, "learning_rate": 5.359546057171083e-05, "loss": 0.0052, "num_input_tokens_seen": 229393072, "step": 106190 }, { "epoch": 17.323817292006524, "grad_norm": 0.0012271327432245016, "learning_rate": 5.356340319136699e-05, "loss": 0.0016, "num_input_tokens_seen": 229403120, "step": 106195 }, { "epoch": 17.32463295269168, "grad_norm": 0.002099724020808935, "learning_rate": 5.353135485862715e-05, "loss": 0.0013, "num_input_tokens_seen": 229413872, "step": 106200 }, { "epoch": 17.325448613376835, "grad_norm": 0.0009085267083719373, "learning_rate": 5.3499315574140784e-05, "loss": 0.1136, "num_input_tokens_seen": 229424624, "step": 106205 }, { "epoch": 17.32626427406199, "grad_norm": 0.008246471174061298, "learning_rate": 5.3467285338557213e-05, "loss": 0.0013, "num_input_tokens_seen": 229435504, "step": 106210 }, { "epoch": 17.327079934747147, "grad_norm": 0.03950975835323334, "learning_rate": 5.343526415252553e-05, "loss": 0.0026, "num_input_tokens_seen": 229445456, "step": 106215 }, { "epoch": 17.3278955954323, "grad_norm": 0.06826602667570114, "learning_rate": 5.340325201669477e-05, "loss": 0.0022, "num_input_tokens_seen": 229454960, "step": 106220 }, { "epoch": 17.328711256117455, "grad_norm": 0.0021825393196195364, "learning_rate": 5.337124893171358e-05, "loss": 0.0009, "num_input_tokens_seen": 229466000, "step": 106225 }, { "epoch": 17.32952691680261, "grad_norm": 0.002758385380730033, "learning_rate": 5.333925489823077e-05, "loss": 0.0021, "num_input_tokens_seen": 229476912, "step": 106230 }, { "epoch": 17.330342577487766, "grad_norm": 0.0008691848488524556, "learning_rate": 5.330726991689439e-05, "loss": 0.0003, "num_input_tokens_seen": 229486928, "step": 106235 }, { "epoch": 17.33115823817292, "grad_norm": 0.06600486487150192, "learning_rate": 5.327529398835307e-05, "loss": 0.0087, "num_input_tokens_seen": 229497072, "step": 106240 }, { "epoch": 17.331973898858074, "grad_norm": 0.0016669132746756077, "learning_rate": 5.324332711325447e-05, "loss": 0.0025, "num_input_tokens_seen": 229509232, "step": 106245 }, { "epoch": 17.33278955954323, "grad_norm": 0.009230856783688068, "learning_rate": 5.3211369292246735e-05, "loss": 0.0027, "num_input_tokens_seen": 229519952, "step": 106250 }, { "epoch": 17.333605220228385, "grad_norm": 0.01821569725871086, "learning_rate": 5.317942052597724e-05, "loss": 0.0368, "num_input_tokens_seen": 229530096, "step": 106255 }, { "epoch": 17.33442088091354, "grad_norm": 0.029215874150395393, "learning_rate": 5.3147480815093684e-05, "loss": 0.0019, "num_input_tokens_seen": 229540656, "step": 106260 }, { "epoch": 17.335236541598697, "grad_norm": 0.04781011864542961, "learning_rate": 5.311555016024328e-05, "loss": 0.0021, "num_input_tokens_seen": 229549968, "step": 106265 }, { "epoch": 17.33605220228385, "grad_norm": 0.000586999929510057, "learning_rate": 5.308362856207322e-05, "loss": 0.0008, "num_input_tokens_seen": 229561264, "step": 106270 }, { "epoch": 17.336867862969005, "grad_norm": 0.0008068184251897037, "learning_rate": 5.3051716021230375e-05, "loss": 0.0007, "num_input_tokens_seen": 229572272, "step": 106275 }, { "epoch": 17.33768352365416, "grad_norm": 0.12466217577457428, "learning_rate": 5.3019812538361466e-05, "loss": 0.0032, "num_input_tokens_seen": 229583408, "step": 106280 }, { "epoch": 17.338499184339316, "grad_norm": 0.0025564394891262054, "learning_rate": 5.298791811411313e-05, "loss": 0.0008, "num_input_tokens_seen": 229593200, "step": 106285 }, { "epoch": 17.339314845024468, "grad_norm": 0.0067188916727900505, "learning_rate": 5.295603274913169e-05, "loss": 0.0015, "num_input_tokens_seen": 229603280, "step": 106290 }, { "epoch": 17.340130505709624, "grad_norm": 0.044277604669332504, "learning_rate": 5.292415644406334e-05, "loss": 0.0032, "num_input_tokens_seen": 229613840, "step": 106295 }, { "epoch": 17.34094616639478, "grad_norm": 0.0005162619636394083, "learning_rate": 5.289228919955413e-05, "loss": 0.0039, "num_input_tokens_seen": 229624784, "step": 106300 }, { "epoch": 17.341761827079935, "grad_norm": 0.0008603575988672674, "learning_rate": 5.286043101624988e-05, "loss": 0.0045, "num_input_tokens_seen": 229636240, "step": 106305 }, { "epoch": 17.34257748776509, "grad_norm": 0.00944200623780489, "learning_rate": 5.2828581894796226e-05, "loss": 0.0009, "num_input_tokens_seen": 229647696, "step": 106310 }, { "epoch": 17.343393148450243, "grad_norm": 0.01924579218029976, "learning_rate": 5.2796741835838656e-05, "loss": 0.0027, "num_input_tokens_seen": 229657808, "step": 106315 }, { "epoch": 17.3442088091354, "grad_norm": 0.0027068655472248793, "learning_rate": 5.276491084002238e-05, "loss": 0.0005, "num_input_tokens_seen": 229668592, "step": 106320 }, { "epoch": 17.345024469820554, "grad_norm": 0.0045636678114533424, "learning_rate": 5.273308890799261e-05, "loss": 0.0015, "num_input_tokens_seen": 229679632, "step": 106325 }, { "epoch": 17.34584013050571, "grad_norm": 0.010973965749144554, "learning_rate": 5.270127604039404e-05, "loss": 0.0029, "num_input_tokens_seen": 229691408, "step": 106330 }, { "epoch": 17.346655791190866, "grad_norm": 0.2658245861530304, "learning_rate": 5.266947223787177e-05, "loss": 0.0074, "num_input_tokens_seen": 229701776, "step": 106335 }, { "epoch": 17.347471451876018, "grad_norm": 0.02542303130030632, "learning_rate": 5.263767750106996e-05, "loss": 0.0022, "num_input_tokens_seen": 229712720, "step": 106340 }, { "epoch": 17.348287112561174, "grad_norm": 0.0022277773823589087, "learning_rate": 5.2605891830633304e-05, "loss": 0.0046, "num_input_tokens_seen": 229722480, "step": 106345 }, { "epoch": 17.34910277324633, "grad_norm": 0.005143773276358843, "learning_rate": 5.257411522720562e-05, "loss": 0.0019, "num_input_tokens_seen": 229733456, "step": 106350 }, { "epoch": 17.349918433931485, "grad_norm": 0.009331930428743362, "learning_rate": 5.2542347691431235e-05, "loss": 0.0007, "num_input_tokens_seen": 229743600, "step": 106355 }, { "epoch": 17.35073409461664, "grad_norm": 0.001864943071268499, "learning_rate": 5.251058922395368e-05, "loss": 0.0005, "num_input_tokens_seen": 229754544, "step": 106360 }, { "epoch": 17.351549755301793, "grad_norm": 0.00036566847120411694, "learning_rate": 5.24788398254169e-05, "loss": 0.004, "num_input_tokens_seen": 229765200, "step": 106365 }, { "epoch": 17.35236541598695, "grad_norm": 0.006779797375202179, "learning_rate": 5.2447099496463925e-05, "loss": 0.005, "num_input_tokens_seen": 229775568, "step": 106370 }, { "epoch": 17.353181076672104, "grad_norm": 0.0017445924459025264, "learning_rate": 5.241536823773846e-05, "loss": 0.0009, "num_input_tokens_seen": 229787152, "step": 106375 }, { "epoch": 17.35399673735726, "grad_norm": 0.022132201120257378, "learning_rate": 5.238364604988316e-05, "loss": 0.0015, "num_input_tokens_seen": 229796560, "step": 106380 }, { "epoch": 17.354812398042416, "grad_norm": 0.38006922602653503, "learning_rate": 5.235193293354129e-05, "loss": 0.0359, "num_input_tokens_seen": 229806512, "step": 106385 }, { "epoch": 17.355628058727568, "grad_norm": 0.0037477388978004456, "learning_rate": 5.2320228889355224e-05, "loss": 0.0028, "num_input_tokens_seen": 229816816, "step": 106390 }, { "epoch": 17.356443719412724, "grad_norm": 0.004561097361147404, "learning_rate": 5.228853391796784e-05, "loss": 0.0007, "num_input_tokens_seen": 229826544, "step": 106395 }, { "epoch": 17.35725938009788, "grad_norm": 0.015991326421499252, "learning_rate": 5.225684802002106e-05, "loss": 0.0009, "num_input_tokens_seen": 229838064, "step": 106400 }, { "epoch": 17.358075040783035, "grad_norm": 0.023101402446627617, "learning_rate": 5.222517119615733e-05, "loss": 0.0012, "num_input_tokens_seen": 229848880, "step": 106405 }, { "epoch": 17.35889070146819, "grad_norm": 0.000387304782634601, "learning_rate": 5.2193503447018564e-05, "loss": 0.0005, "num_input_tokens_seen": 229860720, "step": 106410 }, { "epoch": 17.359706362153343, "grad_norm": 0.0031212973408401012, "learning_rate": 5.216184477324659e-05, "loss": 0.0008, "num_input_tokens_seen": 229871984, "step": 106415 }, { "epoch": 17.3605220228385, "grad_norm": 0.0009755408391356468, "learning_rate": 5.2130195175482896e-05, "loss": 0.0005, "num_input_tokens_seen": 229881808, "step": 106420 }, { "epoch": 17.361337683523654, "grad_norm": 0.00044328568037599325, "learning_rate": 5.209855465436897e-05, "loss": 0.0003, "num_input_tokens_seen": 229891728, "step": 106425 }, { "epoch": 17.36215334420881, "grad_norm": 0.015467526391148567, "learning_rate": 5.2066923210546015e-05, "loss": 0.0166, "num_input_tokens_seen": 229902288, "step": 106430 }, { "epoch": 17.362969004893966, "grad_norm": 0.0005860764067620039, "learning_rate": 5.203530084465513e-05, "loss": 0.0004, "num_input_tokens_seen": 229911824, "step": 106435 }, { "epoch": 17.363784665579118, "grad_norm": 0.0004039146879222244, "learning_rate": 5.20036875573372e-05, "loss": 0.0085, "num_input_tokens_seen": 229923472, "step": 106440 }, { "epoch": 17.364600326264274, "grad_norm": 0.0005128368502482772, "learning_rate": 5.197208334923281e-05, "loss": 0.0011, "num_input_tokens_seen": 229934064, "step": 106445 }, { "epoch": 17.36541598694943, "grad_norm": 0.0003911785315722227, "learning_rate": 5.1940488220982516e-05, "loss": 0.0022, "num_input_tokens_seen": 229944496, "step": 106450 }, { "epoch": 17.366231647634585, "grad_norm": 0.7548543214797974, "learning_rate": 5.1908902173226524e-05, "loss": 0.0669, "num_input_tokens_seen": 229955504, "step": 106455 }, { "epoch": 17.36704730831974, "grad_norm": 0.008681437000632286, "learning_rate": 5.1877325206605316e-05, "loss": 0.0024, "num_input_tokens_seen": 229966544, "step": 106460 }, { "epoch": 17.367862969004893, "grad_norm": 0.0003187756519764662, "learning_rate": 5.1845757321758394e-05, "loss": 0.001, "num_input_tokens_seen": 229977520, "step": 106465 }, { "epoch": 17.36867862969005, "grad_norm": 0.0001434768782928586, "learning_rate": 5.181419851932589e-05, "loss": 0.0005, "num_input_tokens_seen": 229989072, "step": 106470 }, { "epoch": 17.369494290375204, "grad_norm": 0.0026647213380783796, "learning_rate": 5.178264879994704e-05, "loss": 0.002, "num_input_tokens_seen": 229999984, "step": 106475 }, { "epoch": 17.37030995106036, "grad_norm": 0.0023641285952180624, "learning_rate": 5.17511081642616e-05, "loss": 0.0026, "num_input_tokens_seen": 230010704, "step": 106480 }, { "epoch": 17.371125611745512, "grad_norm": 0.7835355997085571, "learning_rate": 5.171957661290838e-05, "loss": 0.0747, "num_input_tokens_seen": 230022224, "step": 106485 }, { "epoch": 17.371941272430668, "grad_norm": 0.003112988080829382, "learning_rate": 5.1688054146526886e-05, "loss": 0.0016, "num_input_tokens_seen": 230033360, "step": 106490 }, { "epoch": 17.372756933115824, "grad_norm": 0.0008296226733364165, "learning_rate": 5.165654076575543e-05, "loss": 0.0033, "num_input_tokens_seen": 230044336, "step": 106495 }, { "epoch": 17.37357259380098, "grad_norm": 0.010152243077754974, "learning_rate": 5.162503647123318e-05, "loss": 0.0008, "num_input_tokens_seen": 230054288, "step": 106500 }, { "epoch": 17.374388254486135, "grad_norm": 0.0011838224017992616, "learning_rate": 5.159354126359816e-05, "loss": 0.001, "num_input_tokens_seen": 230066000, "step": 106505 }, { "epoch": 17.375203915171287, "grad_norm": 0.0012060764711350203, "learning_rate": 5.156205514348905e-05, "loss": 0.0006, "num_input_tokens_seen": 230075344, "step": 106510 }, { "epoch": 17.376019575856443, "grad_norm": 0.003541109850630164, "learning_rate": 5.1530578111543605e-05, "loss": 0.0016, "num_input_tokens_seen": 230086192, "step": 106515 }, { "epoch": 17.3768352365416, "grad_norm": 0.001406823517754674, "learning_rate": 5.149911016840009e-05, "loss": 0.0058, "num_input_tokens_seen": 230097808, "step": 106520 }, { "epoch": 17.377650897226754, "grad_norm": 0.006291515659540892, "learning_rate": 5.146765131469594e-05, "loss": 0.005, "num_input_tokens_seen": 230108176, "step": 106525 }, { "epoch": 17.37846655791191, "grad_norm": 0.00061332545010373, "learning_rate": 5.1436201551068987e-05, "loss": 0.0009, "num_input_tokens_seen": 230118672, "step": 106530 }, { "epoch": 17.379282218597062, "grad_norm": 0.0031427890062332153, "learning_rate": 5.140476087815621e-05, "loss": 0.0013, "num_input_tokens_seen": 230129712, "step": 106535 }, { "epoch": 17.380097879282218, "grad_norm": 0.002123972401022911, "learning_rate": 5.137332929659522e-05, "loss": 0.0005, "num_input_tokens_seen": 230141648, "step": 106540 }, { "epoch": 17.380913539967374, "grad_norm": 0.000174164364580065, "learning_rate": 5.134190680702278e-05, "loss": 0.0152, "num_input_tokens_seen": 230151504, "step": 106545 }, { "epoch": 17.38172920065253, "grad_norm": 0.021573202684521675, "learning_rate": 5.1310493410075765e-05, "loss": 0.0011, "num_input_tokens_seen": 230162512, "step": 106550 }, { "epoch": 17.382544861337685, "grad_norm": 0.017056437209248543, "learning_rate": 5.127908910639084e-05, "loss": 0.031, "num_input_tokens_seen": 230173968, "step": 106555 }, { "epoch": 17.383360522022837, "grad_norm": 0.0004546472628135234, "learning_rate": 5.1247693896604386e-05, "loss": 0.021, "num_input_tokens_seen": 230184016, "step": 106560 }, { "epoch": 17.384176182707993, "grad_norm": 0.009080810472369194, "learning_rate": 5.1216307781352724e-05, "loss": 0.0009, "num_input_tokens_seen": 230193648, "step": 106565 }, { "epoch": 17.38499184339315, "grad_norm": 0.22605130076408386, "learning_rate": 5.11849307612719e-05, "loss": 0.0077, "num_input_tokens_seen": 230204912, "step": 106570 }, { "epoch": 17.385807504078304, "grad_norm": 0.04267728328704834, "learning_rate": 5.115356283699779e-05, "loss": 0.0012, "num_input_tokens_seen": 230216368, "step": 106575 }, { "epoch": 17.38662316476346, "grad_norm": 0.012378888204693794, "learning_rate": 5.112220400916617e-05, "loss": 0.0009, "num_input_tokens_seen": 230227664, "step": 106580 }, { "epoch": 17.387438825448612, "grad_norm": 0.005728128831833601, "learning_rate": 5.109085427841248e-05, "loss": 0.0012, "num_input_tokens_seen": 230239312, "step": 106585 }, { "epoch": 17.388254486133768, "grad_norm": 0.0034778222907334566, "learning_rate": 5.1059513645372146e-05, "loss": 0.0006, "num_input_tokens_seen": 230249616, "step": 106590 }, { "epoch": 17.389070146818923, "grad_norm": 0.000555566162802279, "learning_rate": 5.1028182110680275e-05, "loss": 0.0007, "num_input_tokens_seen": 230260400, "step": 106595 }, { "epoch": 17.38988580750408, "grad_norm": 0.001118313753977418, "learning_rate": 5.0996859674971805e-05, "loss": 0.0038, "num_input_tokens_seen": 230271184, "step": 106600 }, { "epoch": 17.390701468189235, "grad_norm": 0.002053825417533517, "learning_rate": 5.096554633888173e-05, "loss": 0.0006, "num_input_tokens_seen": 230282576, "step": 106605 }, { "epoch": 17.391517128874387, "grad_norm": 0.008048903197050095, "learning_rate": 5.093424210304426e-05, "loss": 0.0011, "num_input_tokens_seen": 230293392, "step": 106610 }, { "epoch": 17.392332789559543, "grad_norm": 0.003288182895630598, "learning_rate": 5.090294696809428e-05, "loss": 0.0004, "num_input_tokens_seen": 230303216, "step": 106615 }, { "epoch": 17.3931484502447, "grad_norm": 0.005758529528975487, "learning_rate": 5.087166093466566e-05, "loss": 0.0006, "num_input_tokens_seen": 230315152, "step": 106620 }, { "epoch": 17.393964110929854, "grad_norm": 0.6126771569252014, "learning_rate": 5.0840384003392745e-05, "loss": 0.0095, "num_input_tokens_seen": 230325328, "step": 106625 }, { "epoch": 17.39477977161501, "grad_norm": 0.001895575551316142, "learning_rate": 5.080911617490902e-05, "loss": 0.0013, "num_input_tokens_seen": 230335152, "step": 106630 }, { "epoch": 17.395595432300162, "grad_norm": 0.00044205409358255565, "learning_rate": 5.0777857449848644e-05, "loss": 0.0009, "num_input_tokens_seen": 230346224, "step": 106635 }, { "epoch": 17.396411092985318, "grad_norm": 0.005649374332278967, "learning_rate": 5.074660782884461e-05, "loss": 0.0008, "num_input_tokens_seen": 230357744, "step": 106640 }, { "epoch": 17.397226753670473, "grad_norm": 0.0003312532207928598, "learning_rate": 5.071536731253074e-05, "loss": 0.0019, "num_input_tokens_seen": 230368912, "step": 106645 }, { "epoch": 17.39804241435563, "grad_norm": 0.0009800927946344018, "learning_rate": 5.0684135901539694e-05, "loss": 0.0004, "num_input_tokens_seen": 230379600, "step": 106650 }, { "epoch": 17.39885807504078, "grad_norm": 0.002080594189465046, "learning_rate": 5.0652913596504704e-05, "loss": 0.001, "num_input_tokens_seen": 230390704, "step": 106655 }, { "epoch": 17.399673735725937, "grad_norm": 0.03588714450597763, "learning_rate": 5.062170039805847e-05, "loss": 0.0947, "num_input_tokens_seen": 230402416, "step": 106660 }, { "epoch": 17.400489396411093, "grad_norm": 0.0012130021350458264, "learning_rate": 5.05904963068336e-05, "loss": 0.0085, "num_input_tokens_seen": 230413360, "step": 106665 }, { "epoch": 17.40130505709625, "grad_norm": 0.044631388038396835, "learning_rate": 5.055930132346237e-05, "loss": 0.0032, "num_input_tokens_seen": 230423792, "step": 106670 }, { "epoch": 17.402120717781404, "grad_norm": 0.001390898018144071, "learning_rate": 5.0528115448577105e-05, "loss": 0.0007, "num_input_tokens_seen": 230434352, "step": 106675 }, { "epoch": 17.402936378466556, "grad_norm": 0.5931783318519592, "learning_rate": 5.0496938682809744e-05, "loss": 0.0803, "num_input_tokens_seen": 230445680, "step": 106680 }, { "epoch": 17.403752039151712, "grad_norm": 0.0012526774080470204, "learning_rate": 5.0465771026792175e-05, "loss": 0.001, "num_input_tokens_seen": 230456336, "step": 106685 }, { "epoch": 17.404567699836868, "grad_norm": 0.0003373456420376897, "learning_rate": 5.043461248115605e-05, "loss": 0.0042, "num_input_tokens_seen": 230468368, "step": 106690 }, { "epoch": 17.405383360522023, "grad_norm": 0.05024491995573044, "learning_rate": 5.040346304653276e-05, "loss": 0.057, "num_input_tokens_seen": 230479408, "step": 106695 }, { "epoch": 17.40619902120718, "grad_norm": 0.0005603748722933233, "learning_rate": 5.037232272355369e-05, "loss": 0.0018, "num_input_tokens_seen": 230490800, "step": 106700 }, { "epoch": 17.40701468189233, "grad_norm": 0.00028684749850071967, "learning_rate": 5.034119151284988e-05, "loss": 0.0014, "num_input_tokens_seen": 230502832, "step": 106705 }, { "epoch": 17.407830342577487, "grad_norm": 0.007525811903178692, "learning_rate": 5.031006941505228e-05, "loss": 0.0007, "num_input_tokens_seen": 230513744, "step": 106710 }, { "epoch": 17.408646003262643, "grad_norm": 0.00031510432017967105, "learning_rate": 5.0278956430791555e-05, "loss": 0.0008, "num_input_tokens_seen": 230524336, "step": 106715 }, { "epoch": 17.4094616639478, "grad_norm": 0.0023549527395516634, "learning_rate": 5.0247852560698304e-05, "loss": 0.0004, "num_input_tokens_seen": 230535440, "step": 106720 }, { "epoch": 17.410277324632954, "grad_norm": 0.0041503324173390865, "learning_rate": 5.0216757805402856e-05, "loss": 0.0012, "num_input_tokens_seen": 230544336, "step": 106725 }, { "epoch": 17.411092985318106, "grad_norm": 0.0039215851575136185, "learning_rate": 5.018567216553543e-05, "loss": 0.001, "num_input_tokens_seen": 230555792, "step": 106730 }, { "epoch": 17.411908646003262, "grad_norm": 0.005930093117058277, "learning_rate": 5.015459564172597e-05, "loss": 0.0129, "num_input_tokens_seen": 230567536, "step": 106735 }, { "epoch": 17.412724306688418, "grad_norm": 0.0019989213906228542, "learning_rate": 5.0123528234604307e-05, "loss": 0.0511, "num_input_tokens_seen": 230578736, "step": 106740 }, { "epoch": 17.413539967373573, "grad_norm": 0.049455612897872925, "learning_rate": 5.009246994479999e-05, "loss": 0.0012, "num_input_tokens_seen": 230588432, "step": 106745 }, { "epoch": 17.41435562805873, "grad_norm": 0.0006300982204265893, "learning_rate": 5.006142077294268e-05, "loss": 0.0125, "num_input_tokens_seen": 230599120, "step": 106750 }, { "epoch": 17.41517128874388, "grad_norm": 0.0014582215808331966, "learning_rate": 5.003038071966126e-05, "loss": 0.0017, "num_input_tokens_seen": 230610960, "step": 106755 }, { "epoch": 17.415986949429037, "grad_norm": 0.015856770798563957, "learning_rate": 4.999934978558513e-05, "loss": 0.08, "num_input_tokens_seen": 230621136, "step": 106760 }, { "epoch": 17.416802610114193, "grad_norm": 0.007289855740964413, "learning_rate": 4.996832797134299e-05, "loss": 0.0006, "num_input_tokens_seen": 230632368, "step": 106765 }, { "epoch": 17.41761827079935, "grad_norm": 0.002110978588461876, "learning_rate": 4.9937315277563625e-05, "loss": 0.0008, "num_input_tokens_seen": 230643600, "step": 106770 }, { "epoch": 17.418433931484504, "grad_norm": 0.012086848728358746, "learning_rate": 4.990631170487553e-05, "loss": 0.0022, "num_input_tokens_seen": 230655696, "step": 106775 }, { "epoch": 17.419249592169656, "grad_norm": 0.08439251780509949, "learning_rate": 4.987531725390698e-05, "loss": 0.0095, "num_input_tokens_seen": 230667920, "step": 106780 }, { "epoch": 17.420065252854812, "grad_norm": 0.1708887368440628, "learning_rate": 4.9844331925286145e-05, "loss": 0.0077, "num_input_tokens_seen": 230678800, "step": 106785 }, { "epoch": 17.420880913539968, "grad_norm": 0.009966623969376087, "learning_rate": 4.981335571964102e-05, "loss": 0.0005, "num_input_tokens_seen": 230689360, "step": 106790 }, { "epoch": 17.421696574225123, "grad_norm": 0.007955643348395824, "learning_rate": 4.978238863759932e-05, "loss": 0.0005, "num_input_tokens_seen": 230698672, "step": 106795 }, { "epoch": 17.42251223491028, "grad_norm": 0.00029836222529411316, "learning_rate": 4.975143067978866e-05, "loss": 0.0013, "num_input_tokens_seen": 230709360, "step": 106800 }, { "epoch": 17.42332789559543, "grad_norm": 0.0006177327013574541, "learning_rate": 4.9720481846836416e-05, "loss": 0.0015, "num_input_tokens_seen": 230721008, "step": 106805 }, { "epoch": 17.424143556280587, "grad_norm": 0.013303990475833416, "learning_rate": 4.968954213936988e-05, "loss": 0.0028, "num_input_tokens_seen": 230733872, "step": 106810 }, { "epoch": 17.424959216965743, "grad_norm": 0.003197494661435485, "learning_rate": 4.9658611558015984e-05, "loss": 0.0006, "num_input_tokens_seen": 230744880, "step": 106815 }, { "epoch": 17.4257748776509, "grad_norm": 0.00020710163516923785, "learning_rate": 4.962769010340163e-05, "loss": 0.0011, "num_input_tokens_seen": 230755504, "step": 106820 }, { "epoch": 17.42659053833605, "grad_norm": 0.00011777384497690946, "learning_rate": 4.959677777615351e-05, "loss": 0.0023, "num_input_tokens_seen": 230765968, "step": 106825 }, { "epoch": 17.427406199021206, "grad_norm": 0.03826236352324486, "learning_rate": 4.956587457689804e-05, "loss": 0.0132, "num_input_tokens_seen": 230777296, "step": 106830 }, { "epoch": 17.428221859706362, "grad_norm": 0.000653933675494045, "learning_rate": 4.953498050626154e-05, "loss": 0.0007, "num_input_tokens_seen": 230788944, "step": 106835 }, { "epoch": 17.429037520391518, "grad_norm": 0.0026093318592756987, "learning_rate": 4.9504095564870124e-05, "loss": 0.0008, "num_input_tokens_seen": 230799984, "step": 106840 }, { "epoch": 17.429853181076673, "grad_norm": 0.0002395760966464877, "learning_rate": 4.947321975334967e-05, "loss": 0.0008, "num_input_tokens_seen": 230810096, "step": 106845 }, { "epoch": 17.430668841761825, "grad_norm": 0.0016788537614047527, "learning_rate": 4.944235307232597e-05, "loss": 0.0008, "num_input_tokens_seen": 230820688, "step": 106850 }, { "epoch": 17.43148450244698, "grad_norm": 0.008425015024840832, "learning_rate": 4.941149552242458e-05, "loss": 0.0012, "num_input_tokens_seen": 230832848, "step": 106855 }, { "epoch": 17.432300163132137, "grad_norm": 0.0009443744784221053, "learning_rate": 4.9380647104270814e-05, "loss": 0.002, "num_input_tokens_seen": 230843088, "step": 106860 }, { "epoch": 17.433115823817293, "grad_norm": 0.0007148012518882751, "learning_rate": 4.93498078184898e-05, "loss": 0.001, "num_input_tokens_seen": 230853616, "step": 106865 }, { "epoch": 17.43393148450245, "grad_norm": 0.0006740608369000256, "learning_rate": 4.9318977665706866e-05, "loss": 0.0043, "num_input_tokens_seen": 230864336, "step": 106870 }, { "epoch": 17.4347471451876, "grad_norm": 0.0037371008656919003, "learning_rate": 4.928815664654635e-05, "loss": 0.0015, "num_input_tokens_seen": 230875312, "step": 106875 }, { "epoch": 17.435562805872756, "grad_norm": 0.036037005484104156, "learning_rate": 4.9257344761633236e-05, "loss": 0.0014, "num_input_tokens_seen": 230885936, "step": 106880 }, { "epoch": 17.436378466557912, "grad_norm": 0.0017335086595267057, "learning_rate": 4.9226542011591716e-05, "loss": 0.0003, "num_input_tokens_seen": 230896688, "step": 106885 }, { "epoch": 17.437194127243067, "grad_norm": 0.015234340913593769, "learning_rate": 4.919574839704627e-05, "loss": 0.0158, "num_input_tokens_seen": 230907408, "step": 106890 }, { "epoch": 17.438009787928223, "grad_norm": 0.005732585676014423, "learning_rate": 4.916496391862085e-05, "loss": 0.0337, "num_input_tokens_seen": 230918256, "step": 106895 }, { "epoch": 17.438825448613375, "grad_norm": 0.0019450652180239558, "learning_rate": 4.913418857693936e-05, "loss": 0.0017, "num_input_tokens_seen": 230928944, "step": 106900 }, { "epoch": 17.43964110929853, "grad_norm": 0.051235880702733994, "learning_rate": 4.9103422372625496e-05, "loss": 0.0013, "num_input_tokens_seen": 230939408, "step": 106905 }, { "epoch": 17.440456769983687, "grad_norm": 0.10393361747264862, "learning_rate": 4.907266530630278e-05, "loss": 0.0035, "num_input_tokens_seen": 230949296, "step": 106910 }, { "epoch": 17.441272430668842, "grad_norm": 0.001110541052184999, "learning_rate": 4.904191737859454e-05, "loss": 0.0005, "num_input_tokens_seen": 230960592, "step": 106915 }, { "epoch": 17.442088091353998, "grad_norm": 0.013865980319678783, "learning_rate": 4.901117859012394e-05, "loss": 0.0014, "num_input_tokens_seen": 230971504, "step": 106920 }, { "epoch": 17.44290375203915, "grad_norm": 0.0016207977896556258, "learning_rate": 4.898044894151393e-05, "loss": 0.0003, "num_input_tokens_seen": 230981200, "step": 106925 }, { "epoch": 17.443719412724306, "grad_norm": 0.00044690087088383734, "learning_rate": 4.894972843338724e-05, "loss": 0.0041, "num_input_tokens_seen": 230991920, "step": 106930 }, { "epoch": 17.44453507340946, "grad_norm": 0.00033451549825258553, "learning_rate": 4.891901706636653e-05, "loss": 0.0007, "num_input_tokens_seen": 231002480, "step": 106935 }, { "epoch": 17.445350734094617, "grad_norm": 0.0024510840885341167, "learning_rate": 4.88883148410742e-05, "loss": 0.0008, "num_input_tokens_seen": 231013904, "step": 106940 }, { "epoch": 17.446166394779773, "grad_norm": 0.0023836391046643257, "learning_rate": 4.885762175813241e-05, "loss": 0.0009, "num_input_tokens_seen": 231024528, "step": 106945 }, { "epoch": 17.446982055464925, "grad_norm": 0.005944438744336367, "learning_rate": 4.882693781816327e-05, "loss": 0.0015, "num_input_tokens_seen": 231036048, "step": 106950 }, { "epoch": 17.44779771615008, "grad_norm": 0.0009455296094529331, "learning_rate": 4.8796263021788524e-05, "loss": 0.0003, "num_input_tokens_seen": 231047344, "step": 106955 }, { "epoch": 17.448613376835237, "grad_norm": 0.026120547205209732, "learning_rate": 4.876559736962999e-05, "loss": 0.0009, "num_input_tokens_seen": 231059440, "step": 106960 }, { "epoch": 17.449429037520392, "grad_norm": 0.002475123852491379, "learning_rate": 4.8734940862309006e-05, "loss": 0.0003, "num_input_tokens_seen": 231071088, "step": 106965 }, { "epoch": 17.450244698205548, "grad_norm": 0.045191384851932526, "learning_rate": 4.8704293500446806e-05, "loss": 0.0101, "num_input_tokens_seen": 231082576, "step": 106970 }, { "epoch": 17.4510603588907, "grad_norm": 0.007040271535515785, "learning_rate": 4.867365528466477e-05, "loss": 0.0007, "num_input_tokens_seen": 231092816, "step": 106975 }, { "epoch": 17.451876019575856, "grad_norm": 0.0007439907640218735, "learning_rate": 4.864302621558353e-05, "loss": 0.003, "num_input_tokens_seen": 231103760, "step": 106980 }, { "epoch": 17.45269168026101, "grad_norm": 0.2785327434539795, "learning_rate": 4.861240629382413e-05, "loss": 0.0085, "num_input_tokens_seen": 231114800, "step": 106985 }, { "epoch": 17.453507340946167, "grad_norm": 0.030314238741993904, "learning_rate": 4.858179552000674e-05, "loss": 0.003, "num_input_tokens_seen": 231126064, "step": 106990 }, { "epoch": 17.454323001631323, "grad_norm": 0.0005355747998692095, "learning_rate": 4.85511938947521e-05, "loss": 0.0007, "num_input_tokens_seen": 231137424, "step": 106995 }, { "epoch": 17.455138662316475, "grad_norm": 0.006188563071191311, "learning_rate": 4.8520601418680085e-05, "loss": 0.0011, "num_input_tokens_seen": 231148464, "step": 107000 }, { "epoch": 17.45595432300163, "grad_norm": 0.00108015863224864, "learning_rate": 4.849001809241099e-05, "loss": 0.0026, "num_input_tokens_seen": 231159696, "step": 107005 }, { "epoch": 17.456769983686787, "grad_norm": 0.0003742810513358563, "learning_rate": 4.845944391656426e-05, "loss": 0.0007, "num_input_tokens_seen": 231170096, "step": 107010 }, { "epoch": 17.457585644371942, "grad_norm": 0.0034818367566913366, "learning_rate": 4.84288788917599e-05, "loss": 0.0004, "num_input_tokens_seen": 231181232, "step": 107015 }, { "epoch": 17.458401305057095, "grad_norm": 0.0006757063092663884, "learning_rate": 4.839832301861696e-05, "loss": 0.0005, "num_input_tokens_seen": 231192848, "step": 107020 }, { "epoch": 17.45921696574225, "grad_norm": 0.0012268743012100458, "learning_rate": 4.836777629775513e-05, "loss": 0.0044, "num_input_tokens_seen": 231204144, "step": 107025 }, { "epoch": 17.460032626427406, "grad_norm": 0.0005029301391914487, "learning_rate": 4.833723872979306e-05, "loss": 0.0003, "num_input_tokens_seen": 231215344, "step": 107030 }, { "epoch": 17.46084828711256, "grad_norm": 0.0007763210451230407, "learning_rate": 4.830671031534989e-05, "loss": 0.0008, "num_input_tokens_seen": 231226416, "step": 107035 }, { "epoch": 17.461663947797717, "grad_norm": 0.010013229213654995, "learning_rate": 4.827619105504427e-05, "loss": 0.007, "num_input_tokens_seen": 231237840, "step": 107040 }, { "epoch": 17.46247960848287, "grad_norm": 0.00031955906888470054, "learning_rate": 4.8245680949494664e-05, "loss": 0.0003, "num_input_tokens_seen": 231247312, "step": 107045 }, { "epoch": 17.463295269168025, "grad_norm": 0.00021452225337270647, "learning_rate": 4.821517999931946e-05, "loss": 0.0004, "num_input_tokens_seen": 231258320, "step": 107050 }, { "epoch": 17.46411092985318, "grad_norm": 0.08768285810947418, "learning_rate": 4.8184688205136716e-05, "loss": 0.0026, "num_input_tokens_seen": 231268080, "step": 107055 }, { "epoch": 17.464926590538337, "grad_norm": 0.0003200488572474569, "learning_rate": 4.8154205567564503e-05, "loss": 0.0014, "num_input_tokens_seen": 231277552, "step": 107060 }, { "epoch": 17.465742251223492, "grad_norm": 0.023660294711589813, "learning_rate": 4.812373208722048e-05, "loss": 0.0016, "num_input_tokens_seen": 231289360, "step": 107065 }, { "epoch": 17.466557911908644, "grad_norm": 0.002158315386623144, "learning_rate": 4.809326776472228e-05, "loss": 0.0026, "num_input_tokens_seen": 231299888, "step": 107070 }, { "epoch": 17.4673735725938, "grad_norm": 0.0004791323735844344, "learning_rate": 4.806281260068729e-05, "loss": 0.0007, "num_input_tokens_seen": 231311568, "step": 107075 }, { "epoch": 17.468189233278956, "grad_norm": 0.0012294030748307705, "learning_rate": 4.803236659573274e-05, "loss": 0.0007, "num_input_tokens_seen": 231322032, "step": 107080 }, { "epoch": 17.46900489396411, "grad_norm": 1.8501989841461182, "learning_rate": 4.800192975047551e-05, "loss": 0.0749, "num_input_tokens_seen": 231332880, "step": 107085 }, { "epoch": 17.469820554649267, "grad_norm": 0.016211597248911858, "learning_rate": 4.79715020655328e-05, "loss": 0.0043, "num_input_tokens_seen": 231344560, "step": 107090 }, { "epoch": 17.47063621533442, "grad_norm": 0.0010530364234000444, "learning_rate": 4.794108354152082e-05, "loss": 0.0008, "num_input_tokens_seen": 231354896, "step": 107095 }, { "epoch": 17.471451876019575, "grad_norm": 0.0028281863778829575, "learning_rate": 4.791067417905648e-05, "loss": 0.0016, "num_input_tokens_seen": 231365648, "step": 107100 }, { "epoch": 17.47226753670473, "grad_norm": 0.5425127744674683, "learning_rate": 4.7880273978755606e-05, "loss": 0.164, "num_input_tokens_seen": 231375728, "step": 107105 }, { "epoch": 17.473083197389887, "grad_norm": 0.00036838767118752003, "learning_rate": 4.784988294123477e-05, "loss": 0.0012, "num_input_tokens_seen": 231387376, "step": 107110 }, { "epoch": 17.473898858075042, "grad_norm": 0.022055508568882942, "learning_rate": 4.781950106710942e-05, "loss": 0.002, "num_input_tokens_seen": 231397776, "step": 107115 }, { "epoch": 17.474714518760194, "grad_norm": 0.0015256558544933796, "learning_rate": 4.7789128356995727e-05, "loss": 0.002, "num_input_tokens_seen": 231408464, "step": 107120 }, { "epoch": 17.47553017944535, "grad_norm": 0.8088774681091309, "learning_rate": 4.775876481150887e-05, "loss": 0.1319, "num_input_tokens_seen": 231419312, "step": 107125 }, { "epoch": 17.476345840130506, "grad_norm": 0.0034476907458156347, "learning_rate": 4.772841043126447e-05, "loss": 0.0004, "num_input_tokens_seen": 231429648, "step": 107130 }, { "epoch": 17.47716150081566, "grad_norm": 0.0049369195476174355, "learning_rate": 4.769806521687742e-05, "loss": 0.0005, "num_input_tokens_seen": 231440208, "step": 107135 }, { "epoch": 17.477977161500817, "grad_norm": 0.004842468071728945, "learning_rate": 4.766772916896306e-05, "loss": 0.021, "num_input_tokens_seen": 231451856, "step": 107140 }, { "epoch": 17.47879282218597, "grad_norm": 0.014109466224908829, "learning_rate": 4.763740228813579e-05, "loss": 0.0014, "num_input_tokens_seen": 231463888, "step": 107145 }, { "epoch": 17.479608482871125, "grad_norm": 0.008914794772863388, "learning_rate": 4.760708457501062e-05, "loss": 0.0042, "num_input_tokens_seen": 231475184, "step": 107150 }, { "epoch": 17.48042414355628, "grad_norm": 0.0373489186167717, "learning_rate": 4.7576776030201606e-05, "loss": 0.0017, "num_input_tokens_seen": 231487312, "step": 107155 }, { "epoch": 17.481239804241437, "grad_norm": 0.01696755364537239, "learning_rate": 4.754647665432338e-05, "loss": 0.0108, "num_input_tokens_seen": 231497488, "step": 107160 }, { "epoch": 17.482055464926592, "grad_norm": 0.0018327207071706653, "learning_rate": 4.751618644798955e-05, "loss": 0.0017, "num_input_tokens_seen": 231508080, "step": 107165 }, { "epoch": 17.482871125611744, "grad_norm": 0.00462839612737298, "learning_rate": 4.7485905411814414e-05, "loss": 0.0016, "num_input_tokens_seen": 231518480, "step": 107170 }, { "epoch": 17.4836867862969, "grad_norm": 0.00042224518256261945, "learning_rate": 4.745563354641125e-05, "loss": 0.0007, "num_input_tokens_seen": 231528496, "step": 107175 }, { "epoch": 17.484502446982056, "grad_norm": 0.0018490392249077559, "learning_rate": 4.74253708523939e-05, "loss": 0.0008, "num_input_tokens_seen": 231538128, "step": 107180 }, { "epoch": 17.48531810766721, "grad_norm": 0.0025340570136904716, "learning_rate": 4.7395117330375494e-05, "loss": 0.0138, "num_input_tokens_seen": 231548688, "step": 107185 }, { "epoch": 17.486133768352367, "grad_norm": 0.024287715554237366, "learning_rate": 4.7364872980969254e-05, "loss": 0.0073, "num_input_tokens_seen": 231560080, "step": 107190 }, { "epoch": 17.48694942903752, "grad_norm": 0.0004691890790127218, "learning_rate": 4.733463780478808e-05, "loss": 0.02, "num_input_tokens_seen": 231571952, "step": 107195 }, { "epoch": 17.487765089722675, "grad_norm": 0.001160036539658904, "learning_rate": 4.7304411802444656e-05, "loss": 0.0012, "num_input_tokens_seen": 231582736, "step": 107200 }, { "epoch": 17.48858075040783, "grad_norm": 0.9447407126426697, "learning_rate": 4.7274194974551656e-05, "loss": 0.0331, "num_input_tokens_seen": 231593872, "step": 107205 }, { "epoch": 17.489396411092986, "grad_norm": 0.0005999091663397849, "learning_rate": 4.724398732172142e-05, "loss": 0.0007, "num_input_tokens_seen": 231603216, "step": 107210 }, { "epoch": 17.49021207177814, "grad_norm": 0.006160971242934465, "learning_rate": 4.721378884456612e-05, "loss": 0.0251, "num_input_tokens_seen": 231614544, "step": 107215 }, { "epoch": 17.491027732463294, "grad_norm": 0.0011769005795940757, "learning_rate": 4.718359954369783e-05, "loss": 0.0015, "num_input_tokens_seen": 231624048, "step": 107220 }, { "epoch": 17.49184339314845, "grad_norm": 0.0006996638257987797, "learning_rate": 4.7153419419728285e-05, "loss": 0.0007, "num_input_tokens_seen": 231634800, "step": 107225 }, { "epoch": 17.492659053833606, "grad_norm": 0.005700491834431887, "learning_rate": 4.7123248473269096e-05, "loss": 0.0013, "num_input_tokens_seen": 231645456, "step": 107230 }, { "epoch": 17.49347471451876, "grad_norm": 0.009123490191996098, "learning_rate": 4.7093086704931955e-05, "loss": 0.0028, "num_input_tokens_seen": 231657136, "step": 107235 }, { "epoch": 17.494290375203914, "grad_norm": 0.91878342628479, "learning_rate": 4.7062934115327804e-05, "loss": 0.005, "num_input_tokens_seen": 231668368, "step": 107240 }, { "epoch": 17.49510603588907, "grad_norm": 0.0010388504015281796, "learning_rate": 4.7032790705068105e-05, "loss": 0.0005, "num_input_tokens_seen": 231679088, "step": 107245 }, { "epoch": 17.495921696574225, "grad_norm": 0.0025188778527081013, "learning_rate": 4.700265647476332e-05, "loss": 0.001, "num_input_tokens_seen": 231690480, "step": 107250 }, { "epoch": 17.49673735725938, "grad_norm": 0.03646330535411835, "learning_rate": 4.69725314250245e-05, "loss": 0.0037, "num_input_tokens_seen": 231701744, "step": 107255 }, { "epoch": 17.497553017944536, "grad_norm": 0.004327393136918545, "learning_rate": 4.6942415556461894e-05, "loss": 0.0021, "num_input_tokens_seen": 231712944, "step": 107260 }, { "epoch": 17.49836867862969, "grad_norm": 0.007958785630762577, "learning_rate": 4.691230886968617e-05, "loss": 0.0007, "num_input_tokens_seen": 231723088, "step": 107265 }, { "epoch": 17.499184339314844, "grad_norm": 0.0002876732323784381, "learning_rate": 4.688221136530712e-05, "loss": 0.0009, "num_input_tokens_seen": 231733072, "step": 107270 }, { "epoch": 17.5, "grad_norm": 0.015398185700178146, "learning_rate": 4.6852123043935044e-05, "loss": 0.0016, "num_input_tokens_seen": 231743824, "step": 107275 }, { "epoch": 17.500815660685156, "grad_norm": 0.0010171079775318503, "learning_rate": 4.682204390617939e-05, "loss": 0.0005, "num_input_tokens_seen": 231754192, "step": 107280 }, { "epoch": 17.50163132137031, "grad_norm": 0.01936420053243637, "learning_rate": 4.6791973952650056e-05, "loss": 0.001, "num_input_tokens_seen": 231765200, "step": 107285 }, { "epoch": 17.502446982055464, "grad_norm": 0.38633960485458374, "learning_rate": 4.6761913183956175e-05, "loss": 0.0156, "num_input_tokens_seen": 231776368, "step": 107290 }, { "epoch": 17.50326264274062, "grad_norm": 0.004747708793729544, "learning_rate": 4.673186160070714e-05, "loss": 0.0036, "num_input_tokens_seen": 231788112, "step": 107295 }, { "epoch": 17.504078303425775, "grad_norm": 0.02657368592917919, "learning_rate": 4.6701819203511964e-05, "loss": 0.0773, "num_input_tokens_seen": 231798480, "step": 107300 }, { "epoch": 17.50489396411093, "grad_norm": 0.00034374097594991326, "learning_rate": 4.667178599297944e-05, "loss": 0.1074, "num_input_tokens_seen": 231810256, "step": 107305 }, { "epoch": 17.505709624796086, "grad_norm": 0.002067964058369398, "learning_rate": 4.664176196971831e-05, "loss": 0.0007, "num_input_tokens_seen": 231821168, "step": 107310 }, { "epoch": 17.50652528548124, "grad_norm": 0.0008781051146797836, "learning_rate": 4.661174713433697e-05, "loss": 0.0154, "num_input_tokens_seen": 231831536, "step": 107315 }, { "epoch": 17.507340946166394, "grad_norm": 0.006266510114073753, "learning_rate": 4.6581741487443765e-05, "loss": 0.002, "num_input_tokens_seen": 231841520, "step": 107320 }, { "epoch": 17.50815660685155, "grad_norm": 0.00011919608368771151, "learning_rate": 4.655174502964676e-05, "loss": 0.0013, "num_input_tokens_seen": 231851184, "step": 107325 }, { "epoch": 17.508972267536706, "grad_norm": 0.0016120661748573184, "learning_rate": 4.6521757761553873e-05, "loss": 0.0016, "num_input_tokens_seen": 231862864, "step": 107330 }, { "epoch": 17.50978792822186, "grad_norm": 0.0043755825608968735, "learning_rate": 4.6491779683772825e-05, "loss": 0.0008, "num_input_tokens_seen": 231873456, "step": 107335 }, { "epoch": 17.510603588907014, "grad_norm": 0.0005683773779310286, "learning_rate": 4.64618107969112e-05, "loss": 0.0249, "num_input_tokens_seen": 231883792, "step": 107340 }, { "epoch": 17.51141924959217, "grad_norm": 0.0006177395698614419, "learning_rate": 4.643185110157633e-05, "loss": 0.002, "num_input_tokens_seen": 231894640, "step": 107345 }, { "epoch": 17.512234910277325, "grad_norm": 0.010703234001994133, "learning_rate": 4.640190059837535e-05, "loss": 0.0071, "num_input_tokens_seen": 231906000, "step": 107350 }, { "epoch": 17.51305057096248, "grad_norm": 0.0019364446634426713, "learning_rate": 4.637195928791532e-05, "loss": 0.0029, "num_input_tokens_seen": 231917232, "step": 107355 }, { "epoch": 17.513866231647633, "grad_norm": 0.0028983517549932003, "learning_rate": 4.634202717080305e-05, "loss": 0.0261, "num_input_tokens_seen": 231927120, "step": 107360 }, { "epoch": 17.51468189233279, "grad_norm": 0.0004596057115122676, "learning_rate": 4.6312104247645035e-05, "loss": 0.0015, "num_input_tokens_seen": 231937872, "step": 107365 }, { "epoch": 17.515497553017944, "grad_norm": 0.0075641958974301815, "learning_rate": 4.6282190519047805e-05, "loss": 0.0016, "num_input_tokens_seen": 231947984, "step": 107370 }, { "epoch": 17.5163132137031, "grad_norm": 0.0040355888195335865, "learning_rate": 4.625228598561748e-05, "loss": 0.0005, "num_input_tokens_seen": 231959248, "step": 107375 }, { "epoch": 17.517128874388256, "grad_norm": 0.0004805214412044734, "learning_rate": 4.6222390647960356e-05, "loss": 0.0006, "num_input_tokens_seen": 231970064, "step": 107380 }, { "epoch": 17.517944535073408, "grad_norm": 0.0017403739038854837, "learning_rate": 4.619250450668194e-05, "loss": 0.0032, "num_input_tokens_seen": 231980944, "step": 107385 }, { "epoch": 17.518760195758563, "grad_norm": 0.010331138968467712, "learning_rate": 4.616262756238837e-05, "loss": 0.0011, "num_input_tokens_seen": 231991088, "step": 107390 }, { "epoch": 17.51957585644372, "grad_norm": 0.0025749632623046637, "learning_rate": 4.613275981568465e-05, "loss": 0.0012, "num_input_tokens_seen": 232001968, "step": 107395 }, { "epoch": 17.520391517128875, "grad_norm": 0.011596056632697582, "learning_rate": 4.610290126717642e-05, "loss": 0.0031, "num_input_tokens_seen": 232012400, "step": 107400 }, { "epoch": 17.52120717781403, "grad_norm": 0.0006934392149560153, "learning_rate": 4.607305191746874e-05, "loss": 0.0004, "num_input_tokens_seen": 232023088, "step": 107405 }, { "epoch": 17.522022838499183, "grad_norm": 0.0007819249876774848, "learning_rate": 4.604321176716647e-05, "loss": 0.009, "num_input_tokens_seen": 232035024, "step": 107410 }, { "epoch": 17.52283849918434, "grad_norm": 0.000496567867230624, "learning_rate": 4.6013380816874394e-05, "loss": 0.0032, "num_input_tokens_seen": 232045904, "step": 107415 }, { "epoch": 17.523654159869494, "grad_norm": 0.021955974400043488, "learning_rate": 4.598355906719709e-05, "loss": 0.0014, "num_input_tokens_seen": 232057872, "step": 107420 }, { "epoch": 17.52446982055465, "grad_norm": 0.01769077777862549, "learning_rate": 4.595374651873896e-05, "loss": 0.0094, "num_input_tokens_seen": 232069072, "step": 107425 }, { "epoch": 17.525285481239806, "grad_norm": 0.0007907668477855623, "learning_rate": 4.592394317210413e-05, "loss": 0.0004, "num_input_tokens_seen": 232079728, "step": 107430 }, { "epoch": 17.526101141924958, "grad_norm": 0.02364833652973175, "learning_rate": 4.589414902789662e-05, "loss": 0.0054, "num_input_tokens_seen": 232090800, "step": 107435 }, { "epoch": 17.526916802610113, "grad_norm": 0.0035746158100664616, "learning_rate": 4.586436408672023e-05, "loss": 0.0029, "num_input_tokens_seen": 232101360, "step": 107440 }, { "epoch": 17.52773246329527, "grad_norm": 0.19809827208518982, "learning_rate": 4.583458834917864e-05, "loss": 0.0083, "num_input_tokens_seen": 232112336, "step": 107445 }, { "epoch": 17.528548123980425, "grad_norm": 0.14923065900802612, "learning_rate": 4.580482181587531e-05, "loss": 0.0035, "num_input_tokens_seen": 232123152, "step": 107450 }, { "epoch": 17.52936378466558, "grad_norm": 0.7618481516838074, "learning_rate": 4.5775064487413424e-05, "loss": 0.0509, "num_input_tokens_seen": 232133328, "step": 107455 }, { "epoch": 17.530179445350733, "grad_norm": 0.003851557383313775, "learning_rate": 4.574531636439605e-05, "loss": 0.0036, "num_input_tokens_seen": 232143824, "step": 107460 }, { "epoch": 17.53099510603589, "grad_norm": 0.007336875889450312, "learning_rate": 4.57155774474261e-05, "loss": 0.0008, "num_input_tokens_seen": 232156080, "step": 107465 }, { "epoch": 17.531810766721044, "grad_norm": 0.002769289305433631, "learning_rate": 4.568584773710632e-05, "loss": 0.0034, "num_input_tokens_seen": 232165968, "step": 107470 }, { "epoch": 17.5326264274062, "grad_norm": 0.1800115555524826, "learning_rate": 4.565612723403911e-05, "loss": 0.0078, "num_input_tokens_seen": 232176752, "step": 107475 }, { "epoch": 17.533442088091356, "grad_norm": 0.0010139649966731668, "learning_rate": 4.562641593882694e-05, "loss": 0.0004, "num_input_tokens_seen": 232187856, "step": 107480 }, { "epoch": 17.534257748776508, "grad_norm": 0.0031493548303842545, "learning_rate": 4.5596713852071816e-05, "loss": 0.0013, "num_input_tokens_seen": 232199696, "step": 107485 }, { "epoch": 17.535073409461663, "grad_norm": 0.31990453600883484, "learning_rate": 4.556702097437576e-05, "loss": 0.0078, "num_input_tokens_seen": 232210832, "step": 107490 }, { "epoch": 17.53588907014682, "grad_norm": 0.005481668282300234, "learning_rate": 4.5537337306340466e-05, "loss": 0.0011, "num_input_tokens_seen": 232222000, "step": 107495 }, { "epoch": 17.536704730831975, "grad_norm": 0.00037945323856547475, "learning_rate": 4.550766284856761e-05, "loss": 0.0003, "num_input_tokens_seen": 232233584, "step": 107500 }, { "epoch": 17.53752039151713, "grad_norm": 0.013014235533773899, "learning_rate": 4.5477997601658384e-05, "loss": 0.0033, "num_input_tokens_seen": 232244464, "step": 107505 }, { "epoch": 17.538336052202283, "grad_norm": 0.0016114584868773818, "learning_rate": 4.5448341566214354e-05, "loss": 0.0023, "num_input_tokens_seen": 232255184, "step": 107510 }, { "epoch": 17.53915171288744, "grad_norm": 0.001107938471250236, "learning_rate": 4.541869474283616e-05, "loss": 0.0005, "num_input_tokens_seen": 232266896, "step": 107515 }, { "epoch": 17.539967373572594, "grad_norm": 0.04263180494308472, "learning_rate": 4.538905713212488e-05, "loss": 0.0023, "num_input_tokens_seen": 232276816, "step": 107520 }, { "epoch": 17.54078303425775, "grad_norm": 0.0007417293963953853, "learning_rate": 4.535942873468102e-05, "loss": 0.0007, "num_input_tokens_seen": 232287664, "step": 107525 }, { "epoch": 17.541598694942905, "grad_norm": 0.0029360156040638685, "learning_rate": 4.532980955110516e-05, "loss": 0.0034, "num_input_tokens_seen": 232298512, "step": 107530 }, { "epoch": 17.542414355628058, "grad_norm": 0.009608111344277859, "learning_rate": 4.530019958199744e-05, "loss": 0.0012, "num_input_tokens_seen": 232309520, "step": 107535 }, { "epoch": 17.543230016313213, "grad_norm": 0.006099861580878496, "learning_rate": 4.527059882795803e-05, "loss": 0.0014, "num_input_tokens_seen": 232320272, "step": 107540 }, { "epoch": 17.54404567699837, "grad_norm": 0.002252019476145506, "learning_rate": 4.52410072895868e-05, "loss": 0.0011, "num_input_tokens_seen": 232329808, "step": 107545 }, { "epoch": 17.544861337683525, "grad_norm": 0.09089305996894836, "learning_rate": 4.521142496748348e-05, "loss": 0.0019, "num_input_tokens_seen": 232340592, "step": 107550 }, { "epoch": 17.545676998368677, "grad_norm": 0.03456374630331993, "learning_rate": 4.5181851862247544e-05, "loss": 0.0022, "num_input_tokens_seen": 232350960, "step": 107555 }, { "epoch": 17.546492659053833, "grad_norm": 0.0005953084328211844, "learning_rate": 4.51522879744784e-05, "loss": 0.0029, "num_input_tokens_seen": 232362416, "step": 107560 }, { "epoch": 17.54730831973899, "grad_norm": 0.00016484300431329757, "learning_rate": 4.5122733304775124e-05, "loss": 0.002, "num_input_tokens_seen": 232373168, "step": 107565 }, { "epoch": 17.548123980424144, "grad_norm": 0.012281266041100025, "learning_rate": 4.509318785373667e-05, "loss": 0.0007, "num_input_tokens_seen": 232382352, "step": 107570 }, { "epoch": 17.5489396411093, "grad_norm": 0.002700270852074027, "learning_rate": 4.506365162196191e-05, "loss": 0.0011, "num_input_tokens_seen": 232392304, "step": 107575 }, { "epoch": 17.549755301794452, "grad_norm": 0.00035837123868986964, "learning_rate": 4.503412461004935e-05, "loss": 0.0099, "num_input_tokens_seen": 232403152, "step": 107580 }, { "epoch": 17.550570962479608, "grad_norm": 0.0021404859144240618, "learning_rate": 4.500460681859742e-05, "loss": 0.0009, "num_input_tokens_seen": 232414960, "step": 107585 }, { "epoch": 17.551386623164763, "grad_norm": 0.00026893772883340716, "learning_rate": 4.4975098248204394e-05, "loss": 0.0003, "num_input_tokens_seen": 232425904, "step": 107590 }, { "epoch": 17.55220228384992, "grad_norm": 0.6711235046386719, "learning_rate": 4.494559889946814e-05, "loss": 0.0272, "num_input_tokens_seen": 232437328, "step": 107595 }, { "epoch": 17.553017944535075, "grad_norm": 0.00047256724792532623, "learning_rate": 4.4916108772986686e-05, "loss": 0.0008, "num_input_tokens_seen": 232448720, "step": 107600 }, { "epoch": 17.553833605220227, "grad_norm": 0.0007653324282728136, "learning_rate": 4.48866278693576e-05, "loss": 0.0214, "num_input_tokens_seen": 232459184, "step": 107605 }, { "epoch": 17.554649265905383, "grad_norm": 0.0006618410698138177, "learning_rate": 4.485715618917818e-05, "loss": 0.0078, "num_input_tokens_seen": 232468880, "step": 107610 }, { "epoch": 17.55546492659054, "grad_norm": 0.001561740762554109, "learning_rate": 4.482769373304613e-05, "loss": 0.0009, "num_input_tokens_seen": 232479312, "step": 107615 }, { "epoch": 17.556280587275694, "grad_norm": 0.7048082947731018, "learning_rate": 4.4798240501558115e-05, "loss": 0.0768, "num_input_tokens_seen": 232489712, "step": 107620 }, { "epoch": 17.55709624796085, "grad_norm": 0.05319645628333092, "learning_rate": 4.4768796495311406e-05, "loss": 0.0021, "num_input_tokens_seen": 232500368, "step": 107625 }, { "epoch": 17.557911908646002, "grad_norm": 0.0017085699364542961, "learning_rate": 4.473936171490228e-05, "loss": 0.0025, "num_input_tokens_seen": 232509616, "step": 107630 }, { "epoch": 17.558727569331158, "grad_norm": 0.2203325629234314, "learning_rate": 4.470993616092778e-05, "loss": 0.0061, "num_input_tokens_seen": 232521072, "step": 107635 }, { "epoch": 17.559543230016313, "grad_norm": 0.014941530302166939, "learning_rate": 4.46805198339838e-05, "loss": 0.0025, "num_input_tokens_seen": 232531984, "step": 107640 }, { "epoch": 17.56035889070147, "grad_norm": 0.002396009163931012, "learning_rate": 4.4651112734666874e-05, "loss": 0.0004, "num_input_tokens_seen": 232543728, "step": 107645 }, { "epoch": 17.561174551386625, "grad_norm": 0.010223069228231907, "learning_rate": 4.462171486357264e-05, "loss": 0.0009, "num_input_tokens_seen": 232553456, "step": 107650 }, { "epoch": 17.561990212071777, "grad_norm": 0.00022783187159802765, "learning_rate": 4.459232622129722e-05, "loss": 0.001, "num_input_tokens_seen": 232564432, "step": 107655 }, { "epoch": 17.562805872756933, "grad_norm": 0.001957368105649948, "learning_rate": 4.4562946808435864e-05, "loss": 0.0005, "num_input_tokens_seen": 232574992, "step": 107660 }, { "epoch": 17.563621533442088, "grad_norm": 1.1116943359375, "learning_rate": 4.453357662558422e-05, "loss": 0.1027, "num_input_tokens_seen": 232585392, "step": 107665 }, { "epoch": 17.564437194127244, "grad_norm": 0.0035373906139284372, "learning_rate": 4.450421567333746e-05, "loss": 0.0015, "num_input_tokens_seen": 232596528, "step": 107670 }, { "epoch": 17.5652528548124, "grad_norm": 0.004156868439167738, "learning_rate": 4.447486395229061e-05, "loss": 0.0008, "num_input_tokens_seen": 232607344, "step": 107675 }, { "epoch": 17.56606851549755, "grad_norm": 0.004597888793796301, "learning_rate": 4.4445521463038486e-05, "loss": 0.0004, "num_input_tokens_seen": 232617584, "step": 107680 }, { "epoch": 17.566884176182707, "grad_norm": 0.005564799532294273, "learning_rate": 4.441618820617582e-05, "loss": 0.0009, "num_input_tokens_seen": 232628592, "step": 107685 }, { "epoch": 17.567699836867863, "grad_norm": 0.014463113620877266, "learning_rate": 4.438686418229698e-05, "loss": 0.0015, "num_input_tokens_seen": 232640080, "step": 107690 }, { "epoch": 17.56851549755302, "grad_norm": 0.007256032433360815, "learning_rate": 4.4357549391996376e-05, "loss": 0.0019, "num_input_tokens_seen": 232651920, "step": 107695 }, { "epoch": 17.569331158238175, "grad_norm": 0.008663265034556389, "learning_rate": 4.432824383586809e-05, "loss": 0.0043, "num_input_tokens_seen": 232663600, "step": 107700 }, { "epoch": 17.570146818923327, "grad_norm": 0.04024729132652283, "learning_rate": 4.429894751450597e-05, "loss": 0.0033, "num_input_tokens_seen": 232673808, "step": 107705 }, { "epoch": 17.570962479608482, "grad_norm": 0.11653250455856323, "learning_rate": 4.4269660428503774e-05, "loss": 0.0042, "num_input_tokens_seen": 232684816, "step": 107710 }, { "epoch": 17.571778140293638, "grad_norm": 0.002741542411968112, "learning_rate": 4.4240382578454915e-05, "loss": 0.0004, "num_input_tokens_seen": 232695728, "step": 107715 }, { "epoch": 17.572593800978794, "grad_norm": 0.2372535616159439, "learning_rate": 4.4211113964953144e-05, "loss": 0.0104, "num_input_tokens_seen": 232705840, "step": 107720 }, { "epoch": 17.57340946166395, "grad_norm": 0.0019646419677883387, "learning_rate": 4.4181854588591085e-05, "loss": 0.0023, "num_input_tokens_seen": 232716720, "step": 107725 }, { "epoch": 17.5742251223491, "grad_norm": 0.0009456843254156411, "learning_rate": 4.415260444996222e-05, "loss": 0.0012, "num_input_tokens_seen": 232728944, "step": 107730 }, { "epoch": 17.575040783034257, "grad_norm": 0.004937485791742802, "learning_rate": 4.4123363549658955e-05, "loss": 0.0027, "num_input_tokens_seen": 232738576, "step": 107735 }, { "epoch": 17.575856443719413, "grad_norm": 0.0002907993330154568, "learning_rate": 4.409413188827416e-05, "loss": 0.0008, "num_input_tokens_seen": 232749616, "step": 107740 }, { "epoch": 17.57667210440457, "grad_norm": 0.006005709525197744, "learning_rate": 4.4064909466400014e-05, "loss": 0.002, "num_input_tokens_seen": 232760496, "step": 107745 }, { "epoch": 17.57748776508972, "grad_norm": 0.007954063825309277, "learning_rate": 4.4035696284629e-05, "loss": 0.0057, "num_input_tokens_seen": 232771632, "step": 107750 }, { "epoch": 17.578303425774877, "grad_norm": 0.0007008819957263768, "learning_rate": 4.4006492343552915e-05, "loss": 0.0011, "num_input_tokens_seen": 232781936, "step": 107755 }, { "epoch": 17.579119086460032, "grad_norm": 0.004294464364647865, "learning_rate": 4.39772976437639e-05, "loss": 0.0017, "num_input_tokens_seen": 232793040, "step": 107760 }, { "epoch": 17.579934747145188, "grad_norm": 0.0009277364588342607, "learning_rate": 4.394811218585326e-05, "loss": 0.001, "num_input_tokens_seen": 232803216, "step": 107765 }, { "epoch": 17.580750407830344, "grad_norm": 0.022793620824813843, "learning_rate": 4.3918935970412796e-05, "loss": 0.0013, "num_input_tokens_seen": 232814544, "step": 107770 }, { "epoch": 17.581566068515496, "grad_norm": 0.01750621385872364, "learning_rate": 4.38897689980336e-05, "loss": 0.0008, "num_input_tokens_seen": 232825872, "step": 107775 }, { "epoch": 17.58238172920065, "grad_norm": 0.00042929017217829823, "learning_rate": 4.386061126930696e-05, "loss": 0.0009, "num_input_tokens_seen": 232837136, "step": 107780 }, { "epoch": 17.583197389885807, "grad_norm": 0.0012211805442348123, "learning_rate": 4.3831462784823525e-05, "loss": 0.0005, "num_input_tokens_seen": 232848208, "step": 107785 }, { "epoch": 17.584013050570963, "grad_norm": 0.07960768789052963, "learning_rate": 4.380232354517433e-05, "loss": 0.0063, "num_input_tokens_seen": 232859248, "step": 107790 }, { "epoch": 17.58482871125612, "grad_norm": 0.0004789176455233246, "learning_rate": 4.3773193550949664e-05, "loss": 0.0004, "num_input_tokens_seen": 232870224, "step": 107795 }, { "epoch": 17.58564437194127, "grad_norm": 0.038380883634090424, "learning_rate": 4.374407280274007e-05, "loss": 0.0748, "num_input_tokens_seen": 232880464, "step": 107800 }, { "epoch": 17.586460032626427, "grad_norm": 0.007192968390882015, "learning_rate": 4.371496130113561e-05, "loss": 0.0016, "num_input_tokens_seen": 232889936, "step": 107805 }, { "epoch": 17.587275693311582, "grad_norm": 0.0016420006286352873, "learning_rate": 4.3685859046726284e-05, "loss": 0.0006, "num_input_tokens_seen": 232900752, "step": 107810 }, { "epoch": 17.588091353996738, "grad_norm": 0.0026500627864152193, "learning_rate": 4.3656766040101933e-05, "loss": 0.0013, "num_input_tokens_seen": 232911600, "step": 107815 }, { "epoch": 17.588907014681894, "grad_norm": 0.0004607281007338315, "learning_rate": 4.362768228185216e-05, "loss": 0.0092, "num_input_tokens_seen": 232922352, "step": 107820 }, { "epoch": 17.589722675367046, "grad_norm": 0.01853001117706299, "learning_rate": 4.35986077725663e-05, "loss": 0.0026, "num_input_tokens_seen": 232931920, "step": 107825 }, { "epoch": 17.5905383360522, "grad_norm": 0.0008818014757707715, "learning_rate": 4.3569542512833684e-05, "loss": 0.0059, "num_input_tokens_seen": 232942160, "step": 107830 }, { "epoch": 17.591353996737357, "grad_norm": 0.005279912613332272, "learning_rate": 4.354048650324327e-05, "loss": 0.0793, "num_input_tokens_seen": 232953424, "step": 107835 }, { "epoch": 17.592169657422513, "grad_norm": 0.001021630479954183, "learning_rate": 4.3511439744383984e-05, "loss": 0.0004, "num_input_tokens_seen": 232964240, "step": 107840 }, { "epoch": 17.59298531810767, "grad_norm": 0.002027069916948676, "learning_rate": 4.348240223684447e-05, "loss": 0.0032, "num_input_tokens_seen": 232974576, "step": 107845 }, { "epoch": 17.59380097879282, "grad_norm": 0.0007032614084891975, "learning_rate": 4.3453373981213184e-05, "loss": 0.0004, "num_input_tokens_seen": 232986032, "step": 107850 }, { "epoch": 17.594616639477977, "grad_norm": 0.002503114752471447, "learning_rate": 4.342435497807845e-05, "loss": 0.0011, "num_input_tokens_seen": 232997104, "step": 107855 }, { "epoch": 17.595432300163132, "grad_norm": 0.0064894710667431355, "learning_rate": 4.3395345228028294e-05, "loss": 0.0008, "num_input_tokens_seen": 233008208, "step": 107860 }, { "epoch": 17.596247960848288, "grad_norm": 0.000594555342104286, "learning_rate": 4.336634473165091e-05, "loss": 0.0028, "num_input_tokens_seen": 233019888, "step": 107865 }, { "epoch": 17.597063621533444, "grad_norm": 0.5004954934120178, "learning_rate": 4.3337353489533606e-05, "loss": 0.0346, "num_input_tokens_seen": 233029872, "step": 107870 }, { "epoch": 17.597879282218596, "grad_norm": 0.00310205458663404, "learning_rate": 4.3308371502264355e-05, "loss": 0.0007, "num_input_tokens_seen": 233041136, "step": 107875 }, { "epoch": 17.59869494290375, "grad_norm": 0.010869835503399372, "learning_rate": 4.327939877043013e-05, "loss": 0.0012, "num_input_tokens_seen": 233051216, "step": 107880 }, { "epoch": 17.599510603588907, "grad_norm": 0.8642117977142334, "learning_rate": 4.3250435294618473e-05, "loss": 0.0312, "num_input_tokens_seen": 233062064, "step": 107885 }, { "epoch": 17.600326264274063, "grad_norm": 0.0018593736458569765, "learning_rate": 4.322148107541596e-05, "loss": 0.0011, "num_input_tokens_seen": 233072208, "step": 107890 }, { "epoch": 17.601141924959215, "grad_norm": 0.008606432005763054, "learning_rate": 4.3192536113409785e-05, "loss": 0.0014, "num_input_tokens_seen": 233082224, "step": 107895 }, { "epoch": 17.60195758564437, "grad_norm": 0.006274912506341934, "learning_rate": 4.316360040918621e-05, "loss": 0.0048, "num_input_tokens_seen": 233092944, "step": 107900 }, { "epoch": 17.602773246329527, "grad_norm": 0.0020932599436491728, "learning_rate": 4.3134673963331985e-05, "loss": 0.0009, "num_input_tokens_seen": 233102576, "step": 107905 }, { "epoch": 17.603588907014682, "grad_norm": 0.0031999878119677305, "learning_rate": 4.310575677643297e-05, "loss": 0.0004, "num_input_tokens_seen": 233112944, "step": 107910 }, { "epoch": 17.604404567699838, "grad_norm": 0.050068553537130356, "learning_rate": 4.307684884907559e-05, "loss": 0.0041, "num_input_tokens_seen": 233123536, "step": 107915 }, { "epoch": 17.605220228384994, "grad_norm": 0.0012258957140147686, "learning_rate": 4.304795018184537e-05, "loss": 0.0007, "num_input_tokens_seen": 233133744, "step": 107920 }, { "epoch": 17.606035889070146, "grad_norm": 0.02541196160018444, "learning_rate": 4.3019060775328186e-05, "loss": 0.0008, "num_input_tokens_seen": 233144720, "step": 107925 }, { "epoch": 17.6068515497553, "grad_norm": 0.004853926599025726, "learning_rate": 4.2990180630109455e-05, "loss": 0.0009, "num_input_tokens_seen": 233154576, "step": 107930 }, { "epoch": 17.607667210440457, "grad_norm": 0.012128345668315887, "learning_rate": 4.296130974677448e-05, "loss": 0.0015, "num_input_tokens_seen": 233164976, "step": 107935 }, { "epoch": 17.608482871125613, "grad_norm": 0.006950197741389275, "learning_rate": 4.293244812590835e-05, "loss": 0.0029, "num_input_tokens_seen": 233176528, "step": 107940 }, { "epoch": 17.609298531810765, "grad_norm": 0.0022767765913158655, "learning_rate": 4.2903595768095995e-05, "loss": 0.0017, "num_input_tokens_seen": 233187248, "step": 107945 }, { "epoch": 17.61011419249592, "grad_norm": 0.0008330377168022096, "learning_rate": 4.28747526739221e-05, "loss": 0.0007, "num_input_tokens_seen": 233197136, "step": 107950 }, { "epoch": 17.610929853181077, "grad_norm": 0.00958797987550497, "learning_rate": 4.284591884397132e-05, "loss": 0.0006, "num_input_tokens_seen": 233208720, "step": 107955 }, { "epoch": 17.611745513866232, "grad_norm": 0.024692602455615997, "learning_rate": 4.281709427882791e-05, "loss": 0.0048, "num_input_tokens_seen": 233218704, "step": 107960 }, { "epoch": 17.612561174551388, "grad_norm": 0.017007293179631233, "learning_rate": 4.2788278979076003e-05, "loss": 0.0026, "num_input_tokens_seen": 233229616, "step": 107965 }, { "epoch": 17.61337683523654, "grad_norm": 0.0005782050429843366, "learning_rate": 4.275947294529969e-05, "loss": 0.0004, "num_input_tokens_seen": 233240080, "step": 107970 }, { "epoch": 17.614192495921696, "grad_norm": 0.0035270596854388714, "learning_rate": 4.2730676178082736e-05, "loss": 0.0013, "num_input_tokens_seen": 233250480, "step": 107975 }, { "epoch": 17.61500815660685, "grad_norm": 0.0002987095504067838, "learning_rate": 4.2701888678008674e-05, "loss": 0.0012, "num_input_tokens_seen": 233261456, "step": 107980 }, { "epoch": 17.615823817292007, "grad_norm": 0.004291311372071505, "learning_rate": 4.267311044566097e-05, "loss": 0.001, "num_input_tokens_seen": 233272144, "step": 107985 }, { "epoch": 17.616639477977163, "grad_norm": 0.001111071789637208, "learning_rate": 4.2644341481622825e-05, "loss": 0.0039, "num_input_tokens_seen": 233281200, "step": 107990 }, { "epoch": 17.617455138662315, "grad_norm": 0.0010424138745293021, "learning_rate": 4.2615581786477234e-05, "loss": 0.0003, "num_input_tokens_seen": 233291888, "step": 107995 }, { "epoch": 17.61827079934747, "grad_norm": 0.0060048531740903854, "learning_rate": 4.2586831360807265e-05, "loss": 0.0005, "num_input_tokens_seen": 233302704, "step": 108000 }, { "epoch": 17.619086460032626, "grad_norm": 0.011316410265862942, "learning_rate": 4.25580902051953e-05, "loss": 0.0026, "num_input_tokens_seen": 233314192, "step": 108005 }, { "epoch": 17.619902120717782, "grad_norm": 0.08108188211917877, "learning_rate": 4.252935832022409e-05, "loss": 0.0041, "num_input_tokens_seen": 233322992, "step": 108010 }, { "epoch": 17.620717781402938, "grad_norm": 0.0003150638658553362, "learning_rate": 4.250063570647561e-05, "loss": 0.0004, "num_input_tokens_seen": 233334352, "step": 108015 }, { "epoch": 17.62153344208809, "grad_norm": 0.008647634647786617, "learning_rate": 4.247192236453229e-05, "loss": 0.0004, "num_input_tokens_seen": 233344784, "step": 108020 }, { "epoch": 17.622349102773246, "grad_norm": 0.015910470858216286, "learning_rate": 4.244321829497566e-05, "loss": 0.0042, "num_input_tokens_seen": 233356080, "step": 108025 }, { "epoch": 17.6231647634584, "grad_norm": 0.001022401382215321, "learning_rate": 4.2414523498387926e-05, "loss": 0.0009, "num_input_tokens_seen": 233367440, "step": 108030 }, { "epoch": 17.623980424143557, "grad_norm": 0.0027995568234473467, "learning_rate": 4.2385837975350115e-05, "loss": 0.0122, "num_input_tokens_seen": 233378640, "step": 108035 }, { "epoch": 17.624796084828713, "grad_norm": 0.013003799133002758, "learning_rate": 4.235716172644394e-05, "loss": 0.0843, "num_input_tokens_seen": 233388112, "step": 108040 }, { "epoch": 17.625611745513865, "grad_norm": 0.002790980041027069, "learning_rate": 4.232849475225048e-05, "loss": 0.002, "num_input_tokens_seen": 233398992, "step": 108045 }, { "epoch": 17.62642740619902, "grad_norm": 0.0019258302636444569, "learning_rate": 4.2299837053350606e-05, "loss": 0.001, "num_input_tokens_seen": 233409072, "step": 108050 }, { "epoch": 17.627243066884176, "grad_norm": 0.002759368624538183, "learning_rate": 4.2271188630325195e-05, "loss": 0.0068, "num_input_tokens_seen": 233419664, "step": 108055 }, { "epoch": 17.628058727569332, "grad_norm": 0.0022524246014654636, "learning_rate": 4.2242549483754836e-05, "loss": 0.0038, "num_input_tokens_seen": 233430480, "step": 108060 }, { "epoch": 17.628874388254488, "grad_norm": 0.003963864874094725, "learning_rate": 4.221391961421989e-05, "loss": 0.0021, "num_input_tokens_seen": 233440400, "step": 108065 }, { "epoch": 17.62969004893964, "grad_norm": 0.00029403064399957657, "learning_rate": 4.218529902230062e-05, "loss": 0.0119, "num_input_tokens_seen": 233450896, "step": 108070 }, { "epoch": 17.630505709624796, "grad_norm": 0.003268659580498934, "learning_rate": 4.2156687708577e-05, "loss": 0.0006, "num_input_tokens_seen": 233460048, "step": 108075 }, { "epoch": 17.63132137030995, "grad_norm": 0.0030870982445776463, "learning_rate": 4.212808567362897e-05, "loss": 0.0006, "num_input_tokens_seen": 233471568, "step": 108080 }, { "epoch": 17.632137030995107, "grad_norm": 0.031759873032569885, "learning_rate": 4.209949291803611e-05, "loss": 0.0028, "num_input_tokens_seen": 233481904, "step": 108085 }, { "epoch": 17.63295269168026, "grad_norm": 0.0062539586797356606, "learning_rate": 4.207090944237796e-05, "loss": 0.0017, "num_input_tokens_seen": 233491408, "step": 108090 }, { "epoch": 17.633768352365415, "grad_norm": 0.010095684789121151, "learning_rate": 4.204233524723372e-05, "loss": 0.0016, "num_input_tokens_seen": 233501168, "step": 108095 }, { "epoch": 17.63458401305057, "grad_norm": 0.0003338803071528673, "learning_rate": 4.201377033318249e-05, "loss": 0.0004, "num_input_tokens_seen": 233512336, "step": 108100 }, { "epoch": 17.635399673735726, "grad_norm": 0.05965923145413399, "learning_rate": 4.198521470080324e-05, "loss": 0.0024, "num_input_tokens_seen": 233522256, "step": 108105 }, { "epoch": 17.636215334420882, "grad_norm": 0.0004708467167802155, "learning_rate": 4.195666835067463e-05, "loss": 0.0247, "num_input_tokens_seen": 233531824, "step": 108110 }, { "epoch": 17.637030995106034, "grad_norm": 0.1438266485929489, "learning_rate": 4.1928131283375246e-05, "loss": 0.005, "num_input_tokens_seen": 233544368, "step": 108115 }, { "epoch": 17.63784665579119, "grad_norm": 0.0019570267759263515, "learning_rate": 4.189960349948335e-05, "loss": 0.0006, "num_input_tokens_seen": 233555440, "step": 108120 }, { "epoch": 17.638662316476346, "grad_norm": 0.002199590904638171, "learning_rate": 4.1871084999577146e-05, "loss": 0.0003, "num_input_tokens_seen": 233565840, "step": 108125 }, { "epoch": 17.6394779771615, "grad_norm": 0.013834419660270214, "learning_rate": 4.184257578423456e-05, "loss": 0.0008, "num_input_tokens_seen": 233575856, "step": 108130 }, { "epoch": 17.640293637846657, "grad_norm": 0.0027103605680167675, "learning_rate": 4.1814075854033405e-05, "loss": 0.0011, "num_input_tokens_seen": 233588624, "step": 108135 }, { "epoch": 17.64110929853181, "grad_norm": 0.0007338287541642785, "learning_rate": 4.178558520955117e-05, "loss": 0.0013, "num_input_tokens_seen": 233599696, "step": 108140 }, { "epoch": 17.641924959216965, "grad_norm": 0.004689326509833336, "learning_rate": 4.175710385136539e-05, "loss": 0.0329, "num_input_tokens_seen": 233610928, "step": 108145 }, { "epoch": 17.64274061990212, "grad_norm": 0.0011317178141325712, "learning_rate": 4.172863178005326e-05, "loss": 0.0003, "num_input_tokens_seen": 233622160, "step": 108150 }, { "epoch": 17.643556280587276, "grad_norm": 0.0009731571190059185, "learning_rate": 4.1700168996191726e-05, "loss": 0.0011, "num_input_tokens_seen": 233632208, "step": 108155 }, { "epoch": 17.644371941272432, "grad_norm": 0.0007394000422209501, "learning_rate": 4.16717155003577e-05, "loss": 0.0027, "num_input_tokens_seen": 233642480, "step": 108160 }, { "epoch": 17.645187601957584, "grad_norm": 0.00035144094727002084, "learning_rate": 4.164327129312778e-05, "loss": 0.0015, "num_input_tokens_seen": 233653680, "step": 108165 }, { "epoch": 17.64600326264274, "grad_norm": 0.004731602966785431, "learning_rate": 4.161483637507846e-05, "loss": 0.0005, "num_input_tokens_seen": 233665712, "step": 108170 }, { "epoch": 17.646818923327896, "grad_norm": 0.023019464686512947, "learning_rate": 4.1586410746785927e-05, "loss": 0.0017, "num_input_tokens_seen": 233674896, "step": 108175 }, { "epoch": 17.64763458401305, "grad_norm": 0.0024400795809924603, "learning_rate": 4.155799440882635e-05, "loss": 0.0031, "num_input_tokens_seen": 233685424, "step": 108180 }, { "epoch": 17.648450244698207, "grad_norm": 0.005999886896461248, "learning_rate": 4.152958736177559e-05, "loss": 0.0005, "num_input_tokens_seen": 233696688, "step": 108185 }, { "epoch": 17.64926590538336, "grad_norm": 0.0010004190262407064, "learning_rate": 4.1501189606209356e-05, "loss": 0.0008, "num_input_tokens_seen": 233708304, "step": 108190 }, { "epoch": 17.650081566068515, "grad_norm": 0.007027873769402504, "learning_rate": 4.147280114270319e-05, "loss": 0.0017, "num_input_tokens_seen": 233720304, "step": 108195 }, { "epoch": 17.65089722675367, "grad_norm": 0.007540034130215645, "learning_rate": 4.1444421971832346e-05, "loss": 0.0008, "num_input_tokens_seen": 233729328, "step": 108200 }, { "epoch": 17.651712887438826, "grad_norm": 0.004675657954066992, "learning_rate": 4.1416052094171985e-05, "loss": 0.0014, "num_input_tokens_seen": 233739408, "step": 108205 }, { "epoch": 17.652528548123982, "grad_norm": 0.0004989549051970243, "learning_rate": 4.1387691510297146e-05, "loss": 0.0015, "num_input_tokens_seen": 233750544, "step": 108210 }, { "epoch": 17.653344208809134, "grad_norm": 0.00018444911984261125, "learning_rate": 4.1359340220782524e-05, "loss": 0.0011, "num_input_tokens_seen": 233761328, "step": 108215 }, { "epoch": 17.65415986949429, "grad_norm": 0.004228262230753899, "learning_rate": 4.133099822620268e-05, "loss": 0.0022, "num_input_tokens_seen": 233772176, "step": 108220 }, { "epoch": 17.654975530179446, "grad_norm": 0.002176476875320077, "learning_rate": 4.130266552713202e-05, "loss": 0.0405, "num_input_tokens_seen": 233782992, "step": 108225 }, { "epoch": 17.6557911908646, "grad_norm": 0.0019155082991346717, "learning_rate": 4.1274342124144713e-05, "loss": 0.0012, "num_input_tokens_seen": 233793936, "step": 108230 }, { "epoch": 17.656606851549757, "grad_norm": 0.002272221725434065, "learning_rate": 4.124602801781485e-05, "loss": 0.0004, "num_input_tokens_seen": 233804336, "step": 108235 }, { "epoch": 17.65742251223491, "grad_norm": 0.0009390924824401736, "learning_rate": 4.1217723208716196e-05, "loss": 0.001, "num_input_tokens_seen": 233816304, "step": 108240 }, { "epoch": 17.658238172920065, "grad_norm": 0.00035862938966602087, "learning_rate": 4.118942769742234e-05, "loss": 0.0002, "num_input_tokens_seen": 233826864, "step": 108245 }, { "epoch": 17.65905383360522, "grad_norm": 0.6004579663276672, "learning_rate": 4.116114148450673e-05, "loss": 0.031, "num_input_tokens_seen": 233838288, "step": 108250 }, { "epoch": 17.659869494290376, "grad_norm": 0.0022010619286447763, "learning_rate": 4.113286457054283e-05, "loss": 0.0008, "num_input_tokens_seen": 233848784, "step": 108255 }, { "epoch": 17.660685154975532, "grad_norm": 0.0006814883090555668, "learning_rate": 4.1104596956103356e-05, "loss": 0.001, "num_input_tokens_seen": 233859472, "step": 108260 }, { "epoch": 17.661500815660684, "grad_norm": 0.06318142265081406, "learning_rate": 4.107633864176158e-05, "loss": 0.0045, "num_input_tokens_seen": 233871280, "step": 108265 }, { "epoch": 17.66231647634584, "grad_norm": 0.022026631981134415, "learning_rate": 4.104808962808976e-05, "loss": 0.001, "num_input_tokens_seen": 233882160, "step": 108270 }, { "epoch": 17.663132137030995, "grad_norm": 0.01632312871515751, "learning_rate": 4.101984991566082e-05, "loss": 0.0013, "num_input_tokens_seen": 233892816, "step": 108275 }, { "epoch": 17.66394779771615, "grad_norm": 0.00041980453534051776, "learning_rate": 4.0991619505046764e-05, "loss": 0.0007, "num_input_tokens_seen": 233903984, "step": 108280 }, { "epoch": 17.664763458401303, "grad_norm": 0.0003276610223110765, "learning_rate": 4.096339839681984e-05, "loss": 0.0003, "num_input_tokens_seen": 233914096, "step": 108285 }, { "epoch": 17.66557911908646, "grad_norm": 0.021783655509352684, "learning_rate": 4.0935186591552044e-05, "loss": 0.0051, "num_input_tokens_seen": 233925392, "step": 108290 }, { "epoch": 17.666394779771615, "grad_norm": 0.0006872511585243046, "learning_rate": 4.0906984089815026e-05, "loss": 0.0056, "num_input_tokens_seen": 233935888, "step": 108295 }, { "epoch": 17.66721044045677, "grad_norm": 0.03984300047159195, "learning_rate": 4.087879089218033e-05, "loss": 0.0017, "num_input_tokens_seen": 233946672, "step": 108300 }, { "epoch": 17.668026101141926, "grad_norm": 0.0005269440589472651, "learning_rate": 4.085060699921944e-05, "loss": 0.0017, "num_input_tokens_seen": 233958256, "step": 108305 }, { "epoch": 17.66884176182708, "grad_norm": 0.0010156655916944146, "learning_rate": 4.0822432411503464e-05, "loss": 0.0004, "num_input_tokens_seen": 233969520, "step": 108310 }, { "epoch": 17.669657422512234, "grad_norm": 0.0011098864488303661, "learning_rate": 4.079426712960338e-05, "loss": 0.0042, "num_input_tokens_seen": 233979632, "step": 108315 }, { "epoch": 17.67047308319739, "grad_norm": 0.006729105953127146, "learning_rate": 4.076611115409001e-05, "loss": 0.0247, "num_input_tokens_seen": 233990416, "step": 108320 }, { "epoch": 17.671288743882545, "grad_norm": 0.014925811439752579, "learning_rate": 4.073796448553402e-05, "loss": 0.002, "num_input_tokens_seen": 234000624, "step": 108325 }, { "epoch": 17.6721044045677, "grad_norm": 0.0018322813557460904, "learning_rate": 4.070982712450571e-05, "loss": 0.0016, "num_input_tokens_seen": 234010928, "step": 108330 }, { "epoch": 17.672920065252853, "grad_norm": 0.0005330965504981577, "learning_rate": 4.068169907157548e-05, "loss": 0.0003, "num_input_tokens_seen": 234021776, "step": 108335 }, { "epoch": 17.67373572593801, "grad_norm": 1.1490570306777954, "learning_rate": 4.065358032731331e-05, "loss": 0.2548, "num_input_tokens_seen": 234033168, "step": 108340 }, { "epoch": 17.674551386623165, "grad_norm": 0.0013266679598018527, "learning_rate": 4.062547089228902e-05, "loss": 0.0003, "num_input_tokens_seen": 234044368, "step": 108345 }, { "epoch": 17.67536704730832, "grad_norm": 0.0019488186808302999, "learning_rate": 4.0597370767072315e-05, "loss": 0.0014, "num_input_tokens_seen": 234054544, "step": 108350 }, { "epoch": 17.676182707993476, "grad_norm": 0.002594847697764635, "learning_rate": 4.056927995223264e-05, "loss": 0.0009, "num_input_tokens_seen": 234065808, "step": 108355 }, { "epoch": 17.67699836867863, "grad_norm": 0.04415489733219147, "learning_rate": 4.054119844833948e-05, "loss": 0.0021, "num_input_tokens_seen": 234075920, "step": 108360 }, { "epoch": 17.677814029363784, "grad_norm": 0.008037679828703403, "learning_rate": 4.0513126255961594e-05, "loss": 0.0006, "num_input_tokens_seen": 234086608, "step": 108365 }, { "epoch": 17.67862969004894, "grad_norm": 0.005952348466962576, "learning_rate": 4.0485063375668316e-05, "loss": 0.1419, "num_input_tokens_seen": 234096560, "step": 108370 }, { "epoch": 17.679445350734095, "grad_norm": 0.0006694883340969682, "learning_rate": 4.045700980802802e-05, "loss": 0.0012, "num_input_tokens_seen": 234108528, "step": 108375 }, { "epoch": 17.68026101141925, "grad_norm": 0.00230517890304327, "learning_rate": 4.042896555360953e-05, "loss": 0.0004, "num_input_tokens_seen": 234118416, "step": 108380 }, { "epoch": 17.681076672104403, "grad_norm": 0.05230387672781944, "learning_rate": 4.040093061298089e-05, "loss": 0.004, "num_input_tokens_seen": 234128688, "step": 108385 }, { "epoch": 17.68189233278956, "grad_norm": 0.005648719146847725, "learning_rate": 4.037290498671059e-05, "loss": 0.0005, "num_input_tokens_seen": 234139376, "step": 108390 }, { "epoch": 17.682707993474715, "grad_norm": 0.035768892616033554, "learning_rate": 4.0344888675366285e-05, "loss": 0.0066, "num_input_tokens_seen": 234149232, "step": 108395 }, { "epoch": 17.68352365415987, "grad_norm": 0.010486296378076077, "learning_rate": 4.031688167951614e-05, "loss": 0.0038, "num_input_tokens_seen": 234159248, "step": 108400 }, { "epoch": 17.684339314845026, "grad_norm": 0.0009895983384922147, "learning_rate": 4.02888839997273e-05, "loss": 0.0093, "num_input_tokens_seen": 234168208, "step": 108405 }, { "epoch": 17.68515497553018, "grad_norm": 0.007241661660373211, "learning_rate": 4.0260895636567654e-05, "loss": 0.0007, "num_input_tokens_seen": 234178768, "step": 108410 }, { "epoch": 17.685970636215334, "grad_norm": 0.006733461283147335, "learning_rate": 4.0232916590603964e-05, "loss": 0.0011, "num_input_tokens_seen": 234190096, "step": 108415 }, { "epoch": 17.68678629690049, "grad_norm": 0.017089251428842545, "learning_rate": 4.020494686240361e-05, "loss": 0.122, "num_input_tokens_seen": 234200592, "step": 108420 }, { "epoch": 17.687601957585645, "grad_norm": 0.15010204911231995, "learning_rate": 4.017698645253321e-05, "loss": 0.004, "num_input_tokens_seen": 234211120, "step": 108425 }, { "epoch": 17.6884176182708, "grad_norm": 0.05645974352955818, "learning_rate": 4.0149035361559504e-05, "loss": 0.001, "num_input_tokens_seen": 234222576, "step": 108430 }, { "epoch": 17.689233278955953, "grad_norm": 0.006962623447179794, "learning_rate": 4.0121093590049004e-05, "loss": 0.0013, "num_input_tokens_seen": 234233808, "step": 108435 }, { "epoch": 17.69004893964111, "grad_norm": 0.0016587387071922421, "learning_rate": 4.009316113856798e-05, "loss": 0.0009, "num_input_tokens_seen": 234244688, "step": 108440 }, { "epoch": 17.690864600326265, "grad_norm": 0.0021991583053022623, "learning_rate": 4.0065238007682414e-05, "loss": 0.0019, "num_input_tokens_seen": 234255024, "step": 108445 }, { "epoch": 17.69168026101142, "grad_norm": 0.04479004442691803, "learning_rate": 4.0037324197958304e-05, "loss": 0.0021, "num_input_tokens_seen": 234266480, "step": 108450 }, { "epoch": 17.692495921696576, "grad_norm": 0.0026538416277617216, "learning_rate": 4.00094197099613e-05, "loss": 0.0011, "num_input_tokens_seen": 234277456, "step": 108455 }, { "epoch": 17.693311582381728, "grad_norm": 0.003920292016118765, "learning_rate": 3.9981524544256964e-05, "loss": 0.0593, "num_input_tokens_seen": 234287536, "step": 108460 }, { "epoch": 17.694127243066884, "grad_norm": 0.001578305964358151, "learning_rate": 3.995363870141061e-05, "loss": 0.001, "num_input_tokens_seen": 234298352, "step": 108465 }, { "epoch": 17.69494290375204, "grad_norm": 0.013417751528322697, "learning_rate": 3.9925762181987345e-05, "loss": 0.0026, "num_input_tokens_seen": 234309488, "step": 108470 }, { "epoch": 17.695758564437195, "grad_norm": 0.0006595024606212974, "learning_rate": 3.9897894986552216e-05, "loss": 0.0009, "num_input_tokens_seen": 234319216, "step": 108475 }, { "epoch": 17.696574225122347, "grad_norm": 0.005656297784298658, "learning_rate": 3.987003711566978e-05, "loss": 0.0275, "num_input_tokens_seen": 234329776, "step": 108480 }, { "epoch": 17.697389885807503, "grad_norm": 0.005397633649408817, "learning_rate": 3.984218856990496e-05, "loss": 0.0017, "num_input_tokens_seen": 234341488, "step": 108485 }, { "epoch": 17.69820554649266, "grad_norm": 0.0029238967690616846, "learning_rate": 3.981434934982176e-05, "loss": 0.001, "num_input_tokens_seen": 234352144, "step": 108490 }, { "epoch": 17.699021207177815, "grad_norm": 0.0007100607035681605, "learning_rate": 3.978651945598472e-05, "loss": 0.0681, "num_input_tokens_seen": 234364208, "step": 108495 }, { "epoch": 17.69983686786297, "grad_norm": 0.002662144135683775, "learning_rate": 3.975869888895756e-05, "loss": 0.0008, "num_input_tokens_seen": 234374320, "step": 108500 }, { "epoch": 17.700652528548122, "grad_norm": 0.000382350233849138, "learning_rate": 3.973088764930433e-05, "loss": 0.0054, "num_input_tokens_seen": 234384368, "step": 108505 }, { "epoch": 17.701468189233278, "grad_norm": 0.0005653170519508421, "learning_rate": 3.9703085737588405e-05, "loss": 0.0072, "num_input_tokens_seen": 234395856, "step": 108510 }, { "epoch": 17.702283849918434, "grad_norm": 0.11164949834346771, "learning_rate": 3.967529315437357e-05, "loss": 0.0031, "num_input_tokens_seen": 234406576, "step": 108515 }, { "epoch": 17.70309951060359, "grad_norm": 0.00023540828260593116, "learning_rate": 3.96475099002227e-05, "loss": 0.0034, "num_input_tokens_seen": 234417904, "step": 108520 }, { "epoch": 17.703915171288745, "grad_norm": 0.0013595453929156065, "learning_rate": 3.9619735975699236e-05, "loss": 0.0016, "num_input_tokens_seen": 234428240, "step": 108525 }, { "epoch": 17.704730831973897, "grad_norm": 0.004526201635599136, "learning_rate": 3.9591971381365665e-05, "loss": 0.0019, "num_input_tokens_seen": 234439152, "step": 108530 }, { "epoch": 17.705546492659053, "grad_norm": 0.07590554654598236, "learning_rate": 3.956421611778499e-05, "loss": 0.0046, "num_input_tokens_seen": 234449744, "step": 108535 }, { "epoch": 17.70636215334421, "grad_norm": 0.005136074032634497, "learning_rate": 3.953647018551948e-05, "loss": 0.001, "num_input_tokens_seen": 234461520, "step": 108540 }, { "epoch": 17.707177814029365, "grad_norm": 0.0015011136420071125, "learning_rate": 3.950873358513168e-05, "loss": 0.0014, "num_input_tokens_seen": 234473232, "step": 108545 }, { "epoch": 17.70799347471452, "grad_norm": 0.023294158279895782, "learning_rate": 3.948100631718338e-05, "loss": 0.0009, "num_input_tokens_seen": 234483504, "step": 108550 }, { "epoch": 17.708809135399672, "grad_norm": 0.0010147414868697524, "learning_rate": 3.945328838223688e-05, "loss": 0.0016, "num_input_tokens_seen": 234494512, "step": 108555 }, { "epoch": 17.709624796084828, "grad_norm": 0.12862026691436768, "learning_rate": 3.942557978085354e-05, "loss": 0.0126, "num_input_tokens_seen": 234505008, "step": 108560 }, { "epoch": 17.710440456769984, "grad_norm": 0.00031221084645949304, "learning_rate": 3.939788051359522e-05, "loss": 0.0023, "num_input_tokens_seen": 234515664, "step": 108565 }, { "epoch": 17.71125611745514, "grad_norm": 0.0004358456062618643, "learning_rate": 3.93701905810232e-05, "loss": 0.0004, "num_input_tokens_seen": 234523856, "step": 108570 }, { "epoch": 17.712071778140295, "grad_norm": 0.0015146576333791018, "learning_rate": 3.934250998369859e-05, "loss": 0.0023, "num_input_tokens_seen": 234534480, "step": 108575 }, { "epoch": 17.712887438825447, "grad_norm": 0.2730502784252167, "learning_rate": 3.931483872218239e-05, "loss": 0.0069, "num_input_tokens_seen": 234545040, "step": 108580 }, { "epoch": 17.713703099510603, "grad_norm": 0.0036683762446045876, "learning_rate": 3.928717679703542e-05, "loss": 0.0008, "num_input_tokens_seen": 234556176, "step": 108585 }, { "epoch": 17.71451876019576, "grad_norm": 0.5490561127662659, "learning_rate": 3.925952420881823e-05, "loss": 0.0327, "num_input_tokens_seen": 234567568, "step": 108590 }, { "epoch": 17.715334420880914, "grad_norm": 0.0008095699595287442, "learning_rate": 3.9231880958091325e-05, "loss": 0.0007, "num_input_tokens_seen": 234579792, "step": 108595 }, { "epoch": 17.71615008156607, "grad_norm": 0.0022214106284081936, "learning_rate": 3.920424704541481e-05, "loss": 0.0417, "num_input_tokens_seen": 234590448, "step": 108600 }, { "epoch": 17.716965742251222, "grad_norm": 0.0017520349938422441, "learning_rate": 3.9176622471348845e-05, "loss": 0.0012, "num_input_tokens_seen": 234602992, "step": 108605 }, { "epoch": 17.717781402936378, "grad_norm": 0.006847703829407692, "learning_rate": 3.9149007236453204e-05, "loss": 0.0625, "num_input_tokens_seen": 234613840, "step": 108610 }, { "epoch": 17.718597063621534, "grad_norm": 0.02551284059882164, "learning_rate": 3.912140134128761e-05, "loss": 0.002, "num_input_tokens_seen": 234624976, "step": 108615 }, { "epoch": 17.71941272430669, "grad_norm": 0.03996856510639191, "learning_rate": 3.909380478641139e-05, "loss": 0.003, "num_input_tokens_seen": 234635760, "step": 108620 }, { "epoch": 17.72022838499184, "grad_norm": 0.014086296781897545, "learning_rate": 3.906621757238393e-05, "loss": 0.0031, "num_input_tokens_seen": 234646832, "step": 108625 }, { "epoch": 17.721044045676997, "grad_norm": 0.11001487076282501, "learning_rate": 3.90386396997644e-05, "loss": 0.0038, "num_input_tokens_seen": 234657200, "step": 108630 }, { "epoch": 17.721859706362153, "grad_norm": 0.0010492857545614243, "learning_rate": 3.901107116911145e-05, "loss": 0.0018, "num_input_tokens_seen": 234666928, "step": 108635 }, { "epoch": 17.72267536704731, "grad_norm": 0.0010918010957539082, "learning_rate": 3.8983511980984154e-05, "loss": 0.0006, "num_input_tokens_seen": 234677424, "step": 108640 }, { "epoch": 17.723491027732464, "grad_norm": 0.002043887274339795, "learning_rate": 3.895596213594066e-05, "loss": 0.0006, "num_input_tokens_seen": 234688400, "step": 108645 }, { "epoch": 17.724306688417617, "grad_norm": 0.010542036034166813, "learning_rate": 3.892842163453964e-05, "loss": 0.0021, "num_input_tokens_seen": 234699792, "step": 108650 }, { "epoch": 17.725122349102772, "grad_norm": 0.006847497075796127, "learning_rate": 3.8900890477338856e-05, "loss": 0.0021, "num_input_tokens_seen": 234710736, "step": 108655 }, { "epoch": 17.725938009787928, "grad_norm": 0.010414250195026398, "learning_rate": 3.887336866489666e-05, "loss": 0.0019, "num_input_tokens_seen": 234722000, "step": 108660 }, { "epoch": 17.726753670473084, "grad_norm": 0.009203067980706692, "learning_rate": 3.884585619777048e-05, "loss": 0.0024, "num_input_tokens_seen": 234733328, "step": 108665 }, { "epoch": 17.72756933115824, "grad_norm": 0.005820513237267733, "learning_rate": 3.881835307651816e-05, "loss": 0.0017, "num_input_tokens_seen": 234743792, "step": 108670 }, { "epoch": 17.72838499184339, "grad_norm": 0.0019853932317346334, "learning_rate": 3.879085930169685e-05, "loss": 0.0011, "num_input_tokens_seen": 234755440, "step": 108675 }, { "epoch": 17.729200652528547, "grad_norm": 0.01240463275462389, "learning_rate": 3.8763374873863886e-05, "loss": 0.0056, "num_input_tokens_seen": 234765776, "step": 108680 }, { "epoch": 17.730016313213703, "grad_norm": 0.06792809069156647, "learning_rate": 3.873589979357633e-05, "loss": 0.0013, "num_input_tokens_seen": 234777008, "step": 108685 }, { "epoch": 17.73083197389886, "grad_norm": 0.01229359582066536, "learning_rate": 3.870843406139085e-05, "loss": 0.0009, "num_input_tokens_seen": 234787152, "step": 108690 }, { "epoch": 17.731647634584014, "grad_norm": 0.00027565364143811166, "learning_rate": 3.868097767786416e-05, "loss": 0.002, "num_input_tokens_seen": 234799472, "step": 108695 }, { "epoch": 17.732463295269167, "grad_norm": 0.015476987697184086, "learning_rate": 3.86535306435527e-05, "loss": 0.0017, "num_input_tokens_seen": 234809296, "step": 108700 }, { "epoch": 17.733278955954322, "grad_norm": 0.0027482211589813232, "learning_rate": 3.8626092959012706e-05, "loss": 0.0013, "num_input_tokens_seen": 234821296, "step": 108705 }, { "epoch": 17.734094616639478, "grad_norm": 0.001372481812722981, "learning_rate": 3.8598664624800215e-05, "loss": 0.0004, "num_input_tokens_seen": 234831792, "step": 108710 }, { "epoch": 17.734910277324634, "grad_norm": 0.008024675771594048, "learning_rate": 3.857124564147113e-05, "loss": 0.0012, "num_input_tokens_seen": 234843408, "step": 108715 }, { "epoch": 17.73572593800979, "grad_norm": 0.0030358731746673584, "learning_rate": 3.8543836009581115e-05, "loss": 0.0024, "num_input_tokens_seen": 234854960, "step": 108720 }, { "epoch": 17.73654159869494, "grad_norm": 0.018493857234716415, "learning_rate": 3.851643572968566e-05, "loss": 0.0041, "num_input_tokens_seen": 234866224, "step": 108725 }, { "epoch": 17.737357259380097, "grad_norm": 0.006060306448489428, "learning_rate": 3.848904480234006e-05, "loss": 0.0251, "num_input_tokens_seen": 234875760, "step": 108730 }, { "epoch": 17.738172920065253, "grad_norm": 0.008324912749230862, "learning_rate": 3.846166322809941e-05, "loss": 0.0051, "num_input_tokens_seen": 234886768, "step": 108735 }, { "epoch": 17.73898858075041, "grad_norm": 0.00037973938742652535, "learning_rate": 3.8434291007518665e-05, "loss": 0.0008, "num_input_tokens_seen": 234897392, "step": 108740 }, { "epoch": 17.739804241435564, "grad_norm": 0.0335724800825119, "learning_rate": 3.8406928141152596e-05, "loss": 0.0017, "num_input_tokens_seen": 234909776, "step": 108745 }, { "epoch": 17.740619902120716, "grad_norm": 0.0029921771492809057, "learning_rate": 3.8379574629555656e-05, "loss": 0.0054, "num_input_tokens_seen": 234921360, "step": 108750 }, { "epoch": 17.741435562805872, "grad_norm": 0.007268199231475592, "learning_rate": 3.835223047328229e-05, "loss": 0.0017, "num_input_tokens_seen": 234931280, "step": 108755 }, { "epoch": 17.742251223491028, "grad_norm": 0.0002096295211231336, "learning_rate": 3.8324895672886554e-05, "loss": 0.1414, "num_input_tokens_seen": 234941584, "step": 108760 }, { "epoch": 17.743066884176184, "grad_norm": 0.16069717705249786, "learning_rate": 3.829757022892255e-05, "loss": 0.0029, "num_input_tokens_seen": 234951728, "step": 108765 }, { "epoch": 17.74388254486134, "grad_norm": 0.05204736813902855, "learning_rate": 3.827025414194385e-05, "loss": 0.0024, "num_input_tokens_seen": 234962000, "step": 108770 }, { "epoch": 17.74469820554649, "grad_norm": 0.004038558341562748, "learning_rate": 3.824294741250439e-05, "loss": 0.0009, "num_input_tokens_seen": 234973488, "step": 108775 }, { "epoch": 17.745513866231647, "grad_norm": 0.0019828190561383963, "learning_rate": 3.821565004115723e-05, "loss": 0.0013, "num_input_tokens_seen": 234984016, "step": 108780 }, { "epoch": 17.746329526916803, "grad_norm": 0.00056139484513551, "learning_rate": 3.8188362028455826e-05, "loss": 0.0027, "num_input_tokens_seen": 234994832, "step": 108785 }, { "epoch": 17.74714518760196, "grad_norm": 1.5413379669189453, "learning_rate": 3.8161083374953056e-05, "loss": 0.0312, "num_input_tokens_seen": 235004816, "step": 108790 }, { "epoch": 17.747960848287114, "grad_norm": 0.026222562417387962, "learning_rate": 3.8133814081201866e-05, "loss": 0.0024, "num_input_tokens_seen": 235015504, "step": 108795 }, { "epoch": 17.748776508972266, "grad_norm": 0.0037302477285265923, "learning_rate": 3.810655414775482e-05, "loss": 0.0466, "num_input_tokens_seen": 235027728, "step": 108800 }, { "epoch": 17.749592169657422, "grad_norm": 0.00023018640058580786, "learning_rate": 3.807930357516448e-05, "loss": 0.0081, "num_input_tokens_seen": 235039344, "step": 108805 }, { "epoch": 17.750407830342578, "grad_norm": 0.004767855163663626, "learning_rate": 3.8052062363982957e-05, "loss": 0.0023, "num_input_tokens_seen": 235050480, "step": 108810 }, { "epoch": 17.751223491027734, "grad_norm": 0.004195967223495245, "learning_rate": 3.8024830514762465e-05, "loss": 0.0009, "num_input_tokens_seen": 235062960, "step": 108815 }, { "epoch": 17.752039151712886, "grad_norm": 0.05753675475716591, "learning_rate": 3.79976080280548e-05, "loss": 0.0172, "num_input_tokens_seen": 235074416, "step": 108820 }, { "epoch": 17.75285481239804, "grad_norm": 0.0006173241999931633, "learning_rate": 3.7970394904411733e-05, "loss": 0.0008, "num_input_tokens_seen": 235084112, "step": 108825 }, { "epoch": 17.753670473083197, "grad_norm": 0.694864809513092, "learning_rate": 3.7943191144384716e-05, "loss": 0.012, "num_input_tokens_seen": 235094128, "step": 108830 }, { "epoch": 17.754486133768353, "grad_norm": 0.09449499845504761, "learning_rate": 3.7915996748525086e-05, "loss": 0.0036, "num_input_tokens_seen": 235106320, "step": 108835 }, { "epoch": 17.75530179445351, "grad_norm": 0.003336785826832056, "learning_rate": 3.788881171738401e-05, "loss": 0.0031, "num_input_tokens_seen": 235117584, "step": 108840 }, { "epoch": 17.75611745513866, "grad_norm": 0.002545825904235244, "learning_rate": 3.7861636051512385e-05, "loss": 0.0159, "num_input_tokens_seen": 235128528, "step": 108845 }, { "epoch": 17.756933115823816, "grad_norm": 0.0005718961474485695, "learning_rate": 3.783446975146099e-05, "loss": 0.0008, "num_input_tokens_seen": 235139984, "step": 108850 }, { "epoch": 17.757748776508972, "grad_norm": 0.05380915477871895, "learning_rate": 3.7807312817780325e-05, "loss": 0.0029, "num_input_tokens_seen": 235151408, "step": 108855 }, { "epoch": 17.758564437194128, "grad_norm": 0.010704193264245987, "learning_rate": 3.7780165251020794e-05, "loss": 0.0022, "num_input_tokens_seen": 235163056, "step": 108860 }, { "epoch": 17.759380097879284, "grad_norm": 0.0006450397195294499, "learning_rate": 3.7753027051732615e-05, "loss": 0.0101, "num_input_tokens_seen": 235174032, "step": 108865 }, { "epoch": 17.760195758564436, "grad_norm": 0.004813667386770248, "learning_rate": 3.772589822046568e-05, "loss": 0.0033, "num_input_tokens_seen": 235184848, "step": 108870 }, { "epoch": 17.76101141924959, "grad_norm": 0.0434638075530529, "learning_rate": 3.7698778757769944e-05, "loss": 0.002, "num_input_tokens_seen": 235195344, "step": 108875 }, { "epoch": 17.761827079934747, "grad_norm": 0.002826629439368844, "learning_rate": 3.767166866419486e-05, "loss": 0.0029, "num_input_tokens_seen": 235206064, "step": 108880 }, { "epoch": 17.762642740619903, "grad_norm": 0.0023013916797935963, "learning_rate": 3.764456794028992e-05, "loss": 0.001, "num_input_tokens_seen": 235215632, "step": 108885 }, { "epoch": 17.76345840130506, "grad_norm": 0.00613722950220108, "learning_rate": 3.7617476586604304e-05, "loss": 0.0009, "num_input_tokens_seen": 235226192, "step": 108890 }, { "epoch": 17.76427406199021, "grad_norm": 0.0007096104673109949, "learning_rate": 3.759039460368724e-05, "loss": 0.0742, "num_input_tokens_seen": 235236464, "step": 108895 }, { "epoch": 17.765089722675366, "grad_norm": 0.0005393307656049728, "learning_rate": 3.756332199208728e-05, "loss": 0.0034, "num_input_tokens_seen": 235246704, "step": 108900 }, { "epoch": 17.765905383360522, "grad_norm": 0.9690297842025757, "learning_rate": 3.753625875235345e-05, "loss": 0.1084, "num_input_tokens_seen": 235257008, "step": 108905 }, { "epoch": 17.766721044045678, "grad_norm": 0.0005110432975925505, "learning_rate": 3.750920488503379e-05, "loss": 0.0134, "num_input_tokens_seen": 235268976, "step": 108910 }, { "epoch": 17.767536704730833, "grad_norm": 0.00060313317226246, "learning_rate": 3.7482160390676866e-05, "loss": 0.0049, "num_input_tokens_seen": 235278768, "step": 108915 }, { "epoch": 17.768352365415986, "grad_norm": 0.019515041261911392, "learning_rate": 3.745512526983075e-05, "loss": 0.0024, "num_input_tokens_seen": 235288784, "step": 108920 }, { "epoch": 17.76916802610114, "grad_norm": 0.13256117701530457, "learning_rate": 3.7428099523043325e-05, "loss": 0.1224, "num_input_tokens_seen": 235299280, "step": 108925 }, { "epoch": 17.769983686786297, "grad_norm": 0.0040601822547614574, "learning_rate": 3.7401083150862216e-05, "loss": 0.0041, "num_input_tokens_seen": 235310032, "step": 108930 }, { "epoch": 17.770799347471453, "grad_norm": 0.008540820330381393, "learning_rate": 3.7374076153835033e-05, "loss": 0.0053, "num_input_tokens_seen": 235319952, "step": 108935 }, { "epoch": 17.77161500815661, "grad_norm": 0.07201741635799408, "learning_rate": 3.734707853250907e-05, "loss": 0.0031, "num_input_tokens_seen": 235331504, "step": 108940 }, { "epoch": 17.77243066884176, "grad_norm": 0.00032880945946089923, "learning_rate": 3.73200902874315e-05, "loss": 0.004, "num_input_tokens_seen": 235341488, "step": 108945 }, { "epoch": 17.773246329526916, "grad_norm": 0.011850826442241669, "learning_rate": 3.729311141914926e-05, "loss": 0.0014, "num_input_tokens_seen": 235352592, "step": 108950 }, { "epoch": 17.774061990212072, "grad_norm": 0.004222301300615072, "learning_rate": 3.72661419282091e-05, "loss": 0.005, "num_input_tokens_seen": 235362224, "step": 108955 }, { "epoch": 17.774877650897228, "grad_norm": 0.014178330078721046, "learning_rate": 3.723918181515756e-05, "loss": 0.0013, "num_input_tokens_seen": 235372688, "step": 108960 }, { "epoch": 17.775693311582383, "grad_norm": 0.0010670581832528114, "learning_rate": 3.721223108054106e-05, "loss": 0.0039, "num_input_tokens_seen": 235383440, "step": 108965 }, { "epoch": 17.776508972267536, "grad_norm": 0.026346096768975258, "learning_rate": 3.7185289724905814e-05, "loss": 0.0012, "num_input_tokens_seen": 235394640, "step": 108970 }, { "epoch": 17.77732463295269, "grad_norm": 0.025496676564216614, "learning_rate": 3.7158357748797775e-05, "loss": 0.0028, "num_input_tokens_seen": 235406736, "step": 108975 }, { "epoch": 17.778140293637847, "grad_norm": 0.043573055416345596, "learning_rate": 3.7131435152762735e-05, "loss": 0.0013, "num_input_tokens_seen": 235415600, "step": 108980 }, { "epoch": 17.778955954323003, "grad_norm": 0.26204147934913635, "learning_rate": 3.710452193734643e-05, "loss": 0.005, "num_input_tokens_seen": 235425776, "step": 108985 }, { "epoch": 17.77977161500816, "grad_norm": 0.0021411122288554907, "learning_rate": 3.707761810309418e-05, "loss": 0.036, "num_input_tokens_seen": 235436752, "step": 108990 }, { "epoch": 17.78058727569331, "grad_norm": 0.0380295105278492, "learning_rate": 3.705072365055112e-05, "loss": 0.0039, "num_input_tokens_seen": 235446608, "step": 108995 }, { "epoch": 17.781402936378466, "grad_norm": 0.02980238012969494, "learning_rate": 3.7023838580262706e-05, "loss": 0.003, "num_input_tokens_seen": 235457264, "step": 109000 }, { "epoch": 17.782218597063622, "grad_norm": 0.010661580599844456, "learning_rate": 3.699696289277327e-05, "loss": 0.028, "num_input_tokens_seen": 235468784, "step": 109005 }, { "epoch": 17.783034257748778, "grad_norm": 0.009770001284778118, "learning_rate": 3.697009658862793e-05, "loss": 0.0009, "num_input_tokens_seen": 235479952, "step": 109010 }, { "epoch": 17.78384991843393, "grad_norm": 0.00064946518978104, "learning_rate": 3.694323966837088e-05, "loss": 0.0006, "num_input_tokens_seen": 235491664, "step": 109015 }, { "epoch": 17.784665579119086, "grad_norm": 0.010038640350103378, "learning_rate": 3.6916392132546605e-05, "loss": 0.0081, "num_input_tokens_seen": 235502416, "step": 109020 }, { "epoch": 17.78548123980424, "grad_norm": 0.0002301902131875977, "learning_rate": 3.6889553981698966e-05, "loss": 0.001, "num_input_tokens_seen": 235514032, "step": 109025 }, { "epoch": 17.786296900489397, "grad_norm": 0.010864865966141224, "learning_rate": 3.6862725216372185e-05, "loss": 0.0027, "num_input_tokens_seen": 235525680, "step": 109030 }, { "epoch": 17.787112561174553, "grad_norm": 0.046079590916633606, "learning_rate": 3.683590583710961e-05, "loss": 0.0014, "num_input_tokens_seen": 235535056, "step": 109035 }, { "epoch": 17.787928221859705, "grad_norm": 0.0027483527082949877, "learning_rate": 3.6809095844455134e-05, "loss": 0.003, "num_input_tokens_seen": 235545968, "step": 109040 }, { "epoch": 17.78874388254486, "grad_norm": 0.023552676662802696, "learning_rate": 3.678229523895177e-05, "loss": 0.0016, "num_input_tokens_seen": 235555600, "step": 109045 }, { "epoch": 17.789559543230016, "grad_norm": 0.0004201144038233906, "learning_rate": 3.675550402114303e-05, "loss": 0.0011, "num_input_tokens_seen": 235566576, "step": 109050 }, { "epoch": 17.790375203915172, "grad_norm": 0.018458297476172447, "learning_rate": 3.6728722191571476e-05, "loss": 0.0007, "num_input_tokens_seen": 235576784, "step": 109055 }, { "epoch": 17.791190864600328, "grad_norm": 0.007056164089590311, "learning_rate": 3.670194975078017e-05, "loss": 0.0011, "num_input_tokens_seen": 235588400, "step": 109060 }, { "epoch": 17.79200652528548, "grad_norm": 0.009251178242266178, "learning_rate": 3.667518669931158e-05, "loss": 0.001, "num_input_tokens_seen": 235599024, "step": 109065 }, { "epoch": 17.792822185970635, "grad_norm": 0.003141851397231221, "learning_rate": 3.6648433037708094e-05, "loss": 0.0038, "num_input_tokens_seen": 235608944, "step": 109070 }, { "epoch": 17.79363784665579, "grad_norm": 0.00041024110396392643, "learning_rate": 3.66216887665119e-05, "loss": 0.0029, "num_input_tokens_seen": 235618896, "step": 109075 }, { "epoch": 17.794453507340947, "grad_norm": 0.001330662053078413, "learning_rate": 3.659495388626505e-05, "loss": 0.0007, "num_input_tokens_seen": 235629616, "step": 109080 }, { "epoch": 17.795269168026103, "grad_norm": 0.018950335681438446, "learning_rate": 3.6568228397509286e-05, "loss": 0.0013, "num_input_tokens_seen": 235641008, "step": 109085 }, { "epoch": 17.796084828711255, "grad_norm": 0.044273439794778824, "learning_rate": 3.654151230078628e-05, "loss": 0.0111, "num_input_tokens_seen": 235651408, "step": 109090 }, { "epoch": 17.79690048939641, "grad_norm": 0.0021813130006194115, "learning_rate": 3.6514805596637504e-05, "loss": 0.0005, "num_input_tokens_seen": 235661648, "step": 109095 }, { "epoch": 17.797716150081566, "grad_norm": 0.013074532151222229, "learning_rate": 3.648810828560417e-05, "loss": 0.0008, "num_input_tokens_seen": 235672880, "step": 109100 }, { "epoch": 17.798531810766722, "grad_norm": 0.002100384095683694, "learning_rate": 3.6461420368227304e-05, "loss": 0.1017, "num_input_tokens_seen": 235683408, "step": 109105 }, { "epoch": 17.799347471451878, "grad_norm": 0.0026194609235972166, "learning_rate": 3.643474184504775e-05, "loss": 0.0029, "num_input_tokens_seen": 235694608, "step": 109110 }, { "epoch": 17.80016313213703, "grad_norm": 0.04093625396490097, "learning_rate": 3.6408072716606344e-05, "loss": 0.0027, "num_input_tokens_seen": 235703984, "step": 109115 }, { "epoch": 17.800978792822185, "grad_norm": 0.016903575509786606, "learning_rate": 3.6381412983443277e-05, "loss": 0.0063, "num_input_tokens_seen": 235714704, "step": 109120 }, { "epoch": 17.80179445350734, "grad_norm": 0.012498315423727036, "learning_rate": 3.635476264609922e-05, "loss": 0.0025, "num_input_tokens_seen": 235726800, "step": 109125 }, { "epoch": 17.802610114192497, "grad_norm": 0.01264413632452488, "learning_rate": 3.6328121705113905e-05, "loss": 0.0032, "num_input_tokens_seen": 235738864, "step": 109130 }, { "epoch": 17.803425774877653, "grad_norm": 0.8477015495300293, "learning_rate": 3.6301490161027574e-05, "loss": 0.0125, "num_input_tokens_seen": 235750288, "step": 109135 }, { "epoch": 17.804241435562805, "grad_norm": 0.01968962326645851, "learning_rate": 3.6274868014379624e-05, "loss": 0.0011, "num_input_tokens_seen": 235761488, "step": 109140 }, { "epoch": 17.80505709624796, "grad_norm": 0.02571706660091877, "learning_rate": 3.6248255265709906e-05, "loss": 0.0053, "num_input_tokens_seen": 235772400, "step": 109145 }, { "epoch": 17.805872756933116, "grad_norm": 0.001857051276601851, "learning_rate": 3.6221651915557484e-05, "loss": 0.015, "num_input_tokens_seen": 235783376, "step": 109150 }, { "epoch": 17.806688417618272, "grad_norm": 0.0020788402762264013, "learning_rate": 3.6195057964461764e-05, "loss": 0.0126, "num_input_tokens_seen": 235794736, "step": 109155 }, { "epoch": 17.807504078303424, "grad_norm": 0.0018391057383269072, "learning_rate": 3.616847341296137e-05, "loss": 0.0012, "num_input_tokens_seen": 235805104, "step": 109160 }, { "epoch": 17.80831973898858, "grad_norm": 0.0004060390347149223, "learning_rate": 3.6141898261595475e-05, "loss": 0.0049, "num_input_tokens_seen": 235816912, "step": 109165 }, { "epoch": 17.809135399673735, "grad_norm": 0.021962953731417656, "learning_rate": 3.611533251090232e-05, "loss": 0.0012, "num_input_tokens_seen": 235826992, "step": 109170 }, { "epoch": 17.80995106035889, "grad_norm": 0.013397028669714928, "learning_rate": 3.608877616142053e-05, "loss": 0.0025, "num_input_tokens_seen": 235837136, "step": 109175 }, { "epoch": 17.810766721044047, "grad_norm": 0.0003482665924821049, "learning_rate": 3.606222921368807e-05, "loss": 0.0022, "num_input_tokens_seen": 235847536, "step": 109180 }, { "epoch": 17.8115823817292, "grad_norm": 0.03608010709285736, "learning_rate": 3.603569166824327e-05, "loss": 0.039, "num_input_tokens_seen": 235857712, "step": 109185 }, { "epoch": 17.812398042414355, "grad_norm": 0.010204904712736607, "learning_rate": 3.600916352562356e-05, "loss": 0.0075, "num_input_tokens_seen": 235866704, "step": 109190 }, { "epoch": 17.81321370309951, "grad_norm": 0.0035822775680571795, "learning_rate": 3.598264478636698e-05, "loss": 0.0047, "num_input_tokens_seen": 235877840, "step": 109195 }, { "epoch": 17.814029363784666, "grad_norm": 0.0004829168610740453, "learning_rate": 3.595613545101056e-05, "loss": 0.0561, "num_input_tokens_seen": 235888368, "step": 109200 }, { "epoch": 17.81484502446982, "grad_norm": 0.0008832847815938294, "learning_rate": 3.592963552009182e-05, "loss": 0.0024, "num_input_tokens_seen": 235898224, "step": 109205 }, { "epoch": 17.815660685154974, "grad_norm": 0.03870289772748947, "learning_rate": 3.590314499414771e-05, "loss": 0.008, "num_input_tokens_seen": 235909584, "step": 109210 }, { "epoch": 17.81647634584013, "grad_norm": 0.0008303043432533741, "learning_rate": 3.587666387371513e-05, "loss": 0.0045, "num_input_tokens_seen": 235921168, "step": 109215 }, { "epoch": 17.817292006525285, "grad_norm": 0.007308437488973141, "learning_rate": 3.585019215933072e-05, "loss": 0.0012, "num_input_tokens_seen": 235932240, "step": 109220 }, { "epoch": 17.81810766721044, "grad_norm": 0.0020328452810645103, "learning_rate": 3.5823729851530983e-05, "loss": 0.0539, "num_input_tokens_seen": 235943280, "step": 109225 }, { "epoch": 17.818923327895597, "grad_norm": 0.0017934415955096483, "learning_rate": 3.5797276950852276e-05, "loss": 0.0013, "num_input_tokens_seen": 235954192, "step": 109230 }, { "epoch": 17.81973898858075, "grad_norm": 0.019754432141780853, "learning_rate": 3.5770833457830554e-05, "loss": 0.0014, "num_input_tokens_seen": 235964784, "step": 109235 }, { "epoch": 17.820554649265905, "grad_norm": 0.007611890789121389, "learning_rate": 3.5744399373001834e-05, "loss": 0.0008, "num_input_tokens_seen": 235975536, "step": 109240 }, { "epoch": 17.82137030995106, "grad_norm": 0.0005911207990720868, "learning_rate": 3.57179746969018e-05, "loss": 0.0024, "num_input_tokens_seen": 235986576, "step": 109245 }, { "epoch": 17.822185970636216, "grad_norm": 0.0020840333309024572, "learning_rate": 3.569155943006602e-05, "loss": 0.0091, "num_input_tokens_seen": 235996944, "step": 109250 }, { "epoch": 17.82300163132137, "grad_norm": 0.017575936391949654, "learning_rate": 3.566515357302974e-05, "loss": 0.0009, "num_input_tokens_seen": 236008144, "step": 109255 }, { "epoch": 17.823817292006524, "grad_norm": 0.0008498613606207073, "learning_rate": 3.56387571263283e-05, "loss": 0.0003, "num_input_tokens_seen": 236019952, "step": 109260 }, { "epoch": 17.82463295269168, "grad_norm": 0.0718621090054512, "learning_rate": 3.561237009049639e-05, "loss": 0.0163, "num_input_tokens_seen": 236031568, "step": 109265 }, { "epoch": 17.825448613376835, "grad_norm": 0.01773866079747677, "learning_rate": 3.558599246606903e-05, "loss": 0.0035, "num_input_tokens_seen": 236042864, "step": 109270 }, { "epoch": 17.82626427406199, "grad_norm": 0.016890283674001694, "learning_rate": 3.555962425358056e-05, "loss": 0.0025, "num_input_tokens_seen": 236053296, "step": 109275 }, { "epoch": 17.827079934747147, "grad_norm": 0.001565889222547412, "learning_rate": 3.5533265453565664e-05, "loss": 0.0066, "num_input_tokens_seen": 236063760, "step": 109280 }, { "epoch": 17.8278955954323, "grad_norm": 0.012458628974854946, "learning_rate": 3.55069160665582e-05, "loss": 0.0056, "num_input_tokens_seen": 236075472, "step": 109285 }, { "epoch": 17.828711256117455, "grad_norm": 0.019188063219189644, "learning_rate": 3.5480576093092466e-05, "loss": 0.0016, "num_input_tokens_seen": 236086384, "step": 109290 }, { "epoch": 17.82952691680261, "grad_norm": 0.0013830027310177684, "learning_rate": 3.545424553370202e-05, "loss": 0.001, "num_input_tokens_seen": 236096720, "step": 109295 }, { "epoch": 17.830342577487766, "grad_norm": 0.0006741559482179582, "learning_rate": 3.5427924388920727e-05, "loss": 0.0002, "num_input_tokens_seen": 236107696, "step": 109300 }, { "epoch": 17.83115823817292, "grad_norm": 0.000454758177511394, "learning_rate": 3.540161265928177e-05, "loss": 0.0059, "num_input_tokens_seen": 236118576, "step": 109305 }, { "epoch": 17.831973898858074, "grad_norm": 0.0011603111634030938, "learning_rate": 3.537531034531855e-05, "loss": 0.0707, "num_input_tokens_seen": 236129136, "step": 109310 }, { "epoch": 17.83278955954323, "grad_norm": 0.004742736462503672, "learning_rate": 3.5349017447564135e-05, "loss": 0.0006, "num_input_tokens_seen": 236139952, "step": 109315 }, { "epoch": 17.833605220228385, "grad_norm": 0.013719492591917515, "learning_rate": 3.532273396655128e-05, "loss": 0.0009, "num_input_tokens_seen": 236150352, "step": 109320 }, { "epoch": 17.83442088091354, "grad_norm": 0.0017408907879143953, "learning_rate": 3.5296459902812775e-05, "loss": 0.0018, "num_input_tokens_seen": 236160464, "step": 109325 }, { "epoch": 17.835236541598697, "grad_norm": 0.0012018510606139898, "learning_rate": 3.527019525688097e-05, "loss": 0.0024, "num_input_tokens_seen": 236170800, "step": 109330 }, { "epoch": 17.83605220228385, "grad_norm": 0.0053267451003193855, "learning_rate": 3.524394002928821e-05, "loss": 0.0007, "num_input_tokens_seen": 236181680, "step": 109335 }, { "epoch": 17.836867862969005, "grad_norm": 0.00016377547581214458, "learning_rate": 3.5217694220566644e-05, "loss": 0.0009, "num_input_tokens_seen": 236191952, "step": 109340 }, { "epoch": 17.83768352365416, "grad_norm": 0.006869807373732328, "learning_rate": 3.5191457831248054e-05, "loss": 0.0715, "num_input_tokens_seen": 236201936, "step": 109345 }, { "epoch": 17.838499184339316, "grad_norm": 0.020623216405510902, "learning_rate": 3.516523086186429e-05, "loss": 0.0021, "num_input_tokens_seen": 236212272, "step": 109350 }, { "epoch": 17.839314845024468, "grad_norm": 0.0009061881573870778, "learning_rate": 3.513901331294678e-05, "loss": 0.0013, "num_input_tokens_seen": 236221712, "step": 109355 }, { "epoch": 17.840130505709624, "grad_norm": 0.0009150461410172284, "learning_rate": 3.5112805185026853e-05, "loss": 0.0019, "num_input_tokens_seen": 236232400, "step": 109360 }, { "epoch": 17.84094616639478, "grad_norm": 0.02719375491142273, "learning_rate": 3.5086606478635706e-05, "loss": 0.0024, "num_input_tokens_seen": 236242512, "step": 109365 }, { "epoch": 17.841761827079935, "grad_norm": 0.0005360327195376158, "learning_rate": 3.506041719430425e-05, "loss": 0.0006, "num_input_tokens_seen": 236252752, "step": 109370 }, { "epoch": 17.84257748776509, "grad_norm": 0.012039005756378174, "learning_rate": 3.503423733256328e-05, "loss": 0.0101, "num_input_tokens_seen": 236263344, "step": 109375 }, { "epoch": 17.843393148450243, "grad_norm": 0.002144909929484129, "learning_rate": 3.500806689394337e-05, "loss": 0.0043, "num_input_tokens_seen": 236274704, "step": 109380 }, { "epoch": 17.8442088091354, "grad_norm": 0.0027891851495951414, "learning_rate": 3.4981905878974815e-05, "loss": 0.0008, "num_input_tokens_seen": 236284944, "step": 109385 }, { "epoch": 17.845024469820554, "grad_norm": 0.004484755452722311, "learning_rate": 3.495575428818787e-05, "loss": 0.0008, "num_input_tokens_seen": 236295856, "step": 109390 }, { "epoch": 17.84584013050571, "grad_norm": 0.5081759691238403, "learning_rate": 3.492961212211249e-05, "loss": 0.0177, "num_input_tokens_seen": 236306512, "step": 109395 }, { "epoch": 17.846655791190866, "grad_norm": 0.0007591186440549791, "learning_rate": 3.490347938127847e-05, "loss": 0.0008, "num_input_tokens_seen": 236317872, "step": 109400 }, { "epoch": 17.847471451876018, "grad_norm": 0.03884272277355194, "learning_rate": 3.4877356066215614e-05, "loss": 0.0669, "num_input_tokens_seen": 236327536, "step": 109405 }, { "epoch": 17.848287112561174, "grad_norm": 0.0006161820492707193, "learning_rate": 3.4851242177453e-05, "loss": 0.0009, "num_input_tokens_seen": 236339408, "step": 109410 }, { "epoch": 17.84910277324633, "grad_norm": 0.013513866811990738, "learning_rate": 3.482513771552021e-05, "loss": 0.0023, "num_input_tokens_seen": 236349904, "step": 109415 }, { "epoch": 17.849918433931485, "grad_norm": 0.009950819425284863, "learning_rate": 3.4799042680945966e-05, "loss": 0.0016, "num_input_tokens_seen": 236361296, "step": 109420 }, { "epoch": 17.85073409461664, "grad_norm": 0.0011644375044852495, "learning_rate": 3.477295707425937e-05, "loss": 0.0005, "num_input_tokens_seen": 236370928, "step": 109425 }, { "epoch": 17.851549755301793, "grad_norm": 0.0018306487472727895, "learning_rate": 3.474688089598893e-05, "loss": 0.0017, "num_input_tokens_seen": 236381200, "step": 109430 }, { "epoch": 17.85236541598695, "grad_norm": 0.0012890893267467618, "learning_rate": 3.4720814146663226e-05, "loss": 0.0017, "num_input_tokens_seen": 236391760, "step": 109435 }, { "epoch": 17.853181076672104, "grad_norm": 0.014044544659554958, "learning_rate": 3.469475682681045e-05, "loss": 0.0018, "num_input_tokens_seen": 236403472, "step": 109440 }, { "epoch": 17.85399673735726, "grad_norm": 0.00026541019906289876, "learning_rate": 3.466870893695867e-05, "loss": 0.0007, "num_input_tokens_seen": 236415088, "step": 109445 }, { "epoch": 17.854812398042416, "grad_norm": 0.001360461232252419, "learning_rate": 3.4642670477635866e-05, "loss": 0.0008, "num_input_tokens_seen": 236427216, "step": 109450 }, { "epoch": 17.855628058727568, "grad_norm": 0.018865276128053665, "learning_rate": 3.4616641449369656e-05, "loss": 0.0022, "num_input_tokens_seen": 236437232, "step": 109455 }, { "epoch": 17.856443719412724, "grad_norm": 0.0013094799360260367, "learning_rate": 3.459062185268763e-05, "loss": 0.001, "num_input_tokens_seen": 236447056, "step": 109460 }, { "epoch": 17.85725938009788, "grad_norm": 0.20267651975154877, "learning_rate": 3.456461168811703e-05, "loss": 0.0058, "num_input_tokens_seen": 236459120, "step": 109465 }, { "epoch": 17.858075040783035, "grad_norm": 0.009335266426205635, "learning_rate": 3.4538610956185044e-05, "loss": 0.001, "num_input_tokens_seen": 236471568, "step": 109470 }, { "epoch": 17.85889070146819, "grad_norm": 0.0011765094241127372, "learning_rate": 3.451261965741859e-05, "loss": 0.0025, "num_input_tokens_seen": 236483024, "step": 109475 }, { "epoch": 17.859706362153343, "grad_norm": 0.06980552524328232, "learning_rate": 3.44866377923444e-05, "loss": 0.0036, "num_input_tokens_seen": 236495248, "step": 109480 }, { "epoch": 17.8605220228385, "grad_norm": 0.0011967658065259457, "learning_rate": 3.446066536148901e-05, "loss": 0.0018, "num_input_tokens_seen": 236505296, "step": 109485 }, { "epoch": 17.861337683523654, "grad_norm": 0.004047077614814043, "learning_rate": 3.4434702365378825e-05, "loss": 0.0078, "num_input_tokens_seen": 236516272, "step": 109490 }, { "epoch": 17.86215334420881, "grad_norm": 0.002524849260225892, "learning_rate": 3.4408748804540034e-05, "loss": 0.0021, "num_input_tokens_seen": 236526160, "step": 109495 }, { "epoch": 17.862969004893966, "grad_norm": 0.003883121768012643, "learning_rate": 3.4382804679498616e-05, "loss": 0.001, "num_input_tokens_seen": 236535696, "step": 109500 }, { "epoch": 17.863784665579118, "grad_norm": 0.0491781048476696, "learning_rate": 3.4356869990780305e-05, "loss": 0.0019, "num_input_tokens_seen": 236547152, "step": 109505 }, { "epoch": 17.864600326264274, "grad_norm": 0.005752094089984894, "learning_rate": 3.4330944738910744e-05, "loss": 0.0015, "num_input_tokens_seen": 236559088, "step": 109510 }, { "epoch": 17.86541598694943, "grad_norm": 0.008952261880040169, "learning_rate": 3.430502892441528e-05, "loss": 0.0656, "num_input_tokens_seen": 236569744, "step": 109515 }, { "epoch": 17.866231647634585, "grad_norm": 0.07735848426818848, "learning_rate": 3.427912254781923e-05, "loss": 0.0036, "num_input_tokens_seen": 236580432, "step": 109520 }, { "epoch": 17.86704730831974, "grad_norm": 0.01697462424635887, "learning_rate": 3.425322560964761e-05, "loss": 0.001, "num_input_tokens_seen": 236590832, "step": 109525 }, { "epoch": 17.867862969004893, "grad_norm": 0.0002120243152603507, "learning_rate": 3.422733811042506e-05, "loss": 0.0006, "num_input_tokens_seen": 236602192, "step": 109530 }, { "epoch": 17.86867862969005, "grad_norm": 0.012320012785494328, "learning_rate": 3.420146005067659e-05, "loss": 0.001, "num_input_tokens_seen": 236614512, "step": 109535 }, { "epoch": 17.869494290375204, "grad_norm": 0.010592760518193245, "learning_rate": 3.4175591430926244e-05, "loss": 0.0064, "num_input_tokens_seen": 236624272, "step": 109540 }, { "epoch": 17.87030995106036, "grad_norm": 0.02042955532670021, "learning_rate": 3.414973225169854e-05, "loss": 0.0042, "num_input_tokens_seen": 236635152, "step": 109545 }, { "epoch": 17.871125611745512, "grad_norm": 0.008258404210209846, "learning_rate": 3.412388251351756e-05, "loss": 0.0009, "num_input_tokens_seen": 236646256, "step": 109550 }, { "epoch": 17.871941272430668, "grad_norm": 0.0008917743107303977, "learning_rate": 3.4098042216907045e-05, "loss": 0.001, "num_input_tokens_seen": 236657520, "step": 109555 }, { "epoch": 17.872756933115824, "grad_norm": 0.05271517485380173, "learning_rate": 3.4072211362390746e-05, "loss": 0.0018, "num_input_tokens_seen": 236668816, "step": 109560 }, { "epoch": 17.87357259380098, "grad_norm": 0.01361636072397232, "learning_rate": 3.40463899504922e-05, "loss": 0.0016, "num_input_tokens_seen": 236679280, "step": 109565 }, { "epoch": 17.874388254486135, "grad_norm": 0.0656086653470993, "learning_rate": 3.402057798173463e-05, "loss": 0.0023, "num_input_tokens_seen": 236689424, "step": 109570 }, { "epoch": 17.875203915171287, "grad_norm": 0.000795271247625351, "learning_rate": 3.39947754566412e-05, "loss": 0.0068, "num_input_tokens_seen": 236699856, "step": 109575 }, { "epoch": 17.876019575856443, "grad_norm": 0.003363175317645073, "learning_rate": 3.3968982375734813e-05, "loss": 0.0022, "num_input_tokens_seen": 236711280, "step": 109580 }, { "epoch": 17.8768352365416, "grad_norm": 0.004550011362880468, "learning_rate": 3.394319873953816e-05, "loss": 0.0006, "num_input_tokens_seen": 236723024, "step": 109585 }, { "epoch": 17.877650897226754, "grad_norm": 0.003007990773767233, "learning_rate": 3.391742454857388e-05, "loss": 0.0046, "num_input_tokens_seen": 236733072, "step": 109590 }, { "epoch": 17.87846655791191, "grad_norm": 0.4620771110057831, "learning_rate": 3.3891659803364225e-05, "loss": 0.1076, "num_input_tokens_seen": 236743056, "step": 109595 }, { "epoch": 17.879282218597062, "grad_norm": 0.0010317835258319974, "learning_rate": 3.386590450443139e-05, "loss": 0.0013, "num_input_tokens_seen": 236754512, "step": 109600 }, { "epoch": 17.880097879282218, "grad_norm": 0.019852880388498306, "learning_rate": 3.3840158652297335e-05, "loss": 0.0009, "num_input_tokens_seen": 236765936, "step": 109605 }, { "epoch": 17.880913539967374, "grad_norm": 0.02316543459892273, "learning_rate": 3.381442224748382e-05, "loss": 0.0012, "num_input_tokens_seen": 236776720, "step": 109610 }, { "epoch": 17.88172920065253, "grad_norm": 0.0006642960361205041, "learning_rate": 3.378869529051243e-05, "loss": 0.0014, "num_input_tokens_seen": 236787696, "step": 109615 }, { "epoch": 17.882544861337685, "grad_norm": 0.07310860604047775, "learning_rate": 3.376297778190457e-05, "loss": 0.0035, "num_input_tokens_seen": 236799056, "step": 109620 }, { "epoch": 17.883360522022837, "grad_norm": 0.010377427563071251, "learning_rate": 3.373726972218144e-05, "loss": 0.0061, "num_input_tokens_seen": 236810576, "step": 109625 }, { "epoch": 17.884176182707993, "grad_norm": 0.005838080309331417, "learning_rate": 3.3711571111864014e-05, "loss": 0.0058, "num_input_tokens_seen": 236821264, "step": 109630 }, { "epoch": 17.88499184339315, "grad_norm": 0.003536843927577138, "learning_rate": 3.3685881951473096e-05, "loss": 0.0004, "num_input_tokens_seen": 236833328, "step": 109635 }, { "epoch": 17.885807504078304, "grad_norm": 0.006238589994609356, "learning_rate": 3.366020224152949e-05, "loss": 0.0006, "num_input_tokens_seen": 236843600, "step": 109640 }, { "epoch": 17.88662316476346, "grad_norm": 0.0026695330161601305, "learning_rate": 3.363453198255328e-05, "loss": 0.0009, "num_input_tokens_seen": 236854800, "step": 109645 }, { "epoch": 17.887438825448612, "grad_norm": 0.055036984384059906, "learning_rate": 3.360887117506506e-05, "loss": 0.0027, "num_input_tokens_seen": 236865456, "step": 109650 }, { "epoch": 17.888254486133768, "grad_norm": 0.0004414636641740799, "learning_rate": 3.358321981958462e-05, "loss": 0.001, "num_input_tokens_seen": 236876208, "step": 109655 }, { "epoch": 17.889070146818923, "grad_norm": 0.0011676463764160872, "learning_rate": 3.3557577916632055e-05, "loss": 0.0017, "num_input_tokens_seen": 236887536, "step": 109660 }, { "epoch": 17.88988580750408, "grad_norm": 0.0007017211173661053, "learning_rate": 3.353194546672672e-05, "loss": 0.0031, "num_input_tokens_seen": 236898032, "step": 109665 }, { "epoch": 17.890701468189235, "grad_norm": 0.0006231152801774442, "learning_rate": 3.3506322470388426e-05, "loss": 0.0114, "num_input_tokens_seen": 236908656, "step": 109670 }, { "epoch": 17.891517128874387, "grad_norm": 0.02516918070614338, "learning_rate": 3.3480708928136204e-05, "loss": 0.0015, "num_input_tokens_seen": 236920400, "step": 109675 }, { "epoch": 17.892332789559543, "grad_norm": 0.0003355523804202676, "learning_rate": 3.34551048404893e-05, "loss": 0.0023, "num_input_tokens_seen": 236931024, "step": 109680 }, { "epoch": 17.8931484502447, "grad_norm": 0.014823941513895988, "learning_rate": 3.342951020796647e-05, "loss": 0.0019, "num_input_tokens_seen": 236940976, "step": 109685 }, { "epoch": 17.893964110929854, "grad_norm": 0.0024333603214472532, "learning_rate": 3.3403925031086525e-05, "loss": 0.0012, "num_input_tokens_seen": 236951984, "step": 109690 }, { "epoch": 17.894779771615006, "grad_norm": 0.051741719245910645, "learning_rate": 3.337834931036798e-05, "loss": 0.007, "num_input_tokens_seen": 236963472, "step": 109695 }, { "epoch": 17.895595432300162, "grad_norm": 0.025518298149108887, "learning_rate": 3.335278304632916e-05, "loss": 0.0028, "num_input_tokens_seen": 236973264, "step": 109700 }, { "epoch": 17.896411092985318, "grad_norm": 0.00023219654394779354, "learning_rate": 3.332722623948814e-05, "loss": 0.0019, "num_input_tokens_seen": 236983440, "step": 109705 }, { "epoch": 17.897226753670473, "grad_norm": 0.0038019700441509485, "learning_rate": 3.330167889036295e-05, "loss": 0.0011, "num_input_tokens_seen": 236994928, "step": 109710 }, { "epoch": 17.89804241435563, "grad_norm": 0.0009693879983387887, "learning_rate": 3.327614099947124e-05, "loss": 0.0029, "num_input_tokens_seen": 237006224, "step": 109715 }, { "epoch": 17.898858075040785, "grad_norm": 0.0016091925790533423, "learning_rate": 3.325061256733058e-05, "loss": 0.0039, "num_input_tokens_seen": 237017488, "step": 109720 }, { "epoch": 17.899673735725937, "grad_norm": 0.07085609436035156, "learning_rate": 3.3225093594458465e-05, "loss": 0.0029, "num_input_tokens_seen": 237027696, "step": 109725 }, { "epoch": 17.900489396411093, "grad_norm": 0.00461050309240818, "learning_rate": 3.319958408137192e-05, "loss": 0.0038, "num_input_tokens_seen": 237038896, "step": 109730 }, { "epoch": 17.90130505709625, "grad_norm": 0.005827105604112148, "learning_rate": 3.317408402858796e-05, "loss": 0.1704, "num_input_tokens_seen": 237049040, "step": 109735 }, { "epoch": 17.902120717781404, "grad_norm": 0.015067823231220245, "learning_rate": 3.314859343662335e-05, "loss": 0.0012, "num_input_tokens_seen": 237060048, "step": 109740 }, { "epoch": 17.902936378466556, "grad_norm": 0.0008944774162955582, "learning_rate": 3.312311230599491e-05, "loss": 0.0031, "num_input_tokens_seen": 237071472, "step": 109745 }, { "epoch": 17.903752039151712, "grad_norm": 0.00048781235818751156, "learning_rate": 3.3097640637218654e-05, "loss": 0.001, "num_input_tokens_seen": 237082896, "step": 109750 }, { "epoch": 17.904567699836868, "grad_norm": 0.07903767377138138, "learning_rate": 3.307217843081123e-05, "loss": 0.0026, "num_input_tokens_seen": 237093776, "step": 109755 }, { "epoch": 17.905383360522023, "grad_norm": 0.00035915974876843393, "learning_rate": 3.3046725687288285e-05, "loss": 0.0022, "num_input_tokens_seen": 237103440, "step": 109760 }, { "epoch": 17.90619902120718, "grad_norm": 0.002234160201624036, "learning_rate": 3.302128240716595e-05, "loss": 0.0088, "num_input_tokens_seen": 237113008, "step": 109765 }, { "epoch": 17.90701468189233, "grad_norm": 0.03307786211371422, "learning_rate": 3.299584859095961e-05, "loss": 0.0023, "num_input_tokens_seen": 237124016, "step": 109770 }, { "epoch": 17.907830342577487, "grad_norm": 0.0002506078453734517, "learning_rate": 3.297042423918495e-05, "loss": 0.0031, "num_input_tokens_seen": 237135888, "step": 109775 }, { "epoch": 17.908646003262643, "grad_norm": 0.0012812531786039472, "learning_rate": 3.2945009352357e-05, "loss": 0.0009, "num_input_tokens_seen": 237145968, "step": 109780 }, { "epoch": 17.9094616639478, "grad_norm": 0.0002897421072702855, "learning_rate": 3.291960393099108e-05, "loss": 0.0003, "num_input_tokens_seen": 237155024, "step": 109785 }, { "epoch": 17.910277324632954, "grad_norm": 0.0034674883354455233, "learning_rate": 3.289420797560172e-05, "loss": 0.0017, "num_input_tokens_seen": 237165616, "step": 109790 }, { "epoch": 17.911092985318106, "grad_norm": 0.018525205552577972, "learning_rate": 3.2868821486704003e-05, "loss": 0.007, "num_input_tokens_seen": 237177200, "step": 109795 }, { "epoch": 17.911908646003262, "grad_norm": 0.007207350339740515, "learning_rate": 3.284344446481208e-05, "loss": 0.0257, "num_input_tokens_seen": 237188592, "step": 109800 }, { "epoch": 17.912724306688418, "grad_norm": 0.0070750233717262745, "learning_rate": 3.2818076910440476e-05, "loss": 0.0028, "num_input_tokens_seen": 237198320, "step": 109805 }, { "epoch": 17.913539967373573, "grad_norm": 0.00534399040043354, "learning_rate": 3.279271882410312e-05, "loss": 0.001, "num_input_tokens_seen": 237209616, "step": 109810 }, { "epoch": 17.91435562805873, "grad_norm": 0.0017620498547330499, "learning_rate": 3.27673702063141e-05, "loss": 0.0007, "num_input_tokens_seen": 237220720, "step": 109815 }, { "epoch": 17.91517128874388, "grad_norm": 0.002240038476884365, "learning_rate": 3.274203105758694e-05, "loss": 0.0023, "num_input_tokens_seen": 237231440, "step": 109820 }, { "epoch": 17.915986949429037, "grad_norm": 0.0004176609800197184, "learning_rate": 3.2716701378435355e-05, "loss": 0.0003, "num_input_tokens_seen": 237242640, "step": 109825 }, { "epoch": 17.916802610114193, "grad_norm": 0.004200051072984934, "learning_rate": 3.269138116937259e-05, "loss": 0.0016, "num_input_tokens_seen": 237253744, "step": 109830 }, { "epoch": 17.91761827079935, "grad_norm": 0.00025301595451310277, "learning_rate": 3.2666070430911796e-05, "loss": 0.0011, "num_input_tokens_seen": 237265168, "step": 109835 }, { "epoch": 17.918433931484504, "grad_norm": 0.0005692046834155917, "learning_rate": 3.264076916356601e-05, "loss": 0.0013, "num_input_tokens_seen": 237276336, "step": 109840 }, { "epoch": 17.919249592169656, "grad_norm": 0.004916089586913586, "learning_rate": 3.2615477367847866e-05, "loss": 0.0014, "num_input_tokens_seen": 237288784, "step": 109845 }, { "epoch": 17.920065252854812, "grad_norm": 0.001998396823182702, "learning_rate": 3.2590195044269965e-05, "loss": 0.0039, "num_input_tokens_seen": 237300112, "step": 109850 }, { "epoch": 17.920880913539968, "grad_norm": 0.002172433538362384, "learning_rate": 3.256492219334478e-05, "loss": 0.0048, "num_input_tokens_seen": 237311248, "step": 109855 }, { "epoch": 17.921696574225123, "grad_norm": 0.022449221462011337, "learning_rate": 3.2539658815584404e-05, "loss": 0.0008, "num_input_tokens_seen": 237321552, "step": 109860 }, { "epoch": 17.92251223491028, "grad_norm": 0.003850628389045596, "learning_rate": 3.2514404911500814e-05, "loss": 0.0006, "num_input_tokens_seen": 237331248, "step": 109865 }, { "epoch": 17.92332789559543, "grad_norm": 0.46473076939582825, "learning_rate": 3.248916048160588e-05, "loss": 0.028, "num_input_tokens_seen": 237342096, "step": 109870 }, { "epoch": 17.924143556280587, "grad_norm": 0.0005388594581745565, "learning_rate": 3.246392552641125e-05, "loss": 0.0047, "num_input_tokens_seen": 237352048, "step": 109875 }, { "epoch": 17.924959216965743, "grad_norm": 0.0011852516327053308, "learning_rate": 3.2438700046428185e-05, "loss": 0.0022, "num_input_tokens_seen": 237362992, "step": 109880 }, { "epoch": 17.9257748776509, "grad_norm": 0.0035580755211412907, "learning_rate": 3.2413484042167984e-05, "loss": 0.0362, "num_input_tokens_seen": 237373392, "step": 109885 }, { "epoch": 17.92659053833605, "grad_norm": 0.0026270966045558453, "learning_rate": 3.2388277514141864e-05, "loss": 0.0005, "num_input_tokens_seen": 237384432, "step": 109890 }, { "epoch": 17.927406199021206, "grad_norm": 0.04227229207754135, "learning_rate": 3.236308046286035e-05, "loss": 0.0021, "num_input_tokens_seen": 237395344, "step": 109895 }, { "epoch": 17.928221859706362, "grad_norm": 0.194888174533844, "learning_rate": 3.2337892888834375e-05, "loss": 0.0175, "num_input_tokens_seen": 237406192, "step": 109900 }, { "epoch": 17.929037520391518, "grad_norm": 0.000787916244007647, "learning_rate": 3.231271479257414e-05, "loss": 0.0025, "num_input_tokens_seen": 237416880, "step": 109905 }, { "epoch": 17.929853181076673, "grad_norm": 0.00029855401953682303, "learning_rate": 3.228754617459023e-05, "loss": 0.0002, "num_input_tokens_seen": 237427440, "step": 109910 }, { "epoch": 17.930668841761825, "grad_norm": 0.01982448808848858, "learning_rate": 3.2262387035392305e-05, "loss": 0.0045, "num_input_tokens_seen": 237437328, "step": 109915 }, { "epoch": 17.93148450244698, "grad_norm": 0.019642792642116547, "learning_rate": 3.2237237375490666e-05, "loss": 0.0016, "num_input_tokens_seen": 237448048, "step": 109920 }, { "epoch": 17.932300163132137, "grad_norm": 0.0037098608445376158, "learning_rate": 3.221209719539469e-05, "loss": 0.0048, "num_input_tokens_seen": 237459376, "step": 109925 }, { "epoch": 17.933115823817293, "grad_norm": 0.0006825768505223095, "learning_rate": 3.218696649561409e-05, "loss": 0.1397, "num_input_tokens_seen": 237470384, "step": 109930 }, { "epoch": 17.93393148450245, "grad_norm": 0.0034533455036580563, "learning_rate": 3.2161845276658e-05, "loss": 0.0005, "num_input_tokens_seen": 237480784, "step": 109935 }, { "epoch": 17.9347471451876, "grad_norm": 0.00120734260417521, "learning_rate": 3.213673353903568e-05, "loss": 0.0007, "num_input_tokens_seen": 237490864, "step": 109940 }, { "epoch": 17.935562805872756, "grad_norm": 0.009501025080680847, "learning_rate": 3.211163128325589e-05, "loss": 0.0009, "num_input_tokens_seen": 237502352, "step": 109945 }, { "epoch": 17.936378466557912, "grad_norm": 0.0702584832906723, "learning_rate": 3.208653850982746e-05, "loss": 0.0028, "num_input_tokens_seen": 237512432, "step": 109950 }, { "epoch": 17.937194127243067, "grad_norm": 0.0038338962476700544, "learning_rate": 3.206145521925896e-05, "loss": 0.0004, "num_input_tokens_seen": 237521744, "step": 109955 }, { "epoch": 17.938009787928223, "grad_norm": 0.16808141767978668, "learning_rate": 3.2036381412058725e-05, "loss": 0.0044, "num_input_tokens_seen": 237531920, "step": 109960 }, { "epoch": 17.938825448613375, "grad_norm": 0.006209705490618944, "learning_rate": 3.2011317088734836e-05, "loss": 0.0006, "num_input_tokens_seen": 237542640, "step": 109965 }, { "epoch": 17.93964110929853, "grad_norm": 0.0027591967955231667, "learning_rate": 3.1986262249795286e-05, "loss": 0.0048, "num_input_tokens_seen": 237552976, "step": 109970 }, { "epoch": 17.940456769983687, "grad_norm": 0.10435988754034042, "learning_rate": 3.196121689574782e-05, "loss": 0.0028, "num_input_tokens_seen": 237563408, "step": 109975 }, { "epoch": 17.941272430668842, "grad_norm": 0.00376000814139843, "learning_rate": 3.193618102710011e-05, "loss": 0.0009, "num_input_tokens_seen": 237574000, "step": 109980 }, { "epoch": 17.942088091353998, "grad_norm": 0.6628631949424744, "learning_rate": 3.191115464435945e-05, "loss": 0.0781, "num_input_tokens_seen": 237584976, "step": 109985 }, { "epoch": 17.94290375203915, "grad_norm": 0.0003267792926635593, "learning_rate": 3.188613774803306e-05, "loss": 0.0003, "num_input_tokens_seen": 237595600, "step": 109990 }, { "epoch": 17.943719412724306, "grad_norm": 0.0014076323714107275, "learning_rate": 3.186113033862792e-05, "loss": 0.0007, "num_input_tokens_seen": 237606512, "step": 109995 }, { "epoch": 17.94453507340946, "grad_norm": 0.002126255538314581, "learning_rate": 3.1836132416650844e-05, "loss": 0.0062, "num_input_tokens_seen": 237616816, "step": 110000 }, { "epoch": 17.945350734094617, "grad_norm": 0.004582113586366177, "learning_rate": 3.1811143982608426e-05, "loss": 0.0024, "num_input_tokens_seen": 237627920, "step": 110005 }, { "epoch": 17.946166394779773, "grad_norm": 0.004174842499196529, "learning_rate": 3.1786165037007156e-05, "loss": 0.0006, "num_input_tokens_seen": 237638800, "step": 110010 }, { "epoch": 17.946982055464925, "grad_norm": 0.0007544121472164989, "learning_rate": 3.176119558035323e-05, "loss": 0.0008, "num_input_tokens_seen": 237648304, "step": 110015 }, { "epoch": 17.94779771615008, "grad_norm": 0.008781618438661098, "learning_rate": 3.173623561315259e-05, "loss": 0.0008, "num_input_tokens_seen": 237659984, "step": 110020 }, { "epoch": 17.948613376835237, "grad_norm": 0.007624879479408264, "learning_rate": 3.171128513591132e-05, "loss": 0.001, "num_input_tokens_seen": 237670832, "step": 110025 }, { "epoch": 17.949429037520392, "grad_norm": 0.00031044858042150736, "learning_rate": 3.1686344149134735e-05, "loss": 0.0034, "num_input_tokens_seen": 237682672, "step": 110030 }, { "epoch": 17.950244698205548, "grad_norm": 0.0014524642610922456, "learning_rate": 3.1661412653328724e-05, "loss": 0.0007, "num_input_tokens_seen": 237694000, "step": 110035 }, { "epoch": 17.9510603588907, "grad_norm": 0.0019769843202084303, "learning_rate": 3.1636490648998095e-05, "loss": 0.0026, "num_input_tokens_seen": 237704336, "step": 110040 }, { "epoch": 17.951876019575856, "grad_norm": 0.0006612506695091724, "learning_rate": 3.1611578136648336e-05, "loss": 0.0006, "num_input_tokens_seen": 237715216, "step": 110045 }, { "epoch": 17.95269168026101, "grad_norm": 0.003691247198730707, "learning_rate": 3.158667511678393e-05, "loss": 0.0004, "num_input_tokens_seen": 237725744, "step": 110050 }, { "epoch": 17.953507340946167, "grad_norm": 0.02169613167643547, "learning_rate": 3.156178158990991e-05, "loss": 0.0015, "num_input_tokens_seen": 237736688, "step": 110055 }, { "epoch": 17.954323001631323, "grad_norm": 0.004092794377356768, "learning_rate": 3.153689755653061e-05, "loss": 0.0095, "num_input_tokens_seen": 237746640, "step": 110060 }, { "epoch": 17.955138662316475, "grad_norm": 0.004916083998978138, "learning_rate": 3.151202301715034e-05, "loss": 0.0045, "num_input_tokens_seen": 237757488, "step": 110065 }, { "epoch": 17.95595432300163, "grad_norm": 0.005175785627216101, "learning_rate": 3.148715797227331e-05, "loss": 0.0021, "num_input_tokens_seen": 237768016, "step": 110070 }, { "epoch": 17.956769983686787, "grad_norm": 0.000567434064578265, "learning_rate": 3.1462302422403334e-05, "loss": 0.0006, "num_input_tokens_seen": 237779792, "step": 110075 }, { "epoch": 17.957585644371942, "grad_norm": 0.0003794727090280503, "learning_rate": 3.143745636804418e-05, "loss": 0.0146, "num_input_tokens_seen": 237790640, "step": 110080 }, { "epoch": 17.958401305057095, "grad_norm": 0.00449990713968873, "learning_rate": 3.14126198096994e-05, "loss": 0.0022, "num_input_tokens_seen": 237800240, "step": 110085 }, { "epoch": 17.95921696574225, "grad_norm": 0.0035965435672551394, "learning_rate": 3.138779274787235e-05, "loss": 0.0005, "num_input_tokens_seen": 237811216, "step": 110090 }, { "epoch": 17.960032626427406, "grad_norm": 0.0003991451230831444, "learning_rate": 3.136297518306614e-05, "loss": 0.0008, "num_input_tokens_seen": 237820528, "step": 110095 }, { "epoch": 17.96084828711256, "grad_norm": 0.003139512613415718, "learning_rate": 3.133816711578369e-05, "loss": 0.0007, "num_input_tokens_seen": 237831504, "step": 110100 }, { "epoch": 17.961663947797717, "grad_norm": 0.00021856573584955186, "learning_rate": 3.131336854652789e-05, "loss": 0.0894, "num_input_tokens_seen": 237842224, "step": 110105 }, { "epoch": 17.96247960848287, "grad_norm": 0.0007067588157951832, "learning_rate": 3.1288579475801215e-05, "loss": 0.0012, "num_input_tokens_seen": 237853680, "step": 110110 }, { "epoch": 17.963295269168025, "grad_norm": 0.057319898158311844, "learning_rate": 3.12637999041061e-05, "loss": 0.0043, "num_input_tokens_seen": 237863856, "step": 110115 }, { "epoch": 17.96411092985318, "grad_norm": 0.027158288285136223, "learning_rate": 3.123902983194471e-05, "loss": 0.0013, "num_input_tokens_seen": 237874800, "step": 110120 }, { "epoch": 17.964926590538337, "grad_norm": 0.007293624337762594, "learning_rate": 3.1214269259819014e-05, "loss": 0.0047, "num_input_tokens_seen": 237885616, "step": 110125 }, { "epoch": 17.965742251223492, "grad_norm": 0.00030968463397584856, "learning_rate": 3.11895181882309e-05, "loss": 0.0003, "num_input_tokens_seen": 237896784, "step": 110130 }, { "epoch": 17.966557911908644, "grad_norm": 0.044866591691970825, "learning_rate": 3.116477661768191e-05, "loss": 0.0017, "num_input_tokens_seen": 237906768, "step": 110135 }, { "epoch": 17.9673735725938, "grad_norm": 0.3906330466270447, "learning_rate": 3.1140044548673476e-05, "loss": 0.0076, "num_input_tokens_seen": 237917456, "step": 110140 }, { "epoch": 17.968189233278956, "grad_norm": 0.0008933874778449535, "learning_rate": 3.11153219817068e-05, "loss": 0.0012, "num_input_tokens_seen": 237928848, "step": 110145 }, { "epoch": 17.96900489396411, "grad_norm": 0.008592470549046993, "learning_rate": 3.109060891728299e-05, "loss": 0.0007, "num_input_tokens_seen": 237938960, "step": 110150 }, { "epoch": 17.969820554649267, "grad_norm": 0.008075353689491749, "learning_rate": 3.1065905355902865e-05, "loss": 0.0023, "num_input_tokens_seen": 237949680, "step": 110155 }, { "epoch": 17.97063621533442, "grad_norm": 0.005265532527118921, "learning_rate": 3.104121129806697e-05, "loss": 0.0007, "num_input_tokens_seen": 237961424, "step": 110160 }, { "epoch": 17.971451876019575, "grad_norm": 0.0018472732044756413, "learning_rate": 3.101652674427585e-05, "loss": 0.0005, "num_input_tokens_seen": 237973232, "step": 110165 }, { "epoch": 17.97226753670473, "grad_norm": 0.0158048328012228, "learning_rate": 3.0991851695029825e-05, "loss": 0.0064, "num_input_tokens_seen": 237985296, "step": 110170 }, { "epoch": 17.973083197389887, "grad_norm": 0.000548655865713954, "learning_rate": 3.0967186150828886e-05, "loss": 0.0014, "num_input_tokens_seen": 237994096, "step": 110175 }, { "epoch": 17.973898858075042, "grad_norm": 0.028464488685131073, "learning_rate": 3.0942530112172905e-05, "loss": 0.0023, "num_input_tokens_seen": 238005392, "step": 110180 }, { "epoch": 17.974714518760194, "grad_norm": 0.011310449801385403, "learning_rate": 3.0917883579561604e-05, "loss": 0.0013, "num_input_tokens_seen": 238017072, "step": 110185 }, { "epoch": 17.97553017944535, "grad_norm": 0.012373429723083973, "learning_rate": 3.0893246553494516e-05, "loss": 0.0015, "num_input_tokens_seen": 238028112, "step": 110190 }, { "epoch": 17.976345840130506, "grad_norm": 0.0010668218601495028, "learning_rate": 3.08686190344708e-05, "loss": 0.0271, "num_input_tokens_seen": 238040240, "step": 110195 }, { "epoch": 17.97716150081566, "grad_norm": 0.007802395615726709, "learning_rate": 3.084400102298973e-05, "loss": 0.0772, "num_input_tokens_seen": 238051408, "step": 110200 }, { "epoch": 17.977977161500817, "grad_norm": 0.011837205849587917, "learning_rate": 3.0819392519550125e-05, "loss": 0.001, "num_input_tokens_seen": 238062160, "step": 110205 }, { "epoch": 17.97879282218597, "grad_norm": 0.002454120898619294, "learning_rate": 3.079479352465076e-05, "loss": 0.0048, "num_input_tokens_seen": 238073520, "step": 110210 }, { "epoch": 17.979608482871125, "grad_norm": 0.0006663525127805769, "learning_rate": 3.077020403879005e-05, "loss": 0.0003, "num_input_tokens_seen": 238083952, "step": 110215 }, { "epoch": 17.98042414355628, "grad_norm": 0.0008392453892156482, "learning_rate": 3.07456240624665e-05, "loss": 0.0012, "num_input_tokens_seen": 238095120, "step": 110220 }, { "epoch": 17.981239804241437, "grad_norm": 0.008441498503088951, "learning_rate": 3.072105359617811e-05, "loss": 0.0011, "num_input_tokens_seen": 238105072, "step": 110225 }, { "epoch": 17.982055464926592, "grad_norm": 0.000342810177244246, "learning_rate": 3.0696492640422954e-05, "loss": 0.0007, "num_input_tokens_seen": 238114928, "step": 110230 }, { "epoch": 17.982871125611744, "grad_norm": 0.0003783302381634712, "learning_rate": 3.067194119569866e-05, "loss": 0.001, "num_input_tokens_seen": 238124944, "step": 110235 }, { "epoch": 17.9836867862969, "grad_norm": 0.16300013661384583, "learning_rate": 3.064739926250293e-05, "loss": 0.0079, "num_input_tokens_seen": 238135888, "step": 110240 }, { "epoch": 17.984502446982056, "grad_norm": 0.0008707176893949509, "learning_rate": 3.062286684133303e-05, "loss": 0.0005, "num_input_tokens_seen": 238146448, "step": 110245 }, { "epoch": 17.98531810766721, "grad_norm": 0.008672765456140041, "learning_rate": 3.059834393268618e-05, "loss": 0.0005, "num_input_tokens_seen": 238156976, "step": 110250 }, { "epoch": 17.986133768352367, "grad_norm": 0.018309568986296654, "learning_rate": 3.057383053705937e-05, "loss": 0.0082, "num_input_tokens_seen": 238168528, "step": 110255 }, { "epoch": 17.98694942903752, "grad_norm": 0.030882669612765312, "learning_rate": 3.054932665494936e-05, "loss": 0.0028, "num_input_tokens_seen": 238179952, "step": 110260 }, { "epoch": 17.987765089722675, "grad_norm": 0.0036772945895791054, "learning_rate": 3.052483228685282e-05, "loss": 0.0029, "num_input_tokens_seen": 238191312, "step": 110265 }, { "epoch": 17.98858075040783, "grad_norm": 0.008443798869848251, "learning_rate": 3.050034743326613e-05, "loss": 0.0008, "num_input_tokens_seen": 238203088, "step": 110270 }, { "epoch": 17.989396411092986, "grad_norm": 0.0029752785339951515, "learning_rate": 3.0475872094685443e-05, "loss": 0.0004, "num_input_tokens_seen": 238213968, "step": 110275 }, { "epoch": 17.99021207177814, "grad_norm": 0.005904734134674072, "learning_rate": 3.0451406271606974e-05, "loss": 0.0012, "num_input_tokens_seen": 238224752, "step": 110280 }, { "epoch": 17.991027732463294, "grad_norm": 0.0010160219389945269, "learning_rate": 3.0426949964526272e-05, "loss": 0.0028, "num_input_tokens_seen": 238235536, "step": 110285 }, { "epoch": 17.99184339314845, "grad_norm": 0.12286140024662018, "learning_rate": 3.0402503173939277e-05, "loss": 0.002, "num_input_tokens_seen": 238246448, "step": 110290 }, { "epoch": 17.992659053833606, "grad_norm": 0.002489682286977768, "learning_rate": 3.0378065900341146e-05, "loss": 0.0006, "num_input_tokens_seen": 238257744, "step": 110295 }, { "epoch": 17.99347471451876, "grad_norm": 0.0010223733261227608, "learning_rate": 3.035363814422737e-05, "loss": 0.0135, "num_input_tokens_seen": 238267952, "step": 110300 }, { "epoch": 17.994290375203914, "grad_norm": 0.00035545893479138613, "learning_rate": 3.0329219906092776e-05, "loss": 0.0017, "num_input_tokens_seen": 238278288, "step": 110305 }, { "epoch": 17.99510603588907, "grad_norm": 0.0034552181605249643, "learning_rate": 3.030481118643247e-05, "loss": 0.0009, "num_input_tokens_seen": 238288752, "step": 110310 }, { "epoch": 17.995921696574225, "grad_norm": 0.0017270646058022976, "learning_rate": 3.0280411985740995e-05, "loss": 0.0008, "num_input_tokens_seen": 238299600, "step": 110315 }, { "epoch": 17.99673735725938, "grad_norm": 0.0005409326404333115, "learning_rate": 3.0256022304512854e-05, "loss": 0.0037, "num_input_tokens_seen": 238310128, "step": 110320 }, { "epoch": 17.997553017944536, "grad_norm": 0.05676782503724098, "learning_rate": 3.023164214324231e-05, "loss": 0.0019, "num_input_tokens_seen": 238321808, "step": 110325 }, { "epoch": 17.99836867862969, "grad_norm": 0.0008575510582886636, "learning_rate": 3.0207271502423527e-05, "loss": 0.0053, "num_input_tokens_seen": 238332176, "step": 110330 }, { "epoch": 17.999184339314844, "grad_norm": 0.00040171988075599074, "learning_rate": 3.018291038255033e-05, "loss": 0.0031, "num_input_tokens_seen": 238343280, "step": 110335 }, { "epoch": 18.0, "grad_norm": 0.0528254434466362, "learning_rate": 3.0158558784116442e-05, "loss": 0.0617, "num_input_tokens_seen": 238352272, "step": 110340 }, { "epoch": 18.0, "eval_loss": 0.32154321670532227, "eval_runtime": 103.778, "eval_samples_per_second": 26.258, "eval_steps_per_second": 6.572, "num_input_tokens_seen": 238352272, "step": 110340 }, { "epoch": 18.000815660685156, "grad_norm": 0.0019651330076158047, "learning_rate": 3.0134216707615404e-05, "loss": 0.0008, "num_input_tokens_seen": 238364784, "step": 110345 }, { "epoch": 18.00163132137031, "grad_norm": 0.0010777993593364954, "learning_rate": 3.0109884153540545e-05, "loss": 0.0012, "num_input_tokens_seen": 238372880, "step": 110350 }, { "epoch": 18.002446982055464, "grad_norm": 0.021584536880254745, "learning_rate": 3.0085561122384974e-05, "loss": 0.0026, "num_input_tokens_seen": 238383504, "step": 110355 }, { "epoch": 18.00326264274062, "grad_norm": 0.004007100127637386, "learning_rate": 3.0061247614641684e-05, "loss": 0.0026, "num_input_tokens_seen": 238394192, "step": 110360 }, { "epoch": 18.004078303425775, "grad_norm": 0.0018758628284558654, "learning_rate": 3.0036943630803282e-05, "loss": 0.0006, "num_input_tokens_seen": 238405584, "step": 110365 }, { "epoch": 18.00489396411093, "grad_norm": 0.0010444171493873, "learning_rate": 3.0012649171362482e-05, "loss": 0.0006, "num_input_tokens_seen": 238417232, "step": 110370 }, { "epoch": 18.005709624796086, "grad_norm": 0.0003816418757196516, "learning_rate": 2.998836423681156e-05, "loss": 0.0297, "num_input_tokens_seen": 238429552, "step": 110375 }, { "epoch": 18.00652528548124, "grad_norm": 0.009662347845733166, "learning_rate": 2.9964088827642564e-05, "loss": 0.0078, "num_input_tokens_seen": 238440304, "step": 110380 }, { "epoch": 18.007340946166394, "grad_norm": 0.027501266449689865, "learning_rate": 2.993982294434777e-05, "loss": 0.0017, "num_input_tokens_seen": 238449552, "step": 110385 }, { "epoch": 18.00815660685155, "grad_norm": 0.033807143568992615, "learning_rate": 2.991556658741862e-05, "loss": 0.0016, "num_input_tokens_seen": 238460944, "step": 110390 }, { "epoch": 18.008972267536706, "grad_norm": 0.0020302990451455116, "learning_rate": 2.9891319757347047e-05, "loss": 0.0099, "num_input_tokens_seen": 238471568, "step": 110395 }, { "epoch": 18.00978792822186, "grad_norm": 0.0008874621125869453, "learning_rate": 2.986708245462405e-05, "loss": 0.0012, "num_input_tokens_seen": 238482128, "step": 110400 }, { "epoch": 18.010603588907014, "grad_norm": 0.0006465526530519128, "learning_rate": 2.984285467974124e-05, "loss": 0.0018, "num_input_tokens_seen": 238494288, "step": 110405 }, { "epoch": 18.01141924959217, "grad_norm": 0.014971431344747543, "learning_rate": 2.981863643318922e-05, "loss": 0.0009, "num_input_tokens_seen": 238505104, "step": 110410 }, { "epoch": 18.012234910277325, "grad_norm": 0.003481280989944935, "learning_rate": 2.979442771545915e-05, "loss": 0.0006, "num_input_tokens_seen": 238516144, "step": 110415 }, { "epoch": 18.01305057096248, "grad_norm": 0.007337215356528759, "learning_rate": 2.9770228527041364e-05, "loss": 0.0008, "num_input_tokens_seen": 238528496, "step": 110420 }, { "epoch": 18.013866231647636, "grad_norm": 0.01175409834831953, "learning_rate": 2.9746038868426584e-05, "loss": 0.0011, "num_input_tokens_seen": 238538448, "step": 110425 }, { "epoch": 18.01468189233279, "grad_norm": 0.0015376220690086484, "learning_rate": 2.9721858740104747e-05, "loss": 0.0004, "num_input_tokens_seen": 238548432, "step": 110430 }, { "epoch": 18.015497553017944, "grad_norm": 0.0022492543794214725, "learning_rate": 2.9697688142566127e-05, "loss": 0.0012, "num_input_tokens_seen": 238557456, "step": 110435 }, { "epoch": 18.0163132137031, "grad_norm": 0.012653055600821972, "learning_rate": 2.967352707630039e-05, "loss": 0.0024, "num_input_tokens_seen": 238568464, "step": 110440 }, { "epoch": 18.017128874388256, "grad_norm": 0.0003922838077414781, "learning_rate": 2.9649375541797418e-05, "loss": 0.0005, "num_input_tokens_seen": 238580464, "step": 110445 }, { "epoch": 18.017944535073408, "grad_norm": 0.001494093332439661, "learning_rate": 2.9625233539546326e-05, "loss": 0.0482, "num_input_tokens_seen": 238590864, "step": 110450 }, { "epoch": 18.018760195758563, "grad_norm": 0.005561790894716978, "learning_rate": 2.960110107003672e-05, "loss": 0.0007, "num_input_tokens_seen": 238603024, "step": 110455 }, { "epoch": 18.01957585644372, "grad_norm": 0.0008520457777194679, "learning_rate": 2.9576978133757536e-05, "loss": 0.0006, "num_input_tokens_seen": 238614320, "step": 110460 }, { "epoch": 18.020391517128875, "grad_norm": 0.7163333892822266, "learning_rate": 2.955286473119767e-05, "loss": 0.0741, "num_input_tokens_seen": 238624720, "step": 110465 }, { "epoch": 18.02120717781403, "grad_norm": 0.03220055624842644, "learning_rate": 2.9528760862845783e-05, "loss": 0.0014, "num_input_tokens_seen": 238636848, "step": 110470 }, { "epoch": 18.022022838499183, "grad_norm": 0.00888325646519661, "learning_rate": 2.9504666529190426e-05, "loss": 0.001, "num_input_tokens_seen": 238648336, "step": 110475 }, { "epoch": 18.02283849918434, "grad_norm": 0.007564366329461336, "learning_rate": 2.9480581730719825e-05, "loss": 0.0012, "num_input_tokens_seen": 238658224, "step": 110480 }, { "epoch": 18.023654159869494, "grad_norm": 0.0019379006698727608, "learning_rate": 2.945650646792214e-05, "loss": 0.0012, "num_input_tokens_seen": 238669744, "step": 110485 }, { "epoch": 18.02446982055465, "grad_norm": 0.0025463791098445654, "learning_rate": 2.9432440741285314e-05, "loss": 0.0005, "num_input_tokens_seen": 238680720, "step": 110490 }, { "epoch": 18.025285481239806, "grad_norm": 0.38154155015945435, "learning_rate": 2.940838455129696e-05, "loss": 0.0078, "num_input_tokens_seen": 238691504, "step": 110495 }, { "epoch": 18.026101141924958, "grad_norm": 0.044939037412405014, "learning_rate": 2.9384337898444747e-05, "loss": 0.0017, "num_input_tokens_seen": 238702160, "step": 110500 }, { "epoch": 18.026916802610113, "grad_norm": 0.0029059057123959064, "learning_rate": 2.9360300783215832e-05, "loss": 0.0011, "num_input_tokens_seen": 238712208, "step": 110505 }, { "epoch": 18.02773246329527, "grad_norm": 0.007650961168110371, "learning_rate": 2.9336273206097663e-05, "loss": 0.0041, "num_input_tokens_seen": 238721840, "step": 110510 }, { "epoch": 18.028548123980425, "grad_norm": 0.0004907246329821646, "learning_rate": 2.931225516757685e-05, "loss": 0.0018, "num_input_tokens_seen": 238732784, "step": 110515 }, { "epoch": 18.02936378466558, "grad_norm": 0.006268959492444992, "learning_rate": 2.9288246668140396e-05, "loss": 0.0015, "num_input_tokens_seen": 238743600, "step": 110520 }, { "epoch": 18.030179445350733, "grad_norm": 0.216169536113739, "learning_rate": 2.9264247708274628e-05, "loss": 0.004, "num_input_tokens_seen": 238754576, "step": 110525 }, { "epoch": 18.03099510603589, "grad_norm": 0.0017121587879955769, "learning_rate": 2.9240258288466215e-05, "loss": 0.0018, "num_input_tokens_seen": 238765680, "step": 110530 }, { "epoch": 18.031810766721044, "grad_norm": 0.0071411821991205215, "learning_rate": 2.921627840920099e-05, "loss": 0.0022, "num_input_tokens_seen": 238776464, "step": 110535 }, { "epoch": 18.0326264274062, "grad_norm": 0.03119852766394615, "learning_rate": 2.919230807096529e-05, "loss": 0.0019, "num_input_tokens_seen": 238787824, "step": 110540 }, { "epoch": 18.033442088091356, "grad_norm": 0.003736414248123765, "learning_rate": 2.916834727424461e-05, "loss": 0.0021, "num_input_tokens_seen": 238797872, "step": 110545 }, { "epoch": 18.034257748776508, "grad_norm": 0.002718925941735506, "learning_rate": 2.9144396019524788e-05, "loss": 0.0017, "num_input_tokens_seen": 238807248, "step": 110550 }, { "epoch": 18.035073409461663, "grad_norm": 0.008032168261706829, "learning_rate": 2.9120454307290933e-05, "loss": 0.0566, "num_input_tokens_seen": 238818256, "step": 110555 }, { "epoch": 18.03588907014682, "grad_norm": 0.027552763000130653, "learning_rate": 2.90965221380286e-05, "loss": 0.0014, "num_input_tokens_seen": 238828688, "step": 110560 }, { "epoch": 18.036704730831975, "grad_norm": 0.040653783828020096, "learning_rate": 2.9072599512222464e-05, "loss": 0.052, "num_input_tokens_seen": 238839696, "step": 110565 }, { "epoch": 18.03752039151713, "grad_norm": 0.008090752176940441, "learning_rate": 2.9048686430357685e-05, "loss": 0.0017, "num_input_tokens_seen": 238851440, "step": 110570 }, { "epoch": 18.038336052202283, "grad_norm": 0.0007834371645003557, "learning_rate": 2.9024782892918543e-05, "loss": 0.0021, "num_input_tokens_seen": 238862288, "step": 110575 }, { "epoch": 18.03915171288744, "grad_norm": 0.00212163757532835, "learning_rate": 2.9000888900389764e-05, "loss": 0.001, "num_input_tokens_seen": 238873776, "step": 110580 }, { "epoch": 18.039967373572594, "grad_norm": 0.0008154436945915222, "learning_rate": 2.8977004453255406e-05, "loss": 0.0022, "num_input_tokens_seen": 238884720, "step": 110585 }, { "epoch": 18.04078303425775, "grad_norm": 0.0016963942907750607, "learning_rate": 2.8953129551999634e-05, "loss": 0.0009, "num_input_tokens_seen": 238894320, "step": 110590 }, { "epoch": 18.041598694942905, "grad_norm": 0.011484961025416851, "learning_rate": 2.892926419710623e-05, "loss": 0.0019, "num_input_tokens_seen": 238903920, "step": 110595 }, { "epoch": 18.042414355628058, "grad_norm": 0.03104168362915516, "learning_rate": 2.8905408389058917e-05, "loss": 0.0031, "num_input_tokens_seen": 238914896, "step": 110600 }, { "epoch": 18.043230016313213, "grad_norm": 0.0025729406625032425, "learning_rate": 2.8881562128341088e-05, "loss": 0.0012, "num_input_tokens_seen": 238925552, "step": 110605 }, { "epoch": 18.04404567699837, "grad_norm": 0.002775913570076227, "learning_rate": 2.885772541543613e-05, "loss": 0.0008, "num_input_tokens_seen": 238936784, "step": 110610 }, { "epoch": 18.044861337683525, "grad_norm": 0.001294884947128594, "learning_rate": 2.8833898250826994e-05, "loss": 0.0014, "num_input_tokens_seen": 238948688, "step": 110615 }, { "epoch": 18.045676998368677, "grad_norm": 0.0018858517287299037, "learning_rate": 2.881008063499663e-05, "loss": 0.0007, "num_input_tokens_seen": 238959664, "step": 110620 }, { "epoch": 18.046492659053833, "grad_norm": 0.0010302024893462658, "learning_rate": 2.878627256842775e-05, "loss": 0.001, "num_input_tokens_seen": 238971440, "step": 110625 }, { "epoch": 18.04730831973899, "grad_norm": 0.002369890222325921, "learning_rate": 2.8762474051602816e-05, "loss": 0.0009, "num_input_tokens_seen": 238981552, "step": 110630 }, { "epoch": 18.048123980424144, "grad_norm": 0.0025272388011217117, "learning_rate": 2.8738685085004156e-05, "loss": 0.0014, "num_input_tokens_seen": 238993168, "step": 110635 }, { "epoch": 18.0489396411093, "grad_norm": 0.0013844823697581887, "learning_rate": 2.871490566911389e-05, "loss": 0.0011, "num_input_tokens_seen": 239004944, "step": 110640 }, { "epoch": 18.049755301794452, "grad_norm": 0.0002905686560552567, "learning_rate": 2.8691135804413905e-05, "loss": 0.0005, "num_input_tokens_seen": 239014320, "step": 110645 }, { "epoch": 18.050570962479608, "grad_norm": 0.012505102902650833, "learning_rate": 2.8667375491385928e-05, "loss": 0.0049, "num_input_tokens_seen": 239026064, "step": 110650 }, { "epoch": 18.051386623164763, "grad_norm": 0.0537576824426651, "learning_rate": 2.864362473051163e-05, "loss": 0.0023, "num_input_tokens_seen": 239037456, "step": 110655 }, { "epoch": 18.05220228384992, "grad_norm": 0.0007542030070908368, "learning_rate": 2.8619883522272072e-05, "loss": 0.0016, "num_input_tokens_seen": 239048976, "step": 110660 }, { "epoch": 18.053017944535075, "grad_norm": 0.0008729331311769783, "learning_rate": 2.85961518671487e-05, "loss": 0.0006, "num_input_tokens_seen": 239059216, "step": 110665 }, { "epoch": 18.053833605220227, "grad_norm": 0.007356339134275913, "learning_rate": 2.8572429765622243e-05, "loss": 0.0016, "num_input_tokens_seen": 239069648, "step": 110670 }, { "epoch": 18.054649265905383, "grad_norm": 0.006332451477646828, "learning_rate": 2.8548717218173647e-05, "loss": 0.0405, "num_input_tokens_seen": 239081744, "step": 110675 }, { "epoch": 18.05546492659054, "grad_norm": 0.17264924943447113, "learning_rate": 2.8525014225283195e-05, "loss": 0.0091, "num_input_tokens_seen": 239092144, "step": 110680 }, { "epoch": 18.056280587275694, "grad_norm": 0.006838109809905291, "learning_rate": 2.8501320787431673e-05, "loss": 0.0019, "num_input_tokens_seen": 239104016, "step": 110685 }, { "epoch": 18.05709624796085, "grad_norm": 0.008148097433149815, "learning_rate": 2.8477636905098802e-05, "loss": 0.0021, "num_input_tokens_seen": 239114576, "step": 110690 }, { "epoch": 18.057911908646002, "grad_norm": 0.0005130280624143779, "learning_rate": 2.845396257876487e-05, "loss": 0.0039, "num_input_tokens_seen": 239125264, "step": 110695 }, { "epoch": 18.058727569331158, "grad_norm": 0.17097468674182892, "learning_rate": 2.84302978089096e-05, "loss": 0.0107, "num_input_tokens_seen": 239136784, "step": 110700 }, { "epoch": 18.059543230016313, "grad_norm": 0.000584763940423727, "learning_rate": 2.840664259601261e-05, "loss": 0.0004, "num_input_tokens_seen": 239146480, "step": 110705 }, { "epoch": 18.06035889070147, "grad_norm": 0.036263592541217804, "learning_rate": 2.838299694055324e-05, "loss": 0.0317, "num_input_tokens_seen": 239157840, "step": 110710 }, { "epoch": 18.061174551386625, "grad_norm": 0.026984870433807373, "learning_rate": 2.835936084301072e-05, "loss": 0.0038, "num_input_tokens_seen": 239167152, "step": 110715 }, { "epoch": 18.061990212071777, "grad_norm": 0.06570431590080261, "learning_rate": 2.8335734303864047e-05, "loss": 0.0026, "num_input_tokens_seen": 239177648, "step": 110720 }, { "epoch": 18.062805872756933, "grad_norm": 0.004828798584640026, "learning_rate": 2.8312117323592125e-05, "loss": 0.0029, "num_input_tokens_seen": 239188464, "step": 110725 }, { "epoch": 18.063621533442088, "grad_norm": 0.000707502942532301, "learning_rate": 2.8288509902673454e-05, "loss": 0.0007, "num_input_tokens_seen": 239198896, "step": 110730 }, { "epoch": 18.064437194127244, "grad_norm": 0.0006841020658612251, "learning_rate": 2.8264912041586598e-05, "loss": 0.0017, "num_input_tokens_seen": 239210640, "step": 110735 }, { "epoch": 18.0652528548124, "grad_norm": 0.006858312990516424, "learning_rate": 2.8241323740809676e-05, "loss": 0.0015, "num_input_tokens_seen": 239221360, "step": 110740 }, { "epoch": 18.06606851549755, "grad_norm": 0.002047081710770726, "learning_rate": 2.821774500082086e-05, "loss": 0.0015, "num_input_tokens_seen": 239231856, "step": 110745 }, { "epoch": 18.066884176182707, "grad_norm": 0.001701177330687642, "learning_rate": 2.819417582209788e-05, "loss": 0.0015, "num_input_tokens_seen": 239242384, "step": 110750 }, { "epoch": 18.067699836867863, "grad_norm": 0.0004878344479948282, "learning_rate": 2.8170616205118516e-05, "loss": 0.0352, "num_input_tokens_seen": 239253072, "step": 110755 }, { "epoch": 18.06851549755302, "grad_norm": 0.0009230131399817765, "learning_rate": 2.8147066150360167e-05, "loss": 0.0023, "num_input_tokens_seen": 239263760, "step": 110760 }, { "epoch": 18.069331158238175, "grad_norm": 0.001374510582536459, "learning_rate": 2.8123525658300066e-05, "loss": 0.0307, "num_input_tokens_seen": 239274288, "step": 110765 }, { "epoch": 18.070146818923327, "grad_norm": 0.0009231427684426308, "learning_rate": 2.8099994729415377e-05, "loss": 0.108, "num_input_tokens_seen": 239285776, "step": 110770 }, { "epoch": 18.070962479608482, "grad_norm": 0.0004372471885289997, "learning_rate": 2.8076473364182897e-05, "loss": 0.0031, "num_input_tokens_seen": 239295920, "step": 110775 }, { "epoch": 18.071778140293638, "grad_norm": 0.0012657229090109468, "learning_rate": 2.8052961563079403e-05, "loss": 0.0009, "num_input_tokens_seen": 239306800, "step": 110780 }, { "epoch": 18.072593800978794, "grad_norm": 0.004266361240297556, "learning_rate": 2.8029459326581353e-05, "loss": 0.0014, "num_input_tokens_seen": 239317488, "step": 110785 }, { "epoch": 18.07340946166395, "grad_norm": 0.00956976879388094, "learning_rate": 2.8005966655165026e-05, "loss": 0.0014, "num_input_tokens_seen": 239327312, "step": 110790 }, { "epoch": 18.0742251223491, "grad_norm": 0.001034679007716477, "learning_rate": 2.7982483549306435e-05, "loss": 0.001, "num_input_tokens_seen": 239338320, "step": 110795 }, { "epoch": 18.075040783034257, "grad_norm": 0.0001628376339795068, "learning_rate": 2.795901000948181e-05, "loss": 0.0005, "num_input_tokens_seen": 239347952, "step": 110800 }, { "epoch": 18.075856443719413, "grad_norm": 0.0015392429195344448, "learning_rate": 2.7935546036166548e-05, "loss": 0.0013, "num_input_tokens_seen": 239358384, "step": 110805 }, { "epoch": 18.07667210440457, "grad_norm": 0.0050073969177901745, "learning_rate": 2.7912091629836324e-05, "loss": 0.0013, "num_input_tokens_seen": 239369296, "step": 110810 }, { "epoch": 18.07748776508972, "grad_norm": 0.0005766893737018108, "learning_rate": 2.7888646790966476e-05, "loss": 0.0057, "num_input_tokens_seen": 239379344, "step": 110815 }, { "epoch": 18.078303425774877, "grad_norm": 0.05712695047259331, "learning_rate": 2.786521152003213e-05, "loss": 0.0023, "num_input_tokens_seen": 239390096, "step": 110820 }, { "epoch": 18.079119086460032, "grad_norm": 0.0008328685071319342, "learning_rate": 2.784178581750818e-05, "loss": 0.0004, "num_input_tokens_seen": 239401328, "step": 110825 }, { "epoch": 18.079934747145188, "grad_norm": 0.00018631898274179548, "learning_rate": 2.781836968386947e-05, "loss": 0.0012, "num_input_tokens_seen": 239412688, "step": 110830 }, { "epoch": 18.080750407830344, "grad_norm": 0.0006286951247602701, "learning_rate": 2.7794963119590454e-05, "loss": 0.0008, "num_input_tokens_seen": 239422960, "step": 110835 }, { "epoch": 18.081566068515496, "grad_norm": 0.003879460971802473, "learning_rate": 2.7771566125145588e-05, "loss": 0.0028, "num_input_tokens_seen": 239434320, "step": 110840 }, { "epoch": 18.08238172920065, "grad_norm": 0.0002762196818366647, "learning_rate": 2.774817870100893e-05, "loss": 0.0009, "num_input_tokens_seen": 239445104, "step": 110845 }, { "epoch": 18.083197389885807, "grad_norm": 0.0018938088323920965, "learning_rate": 2.7724800847654608e-05, "loss": 0.0006, "num_input_tokens_seen": 239457584, "step": 110850 }, { "epoch": 18.084013050570963, "grad_norm": 0.0024898534175008535, "learning_rate": 2.7701432565556296e-05, "loss": 0.0011, "num_input_tokens_seen": 239469392, "step": 110855 }, { "epoch": 18.08482871125612, "grad_norm": 0.015188485383987427, "learning_rate": 2.767807385518756e-05, "loss": 0.0011, "num_input_tokens_seen": 239480176, "step": 110860 }, { "epoch": 18.08564437194127, "grad_norm": 0.00016453674470540136, "learning_rate": 2.765472471702185e-05, "loss": 0.0038, "num_input_tokens_seen": 239490928, "step": 110865 }, { "epoch": 18.086460032626427, "grad_norm": 0.004059888422489166, "learning_rate": 2.7631385151532405e-05, "loss": 0.0025, "num_input_tokens_seen": 239501200, "step": 110870 }, { "epoch": 18.087275693311582, "grad_norm": 0.3584325313568115, "learning_rate": 2.7608055159192125e-05, "loss": 0.0082, "num_input_tokens_seen": 239511856, "step": 110875 }, { "epoch": 18.088091353996738, "grad_norm": 0.009047990664839745, "learning_rate": 2.7584734740473905e-05, "loss": 0.0014, "num_input_tokens_seen": 239522736, "step": 110880 }, { "epoch": 18.088907014681894, "grad_norm": 0.0010313192615285516, "learning_rate": 2.756142389585037e-05, "loss": 0.0008, "num_input_tokens_seen": 239533744, "step": 110885 }, { "epoch": 18.089722675367046, "grad_norm": 0.01068951841443777, "learning_rate": 2.753812262579386e-05, "loss": 0.0104, "num_input_tokens_seen": 239543760, "step": 110890 }, { "epoch": 18.0905383360522, "grad_norm": 0.0006580300396308303, "learning_rate": 2.7514830930776667e-05, "loss": 0.0023, "num_input_tokens_seen": 239554544, "step": 110895 }, { "epoch": 18.091353996737357, "grad_norm": 0.00027455881354399025, "learning_rate": 2.749154881127086e-05, "loss": 0.0012, "num_input_tokens_seen": 239564880, "step": 110900 }, { "epoch": 18.092169657422513, "grad_norm": 0.004377785138785839, "learning_rate": 2.7468276267748172e-05, "loss": 0.0018, "num_input_tokens_seen": 239575696, "step": 110905 }, { "epoch": 18.09298531810767, "grad_norm": 0.004063542932271957, "learning_rate": 2.7445013300680333e-05, "loss": 0.0004, "num_input_tokens_seen": 239586384, "step": 110910 }, { "epoch": 18.09380097879282, "grad_norm": 0.002702921163290739, "learning_rate": 2.7421759910538745e-05, "loss": 0.0024, "num_input_tokens_seen": 239596688, "step": 110915 }, { "epoch": 18.094616639477977, "grad_norm": 0.001507585751824081, "learning_rate": 2.739851609779481e-05, "loss": 0.0009, "num_input_tokens_seen": 239607120, "step": 110920 }, { "epoch": 18.095432300163132, "grad_norm": 0.03425592556595802, "learning_rate": 2.737528186291932e-05, "loss": 0.0015, "num_input_tokens_seen": 239617840, "step": 110925 }, { "epoch": 18.096247960848288, "grad_norm": 0.011485468596220016, "learning_rate": 2.735205720638351e-05, "loss": 0.0007, "num_input_tokens_seen": 239628880, "step": 110930 }, { "epoch": 18.097063621533444, "grad_norm": 0.000180011527845636, "learning_rate": 2.732884212865766e-05, "loss": 0.0031, "num_input_tokens_seen": 239639280, "step": 110935 }, { "epoch": 18.097879282218596, "grad_norm": 0.0004578085499815643, "learning_rate": 2.730563663021257e-05, "loss": 0.0005, "num_input_tokens_seen": 239650704, "step": 110940 }, { "epoch": 18.09869494290375, "grad_norm": 0.02956857904791832, "learning_rate": 2.7282440711518363e-05, "loss": 0.0045, "num_input_tokens_seen": 239661776, "step": 110945 }, { "epoch": 18.099510603588907, "grad_norm": 0.003422102192416787, "learning_rate": 2.725925437304522e-05, "loss": 0.0006, "num_input_tokens_seen": 239674512, "step": 110950 }, { "epoch": 18.100326264274063, "grad_norm": 0.0030121582094579935, "learning_rate": 2.7236077615262976e-05, "loss": 0.0016, "num_input_tokens_seen": 239686352, "step": 110955 }, { "epoch": 18.10114192495922, "grad_norm": 0.00024480524007230997, "learning_rate": 2.721291043864138e-05, "loss": 0.0022, "num_input_tokens_seen": 239698000, "step": 110960 }, { "epoch": 18.10195758564437, "grad_norm": 0.0009636294562369585, "learning_rate": 2.7189752843649885e-05, "loss": 0.0035, "num_input_tokens_seen": 239708304, "step": 110965 }, { "epoch": 18.102773246329527, "grad_norm": 0.0029267354402691126, "learning_rate": 2.716660483075789e-05, "loss": 0.0039, "num_input_tokens_seen": 239719568, "step": 110970 }, { "epoch": 18.103588907014682, "grad_norm": 0.0013445314252749085, "learning_rate": 2.714346640043447e-05, "loss": 0.0054, "num_input_tokens_seen": 239731184, "step": 110975 }, { "epoch": 18.104404567699838, "grad_norm": 0.004225427284836769, "learning_rate": 2.7120337553148578e-05, "loss": 0.0093, "num_input_tokens_seen": 239741872, "step": 110980 }, { "epoch": 18.10522022838499, "grad_norm": 0.007663280237466097, "learning_rate": 2.7097218289368896e-05, "loss": 0.0012, "num_input_tokens_seen": 239754032, "step": 110985 }, { "epoch": 18.106035889070146, "grad_norm": 0.0012027625925838947, "learning_rate": 2.7074108609564053e-05, "loss": 0.0034, "num_input_tokens_seen": 239765488, "step": 110990 }, { "epoch": 18.1068515497553, "grad_norm": 0.0012016665423288941, "learning_rate": 2.7051008514202336e-05, "loss": 0.0049, "num_input_tokens_seen": 239775824, "step": 110995 }, { "epoch": 18.107667210440457, "grad_norm": 0.01750839501619339, "learning_rate": 2.7027918003751873e-05, "loss": 0.0166, "num_input_tokens_seen": 239786608, "step": 111000 }, { "epoch": 18.108482871125613, "grad_norm": 0.0005055178189650178, "learning_rate": 2.7004837078680678e-05, "loss": 0.0067, "num_input_tokens_seen": 239797200, "step": 111005 }, { "epoch": 18.109298531810765, "grad_norm": 0.0007495254976674914, "learning_rate": 2.698176573945654e-05, "loss": 0.0821, "num_input_tokens_seen": 239806928, "step": 111010 }, { "epoch": 18.11011419249592, "grad_norm": 0.0008099843980744481, "learning_rate": 2.695870398654693e-05, "loss": 0.0011, "num_input_tokens_seen": 239817232, "step": 111015 }, { "epoch": 18.110929853181077, "grad_norm": 0.0007534879259765148, "learning_rate": 2.693565182041924e-05, "loss": 0.0014, "num_input_tokens_seen": 239827952, "step": 111020 }, { "epoch": 18.111745513866232, "grad_norm": 0.017502669245004654, "learning_rate": 2.6912609241540818e-05, "loss": 0.0015, "num_input_tokens_seen": 239839248, "step": 111025 }, { "epoch": 18.112561174551388, "grad_norm": 0.003195826429873705, "learning_rate": 2.688957625037841e-05, "loss": 0.0006, "num_input_tokens_seen": 239849904, "step": 111030 }, { "epoch": 18.11337683523654, "grad_norm": 0.021487493067979813, "learning_rate": 2.6866552847399028e-05, "loss": 0.0011, "num_input_tokens_seen": 239860976, "step": 111035 }, { "epoch": 18.114192495921696, "grad_norm": 0.0024699419736862183, "learning_rate": 2.684353903306902e-05, "loss": 0.004, "num_input_tokens_seen": 239872016, "step": 111040 }, { "epoch": 18.11500815660685, "grad_norm": 0.0003210293361917138, "learning_rate": 2.6820534807855124e-05, "loss": 0.0013, "num_input_tokens_seen": 239882416, "step": 111045 }, { "epoch": 18.115823817292007, "grad_norm": 0.00045671319821849465, "learning_rate": 2.679754017222319e-05, "loss": 0.0025, "num_input_tokens_seen": 239893872, "step": 111050 }, { "epoch": 18.116639477977163, "grad_norm": 0.0006556420703418553, "learning_rate": 2.677455512663951e-05, "loss": 0.0008, "num_input_tokens_seen": 239904304, "step": 111055 }, { "epoch": 18.117455138662315, "grad_norm": 0.0008096517412923276, "learning_rate": 2.6751579671569715e-05, "loss": 0.001, "num_input_tokens_seen": 239914896, "step": 111060 }, { "epoch": 18.11827079934747, "grad_norm": 0.012913156300783157, "learning_rate": 2.6728613807479594e-05, "loss": 0.0055, "num_input_tokens_seen": 239927184, "step": 111065 }, { "epoch": 18.119086460032626, "grad_norm": 0.5544732213020325, "learning_rate": 2.6705657534834394e-05, "loss": 0.0975, "num_input_tokens_seen": 239937488, "step": 111070 }, { "epoch": 18.119902120717782, "grad_norm": 0.05267966538667679, "learning_rate": 2.6682710854099623e-05, "loss": 0.0438, "num_input_tokens_seen": 239948432, "step": 111075 }, { "epoch": 18.120717781402938, "grad_norm": 0.023334262892603874, "learning_rate": 2.6659773765740025e-05, "loss": 0.0025, "num_input_tokens_seen": 239958864, "step": 111080 }, { "epoch": 18.12153344208809, "grad_norm": 0.016666380688548088, "learning_rate": 2.6636846270220615e-05, "loss": 0.0018, "num_input_tokens_seen": 239968976, "step": 111085 }, { "epoch": 18.122349102773246, "grad_norm": 0.003353215055540204, "learning_rate": 2.661392836800608e-05, "loss": 0.0011, "num_input_tokens_seen": 239978960, "step": 111090 }, { "epoch": 18.1231647634584, "grad_norm": 0.006997089833021164, "learning_rate": 2.6591020059560766e-05, "loss": 0.0032, "num_input_tokens_seen": 239989008, "step": 111095 }, { "epoch": 18.123980424143557, "grad_norm": 0.005469950847327709, "learning_rate": 2.656812134534897e-05, "loss": 0.0005, "num_input_tokens_seen": 240000432, "step": 111100 }, { "epoch": 18.124796084828713, "grad_norm": 0.0023466164711862803, "learning_rate": 2.6545232225834825e-05, "loss": 0.0006, "num_input_tokens_seen": 240012272, "step": 111105 }, { "epoch": 18.125611745513865, "grad_norm": 0.0005659526796080172, "learning_rate": 2.6522352701482178e-05, "loss": 0.0041, "num_input_tokens_seen": 240022288, "step": 111110 }, { "epoch": 18.12642740619902, "grad_norm": 0.0014040175592526793, "learning_rate": 2.6499482772754714e-05, "loss": 0.0004, "num_input_tokens_seen": 240032144, "step": 111115 }, { "epoch": 18.127243066884176, "grad_norm": 0.06160569190979004, "learning_rate": 2.6476622440115894e-05, "loss": 0.0038, "num_input_tokens_seen": 240042192, "step": 111120 }, { "epoch": 18.128058727569332, "grad_norm": 0.0021232604049146175, "learning_rate": 2.6453771704029017e-05, "loss": 0.0003, "num_input_tokens_seen": 240052272, "step": 111125 }, { "epoch": 18.128874388254488, "grad_norm": 0.0018104122718796134, "learning_rate": 2.6430930564957213e-05, "loss": 0.0009, "num_input_tokens_seen": 240063632, "step": 111130 }, { "epoch": 18.12969004893964, "grad_norm": 0.0006452035158872604, "learning_rate": 2.6408099023363275e-05, "loss": 0.0013, "num_input_tokens_seen": 240073232, "step": 111135 }, { "epoch": 18.130505709624796, "grad_norm": 0.0019445134093984962, "learning_rate": 2.6385277079710113e-05, "loss": 0.0005, "num_input_tokens_seen": 240083344, "step": 111140 }, { "epoch": 18.13132137030995, "grad_norm": 0.0015937142306938767, "learning_rate": 2.6362464734460024e-05, "loss": 0.0012, "num_input_tokens_seen": 240093008, "step": 111145 }, { "epoch": 18.132137030995107, "grad_norm": 0.48695728182792664, "learning_rate": 2.633966198807558e-05, "loss": 0.0239, "num_input_tokens_seen": 240105200, "step": 111150 }, { "epoch": 18.13295269168026, "grad_norm": 0.0005318346084095538, "learning_rate": 2.631686884101864e-05, "loss": 0.0013, "num_input_tokens_seen": 240116720, "step": 111155 }, { "epoch": 18.133768352365415, "grad_norm": 0.01507630106061697, "learning_rate": 2.6294085293751435e-05, "loss": 0.0006, "num_input_tokens_seen": 240127984, "step": 111160 }, { "epoch": 18.13458401305057, "grad_norm": 0.002034904668107629, "learning_rate": 2.6271311346735326e-05, "loss": 0.0009, "num_input_tokens_seen": 240137872, "step": 111165 }, { "epoch": 18.135399673735726, "grad_norm": 0.0023250230588018894, "learning_rate": 2.624854700043222e-05, "loss": 0.0014, "num_input_tokens_seen": 240149456, "step": 111170 }, { "epoch": 18.136215334420882, "grad_norm": 0.0006091590621508658, "learning_rate": 2.6225792255303195e-05, "loss": 0.0004, "num_input_tokens_seen": 240159792, "step": 111175 }, { "epoch": 18.137030995106034, "grad_norm": 0.004074991215020418, "learning_rate": 2.6203047111809597e-05, "loss": 0.0004, "num_input_tokens_seen": 240169360, "step": 111180 }, { "epoch": 18.13784665579119, "grad_norm": 0.0071708871982991695, "learning_rate": 2.6180311570412174e-05, "loss": 0.0029, "num_input_tokens_seen": 240180528, "step": 111185 }, { "epoch": 18.138662316476346, "grad_norm": 0.0010574172483757138, "learning_rate": 2.6157585631572e-05, "loss": 0.0003, "num_input_tokens_seen": 240192016, "step": 111190 }, { "epoch": 18.1394779771615, "grad_norm": 0.001377054606564343, "learning_rate": 2.613486929574932e-05, "loss": 0.0016, "num_input_tokens_seen": 240202480, "step": 111195 }, { "epoch": 18.140293637846657, "grad_norm": 0.0010247458703815937, "learning_rate": 2.611216256340476e-05, "loss": 0.0007, "num_input_tokens_seen": 240212912, "step": 111200 }, { "epoch": 18.14110929853181, "grad_norm": 0.006079982966184616, "learning_rate": 2.6089465434998296e-05, "loss": 0.0018, "num_input_tokens_seen": 240224752, "step": 111205 }, { "epoch": 18.141924959216965, "grad_norm": 0.04028640687465668, "learning_rate": 2.6066777910990104e-05, "loss": 0.0015, "num_input_tokens_seen": 240235056, "step": 111210 }, { "epoch": 18.14274061990212, "grad_norm": 0.0025856448337435722, "learning_rate": 2.6044099991839766e-05, "loss": 0.0008, "num_input_tokens_seen": 240245680, "step": 111215 }, { "epoch": 18.143556280587276, "grad_norm": 0.0061494940891861916, "learning_rate": 2.602143167800719e-05, "loss": 0.0018, "num_input_tokens_seen": 240255728, "step": 111220 }, { "epoch": 18.144371941272432, "grad_norm": 0.002374051371589303, "learning_rate": 2.59987729699514e-05, "loss": 0.0021, "num_input_tokens_seen": 240265264, "step": 111225 }, { "epoch": 18.145187601957584, "grad_norm": 0.00037141350912861526, "learning_rate": 2.5976123868131864e-05, "loss": 0.0006, "num_input_tokens_seen": 240275856, "step": 111230 }, { "epoch": 18.14600326264274, "grad_norm": 0.00015754564083181322, "learning_rate": 2.5953484373007487e-05, "loss": 0.0011, "num_input_tokens_seen": 240285712, "step": 111235 }, { "epoch": 18.146818923327896, "grad_norm": 0.004322631284594536, "learning_rate": 2.5930854485037124e-05, "loss": 0.0011, "num_input_tokens_seen": 240296528, "step": 111240 }, { "epoch": 18.14763458401305, "grad_norm": 0.0036023175343871117, "learning_rate": 2.590823420467947e-05, "loss": 0.1025, "num_input_tokens_seen": 240307824, "step": 111245 }, { "epoch": 18.148450244698207, "grad_norm": 0.0007313843816518784, "learning_rate": 2.5885623532392823e-05, "loss": 0.0009, "num_input_tokens_seen": 240318672, "step": 111250 }, { "epoch": 18.14926590538336, "grad_norm": 0.017052991315722466, "learning_rate": 2.586302246863548e-05, "loss": 0.0015, "num_input_tokens_seen": 240329488, "step": 111255 }, { "epoch": 18.150081566068515, "grad_norm": 0.0006939188460819423, "learning_rate": 2.584043101386546e-05, "loss": 0.0005, "num_input_tokens_seen": 240341264, "step": 111260 }, { "epoch": 18.15089722675367, "grad_norm": 0.0010113732423633337, "learning_rate": 2.5817849168540576e-05, "loss": 0.0004, "num_input_tokens_seen": 240352464, "step": 111265 }, { "epoch": 18.151712887438826, "grad_norm": 0.003736126236617565, "learning_rate": 2.5795276933118618e-05, "loss": 0.0129, "num_input_tokens_seen": 240363440, "step": 111270 }, { "epoch": 18.152528548123982, "grad_norm": 0.02830549329519272, "learning_rate": 2.5772714308056887e-05, "loss": 0.002, "num_input_tokens_seen": 240373616, "step": 111275 }, { "epoch": 18.153344208809134, "grad_norm": 0.0023447242565453053, "learning_rate": 2.5750161293812635e-05, "loss": 0.0009, "num_input_tokens_seen": 240384560, "step": 111280 }, { "epoch": 18.15415986949429, "grad_norm": 0.005311821587383747, "learning_rate": 2.572761789084316e-05, "loss": 0.0012, "num_input_tokens_seen": 240394960, "step": 111285 }, { "epoch": 18.154975530179446, "grad_norm": 0.03360892832279205, "learning_rate": 2.570508409960498e-05, "loss": 0.0033, "num_input_tokens_seen": 240403984, "step": 111290 }, { "epoch": 18.1557911908646, "grad_norm": 0.03504948318004608, "learning_rate": 2.5682559920555127e-05, "loss": 0.0049, "num_input_tokens_seen": 240414352, "step": 111295 }, { "epoch": 18.156606851549757, "grad_norm": 0.0005736067541874945, "learning_rate": 2.5660045354149786e-05, "loss": 0.0007, "num_input_tokens_seen": 240425392, "step": 111300 }, { "epoch": 18.15742251223491, "grad_norm": 0.0002982286678161472, "learning_rate": 2.5637540400845483e-05, "loss": 0.0009, "num_input_tokens_seen": 240436944, "step": 111305 }, { "epoch": 18.158238172920065, "grad_norm": 0.042514994740486145, "learning_rate": 2.561504506109802e-05, "loss": 0.0025, "num_input_tokens_seen": 240448272, "step": 111310 }, { "epoch": 18.15905383360522, "grad_norm": 0.004912849515676498, "learning_rate": 2.5592559335363696e-05, "loss": 0.0009, "num_input_tokens_seen": 240460368, "step": 111315 }, { "epoch": 18.159869494290376, "grad_norm": 0.0012176425661891699, "learning_rate": 2.5570083224097763e-05, "loss": 0.0016, "num_input_tokens_seen": 240472368, "step": 111320 }, { "epoch": 18.160685154975532, "grad_norm": 0.03821130096912384, "learning_rate": 2.554761672775613e-05, "loss": 0.0015, "num_input_tokens_seen": 240483056, "step": 111325 }, { "epoch": 18.161500815660684, "grad_norm": 0.004230343271046877, "learning_rate": 2.5525159846793822e-05, "loss": 0.0006, "num_input_tokens_seen": 240494096, "step": 111330 }, { "epoch": 18.16231647634584, "grad_norm": 0.07421465963125229, "learning_rate": 2.550271258166609e-05, "loss": 0.0047, "num_input_tokens_seen": 240505168, "step": 111335 }, { "epoch": 18.163132137030995, "grad_norm": 0.004852895624935627, "learning_rate": 2.548027493282784e-05, "loss": 0.0006, "num_input_tokens_seen": 240515632, "step": 111340 }, { "epoch": 18.16394779771615, "grad_norm": 0.003312204033136368, "learning_rate": 2.5457846900733774e-05, "loss": 0.0006, "num_input_tokens_seen": 240526608, "step": 111345 }, { "epoch": 18.164763458401303, "grad_norm": 0.0339200459420681, "learning_rate": 2.5435428485838465e-05, "loss": 0.0017, "num_input_tokens_seen": 240538032, "step": 111350 }, { "epoch": 18.16557911908646, "grad_norm": 0.0012343511916697025, "learning_rate": 2.5413019688596218e-05, "loss": 0.0004, "num_input_tokens_seen": 240549104, "step": 111355 }, { "epoch": 18.166394779771615, "grad_norm": 0.002318829298019409, "learning_rate": 2.539062050946117e-05, "loss": 0.0017, "num_input_tokens_seen": 240559120, "step": 111360 }, { "epoch": 18.16721044045677, "grad_norm": 0.001546688610687852, "learning_rate": 2.5368230948887295e-05, "loss": 0.001, "num_input_tokens_seen": 240571408, "step": 111365 }, { "epoch": 18.168026101141926, "grad_norm": 0.003105068812146783, "learning_rate": 2.5345851007328336e-05, "loss": 0.0024, "num_input_tokens_seen": 240581264, "step": 111370 }, { "epoch": 18.16884176182708, "grad_norm": 0.0007292951340787113, "learning_rate": 2.532348068523782e-05, "loss": 0.0008, "num_input_tokens_seen": 240591664, "step": 111375 }, { "epoch": 18.169657422512234, "grad_norm": 0.027069859206676483, "learning_rate": 2.5301119983069165e-05, "loss": 0.0013, "num_input_tokens_seen": 240602224, "step": 111380 }, { "epoch": 18.17047308319739, "grad_norm": 0.0005704367067664862, "learning_rate": 2.5278768901275506e-05, "loss": 0.003, "num_input_tokens_seen": 240612880, "step": 111385 }, { "epoch": 18.171288743882545, "grad_norm": 0.009828636422753334, "learning_rate": 2.5256427440309815e-05, "loss": 0.0033, "num_input_tokens_seen": 240624752, "step": 111390 }, { "epoch": 18.1721044045677, "grad_norm": 0.0319136418402195, "learning_rate": 2.5234095600624896e-05, "loss": 0.0029, "num_input_tokens_seen": 240634672, "step": 111395 }, { "epoch": 18.172920065252853, "grad_norm": 0.0002865030546672642, "learning_rate": 2.5211773382673274e-05, "loss": 0.0015, "num_input_tokens_seen": 240645776, "step": 111400 }, { "epoch": 18.17373572593801, "grad_norm": 0.025485774502158165, "learning_rate": 2.5189460786907425e-05, "loss": 0.0028, "num_input_tokens_seen": 240655920, "step": 111405 }, { "epoch": 18.174551386623165, "grad_norm": 0.0005207830108702183, "learning_rate": 2.5167157813779485e-05, "loss": 0.0458, "num_input_tokens_seen": 240665392, "step": 111410 }, { "epoch": 18.17536704730832, "grad_norm": 0.005462713073939085, "learning_rate": 2.5144864463741423e-05, "loss": 0.0059, "num_input_tokens_seen": 240677040, "step": 111415 }, { "epoch": 18.176182707993476, "grad_norm": 0.0015583484200760722, "learning_rate": 2.5122580737245105e-05, "loss": 0.0014, "num_input_tokens_seen": 240688336, "step": 111420 }, { "epoch": 18.17699836867863, "grad_norm": 0.06314843893051147, "learning_rate": 2.5100306634742053e-05, "loss": 0.0027, "num_input_tokens_seen": 240699664, "step": 111425 }, { "epoch": 18.177814029363784, "grad_norm": 0.16858816146850586, "learning_rate": 2.5078042156683854e-05, "loss": 0.0043, "num_input_tokens_seen": 240708752, "step": 111430 }, { "epoch": 18.17862969004894, "grad_norm": 0.0017226624768227339, "learning_rate": 2.5055787303521483e-05, "loss": 0.0018, "num_input_tokens_seen": 240719248, "step": 111435 }, { "epoch": 18.179445350734095, "grad_norm": 0.007539310026913881, "learning_rate": 2.5033542075706184e-05, "loss": 0.0022, "num_input_tokens_seen": 240731536, "step": 111440 }, { "epoch": 18.18026101141925, "grad_norm": 0.005234704352915287, "learning_rate": 2.5011306473688656e-05, "loss": 0.0011, "num_input_tokens_seen": 240743088, "step": 111445 }, { "epoch": 18.181076672104403, "grad_norm": 0.0010076353792101145, "learning_rate": 2.4989080497919593e-05, "loss": 0.0003, "num_input_tokens_seen": 240755056, "step": 111450 }, { "epoch": 18.18189233278956, "grad_norm": 0.002447428647428751, "learning_rate": 2.496686414884941e-05, "loss": 0.0008, "num_input_tokens_seen": 240766576, "step": 111455 }, { "epoch": 18.182707993474715, "grad_norm": 0.07020247727632523, "learning_rate": 2.4944657426928306e-05, "loss": 0.0072, "num_input_tokens_seen": 240776720, "step": 111460 }, { "epoch": 18.18352365415987, "grad_norm": 0.0051605477929115295, "learning_rate": 2.492246033260642e-05, "loss": 0.1114, "num_input_tokens_seen": 240787760, "step": 111465 }, { "epoch": 18.184339314845026, "grad_norm": 0.0043184030801057816, "learning_rate": 2.490027286633356e-05, "loss": 0.0008, "num_input_tokens_seen": 240798640, "step": 111470 }, { "epoch": 18.18515497553018, "grad_norm": 0.002179432427510619, "learning_rate": 2.487809502855931e-05, "loss": 0.0082, "num_input_tokens_seen": 240810480, "step": 111475 }, { "epoch": 18.185970636215334, "grad_norm": 0.00016462391067761928, "learning_rate": 2.4855926819733253e-05, "loss": 0.0017, "num_input_tokens_seen": 240821680, "step": 111480 }, { "epoch": 18.18678629690049, "grad_norm": 0.16513219475746155, "learning_rate": 2.4833768240304587e-05, "loss": 0.0026, "num_input_tokens_seen": 240832592, "step": 111485 }, { "epoch": 18.187601957585645, "grad_norm": 0.00029663904570043087, "learning_rate": 2.48116192907224e-05, "loss": 0.0009, "num_input_tokens_seen": 240844208, "step": 111490 }, { "epoch": 18.1884176182708, "grad_norm": 0.014264887198805809, "learning_rate": 2.4789479971435602e-05, "loss": 0.0015, "num_input_tokens_seen": 240855472, "step": 111495 }, { "epoch": 18.189233278955953, "grad_norm": 0.02956242486834526, "learning_rate": 2.4767350282892788e-05, "loss": 0.0031, "num_input_tokens_seen": 240866896, "step": 111500 }, { "epoch": 18.19004893964111, "grad_norm": 0.08667551726102829, "learning_rate": 2.4745230225542536e-05, "loss": 0.0024, "num_input_tokens_seen": 240877488, "step": 111505 }, { "epoch": 18.190864600326265, "grad_norm": 0.017870064824819565, "learning_rate": 2.472311979983305e-05, "loss": 0.0008, "num_input_tokens_seen": 240887472, "step": 111510 }, { "epoch": 18.19168026101142, "grad_norm": 0.0027186137158423662, "learning_rate": 2.470101900621252e-05, "loss": 0.0122, "num_input_tokens_seen": 240899344, "step": 111515 }, { "epoch": 18.192495921696572, "grad_norm": 0.009860222227871418, "learning_rate": 2.4678927845128762e-05, "loss": 0.0008, "num_input_tokens_seen": 240910480, "step": 111520 }, { "epoch": 18.193311582381728, "grad_norm": 0.0051208180375397205, "learning_rate": 2.4656846317029524e-05, "loss": 0.0005, "num_input_tokens_seen": 240921168, "step": 111525 }, { "epoch": 18.194127243066884, "grad_norm": 0.12531216442584991, "learning_rate": 2.463477442236234e-05, "loss": 0.0018, "num_input_tokens_seen": 240932272, "step": 111530 }, { "epoch": 18.19494290375204, "grad_norm": 0.028876209631562233, "learning_rate": 2.4612712161574457e-05, "loss": 0.0123, "num_input_tokens_seen": 240941712, "step": 111535 }, { "epoch": 18.195758564437195, "grad_norm": 0.0025795248802751303, "learning_rate": 2.459065953511308e-05, "loss": 0.0013, "num_input_tokens_seen": 240952816, "step": 111540 }, { "epoch": 18.196574225122347, "grad_norm": 0.0012704171240329742, "learning_rate": 2.456861654342507e-05, "loss": 0.0006, "num_input_tokens_seen": 240962192, "step": 111545 }, { "epoch": 18.197389885807503, "grad_norm": 0.0005869403248652816, "learning_rate": 2.454658318695713e-05, "loss": 0.0017, "num_input_tokens_seen": 240972592, "step": 111550 }, { "epoch": 18.19820554649266, "grad_norm": 0.0015953588299453259, "learning_rate": 2.4524559466155838e-05, "loss": 0.0007, "num_input_tokens_seen": 240983536, "step": 111555 }, { "epoch": 18.199021207177815, "grad_norm": 0.0008225612109526992, "learning_rate": 2.450254538146762e-05, "loss": 0.0021, "num_input_tokens_seen": 240993968, "step": 111560 }, { "epoch": 18.19983686786297, "grad_norm": 0.002442733384668827, "learning_rate": 2.44805409333384e-05, "loss": 0.0017, "num_input_tokens_seen": 241004272, "step": 111565 }, { "epoch": 18.200652528548122, "grad_norm": 0.00041253273957408965, "learning_rate": 2.445854612221432e-05, "loss": 0.0006, "num_input_tokens_seen": 241014928, "step": 111570 }, { "epoch": 18.201468189233278, "grad_norm": 0.0012305235723033547, "learning_rate": 2.443656094854113e-05, "loss": 0.0006, "num_input_tokens_seen": 241025968, "step": 111575 }, { "epoch": 18.202283849918434, "grad_norm": 0.0023012920282781124, "learning_rate": 2.4414585412764255e-05, "loss": 0.0019, "num_input_tokens_seen": 241037040, "step": 111580 }, { "epoch": 18.20309951060359, "grad_norm": 0.001884901081211865, "learning_rate": 2.4392619515329173e-05, "loss": 0.0009, "num_input_tokens_seen": 241048080, "step": 111585 }, { "epoch": 18.203915171288745, "grad_norm": 0.002780719194561243, "learning_rate": 2.437066325668097e-05, "loss": 0.0006, "num_input_tokens_seen": 241059216, "step": 111590 }, { "epoch": 18.204730831973897, "grad_norm": 0.02738889679312706, "learning_rate": 2.434871663726468e-05, "loss": 0.0025, "num_input_tokens_seen": 241069968, "step": 111595 }, { "epoch": 18.205546492659053, "grad_norm": 0.013765150681138039, "learning_rate": 2.4326779657525055e-05, "loss": 0.0016, "num_input_tokens_seen": 241081008, "step": 111600 }, { "epoch": 18.20636215334421, "grad_norm": 0.00026065035490319133, "learning_rate": 2.430485231790669e-05, "loss": 0.003, "num_input_tokens_seen": 241091056, "step": 111605 }, { "epoch": 18.207177814029365, "grad_norm": 0.0007315054535865784, "learning_rate": 2.428293461885389e-05, "loss": 0.0005, "num_input_tokens_seen": 241102064, "step": 111610 }, { "epoch": 18.20799347471452, "grad_norm": 0.0005587812629528344, "learning_rate": 2.426102656081097e-05, "loss": 0.0003, "num_input_tokens_seen": 241113456, "step": 111615 }, { "epoch": 18.208809135399672, "grad_norm": 0.022680338472127914, "learning_rate": 2.4239128144221857e-05, "loss": 0.0008, "num_input_tokens_seen": 241123472, "step": 111620 }, { "epoch": 18.209624796084828, "grad_norm": 0.007227160967886448, "learning_rate": 2.4217239369530354e-05, "loss": 0.0142, "num_input_tokens_seen": 241133808, "step": 111625 }, { "epoch": 18.210440456769984, "grad_norm": 0.03996798023581505, "learning_rate": 2.4195360237180053e-05, "loss": 0.0017, "num_input_tokens_seen": 241144304, "step": 111630 }, { "epoch": 18.21125611745514, "grad_norm": 0.002206821460276842, "learning_rate": 2.417349074761438e-05, "loss": 0.0015, "num_input_tokens_seen": 241153264, "step": 111635 }, { "epoch": 18.212071778140295, "grad_norm": 0.07152996957302094, "learning_rate": 2.4151630901276534e-05, "loss": 0.0014, "num_input_tokens_seen": 241164816, "step": 111640 }, { "epoch": 18.212887438825447, "grad_norm": 0.0010918622137978673, "learning_rate": 2.4129780698609606e-05, "loss": 0.0005, "num_input_tokens_seen": 241175952, "step": 111645 }, { "epoch": 18.213703099510603, "grad_norm": 0.007130472920835018, "learning_rate": 2.4107940140056294e-05, "loss": 0.0004, "num_input_tokens_seen": 241186032, "step": 111650 }, { "epoch": 18.21451876019576, "grad_norm": 0.0018399967812001705, "learning_rate": 2.4086109226059305e-05, "loss": 0.0019, "num_input_tokens_seen": 241197584, "step": 111655 }, { "epoch": 18.215334420880914, "grad_norm": 0.001044351258315146, "learning_rate": 2.4064287957061003e-05, "loss": 0.0005, "num_input_tokens_seen": 241209168, "step": 111660 }, { "epoch": 18.21615008156607, "grad_norm": 0.00033728586276993155, "learning_rate": 2.404247633350376e-05, "loss": 0.0008, "num_input_tokens_seen": 241220304, "step": 111665 }, { "epoch": 18.216965742251222, "grad_norm": 0.007836922071874142, "learning_rate": 2.402067435582944e-05, "loss": 0.0019, "num_input_tokens_seen": 241231024, "step": 111670 }, { "epoch": 18.217781402936378, "grad_norm": 0.006131039932370186, "learning_rate": 2.3998882024480085e-05, "loss": 0.0017, "num_input_tokens_seen": 241242192, "step": 111675 }, { "epoch": 18.218597063621534, "grad_norm": 0.0007795770070515573, "learning_rate": 2.3977099339897112e-05, "loss": 0.0012, "num_input_tokens_seen": 241253104, "step": 111680 }, { "epoch": 18.21941272430669, "grad_norm": 0.0077186450362205505, "learning_rate": 2.395532630252223e-05, "loss": 0.0031, "num_input_tokens_seen": 241263920, "step": 111685 }, { "epoch": 18.22022838499184, "grad_norm": 0.010755318216979504, "learning_rate": 2.393356291279647e-05, "loss": 0.0015, "num_input_tokens_seen": 241274288, "step": 111690 }, { "epoch": 18.221044045676997, "grad_norm": 0.003711380995810032, "learning_rate": 2.391180917116109e-05, "loss": 0.0007, "num_input_tokens_seen": 241285904, "step": 111695 }, { "epoch": 18.221859706362153, "grad_norm": 0.0002724926162045449, "learning_rate": 2.389006507805669e-05, "loss": 0.0014, "num_input_tokens_seen": 241298000, "step": 111700 }, { "epoch": 18.22267536704731, "grad_norm": 0.002828385913744569, "learning_rate": 2.3868330633924295e-05, "loss": 0.0006, "num_input_tokens_seen": 241308496, "step": 111705 }, { "epoch": 18.223491027732464, "grad_norm": 0.000973310845438391, "learning_rate": 2.3846605839204062e-05, "loss": 0.0108, "num_input_tokens_seen": 241318960, "step": 111710 }, { "epoch": 18.224306688417617, "grad_norm": 0.005908642895519733, "learning_rate": 2.3824890694336467e-05, "loss": 0.0297, "num_input_tokens_seen": 241328656, "step": 111715 }, { "epoch": 18.225122349102772, "grad_norm": 0.47186583280563354, "learning_rate": 2.380318519976149e-05, "loss": 0.0148, "num_input_tokens_seen": 241340112, "step": 111720 }, { "epoch": 18.225938009787928, "grad_norm": 0.04481413587927818, "learning_rate": 2.3781489355919117e-05, "loss": 0.0016, "num_input_tokens_seen": 241351024, "step": 111725 }, { "epoch": 18.226753670473084, "grad_norm": 0.0014045239659026265, "learning_rate": 2.375980316324894e-05, "loss": 0.0006, "num_input_tokens_seen": 241361616, "step": 111730 }, { "epoch": 18.22756933115824, "grad_norm": 0.021581880748271942, "learning_rate": 2.373812662219055e-05, "loss": 0.0049, "num_input_tokens_seen": 241371696, "step": 111735 }, { "epoch": 18.22838499184339, "grad_norm": 0.001914651715196669, "learning_rate": 2.3716459733183205e-05, "loss": 0.0006, "num_input_tokens_seen": 241383120, "step": 111740 }, { "epoch": 18.229200652528547, "grad_norm": 0.0057961605489254, "learning_rate": 2.3694802496665945e-05, "loss": 0.0006, "num_input_tokens_seen": 241392752, "step": 111745 }, { "epoch": 18.230016313213703, "grad_norm": 0.004409853368997574, "learning_rate": 2.367315491307781e-05, "loss": 0.0004, "num_input_tokens_seen": 241403376, "step": 111750 }, { "epoch": 18.23083197389886, "grad_norm": 0.009331168606877327, "learning_rate": 2.3651516982857448e-05, "loss": 0.0009, "num_input_tokens_seen": 241413840, "step": 111755 }, { "epoch": 18.231647634584014, "grad_norm": 0.0019217518856748939, "learning_rate": 2.362988870644339e-05, "loss": 0.0005, "num_input_tokens_seen": 241424752, "step": 111760 }, { "epoch": 18.232463295269167, "grad_norm": 0.0011666314676404, "learning_rate": 2.3608270084273853e-05, "loss": 0.0008, "num_input_tokens_seen": 241435536, "step": 111765 }, { "epoch": 18.233278955954322, "grad_norm": 0.14562870562076569, "learning_rate": 2.3586661116787255e-05, "loss": 0.0049, "num_input_tokens_seen": 241446960, "step": 111770 }, { "epoch": 18.234094616639478, "grad_norm": 0.01970742829144001, "learning_rate": 2.3565061804421195e-05, "loss": 0.0008, "num_input_tokens_seen": 241457264, "step": 111775 }, { "epoch": 18.234910277324634, "grad_norm": 0.4335617125034332, "learning_rate": 2.3543472147613654e-05, "loss": 0.0163, "num_input_tokens_seen": 241469104, "step": 111780 }, { "epoch": 18.23572593800979, "grad_norm": 0.00016372000391129404, "learning_rate": 2.3521892146801947e-05, "loss": 0.0004, "num_input_tokens_seen": 241479472, "step": 111785 }, { "epoch": 18.23654159869494, "grad_norm": 0.0005032969056628644, "learning_rate": 2.350032180242373e-05, "loss": 0.0008, "num_input_tokens_seen": 241490256, "step": 111790 }, { "epoch": 18.237357259380097, "grad_norm": 0.03568139672279358, "learning_rate": 2.3478761114915814e-05, "loss": 0.0011, "num_input_tokens_seen": 241501616, "step": 111795 }, { "epoch": 18.238172920065253, "grad_norm": 0.0010190936736762524, "learning_rate": 2.3457210084715462e-05, "loss": 0.0003, "num_input_tokens_seen": 241514256, "step": 111800 }, { "epoch": 18.23898858075041, "grad_norm": 0.0015556697035208344, "learning_rate": 2.3435668712259105e-05, "loss": 0.005, "num_input_tokens_seen": 241525712, "step": 111805 }, { "epoch": 18.239804241435564, "grad_norm": 0.002752800937741995, "learning_rate": 2.341413699798367e-05, "loss": 0.0009, "num_input_tokens_seen": 241536656, "step": 111810 }, { "epoch": 18.240619902120716, "grad_norm": 0.0019639593083411455, "learning_rate": 2.3392614942325196e-05, "loss": 0.0016, "num_input_tokens_seen": 241547920, "step": 111815 }, { "epoch": 18.241435562805872, "grad_norm": 0.06264805048704147, "learning_rate": 2.3371102545720112e-05, "loss": 0.0693, "num_input_tokens_seen": 241559728, "step": 111820 }, { "epoch": 18.242251223491028, "grad_norm": 0.10108703374862671, "learning_rate": 2.3349599808604182e-05, "loss": 0.003, "num_input_tokens_seen": 241571216, "step": 111825 }, { "epoch": 18.243066884176184, "grad_norm": 0.0005204555345699191, "learning_rate": 2.332810673141339e-05, "loss": 0.0008, "num_input_tokens_seen": 241583024, "step": 111830 }, { "epoch": 18.24388254486134, "grad_norm": 0.003744245506823063, "learning_rate": 2.3306623314583108e-05, "loss": 0.0006, "num_input_tokens_seen": 241593712, "step": 111835 }, { "epoch": 18.24469820554649, "grad_norm": 0.0042017437517642975, "learning_rate": 2.3285149558548934e-05, "loss": 0.002, "num_input_tokens_seen": 241604240, "step": 111840 }, { "epoch": 18.245513866231647, "grad_norm": 0.007984976284205914, "learning_rate": 2.3263685463745854e-05, "loss": 0.0005, "num_input_tokens_seen": 241615408, "step": 111845 }, { "epoch": 18.246329526916803, "grad_norm": 0.004355450160801411, "learning_rate": 2.324223103060913e-05, "loss": 0.0035, "num_input_tokens_seen": 241628016, "step": 111850 }, { "epoch": 18.24714518760196, "grad_norm": 0.0002695178845897317, "learning_rate": 2.322078625957319e-05, "loss": 0.0006, "num_input_tokens_seen": 241639504, "step": 111855 }, { "epoch": 18.247960848287114, "grad_norm": 0.037556588649749756, "learning_rate": 2.319935115107302e-05, "loss": 0.0262, "num_input_tokens_seen": 241648880, "step": 111860 }, { "epoch": 18.248776508972266, "grad_norm": 0.0008369534043595195, "learning_rate": 2.317792570554278e-05, "loss": 0.0005, "num_input_tokens_seen": 241660464, "step": 111865 }, { "epoch": 18.249592169657422, "grad_norm": 0.0007577822543680668, "learning_rate": 2.3156509923416778e-05, "loss": 0.0009, "num_input_tokens_seen": 241670064, "step": 111870 }, { "epoch": 18.250407830342578, "grad_norm": 0.0008776098839007318, "learning_rate": 2.3135103805129065e-05, "loss": 0.0007, "num_input_tokens_seen": 241680432, "step": 111875 }, { "epoch": 18.251223491027734, "grad_norm": 0.02049904502928257, "learning_rate": 2.31137073511134e-05, "loss": 0.0632, "num_input_tokens_seen": 241690736, "step": 111880 }, { "epoch": 18.252039151712886, "grad_norm": 0.009858843870460987, "learning_rate": 2.3092320561803436e-05, "loss": 0.0007, "num_input_tokens_seen": 241701264, "step": 111885 }, { "epoch": 18.25285481239804, "grad_norm": 0.0036323664244264364, "learning_rate": 2.3070943437632553e-05, "loss": 0.0234, "num_input_tokens_seen": 241711152, "step": 111890 }, { "epoch": 18.253670473083197, "grad_norm": 0.00025795798865146935, "learning_rate": 2.3049575979034066e-05, "loss": 0.0013, "num_input_tokens_seen": 241722640, "step": 111895 }, { "epoch": 18.254486133768353, "grad_norm": 0.0005865280982106924, "learning_rate": 2.3028218186440964e-05, "loss": 0.0074, "num_input_tokens_seen": 241733680, "step": 111900 }, { "epoch": 18.25530179445351, "grad_norm": 1.113283634185791, "learning_rate": 2.3006870060286123e-05, "loss": 0.0298, "num_input_tokens_seen": 241744240, "step": 111905 }, { "epoch": 18.25611745513866, "grad_norm": 0.00035421474603936076, "learning_rate": 2.2985531601002084e-05, "loss": 0.0012, "num_input_tokens_seen": 241753872, "step": 111910 }, { "epoch": 18.256933115823816, "grad_norm": 0.0012218153569847345, "learning_rate": 2.2964202809021563e-05, "loss": 0.0004, "num_input_tokens_seen": 241764304, "step": 111915 }, { "epoch": 18.257748776508972, "grad_norm": 0.006138972472399473, "learning_rate": 2.2942883684776428e-05, "loss": 0.0009, "num_input_tokens_seen": 241774160, "step": 111920 }, { "epoch": 18.258564437194128, "grad_norm": 0.0003647230041678995, "learning_rate": 2.2921574228699116e-05, "loss": 0.0063, "num_input_tokens_seen": 241786128, "step": 111925 }, { "epoch": 18.259380097879284, "grad_norm": 0.0019345534965395927, "learning_rate": 2.290027444122117e-05, "loss": 0.0007, "num_input_tokens_seen": 241796368, "step": 111930 }, { "epoch": 18.260195758564436, "grad_norm": 0.005643834825605154, "learning_rate": 2.2878984322774578e-05, "loss": 0.0005, "num_input_tokens_seen": 241807216, "step": 111935 }, { "epoch": 18.26101141924959, "grad_norm": 0.029966186732053757, "learning_rate": 2.2857703873790435e-05, "loss": 0.0017, "num_input_tokens_seen": 241818608, "step": 111940 }, { "epoch": 18.261827079934747, "grad_norm": 0.00763977924361825, "learning_rate": 2.2836433094700405e-05, "loss": 0.0035, "num_input_tokens_seen": 241828656, "step": 111945 }, { "epoch": 18.262642740619903, "grad_norm": 0.0040611946024000645, "learning_rate": 2.2815171985935246e-05, "loss": 0.0005, "num_input_tokens_seen": 241840208, "step": 111950 }, { "epoch": 18.26345840130506, "grad_norm": 0.03236650675535202, "learning_rate": 2.279392054792612e-05, "loss": 0.0057, "num_input_tokens_seen": 241851696, "step": 111955 }, { "epoch": 18.26427406199021, "grad_norm": 0.0016628196462988853, "learning_rate": 2.277267878110345e-05, "loss": 0.0008, "num_input_tokens_seen": 241863056, "step": 111960 }, { "epoch": 18.265089722675366, "grad_norm": 0.0033213666174560785, "learning_rate": 2.275144668589796e-05, "loss": 0.0008, "num_input_tokens_seen": 241873040, "step": 111965 }, { "epoch": 18.265905383360522, "grad_norm": 0.008418967947363853, "learning_rate": 2.2730224262739687e-05, "loss": 0.0021, "num_input_tokens_seen": 241884720, "step": 111970 }, { "epoch": 18.266721044045678, "grad_norm": 0.004653456620872021, "learning_rate": 2.270901151205895e-05, "loss": 0.0005, "num_input_tokens_seen": 241894512, "step": 111975 }, { "epoch": 18.267536704730833, "grad_norm": 0.00020115444203838706, "learning_rate": 2.2687808434285585e-05, "loss": 0.0039, "num_input_tokens_seen": 241906544, "step": 111980 }, { "epoch": 18.268352365415986, "grad_norm": 0.005816313438117504, "learning_rate": 2.266661502984929e-05, "loss": 0.0008, "num_input_tokens_seen": 241917456, "step": 111985 }, { "epoch": 18.26916802610114, "grad_norm": 0.08827083557844162, "learning_rate": 2.264543129917962e-05, "loss": 0.004, "num_input_tokens_seen": 241929200, "step": 111990 }, { "epoch": 18.269983686786297, "grad_norm": 0.1319461166858673, "learning_rate": 2.2624257242705838e-05, "loss": 0.0034, "num_input_tokens_seen": 241940016, "step": 111995 }, { "epoch": 18.270799347471453, "grad_norm": 0.04293489083647728, "learning_rate": 2.2603092860857045e-05, "loss": 0.0015, "num_input_tokens_seen": 241950928, "step": 112000 }, { "epoch": 18.27161500815661, "grad_norm": 0.0004455571179278195, "learning_rate": 2.258193815406223e-05, "loss": 0.0078, "num_input_tokens_seen": 241961488, "step": 112005 }, { "epoch": 18.27243066884176, "grad_norm": 0.0009946267819032073, "learning_rate": 2.2560793122750056e-05, "loss": 0.0033, "num_input_tokens_seen": 241972208, "step": 112010 }, { "epoch": 18.273246329526916, "grad_norm": 0.00034329970367252827, "learning_rate": 2.253965776734912e-05, "loss": 0.0004, "num_input_tokens_seen": 241984176, "step": 112015 }, { "epoch": 18.274061990212072, "grad_norm": 0.002764312084764242, "learning_rate": 2.251853208828769e-05, "loss": 0.0008, "num_input_tokens_seen": 241994064, "step": 112020 }, { "epoch": 18.274877650897228, "grad_norm": 0.008333449251949787, "learning_rate": 2.2497416085993983e-05, "loss": 0.0012, "num_input_tokens_seen": 242003600, "step": 112025 }, { "epoch": 18.275693311582383, "grad_norm": 0.002981035504490137, "learning_rate": 2.247630976089582e-05, "loss": 0.001, "num_input_tokens_seen": 242015152, "step": 112030 }, { "epoch": 18.276508972267536, "grad_norm": 0.004562276415526867, "learning_rate": 2.245521311342108e-05, "loss": 0.0022, "num_input_tokens_seen": 242026704, "step": 112035 }, { "epoch": 18.27732463295269, "grad_norm": 0.007158250547945499, "learning_rate": 2.2434126143997258e-05, "loss": 0.0007, "num_input_tokens_seen": 242037552, "step": 112040 }, { "epoch": 18.278140293637847, "grad_norm": 0.004958420526236296, "learning_rate": 2.241304885305162e-05, "loss": 0.0011, "num_input_tokens_seen": 242048720, "step": 112045 }, { "epoch": 18.278955954323003, "grad_norm": 0.0006449085776694119, "learning_rate": 2.2391981241011495e-05, "loss": 0.0016, "num_input_tokens_seen": 242060656, "step": 112050 }, { "epoch": 18.27977161500816, "grad_norm": 0.009936443530023098, "learning_rate": 2.2370923308303702e-05, "loss": 0.0009, "num_input_tokens_seen": 242071184, "step": 112055 }, { "epoch": 18.28058727569331, "grad_norm": 0.00033961181179620326, "learning_rate": 2.234987505535513e-05, "loss": 0.0016, "num_input_tokens_seen": 242080560, "step": 112060 }, { "epoch": 18.281402936378466, "grad_norm": 0.00022909794643055648, "learning_rate": 2.2328836482592208e-05, "loss": 0.0011, "num_input_tokens_seen": 242091600, "step": 112065 }, { "epoch": 18.282218597063622, "grad_norm": 0.0009541076142340899, "learning_rate": 2.2307807590441486e-05, "loss": 0.0011, "num_input_tokens_seen": 242103088, "step": 112070 }, { "epoch": 18.283034257748778, "grad_norm": 0.019319789484143257, "learning_rate": 2.2286788379328905e-05, "loss": 0.0031, "num_input_tokens_seen": 242113392, "step": 112075 }, { "epoch": 18.28384991843393, "grad_norm": 0.012067809700965881, "learning_rate": 2.2265778849680673e-05, "loss": 0.0008, "num_input_tokens_seen": 242124208, "step": 112080 }, { "epoch": 18.284665579119086, "grad_norm": 0.0042475382797420025, "learning_rate": 2.2244779001922457e-05, "loss": 0.0021, "num_input_tokens_seen": 242134160, "step": 112085 }, { "epoch": 18.28548123980424, "grad_norm": 0.21117204427719116, "learning_rate": 2.222378883647985e-05, "loss": 0.009, "num_input_tokens_seen": 242145296, "step": 112090 }, { "epoch": 18.286296900489397, "grad_norm": 0.0018978551961481571, "learning_rate": 2.2202808353778302e-05, "loss": 0.0043, "num_input_tokens_seen": 242156848, "step": 112095 }, { "epoch": 18.287112561174553, "grad_norm": 0.20463545620441437, "learning_rate": 2.2181837554242968e-05, "loss": 0.0046, "num_input_tokens_seen": 242167632, "step": 112100 }, { "epoch": 18.287928221859705, "grad_norm": 0.0037592577282339334, "learning_rate": 2.216087643829884e-05, "loss": 0.001, "num_input_tokens_seen": 242178704, "step": 112105 }, { "epoch": 18.28874388254486, "grad_norm": 0.01856519654393196, "learning_rate": 2.213992500637074e-05, "loss": 0.0083, "num_input_tokens_seen": 242189712, "step": 112110 }, { "epoch": 18.289559543230016, "grad_norm": 0.004871395882219076, "learning_rate": 2.211898325888323e-05, "loss": 0.0006, "num_input_tokens_seen": 242201424, "step": 112115 }, { "epoch": 18.290375203915172, "grad_norm": 0.02684687077999115, "learning_rate": 2.2098051196260794e-05, "loss": 0.0013, "num_input_tokens_seen": 242212240, "step": 112120 }, { "epoch": 18.291190864600328, "grad_norm": 0.0012460710713639855, "learning_rate": 2.207712881892765e-05, "loss": 0.0033, "num_input_tokens_seen": 242221808, "step": 112125 }, { "epoch": 18.29200652528548, "grad_norm": 0.002025953261181712, "learning_rate": 2.205621612730774e-05, "loss": 0.1082, "num_input_tokens_seen": 242231760, "step": 112130 }, { "epoch": 18.292822185970635, "grad_norm": 0.013695158064365387, "learning_rate": 2.2035313121824884e-05, "loss": 0.0012, "num_input_tokens_seen": 242241104, "step": 112135 }, { "epoch": 18.29363784665579, "grad_norm": 0.003423569491133094, "learning_rate": 2.2014419802902808e-05, "loss": 0.02, "num_input_tokens_seen": 242250608, "step": 112140 }, { "epoch": 18.294453507340947, "grad_norm": 0.00023604616580996662, "learning_rate": 2.1993536170964832e-05, "loss": 0.0008, "num_input_tokens_seen": 242261584, "step": 112145 }, { "epoch": 18.295269168026103, "grad_norm": 0.00926015805453062, "learning_rate": 2.1972662226434292e-05, "loss": 0.0011, "num_input_tokens_seen": 242272144, "step": 112150 }, { "epoch": 18.296084828711255, "grad_norm": 0.002171823987737298, "learning_rate": 2.1951797969734178e-05, "loss": 0.013, "num_input_tokens_seen": 242283824, "step": 112155 }, { "epoch": 18.29690048939641, "grad_norm": 0.004147836938500404, "learning_rate": 2.193094340128726e-05, "loss": 0.0011, "num_input_tokens_seen": 242294128, "step": 112160 }, { "epoch": 18.297716150081566, "grad_norm": 0.0004257794935256243, "learning_rate": 2.191009852151632e-05, "loss": 0.0005, "num_input_tokens_seen": 242304080, "step": 112165 }, { "epoch": 18.298531810766722, "grad_norm": 0.0004469923733267933, "learning_rate": 2.188926333084368e-05, "loss": 0.0017, "num_input_tokens_seen": 242313328, "step": 112170 }, { "epoch": 18.299347471451878, "grad_norm": 0.04959937185049057, "learning_rate": 2.186843782969167e-05, "loss": 0.0015, "num_input_tokens_seen": 242323984, "step": 112175 }, { "epoch": 18.30016313213703, "grad_norm": 0.0006796122179366648, "learning_rate": 2.1847622018482283e-05, "loss": 0.0007, "num_input_tokens_seen": 242334800, "step": 112180 }, { "epoch": 18.300978792822185, "grad_norm": 0.01616012305021286, "learning_rate": 2.182681589763741e-05, "loss": 0.0009, "num_input_tokens_seen": 242345936, "step": 112185 }, { "epoch": 18.30179445350734, "grad_norm": 0.03500431403517723, "learning_rate": 2.1806019467578765e-05, "loss": 0.0138, "num_input_tokens_seen": 242355504, "step": 112190 }, { "epoch": 18.302610114192497, "grad_norm": 0.00026632804656401277, "learning_rate": 2.1785232728727734e-05, "loss": 0.0051, "num_input_tokens_seen": 242366512, "step": 112195 }, { "epoch": 18.303425774877653, "grad_norm": 0.00029793393332511187, "learning_rate": 2.1764455681505645e-05, "loss": 0.0064, "num_input_tokens_seen": 242376528, "step": 112200 }, { "epoch": 18.304241435562805, "grad_norm": 0.00025668280432000756, "learning_rate": 2.1743688326333555e-05, "loss": 0.0038, "num_input_tokens_seen": 242388528, "step": 112205 }, { "epoch": 18.30505709624796, "grad_norm": 0.0053967381827533245, "learning_rate": 2.1722930663632344e-05, "loss": 0.0015, "num_input_tokens_seen": 242399312, "step": 112210 }, { "epoch": 18.305872756933116, "grad_norm": 0.004073168616741896, "learning_rate": 2.1702182693822625e-05, "loss": 0.0009, "num_input_tokens_seen": 242409808, "step": 112215 }, { "epoch": 18.306688417618272, "grad_norm": 0.002889038994908333, "learning_rate": 2.1681444417325004e-05, "loss": 0.0014, "num_input_tokens_seen": 242419632, "step": 112220 }, { "epoch": 18.307504078303428, "grad_norm": 0.0007618418894708157, "learning_rate": 2.166071583455964e-05, "loss": 0.0004, "num_input_tokens_seen": 242430096, "step": 112225 }, { "epoch": 18.30831973898858, "grad_norm": 0.01946703903377056, "learning_rate": 2.1639996945946706e-05, "loss": 0.0025, "num_input_tokens_seen": 242440592, "step": 112230 }, { "epoch": 18.309135399673735, "grad_norm": 0.008552854880690575, "learning_rate": 2.1619287751906135e-05, "loss": 0.0008, "num_input_tokens_seen": 242450288, "step": 112235 }, { "epoch": 18.30995106035889, "grad_norm": 0.0020822572987526655, "learning_rate": 2.1598588252857486e-05, "loss": 0.0432, "num_input_tokens_seen": 242460336, "step": 112240 }, { "epoch": 18.310766721044047, "grad_norm": 0.0005850521847605705, "learning_rate": 2.157789844922037e-05, "loss": 0.0044, "num_input_tokens_seen": 242471632, "step": 112245 }, { "epoch": 18.3115823817292, "grad_norm": 0.00023612409131601453, "learning_rate": 2.1557218341414055e-05, "loss": 0.0002, "num_input_tokens_seen": 242482608, "step": 112250 }, { "epoch": 18.312398042414355, "grad_norm": 0.019624780863523483, "learning_rate": 2.1536547929857707e-05, "loss": 0.0014, "num_input_tokens_seen": 242493712, "step": 112255 }, { "epoch": 18.31321370309951, "grad_norm": 0.0011469227029010653, "learning_rate": 2.1515887214970165e-05, "loss": 0.025, "num_input_tokens_seen": 242505072, "step": 112260 }, { "epoch": 18.314029363784666, "grad_norm": 0.09801533818244934, "learning_rate": 2.1495236197170143e-05, "loss": 0.0022, "num_input_tokens_seen": 242516304, "step": 112265 }, { "epoch": 18.31484502446982, "grad_norm": 0.00623701885342598, "learning_rate": 2.1474594876876198e-05, "loss": 0.0092, "num_input_tokens_seen": 242526640, "step": 112270 }, { "epoch": 18.315660685154974, "grad_norm": 0.000984379556030035, "learning_rate": 2.1453963254506604e-05, "loss": 0.002, "num_input_tokens_seen": 242537648, "step": 112275 }, { "epoch": 18.31647634584013, "grad_norm": 0.00510720070451498, "learning_rate": 2.1433341330479583e-05, "loss": 0.0054, "num_input_tokens_seen": 242547792, "step": 112280 }, { "epoch": 18.317292006525285, "grad_norm": 0.001066899043507874, "learning_rate": 2.141272910521297e-05, "loss": 0.0035, "num_input_tokens_seen": 242558384, "step": 112285 }, { "epoch": 18.31810766721044, "grad_norm": 0.012570103630423546, "learning_rate": 2.1392126579124536e-05, "loss": 0.0016, "num_input_tokens_seen": 242569488, "step": 112290 }, { "epoch": 18.318923327895597, "grad_norm": 0.15933559834957123, "learning_rate": 2.1371533752631844e-05, "loss": 0.0045, "num_input_tokens_seen": 242580048, "step": 112295 }, { "epoch": 18.31973898858075, "grad_norm": 0.003937386907637119, "learning_rate": 2.135095062615211e-05, "loss": 0.0007, "num_input_tokens_seen": 242590480, "step": 112300 }, { "epoch": 18.320554649265905, "grad_norm": 0.06767729669809341, "learning_rate": 2.1330377200102723e-05, "loss": 0.0053, "num_input_tokens_seen": 242602032, "step": 112305 }, { "epoch": 18.32137030995106, "grad_norm": 0.0006834762170910835, "learning_rate": 2.130981347490035e-05, "loss": 0.0005, "num_input_tokens_seen": 242612624, "step": 112310 }, { "epoch": 18.322185970636216, "grad_norm": 1.1935534477233887, "learning_rate": 2.1289259450961995e-05, "loss": 0.1134, "num_input_tokens_seen": 242620752, "step": 112315 }, { "epoch": 18.32300163132137, "grad_norm": 0.003689026227220893, "learning_rate": 2.1268715128703932e-05, "loss": 0.0007, "num_input_tokens_seen": 242633200, "step": 112320 }, { "epoch": 18.323817292006524, "grad_norm": 0.010268572717905045, "learning_rate": 2.124818050854277e-05, "loss": 0.0009, "num_input_tokens_seen": 242643472, "step": 112325 }, { "epoch": 18.32463295269168, "grad_norm": 0.0009742376278154552, "learning_rate": 2.122765559089451e-05, "loss": 0.1357, "num_input_tokens_seen": 242654736, "step": 112330 }, { "epoch": 18.325448613376835, "grad_norm": 0.013293848372995853, "learning_rate": 2.1207140376175214e-05, "loss": 0.0036, "num_input_tokens_seen": 242665136, "step": 112335 }, { "epoch": 18.32626427406199, "grad_norm": 0.0005903943674638867, "learning_rate": 2.1186634864800603e-05, "loss": 0.0005, "num_input_tokens_seen": 242676272, "step": 112340 }, { "epoch": 18.327079934747147, "grad_norm": 0.012852600775659084, "learning_rate": 2.116613905718623e-05, "loss": 0.0012, "num_input_tokens_seen": 242688336, "step": 112345 }, { "epoch": 18.3278955954323, "grad_norm": 0.0014939934480935335, "learning_rate": 2.114565295374754e-05, "loss": 0.0539, "num_input_tokens_seen": 242698672, "step": 112350 }, { "epoch": 18.328711256117455, "grad_norm": 0.00032311692484654486, "learning_rate": 2.112517655489965e-05, "loss": 0.0012, "num_input_tokens_seen": 242709904, "step": 112355 }, { "epoch": 18.32952691680261, "grad_norm": 0.004247542470693588, "learning_rate": 2.110470986105756e-05, "loss": 0.0009, "num_input_tokens_seen": 242720720, "step": 112360 }, { "epoch": 18.330342577487766, "grad_norm": 0.006098807789385319, "learning_rate": 2.1084252872636046e-05, "loss": 0.0068, "num_input_tokens_seen": 242731056, "step": 112365 }, { "epoch": 18.33115823817292, "grad_norm": 0.005680213216692209, "learning_rate": 2.1063805590049667e-05, "loss": 0.0163, "num_input_tokens_seen": 242743312, "step": 112370 }, { "epoch": 18.331973898858074, "grad_norm": 0.00022195794736035168, "learning_rate": 2.1043368013712872e-05, "loss": 0.0026, "num_input_tokens_seen": 242753936, "step": 112375 }, { "epoch": 18.33278955954323, "grad_norm": 0.0006564015056937933, "learning_rate": 2.102294014403977e-05, "loss": 0.0002, "num_input_tokens_seen": 242764272, "step": 112380 }, { "epoch": 18.333605220228385, "grad_norm": 0.0004953066818416119, "learning_rate": 2.1002521981444477e-05, "loss": 0.001, "num_input_tokens_seen": 242776048, "step": 112385 }, { "epoch": 18.33442088091354, "grad_norm": 0.000778991321567446, "learning_rate": 2.0982113526340662e-05, "loss": 0.038, "num_input_tokens_seen": 242786864, "step": 112390 }, { "epoch": 18.335236541598697, "grad_norm": 0.001229040906764567, "learning_rate": 2.0961714779142048e-05, "loss": 0.0004, "num_input_tokens_seen": 242797552, "step": 112395 }, { "epoch": 18.33605220228385, "grad_norm": 0.0008231330430135131, "learning_rate": 2.0941325740261975e-05, "loss": 0.0033, "num_input_tokens_seen": 242809488, "step": 112400 }, { "epoch": 18.336867862969005, "grad_norm": 0.056143589317798615, "learning_rate": 2.0920946410113604e-05, "loss": 0.0025, "num_input_tokens_seen": 242820656, "step": 112405 }, { "epoch": 18.33768352365416, "grad_norm": 0.008879280649125576, "learning_rate": 2.0900576789110116e-05, "loss": 0.0007, "num_input_tokens_seen": 242831856, "step": 112410 }, { "epoch": 18.338499184339316, "grad_norm": 0.007489972282201052, "learning_rate": 2.0880216877664116e-05, "loss": 0.0667, "num_input_tokens_seen": 242842320, "step": 112415 }, { "epoch": 18.339314845024468, "grad_norm": 0.0021333445329219103, "learning_rate": 2.0859866676188445e-05, "loss": 0.0012, "num_input_tokens_seen": 242852784, "step": 112420 }, { "epoch": 18.340130505709624, "grad_norm": 0.018818873912096024, "learning_rate": 2.083952618509527e-05, "loss": 0.0031, "num_input_tokens_seen": 242862960, "step": 112425 }, { "epoch": 18.34094616639478, "grad_norm": 0.13519856333732605, "learning_rate": 2.0819195404797098e-05, "loss": 0.0036, "num_input_tokens_seen": 242872528, "step": 112430 }, { "epoch": 18.341761827079935, "grad_norm": 0.07205647975206375, "learning_rate": 2.0798874335705707e-05, "loss": 0.0026, "num_input_tokens_seen": 242882512, "step": 112435 }, { "epoch": 18.34257748776509, "grad_norm": 0.0011894232593476772, "learning_rate": 2.077856297823316e-05, "loss": 0.0008, "num_input_tokens_seen": 242893648, "step": 112440 }, { "epoch": 18.343393148450243, "grad_norm": 0.00026534864446148276, "learning_rate": 2.0758261332790796e-05, "loss": 0.0004, "num_input_tokens_seen": 242903792, "step": 112445 }, { "epoch": 18.3442088091354, "grad_norm": 0.0033330917358398438, "learning_rate": 2.0737969399790392e-05, "loss": 0.0012, "num_input_tokens_seen": 242914608, "step": 112450 }, { "epoch": 18.345024469820554, "grad_norm": 0.00043053895933553576, "learning_rate": 2.0717687179642896e-05, "loss": 0.0003, "num_input_tokens_seen": 242924176, "step": 112455 }, { "epoch": 18.34584013050571, "grad_norm": 0.021750640124082565, "learning_rate": 2.0697414672759596e-05, "loss": 0.0017, "num_input_tokens_seen": 242934992, "step": 112460 }, { "epoch": 18.346655791190866, "grad_norm": 0.001099413144402206, "learning_rate": 2.0677151879551103e-05, "loss": 0.0009, "num_input_tokens_seen": 242946832, "step": 112465 }, { "epoch": 18.347471451876018, "grad_norm": 0.0010571812745183706, "learning_rate": 2.0656898800428313e-05, "loss": 0.0014, "num_input_tokens_seen": 242955696, "step": 112470 }, { "epoch": 18.348287112561174, "grad_norm": 0.0031790726352483034, "learning_rate": 2.0636655435801455e-05, "loss": 0.0011, "num_input_tokens_seen": 242967632, "step": 112475 }, { "epoch": 18.34910277324633, "grad_norm": 0.0003818267723545432, "learning_rate": 2.061642178608092e-05, "loss": 0.0016, "num_input_tokens_seen": 242977584, "step": 112480 }, { "epoch": 18.349918433931485, "grad_norm": 0.0007671648636460304, "learning_rate": 2.0596197851676768e-05, "loss": 0.0018, "num_input_tokens_seen": 242987056, "step": 112485 }, { "epoch": 18.35073409461664, "grad_norm": 0.0005823525134474039, "learning_rate": 2.057598363299884e-05, "loss": 0.0025, "num_input_tokens_seen": 242998000, "step": 112490 }, { "epoch": 18.351549755301793, "grad_norm": 0.010062271729111671, "learning_rate": 2.055577913045675e-05, "loss": 0.0007, "num_input_tokens_seen": 243007600, "step": 112495 }, { "epoch": 18.35236541598695, "grad_norm": 0.02205917239189148, "learning_rate": 2.0535584344460066e-05, "loss": 0.0015, "num_input_tokens_seen": 243017840, "step": 112500 }, { "epoch": 18.353181076672104, "grad_norm": 0.0054580941796302795, "learning_rate": 2.0515399275417958e-05, "loss": 0.0005, "num_input_tokens_seen": 243029072, "step": 112505 }, { "epoch": 18.35399673735726, "grad_norm": 0.0038355544675141573, "learning_rate": 2.0495223923739593e-05, "loss": 0.001, "num_input_tokens_seen": 243039472, "step": 112510 }, { "epoch": 18.354812398042416, "grad_norm": 0.008391822688281536, "learning_rate": 2.0475058289833815e-05, "loss": 0.0011, "num_input_tokens_seen": 243050384, "step": 112515 }, { "epoch": 18.355628058727568, "grad_norm": 0.017565961927175522, "learning_rate": 2.045490237410924e-05, "loss": 0.0013, "num_input_tokens_seen": 243060848, "step": 112520 }, { "epoch": 18.356443719412724, "grad_norm": 0.01259287167340517, "learning_rate": 2.043475617697449e-05, "loss": 0.0034, "num_input_tokens_seen": 243071952, "step": 112525 }, { "epoch": 18.35725938009788, "grad_norm": 0.1289086937904358, "learning_rate": 2.0414619698837677e-05, "loss": 0.0029, "num_input_tokens_seen": 243082736, "step": 112530 }, { "epoch": 18.358075040783035, "grad_norm": 0.06562699377536774, "learning_rate": 2.0394492940107144e-05, "loss": 0.003, "num_input_tokens_seen": 243093968, "step": 112535 }, { "epoch": 18.35889070146819, "grad_norm": 0.0011165590258315206, "learning_rate": 2.0374375901190456e-05, "loss": 0.0004, "num_input_tokens_seen": 243104816, "step": 112540 }, { "epoch": 18.359706362153343, "grad_norm": 0.0002490470069460571, "learning_rate": 2.0354268582495673e-05, "loss": 0.0008, "num_input_tokens_seen": 243115792, "step": 112545 }, { "epoch": 18.3605220228385, "grad_norm": 0.0014397975755855441, "learning_rate": 2.0334170984429966e-05, "loss": 0.0083, "num_input_tokens_seen": 243127152, "step": 112550 }, { "epoch": 18.361337683523654, "grad_norm": 0.01572202518582344, "learning_rate": 2.0314083107400904e-05, "loss": 0.0017, "num_input_tokens_seen": 243138032, "step": 112555 }, { "epoch": 18.36215334420881, "grad_norm": 0.0020776886958628893, "learning_rate": 2.0294004951815324e-05, "loss": 0.0866, "num_input_tokens_seen": 243148432, "step": 112560 }, { "epoch": 18.362969004893966, "grad_norm": 0.00019504585361573845, "learning_rate": 2.027393651808046e-05, "loss": 0.0004, "num_input_tokens_seen": 243159664, "step": 112565 }, { "epoch": 18.363784665579118, "grad_norm": 0.0009072918328456581, "learning_rate": 2.0253877806602648e-05, "loss": 0.0002, "num_input_tokens_seen": 243170448, "step": 112570 }, { "epoch": 18.364600326264274, "grad_norm": 0.00788530521094799, "learning_rate": 2.0233828817788792e-05, "loss": 0.0069, "num_input_tokens_seen": 243180592, "step": 112575 }, { "epoch": 18.36541598694943, "grad_norm": 0.0006791690248064697, "learning_rate": 2.0213789552044893e-05, "loss": 0.0005, "num_input_tokens_seen": 243191408, "step": 112580 }, { "epoch": 18.366231647634585, "grad_norm": 0.0016448087990283966, "learning_rate": 2.0193760009777295e-05, "loss": 0.0091, "num_input_tokens_seen": 243202160, "step": 112585 }, { "epoch": 18.36704730831974, "grad_norm": 0.032738588750362396, "learning_rate": 2.0173740191391732e-05, "loss": 0.0014, "num_input_tokens_seen": 243212464, "step": 112590 }, { "epoch": 18.367862969004893, "grad_norm": 0.003810546128079295, "learning_rate": 2.0153730097294153e-05, "loss": 0.0009, "num_input_tokens_seen": 243222768, "step": 112595 }, { "epoch": 18.36867862969005, "grad_norm": 0.00023762752243783325, "learning_rate": 2.0133729727889794e-05, "loss": 0.0002, "num_input_tokens_seen": 243234064, "step": 112600 }, { "epoch": 18.369494290375204, "grad_norm": 0.0093738604336977, "learning_rate": 2.0113739083584327e-05, "loss": 0.0009, "num_input_tokens_seen": 243244208, "step": 112605 }, { "epoch": 18.37030995106036, "grad_norm": 0.003199538215994835, "learning_rate": 2.0093758164782595e-05, "loss": 0.1295, "num_input_tokens_seen": 243254512, "step": 112610 }, { "epoch": 18.371125611745512, "grad_norm": 0.0027874563820660114, "learning_rate": 2.0073786971889662e-05, "loss": 0.0023, "num_input_tokens_seen": 243265648, "step": 112615 }, { "epoch": 18.371941272430668, "grad_norm": 0.0008438194054178894, "learning_rate": 2.0053825505310318e-05, "loss": 0.0012, "num_input_tokens_seen": 243276560, "step": 112620 }, { "epoch": 18.372756933115824, "grad_norm": 0.0001784580817911774, "learning_rate": 2.0033873765449018e-05, "loss": 0.0003, "num_input_tokens_seen": 243286608, "step": 112625 }, { "epoch": 18.37357259380098, "grad_norm": 0.002208051038905978, "learning_rate": 2.0013931752710214e-05, "loss": 0.002, "num_input_tokens_seen": 243298128, "step": 112630 }, { "epoch": 18.374388254486135, "grad_norm": 0.001969917444512248, "learning_rate": 1.9993999467497913e-05, "loss": 0.0009, "num_input_tokens_seen": 243309680, "step": 112635 }, { "epoch": 18.375203915171287, "grad_norm": 0.0012371476041153073, "learning_rate": 1.9974076910216188e-05, "loss": 0.0012, "num_input_tokens_seen": 243318608, "step": 112640 }, { "epoch": 18.376019575856443, "grad_norm": 0.00016509677516296506, "learning_rate": 1.995416408126871e-05, "loss": 0.0004, "num_input_tokens_seen": 243328624, "step": 112645 }, { "epoch": 18.3768352365416, "grad_norm": 0.03946005553007126, "learning_rate": 1.9934260981059103e-05, "loss": 0.0094, "num_input_tokens_seen": 243339600, "step": 112650 }, { "epoch": 18.377650897226754, "grad_norm": 0.023350073024630547, "learning_rate": 1.9914367609990713e-05, "loss": 0.0011, "num_input_tokens_seen": 243349584, "step": 112655 }, { "epoch": 18.37846655791191, "grad_norm": 0.013439115136861801, "learning_rate": 1.9894483968466715e-05, "loss": 0.0017, "num_input_tokens_seen": 243361392, "step": 112660 }, { "epoch": 18.379282218597062, "grad_norm": 0.0004325744812376797, "learning_rate": 1.9874610056890007e-05, "loss": 0.0011, "num_input_tokens_seen": 243371408, "step": 112665 }, { "epoch": 18.380097879282218, "grad_norm": 0.0010596549836918712, "learning_rate": 1.9854745875663438e-05, "loss": 0.0024, "num_input_tokens_seen": 243382928, "step": 112670 }, { "epoch": 18.380913539967374, "grad_norm": 0.003541856538504362, "learning_rate": 1.983489142518946e-05, "loss": 0.0005, "num_input_tokens_seen": 243393328, "step": 112675 }, { "epoch": 18.38172920065253, "grad_norm": 0.018053626641631126, "learning_rate": 1.9815046705870697e-05, "loss": 0.0017, "num_input_tokens_seen": 243404816, "step": 112680 }, { "epoch": 18.382544861337685, "grad_norm": 0.010469280183315277, "learning_rate": 1.979521171810905e-05, "loss": 0.0007, "num_input_tokens_seen": 243414640, "step": 112685 }, { "epoch": 18.383360522022837, "grad_norm": 0.0004034289449919015, "learning_rate": 1.9775386462306756e-05, "loss": 0.0033, "num_input_tokens_seen": 243425104, "step": 112690 }, { "epoch": 18.384176182707993, "grad_norm": 0.01321091316640377, "learning_rate": 1.9755570938865263e-05, "loss": 0.0028, "num_input_tokens_seen": 243436176, "step": 112695 }, { "epoch": 18.38499184339315, "grad_norm": 0.0053224824368953705, "learning_rate": 1.9735765148186536e-05, "loss": 0.0538, "num_input_tokens_seen": 243445456, "step": 112700 }, { "epoch": 18.385807504078304, "grad_norm": 0.0077959164045751095, "learning_rate": 1.9715969090671693e-05, "loss": 0.0031, "num_input_tokens_seen": 243456784, "step": 112705 }, { "epoch": 18.38662316476346, "grad_norm": 0.018376147374510765, "learning_rate": 1.969618276672208e-05, "loss": 0.0082, "num_input_tokens_seen": 243468656, "step": 112710 }, { "epoch": 18.387438825448612, "grad_norm": 0.025106191635131836, "learning_rate": 1.9676406176738547e-05, "loss": 0.0013, "num_input_tokens_seen": 243479824, "step": 112715 }, { "epoch": 18.388254486133768, "grad_norm": 0.0026839233469218016, "learning_rate": 1.965663932112205e-05, "loss": 0.0025, "num_input_tokens_seen": 243490512, "step": 112720 }, { "epoch": 18.389070146818923, "grad_norm": 0.9623442888259888, "learning_rate": 1.96368822002731e-05, "loss": 0.0193, "num_input_tokens_seen": 243501456, "step": 112725 }, { "epoch": 18.38988580750408, "grad_norm": 0.031202662736177444, "learning_rate": 1.9617134814592096e-05, "loss": 0.0898, "num_input_tokens_seen": 243513424, "step": 112730 }, { "epoch": 18.390701468189235, "grad_norm": 0.0001352128601865843, "learning_rate": 1.9597397164479282e-05, "loss": 0.0021, "num_input_tokens_seen": 243524880, "step": 112735 }, { "epoch": 18.391517128874387, "grad_norm": 0.026948045939207077, "learning_rate": 1.957766925033466e-05, "loss": 0.0025, "num_input_tokens_seen": 243535120, "step": 112740 }, { "epoch": 18.392332789559543, "grad_norm": 0.0015484422910958529, "learning_rate": 1.9557951072557978e-05, "loss": 0.0004, "num_input_tokens_seen": 243545232, "step": 112745 }, { "epoch": 18.3931484502447, "grad_norm": 0.02422100119292736, "learning_rate": 1.9538242631548965e-05, "loss": 0.0024, "num_input_tokens_seen": 243555216, "step": 112750 }, { "epoch": 18.393964110929854, "grad_norm": 0.00910177081823349, "learning_rate": 1.9518543927706968e-05, "loss": 0.0008, "num_input_tokens_seen": 243564848, "step": 112755 }, { "epoch": 18.39477977161501, "grad_norm": 0.0012397068785503507, "learning_rate": 1.949885496143117e-05, "loss": 0.0003, "num_input_tokens_seen": 243575120, "step": 112760 }, { "epoch": 18.395595432300162, "grad_norm": 0.02492532506585121, "learning_rate": 1.947917573312069e-05, "loss": 0.0011, "num_input_tokens_seen": 243585168, "step": 112765 }, { "epoch": 18.396411092985318, "grad_norm": 0.000635263801086694, "learning_rate": 1.945950624317422e-05, "loss": 0.0005, "num_input_tokens_seen": 243595920, "step": 112770 }, { "epoch": 18.397226753670473, "grad_norm": 0.001339736278168857, "learning_rate": 1.943984649199054e-05, "loss": 0.0009, "num_input_tokens_seen": 243605424, "step": 112775 }, { "epoch": 18.39804241435563, "grad_norm": 0.005495937541127205, "learning_rate": 1.9420196479967957e-05, "loss": 0.0004, "num_input_tokens_seen": 243616720, "step": 112780 }, { "epoch": 18.39885807504078, "grad_norm": 0.0017825653776526451, "learning_rate": 1.9400556207504805e-05, "loss": 0.031, "num_input_tokens_seen": 243627440, "step": 112785 }, { "epoch": 18.399673735725937, "grad_norm": 0.0019627753645181656, "learning_rate": 1.9380925674998995e-05, "loss": 0.0012, "num_input_tokens_seen": 243638096, "step": 112790 }, { "epoch": 18.400489396411093, "grad_norm": 0.0011574667878448963, "learning_rate": 1.9361304882848487e-05, "loss": 0.0013, "num_input_tokens_seen": 243648848, "step": 112795 }, { "epoch": 18.40130505709625, "grad_norm": 0.0014631313970312476, "learning_rate": 1.9341693831450847e-05, "loss": 0.0012, "num_input_tokens_seen": 243658768, "step": 112800 }, { "epoch": 18.402120717781404, "grad_norm": 0.005520334001630545, "learning_rate": 1.9322092521203537e-05, "loss": 0.0008, "num_input_tokens_seen": 243669104, "step": 112805 }, { "epoch": 18.402936378466556, "grad_norm": 0.12357936054468155, "learning_rate": 1.93025009525038e-05, "loss": 0.0044, "num_input_tokens_seen": 243679760, "step": 112810 }, { "epoch": 18.403752039151712, "grad_norm": 0.0001971587771549821, "learning_rate": 1.92829191257487e-05, "loss": 0.0003, "num_input_tokens_seen": 243690928, "step": 112815 }, { "epoch": 18.404567699836868, "grad_norm": 0.013569245114922523, "learning_rate": 1.9263347041335033e-05, "loss": 0.0063, "num_input_tokens_seen": 243701776, "step": 112820 }, { "epoch": 18.405383360522023, "grad_norm": 0.0014590908540412784, "learning_rate": 1.9243784699659538e-05, "loss": 0.0022, "num_input_tokens_seen": 243712528, "step": 112825 }, { "epoch": 18.40619902120718, "grad_norm": 0.00046936338185332716, "learning_rate": 1.9224232101118623e-05, "loss": 0.0019, "num_input_tokens_seen": 243722576, "step": 112830 }, { "epoch": 18.40701468189233, "grad_norm": 0.0006246300181373954, "learning_rate": 1.9204689246108576e-05, "loss": 0.0016, "num_input_tokens_seen": 243732976, "step": 112835 }, { "epoch": 18.407830342577487, "grad_norm": 0.0062375376001000404, "learning_rate": 1.9185156135025417e-05, "loss": 0.0022, "num_input_tokens_seen": 243744464, "step": 112840 }, { "epoch": 18.408646003262643, "grad_norm": 0.1260538250207901, "learning_rate": 1.9165632768264994e-05, "loss": 0.0035, "num_input_tokens_seen": 243754544, "step": 112845 }, { "epoch": 18.4094616639478, "grad_norm": 0.06947627663612366, "learning_rate": 1.9146119146223052e-05, "loss": 0.001, "num_input_tokens_seen": 243765648, "step": 112850 }, { "epoch": 18.410277324632954, "grad_norm": 0.00032668912899680436, "learning_rate": 1.9126615269294988e-05, "loss": 0.0012, "num_input_tokens_seen": 243776816, "step": 112855 }, { "epoch": 18.411092985318106, "grad_norm": 0.002180765848606825, "learning_rate": 1.9107121137876106e-05, "loss": 0.0009, "num_input_tokens_seen": 243788400, "step": 112860 }, { "epoch": 18.411908646003262, "grad_norm": 0.0003920606686733663, "learning_rate": 1.908763675236147e-05, "loss": 0.0006, "num_input_tokens_seen": 243797904, "step": 112865 }, { "epoch": 18.412724306688418, "grad_norm": 0.0018217455362901092, "learning_rate": 1.906816211314599e-05, "loss": 0.0017, "num_input_tokens_seen": 243809392, "step": 112870 }, { "epoch": 18.413539967373573, "grad_norm": 0.0001498242054367438, "learning_rate": 1.9048697220624244e-05, "loss": 0.001, "num_input_tokens_seen": 243820464, "step": 112875 }, { "epoch": 18.41435562805873, "grad_norm": 0.007615982089191675, "learning_rate": 1.9029242075190856e-05, "loss": 0.0015, "num_input_tokens_seen": 243831408, "step": 112880 }, { "epoch": 18.41517128874388, "grad_norm": 0.004452873952686787, "learning_rate": 1.9009796677239953e-05, "loss": 0.0005, "num_input_tokens_seen": 243841808, "step": 112885 }, { "epoch": 18.415986949429037, "grad_norm": 0.0032919731456786394, "learning_rate": 1.8990361027165726e-05, "loss": 0.0005, "num_input_tokens_seen": 243853776, "step": 112890 }, { "epoch": 18.416802610114193, "grad_norm": 0.0024367780424654484, "learning_rate": 1.8970935125362076e-05, "loss": 0.0042, "num_input_tokens_seen": 243864432, "step": 112895 }, { "epoch": 18.41761827079935, "grad_norm": 0.1128418818116188, "learning_rate": 1.8951518972222637e-05, "loss": 0.0025, "num_input_tokens_seen": 243875504, "step": 112900 }, { "epoch": 18.418433931484504, "grad_norm": 0.04299400746822357, "learning_rate": 1.893211256814087e-05, "loss": 0.0011, "num_input_tokens_seen": 243885168, "step": 112905 }, { "epoch": 18.419249592169656, "grad_norm": 0.007050327956676483, "learning_rate": 1.891271591351018e-05, "loss": 0.0011, "num_input_tokens_seen": 243896208, "step": 112910 }, { "epoch": 18.420065252854812, "grad_norm": 0.00047753899707458913, "learning_rate": 1.8893329008723593e-05, "loss": 0.0008, "num_input_tokens_seen": 243907344, "step": 112915 }, { "epoch": 18.420880913539968, "grad_norm": 0.6455734372138977, "learning_rate": 1.8873951854173955e-05, "loss": 0.0304, "num_input_tokens_seen": 243918448, "step": 112920 }, { "epoch": 18.421696574225123, "grad_norm": 0.00031255558133125305, "learning_rate": 1.885458445025412e-05, "loss": 0.0023, "num_input_tokens_seen": 243927984, "step": 112925 }, { "epoch": 18.42251223491028, "grad_norm": 0.0006976706790737808, "learning_rate": 1.883522679735644e-05, "loss": 0.062, "num_input_tokens_seen": 243938160, "step": 112930 }, { "epoch": 18.42332789559543, "grad_norm": 0.0007012999849393964, "learning_rate": 1.8815878895873328e-05, "loss": 0.0033, "num_input_tokens_seen": 243949168, "step": 112935 }, { "epoch": 18.424143556280587, "grad_norm": 0.00946141593158245, "learning_rate": 1.87965407461968e-05, "loss": 0.0004, "num_input_tokens_seen": 243960880, "step": 112940 }, { "epoch": 18.424959216965743, "grad_norm": 0.002110017230734229, "learning_rate": 1.877721234871893e-05, "loss": 0.0026, "num_input_tokens_seen": 243971120, "step": 112945 }, { "epoch": 18.4257748776509, "grad_norm": 0.0002857319777831435, "learning_rate": 1.8757893703831243e-05, "loss": 0.0035, "num_input_tokens_seen": 243981104, "step": 112950 }, { "epoch": 18.42659053833605, "grad_norm": 0.009105951525270939, "learning_rate": 1.8738584811925417e-05, "loss": 0.0006, "num_input_tokens_seen": 243992816, "step": 112955 }, { "epoch": 18.427406199021206, "grad_norm": 0.0012741464888677, "learning_rate": 1.8719285673392594e-05, "loss": 0.0032, "num_input_tokens_seen": 244003920, "step": 112960 }, { "epoch": 18.428221859706362, "grad_norm": 0.021076209843158722, "learning_rate": 1.869999628862401e-05, "loss": 0.0018, "num_input_tokens_seen": 244016528, "step": 112965 }, { "epoch": 18.429037520391518, "grad_norm": 0.00040486734360456467, "learning_rate": 1.8680716658010633e-05, "loss": 0.0004, "num_input_tokens_seen": 244027824, "step": 112970 }, { "epoch": 18.429853181076673, "grad_norm": 0.00019396857533138245, "learning_rate": 1.8661446781943093e-05, "loss": 0.0002, "num_input_tokens_seen": 244039184, "step": 112975 }, { "epoch": 18.430668841761825, "grad_norm": 0.004015372600406408, "learning_rate": 1.8642186660811965e-05, "loss": 0.0016, "num_input_tokens_seen": 244051024, "step": 112980 }, { "epoch": 18.43148450244698, "grad_norm": 0.0005323364166542888, "learning_rate": 1.862293629500761e-05, "loss": 0.0005, "num_input_tokens_seen": 244063248, "step": 112985 }, { "epoch": 18.432300163132137, "grad_norm": 0.8979222774505615, "learning_rate": 1.8603695684920042e-05, "loss": 0.1016, "num_input_tokens_seen": 244074000, "step": 112990 }, { "epoch": 18.433115823817293, "grad_norm": 0.00034616264747455716, "learning_rate": 1.858446483093934e-05, "loss": 0.0037, "num_input_tokens_seen": 244084752, "step": 112995 }, { "epoch": 18.43393148450245, "grad_norm": 0.0007030692067928612, "learning_rate": 1.856524373345514e-05, "loss": 0.0016, "num_input_tokens_seen": 244095856, "step": 113000 }, { "epoch": 18.4347471451876, "grad_norm": 0.004537553526461124, "learning_rate": 1.8546032392857014e-05, "loss": 0.0179, "num_input_tokens_seen": 244107696, "step": 113005 }, { "epoch": 18.435562805872756, "grad_norm": 0.010070395655930042, "learning_rate": 1.8526830809534377e-05, "loss": 0.0023, "num_input_tokens_seen": 244120528, "step": 113010 }, { "epoch": 18.436378466557912, "grad_norm": 0.0016404170310124755, "learning_rate": 1.8507638983876252e-05, "loss": 0.0021, "num_input_tokens_seen": 244131664, "step": 113015 }, { "epoch": 18.437194127243067, "grad_norm": 0.002478382084518671, "learning_rate": 1.84884569162716e-05, "loss": 0.0022, "num_input_tokens_seen": 244142288, "step": 113020 }, { "epoch": 18.438009787928223, "grad_norm": 0.1491037756204605, "learning_rate": 1.8469284607109282e-05, "loss": 0.0029, "num_input_tokens_seen": 244154480, "step": 113025 }, { "epoch": 18.438825448613375, "grad_norm": 0.0012238170020282269, "learning_rate": 1.8450122056777762e-05, "loss": 0.0035, "num_input_tokens_seen": 244164944, "step": 113030 }, { "epoch": 18.43964110929853, "grad_norm": 0.00036461750278249383, "learning_rate": 1.8430969265665398e-05, "loss": 0.0005, "num_input_tokens_seen": 244175248, "step": 113035 }, { "epoch": 18.440456769983687, "grad_norm": 0.002877857070416212, "learning_rate": 1.8411826234160324e-05, "loss": 0.0008, "num_input_tokens_seen": 244185552, "step": 113040 }, { "epoch": 18.441272430668842, "grad_norm": 0.010942216031253338, "learning_rate": 1.8392692962650504e-05, "loss": 0.0017, "num_input_tokens_seen": 244195312, "step": 113045 }, { "epoch": 18.442088091353998, "grad_norm": 0.007464864756911993, "learning_rate": 1.8373569451523853e-05, "loss": 0.0006, "num_input_tokens_seen": 244205840, "step": 113050 }, { "epoch": 18.44290375203915, "grad_norm": 0.025183305144309998, "learning_rate": 1.8354455701167672e-05, "loss": 0.0017, "num_input_tokens_seen": 244215216, "step": 113055 }, { "epoch": 18.443719412724306, "grad_norm": 0.0012140703620389104, "learning_rate": 1.833535171196954e-05, "loss": 0.0019, "num_input_tokens_seen": 244225648, "step": 113060 }, { "epoch": 18.44453507340946, "grad_norm": 0.000981401652097702, "learning_rate": 1.831625748431648e-05, "loss": 0.0139, "num_input_tokens_seen": 244237872, "step": 113065 }, { "epoch": 18.445350734094617, "grad_norm": 0.00016673772188369185, "learning_rate": 1.829717301859557e-05, "loss": 0.0011, "num_input_tokens_seen": 244248560, "step": 113070 }, { "epoch": 18.446166394779773, "grad_norm": 0.014688815921545029, "learning_rate": 1.8278098315193504e-05, "loss": 0.0009, "num_input_tokens_seen": 244258960, "step": 113075 }, { "epoch": 18.446982055464925, "grad_norm": 0.0007847630186006427, "learning_rate": 1.8259033374496915e-05, "loss": 0.0007, "num_input_tokens_seen": 244269424, "step": 113080 }, { "epoch": 18.44779771615008, "grad_norm": 0.00025912452838383615, "learning_rate": 1.8239978196892105e-05, "loss": 0.002, "num_input_tokens_seen": 244281008, "step": 113085 }, { "epoch": 18.448613376835237, "grad_norm": 0.0014495945069938898, "learning_rate": 1.8220932782765377e-05, "loss": 0.0027, "num_input_tokens_seen": 244290544, "step": 113090 }, { "epoch": 18.449429037520392, "grad_norm": 0.013381626456975937, "learning_rate": 1.8201897132502476e-05, "loss": 0.0067, "num_input_tokens_seen": 244302000, "step": 113095 }, { "epoch": 18.450244698205548, "grad_norm": 0.014497867785394192, "learning_rate": 1.8182871246489487e-05, "loss": 0.0007, "num_input_tokens_seen": 244312336, "step": 113100 }, { "epoch": 18.4510603588907, "grad_norm": 0.024804679676890373, "learning_rate": 1.8163855125111707e-05, "loss": 0.0024, "num_input_tokens_seen": 244323600, "step": 113105 }, { "epoch": 18.451876019575856, "grad_norm": 0.0011982301948592067, "learning_rate": 1.8144848768754717e-05, "loss": 0.0005, "num_input_tokens_seen": 244334704, "step": 113110 }, { "epoch": 18.45269168026101, "grad_norm": 0.0005384967080317438, "learning_rate": 1.8125852177803658e-05, "loss": 0.0011, "num_input_tokens_seen": 244344816, "step": 113115 }, { "epoch": 18.453507340946167, "grad_norm": 0.003350053681060672, "learning_rate": 1.8106865352643498e-05, "loss": 0.0011, "num_input_tokens_seen": 244355984, "step": 113120 }, { "epoch": 18.454323001631323, "grad_norm": 0.01903350092470646, "learning_rate": 1.808788829365904e-05, "loss": 0.0014, "num_input_tokens_seen": 244366320, "step": 113125 }, { "epoch": 18.455138662316475, "grad_norm": 0.0018174276920035481, "learning_rate": 1.8068921001234862e-05, "loss": 0.0024, "num_input_tokens_seen": 244377296, "step": 113130 }, { "epoch": 18.45595432300163, "grad_norm": 0.0027823823038488626, "learning_rate": 1.804996347575538e-05, "loss": 0.0009, "num_input_tokens_seen": 244389392, "step": 113135 }, { "epoch": 18.456769983686787, "grad_norm": 0.0007917169132269919, "learning_rate": 1.8031015717604793e-05, "loss": 0.0004, "num_input_tokens_seen": 244399888, "step": 113140 }, { "epoch": 18.457585644371942, "grad_norm": 0.563396155834198, "learning_rate": 1.8012077727167065e-05, "loss": 0.031, "num_input_tokens_seen": 244410128, "step": 113145 }, { "epoch": 18.458401305057095, "grad_norm": 0.03793555125594139, "learning_rate": 1.7993149504826056e-05, "loss": 0.0178, "num_input_tokens_seen": 244421488, "step": 113150 }, { "epoch": 18.45921696574225, "grad_norm": 0.010772444307804108, "learning_rate": 1.7974231050965352e-05, "loss": 0.002, "num_input_tokens_seen": 244432464, "step": 113155 }, { "epoch": 18.460032626427406, "grad_norm": 0.00041844710358418524, "learning_rate": 1.7955322365968253e-05, "loss": 0.0005, "num_input_tokens_seen": 244443984, "step": 113160 }, { "epoch": 18.46084828711256, "grad_norm": 0.004533614031970501, "learning_rate": 1.793642345021823e-05, "loss": 0.0005, "num_input_tokens_seen": 244455056, "step": 113165 }, { "epoch": 18.461663947797717, "grad_norm": 0.0014635130064561963, "learning_rate": 1.7917534304097983e-05, "loss": 0.0006, "num_input_tokens_seen": 244465808, "step": 113170 }, { "epoch": 18.46247960848287, "grad_norm": 0.0006045596674084663, "learning_rate": 1.7898654927990587e-05, "loss": 0.0009, "num_input_tokens_seen": 244476080, "step": 113175 }, { "epoch": 18.463295269168025, "grad_norm": 0.006617935374379158, "learning_rate": 1.7879785322278408e-05, "loss": 0.0035, "num_input_tokens_seen": 244486608, "step": 113180 }, { "epoch": 18.46411092985318, "grad_norm": 0.005817278753966093, "learning_rate": 1.786092548734408e-05, "loss": 0.0044, "num_input_tokens_seen": 244497296, "step": 113185 }, { "epoch": 18.464926590538337, "grad_norm": 0.003113929880782962, "learning_rate": 1.7842075423569692e-05, "loss": 0.0005, "num_input_tokens_seen": 244506288, "step": 113190 }, { "epoch": 18.465742251223492, "grad_norm": 0.010674776509404182, "learning_rate": 1.782323513133738e-05, "loss": 0.0055, "num_input_tokens_seen": 244517712, "step": 113195 }, { "epoch": 18.466557911908644, "grad_norm": 0.0003821174323093146, "learning_rate": 1.7804404611028778e-05, "loss": 0.0044, "num_input_tokens_seen": 244528912, "step": 113200 }, { "epoch": 18.4673735725938, "grad_norm": 0.010828257538378239, "learning_rate": 1.7785583863025757e-05, "loss": 0.0022, "num_input_tokens_seen": 244539984, "step": 113205 }, { "epoch": 18.468189233278956, "grad_norm": 0.003220900660380721, "learning_rate": 1.776677288770945e-05, "loss": 0.0009, "num_input_tokens_seen": 244550864, "step": 113210 }, { "epoch": 18.46900489396411, "grad_norm": 0.015322371385991573, "learning_rate": 1.7747971685461383e-05, "loss": 0.0023, "num_input_tokens_seen": 244561392, "step": 113215 }, { "epoch": 18.469820554649267, "grad_norm": 0.013095523230731487, "learning_rate": 1.772918025666237e-05, "loss": 0.0031, "num_input_tokens_seen": 244571504, "step": 113220 }, { "epoch": 18.47063621533442, "grad_norm": 0.004092440940439701, "learning_rate": 1.7710398601693432e-05, "loss": 0.0015, "num_input_tokens_seen": 244581552, "step": 113225 }, { "epoch": 18.471451876019575, "grad_norm": 0.0057231769897043705, "learning_rate": 1.769162672093494e-05, "loss": 0.0024, "num_input_tokens_seen": 244592144, "step": 113230 }, { "epoch": 18.47226753670473, "grad_norm": 0.255003958940506, "learning_rate": 1.7672864614767636e-05, "loss": 0.0078, "num_input_tokens_seen": 244602448, "step": 113235 }, { "epoch": 18.473083197389887, "grad_norm": 0.0010036260355263948, "learning_rate": 1.7654112283571446e-05, "loss": 0.001, "num_input_tokens_seen": 244612368, "step": 113240 }, { "epoch": 18.473898858075042, "grad_norm": 0.0030960855074226856, "learning_rate": 1.7635369727726726e-05, "loss": 0.0005, "num_input_tokens_seen": 244623184, "step": 113245 }, { "epoch": 18.474714518760194, "grad_norm": 0.05858004838228226, "learning_rate": 1.7616636947613063e-05, "loss": 0.0031, "num_input_tokens_seen": 244634032, "step": 113250 }, { "epoch": 18.47553017944535, "grad_norm": 0.006544803269207478, "learning_rate": 1.759791394361021e-05, "loss": 0.0017, "num_input_tokens_seen": 244645296, "step": 113255 }, { "epoch": 18.476345840130506, "grad_norm": 0.000575044599827379, "learning_rate": 1.757920071609764e-05, "loss": 0.0013, "num_input_tokens_seen": 244655632, "step": 113260 }, { "epoch": 18.47716150081566, "grad_norm": 0.0021637838799506426, "learning_rate": 1.75604972654545e-05, "loss": 0.0099, "num_input_tokens_seen": 244665776, "step": 113265 }, { "epoch": 18.477977161500817, "grad_norm": 0.016059428453445435, "learning_rate": 1.754180359205998e-05, "loss": 0.0022, "num_input_tokens_seen": 244677168, "step": 113270 }, { "epoch": 18.47879282218597, "grad_norm": 0.025442641228437424, "learning_rate": 1.752311969629278e-05, "loss": 0.0018, "num_input_tokens_seen": 244687632, "step": 113275 }, { "epoch": 18.479608482871125, "grad_norm": 0.015501430258154869, "learning_rate": 1.7504445578531703e-05, "loss": 0.0033, "num_input_tokens_seen": 244697328, "step": 113280 }, { "epoch": 18.48042414355628, "grad_norm": 0.0016439296305179596, "learning_rate": 1.7485781239155063e-05, "loss": 0.0006, "num_input_tokens_seen": 244707952, "step": 113285 }, { "epoch": 18.481239804241437, "grad_norm": 0.003533845068886876, "learning_rate": 1.7467126678541223e-05, "loss": 0.0016, "num_input_tokens_seen": 244718896, "step": 113290 }, { "epoch": 18.482055464926592, "grad_norm": 0.01935855858027935, "learning_rate": 1.7448481897068158e-05, "loss": 0.0012, "num_input_tokens_seen": 244729104, "step": 113295 }, { "epoch": 18.482871125611744, "grad_norm": 0.00022205821005627513, "learning_rate": 1.742984689511379e-05, "loss": 0.0032, "num_input_tokens_seen": 244738320, "step": 113300 }, { "epoch": 18.4836867862969, "grad_norm": 0.0034483519848436117, "learning_rate": 1.7411221673055644e-05, "loss": 0.0011, "num_input_tokens_seen": 244749680, "step": 113305 }, { "epoch": 18.484502446982056, "grad_norm": 0.004889797419309616, "learning_rate": 1.739260623127148e-05, "loss": 0.0009, "num_input_tokens_seen": 244760816, "step": 113310 }, { "epoch": 18.48531810766721, "grad_norm": 0.008101309649646282, "learning_rate": 1.737400057013827e-05, "loss": 0.0005, "num_input_tokens_seen": 244771440, "step": 113315 }, { "epoch": 18.486133768352367, "grad_norm": 0.0009130858816206455, "learning_rate": 1.735540469003327e-05, "loss": 0.1336, "num_input_tokens_seen": 244782096, "step": 113320 }, { "epoch": 18.48694942903752, "grad_norm": 0.055807098746299744, "learning_rate": 1.733681859133318e-05, "loss": 0.1183, "num_input_tokens_seen": 244792144, "step": 113325 }, { "epoch": 18.487765089722675, "grad_norm": 0.00047300878213718534, "learning_rate": 1.7318242274414864e-05, "loss": 0.0023, "num_input_tokens_seen": 244801424, "step": 113330 }, { "epoch": 18.48858075040783, "grad_norm": 0.0005087702884338796, "learning_rate": 1.7299675739654575e-05, "loss": 0.0036, "num_input_tokens_seen": 244812560, "step": 113335 }, { "epoch": 18.489396411092986, "grad_norm": 0.15531721711158752, "learning_rate": 1.7281118987428847e-05, "loss": 0.0044, "num_input_tokens_seen": 244821808, "step": 113340 }, { "epoch": 18.49021207177814, "grad_norm": 0.00020166859030723572, "learning_rate": 1.7262572018113488e-05, "loss": 0.0009, "num_input_tokens_seen": 244833424, "step": 113345 }, { "epoch": 18.491027732463294, "grad_norm": 0.0009022870799526572, "learning_rate": 1.7244034832084587e-05, "loss": 0.0006, "num_input_tokens_seen": 244843504, "step": 113350 }, { "epoch": 18.49184339314845, "grad_norm": 0.0002938243851531297, "learning_rate": 1.722550742971768e-05, "loss": 0.0004, "num_input_tokens_seen": 244853360, "step": 113355 }, { "epoch": 18.492659053833606, "grad_norm": 0.04535282030701637, "learning_rate": 1.720698981138835e-05, "loss": 0.0035, "num_input_tokens_seen": 244864432, "step": 113360 }, { "epoch": 18.49347471451876, "grad_norm": 0.003897402435541153, "learning_rate": 1.7188481977471804e-05, "loss": 0.0034, "num_input_tokens_seen": 244874928, "step": 113365 }, { "epoch": 18.494290375203914, "grad_norm": 0.01214703917503357, "learning_rate": 1.716998392834318e-05, "loss": 0.0013, "num_input_tokens_seen": 244886640, "step": 113370 }, { "epoch": 18.49510603588907, "grad_norm": 0.0006659848149865866, "learning_rate": 1.715149566437735e-05, "loss": 0.0009, "num_input_tokens_seen": 244896912, "step": 113375 }, { "epoch": 18.495921696574225, "grad_norm": 0.008747376501560211, "learning_rate": 1.7133017185949007e-05, "loss": 0.0005, "num_input_tokens_seen": 244907728, "step": 113380 }, { "epoch": 18.49673735725938, "grad_norm": 0.0059668924659490585, "learning_rate": 1.711454849343258e-05, "loss": 0.0677, "num_input_tokens_seen": 244918352, "step": 113385 }, { "epoch": 18.497553017944536, "grad_norm": 0.0030300829093903303, "learning_rate": 1.709608958720249e-05, "loss": 0.0044, "num_input_tokens_seen": 244928336, "step": 113390 }, { "epoch": 18.49836867862969, "grad_norm": 0.00733586261048913, "learning_rate": 1.7077640467632714e-05, "loss": 0.0014, "num_input_tokens_seen": 244938512, "step": 113395 }, { "epoch": 18.499184339314844, "grad_norm": 0.002480535302311182, "learning_rate": 1.705920113509718e-05, "loss": 0.0009, "num_input_tokens_seen": 244950128, "step": 113400 }, { "epoch": 18.5, "grad_norm": 0.0012112815165892243, "learning_rate": 1.7040771589969583e-05, "loss": 0.0371, "num_input_tokens_seen": 244960368, "step": 113405 }, { "epoch": 18.500815660685156, "grad_norm": 0.002310445299372077, "learning_rate": 1.7022351832623407e-05, "loss": 0.0006, "num_input_tokens_seen": 244971440, "step": 113410 }, { "epoch": 18.50163132137031, "grad_norm": 0.004520154092460871, "learning_rate": 1.7003941863432014e-05, "loss": 0.0026, "num_input_tokens_seen": 244981456, "step": 113415 }, { "epoch": 18.502446982055464, "grad_norm": 0.0012209441047161818, "learning_rate": 1.6985541682768445e-05, "loss": 0.0006, "num_input_tokens_seen": 244991216, "step": 113420 }, { "epoch": 18.50326264274062, "grad_norm": 0.000785894924774766, "learning_rate": 1.696715129100562e-05, "loss": 0.0013, "num_input_tokens_seen": 245002352, "step": 113425 }, { "epoch": 18.504078303425775, "grad_norm": 0.008201303891837597, "learning_rate": 1.6948770688516248e-05, "loss": 0.0012, "num_input_tokens_seen": 245013904, "step": 113430 }, { "epoch": 18.50489396411093, "grad_norm": 0.0007624907302670181, "learning_rate": 1.6930399875672853e-05, "loss": 0.083, "num_input_tokens_seen": 245025456, "step": 113435 }, { "epoch": 18.505709624796086, "grad_norm": 0.11084222048521042, "learning_rate": 1.69120388528477e-05, "loss": 0.0091, "num_input_tokens_seen": 245036560, "step": 113440 }, { "epoch": 18.50652528548124, "grad_norm": 0.0011350169079378247, "learning_rate": 1.6893687620412933e-05, "loss": 0.0029, "num_input_tokens_seen": 245047248, "step": 113445 }, { "epoch": 18.507340946166394, "grad_norm": 0.0018546866485849023, "learning_rate": 1.687534617874037e-05, "loss": 0.0012, "num_input_tokens_seen": 245059536, "step": 113450 }, { "epoch": 18.50815660685155, "grad_norm": 0.0010910548735409975, "learning_rate": 1.685701452820193e-05, "loss": 0.0005, "num_input_tokens_seen": 245068592, "step": 113455 }, { "epoch": 18.508972267536706, "grad_norm": 0.0018128188094124198, "learning_rate": 1.6838692669168876e-05, "loss": 0.0014, "num_input_tokens_seen": 245080912, "step": 113460 }, { "epoch": 18.50978792822186, "grad_norm": 0.006234641652554274, "learning_rate": 1.682038060201274e-05, "loss": 0.0011, "num_input_tokens_seen": 245091824, "step": 113465 }, { "epoch": 18.510603588907014, "grad_norm": 0.00048175413394346833, "learning_rate": 1.680207832710451e-05, "loss": 0.0003, "num_input_tokens_seen": 245102928, "step": 113470 }, { "epoch": 18.51141924959217, "grad_norm": 0.002115569543093443, "learning_rate": 1.6783785844815157e-05, "loss": 0.002, "num_input_tokens_seen": 245113200, "step": 113475 }, { "epoch": 18.512234910277325, "grad_norm": 0.007819109596312046, "learning_rate": 1.6765503155515394e-05, "loss": 0.0019, "num_input_tokens_seen": 245125360, "step": 113480 }, { "epoch": 18.51305057096248, "grad_norm": 0.0004998321528546512, "learning_rate": 1.6747230259575696e-05, "loss": 0.0016, "num_input_tokens_seen": 245136016, "step": 113485 }, { "epoch": 18.513866231647633, "grad_norm": 0.005353355780243874, "learning_rate": 1.6728967157366492e-05, "loss": 0.0038, "num_input_tokens_seen": 245146896, "step": 113490 }, { "epoch": 18.51468189233279, "grad_norm": 0.0007995230262167752, "learning_rate": 1.671071384925782e-05, "loss": 0.0005, "num_input_tokens_seen": 245157104, "step": 113495 }, { "epoch": 18.515497553017944, "grad_norm": 0.07687735557556152, "learning_rate": 1.66924703356196e-05, "loss": 0.0028, "num_input_tokens_seen": 245167696, "step": 113500 }, { "epoch": 18.5163132137031, "grad_norm": 0.03595279902219772, "learning_rate": 1.6674236616821602e-05, "loss": 0.0018, "num_input_tokens_seen": 245178768, "step": 113505 }, { "epoch": 18.517128874388256, "grad_norm": 0.2188006043434143, "learning_rate": 1.6656012693233357e-05, "loss": 0.0039, "num_input_tokens_seen": 245189264, "step": 113510 }, { "epoch": 18.517944535073408, "grad_norm": 0.005239508114755154, "learning_rate": 1.6637798565224127e-05, "loss": 0.0045, "num_input_tokens_seen": 245199600, "step": 113515 }, { "epoch": 18.518760195758563, "grad_norm": 0.0013815397396683693, "learning_rate": 1.6619594233163172e-05, "loss": 0.0007, "num_input_tokens_seen": 245211856, "step": 113520 }, { "epoch": 18.51957585644372, "grad_norm": 0.0036403543781489134, "learning_rate": 1.6601399697419306e-05, "loss": 0.003, "num_input_tokens_seen": 245222288, "step": 113525 }, { "epoch": 18.520391517128875, "grad_norm": 0.0004640101979020983, "learning_rate": 1.658321495836135e-05, "loss": 0.0007, "num_input_tokens_seen": 245230896, "step": 113530 }, { "epoch": 18.52120717781403, "grad_norm": 0.00018253900634590536, "learning_rate": 1.6565040016357725e-05, "loss": 0.0023, "num_input_tokens_seen": 245242736, "step": 113535 }, { "epoch": 18.522022838499183, "grad_norm": 0.0010609666351228952, "learning_rate": 1.654687487177692e-05, "loss": 0.0004, "num_input_tokens_seen": 245253616, "step": 113540 }, { "epoch": 18.52283849918434, "grad_norm": 0.0076196757145226, "learning_rate": 1.6528719524986967e-05, "loss": 0.0029, "num_input_tokens_seen": 245263792, "step": 113545 }, { "epoch": 18.523654159869494, "grad_norm": 0.008771974593400955, "learning_rate": 1.6510573976355858e-05, "loss": 0.001, "num_input_tokens_seen": 245273104, "step": 113550 }, { "epoch": 18.52446982055465, "grad_norm": 0.0018892057705670595, "learning_rate": 1.6492438226251295e-05, "loss": 0.0007, "num_input_tokens_seen": 245283312, "step": 113555 }, { "epoch": 18.525285481239806, "grad_norm": 0.0011229922529309988, "learning_rate": 1.647431227504087e-05, "loss": 0.0059, "num_input_tokens_seen": 245293776, "step": 113560 }, { "epoch": 18.526101141924958, "grad_norm": 0.0634569600224495, "learning_rate": 1.645619612309185e-05, "loss": 0.0013, "num_input_tokens_seen": 245304816, "step": 113565 }, { "epoch": 18.526916802610113, "grad_norm": 0.2746676504611969, "learning_rate": 1.6438089770771435e-05, "loss": 0.0055, "num_input_tokens_seen": 245316112, "step": 113570 }, { "epoch": 18.52773246329527, "grad_norm": 0.016933711245656013, "learning_rate": 1.6419993218446673e-05, "loss": 0.0026, "num_input_tokens_seen": 245328016, "step": 113575 }, { "epoch": 18.528548123980425, "grad_norm": 0.007328105624765158, "learning_rate": 1.640190646648404e-05, "loss": 0.0011, "num_input_tokens_seen": 245339856, "step": 113580 }, { "epoch": 18.52936378466558, "grad_norm": 0.002034168690443039, "learning_rate": 1.638382951525047e-05, "loss": 0.0018, "num_input_tokens_seen": 245350160, "step": 113585 }, { "epoch": 18.530179445350733, "grad_norm": 0.0021341920364648104, "learning_rate": 1.6365762365111947e-05, "loss": 0.0017, "num_input_tokens_seen": 245361200, "step": 113590 }, { "epoch": 18.53099510603589, "grad_norm": 0.0005403195391409099, "learning_rate": 1.6347705016434844e-05, "loss": 0.0022, "num_input_tokens_seen": 245370832, "step": 113595 }, { "epoch": 18.531810766721044, "grad_norm": 0.00026967190206050873, "learning_rate": 1.6329657469585037e-05, "loss": 0.0004, "num_input_tokens_seen": 245380688, "step": 113600 }, { "epoch": 18.5326264274062, "grad_norm": 0.00889151357114315, "learning_rate": 1.6311619724928283e-05, "loss": 0.0054, "num_input_tokens_seen": 245391920, "step": 113605 }, { "epoch": 18.533442088091356, "grad_norm": 0.0009094868437387049, "learning_rate": 1.6293591782830186e-05, "loss": 0.0006, "num_input_tokens_seen": 245404144, "step": 113610 }, { "epoch": 18.534257748776508, "grad_norm": 0.0718315988779068, "learning_rate": 1.6275573643656115e-05, "loss": 0.0088, "num_input_tokens_seen": 245414064, "step": 113615 }, { "epoch": 18.535073409461663, "grad_norm": 0.04146379604935646, "learning_rate": 1.6257565307771115e-05, "loss": 0.0023, "num_input_tokens_seen": 245425424, "step": 113620 }, { "epoch": 18.53588907014682, "grad_norm": 0.015665479004383087, "learning_rate": 1.6239566775540283e-05, "loss": 0.0028, "num_input_tokens_seen": 245434992, "step": 113625 }, { "epoch": 18.536704730831975, "grad_norm": 0.0012484738836064935, "learning_rate": 1.6221578047328322e-05, "loss": 0.0006, "num_input_tokens_seen": 245446288, "step": 113630 }, { "epoch": 18.53752039151713, "grad_norm": 0.28304043412208557, "learning_rate": 1.6203599123499778e-05, "loss": 0.0022, "num_input_tokens_seen": 245456816, "step": 113635 }, { "epoch": 18.538336052202283, "grad_norm": 0.008190099149942398, "learning_rate": 1.6185630004419027e-05, "loss": 0.0044, "num_input_tokens_seen": 245467408, "step": 113640 }, { "epoch": 18.53915171288744, "grad_norm": 0.00023345145746134222, "learning_rate": 1.6167670690450276e-05, "loss": 0.0125, "num_input_tokens_seen": 245478160, "step": 113645 }, { "epoch": 18.539967373572594, "grad_norm": 0.012173679657280445, "learning_rate": 1.6149721181957456e-05, "loss": 0.0011, "num_input_tokens_seen": 245488688, "step": 113650 }, { "epoch": 18.54078303425775, "grad_norm": 0.00276687229052186, "learning_rate": 1.6131781479304332e-05, "loss": 0.0006, "num_input_tokens_seen": 245499600, "step": 113655 }, { "epoch": 18.541598694942905, "grad_norm": 0.004582415334880352, "learning_rate": 1.61138515828545e-05, "loss": 0.0013, "num_input_tokens_seen": 245510096, "step": 113660 }, { "epoch": 18.542414355628058, "grad_norm": 0.012344161979854107, "learning_rate": 1.6095931492971282e-05, "loss": 0.0022, "num_input_tokens_seen": 245519760, "step": 113665 }, { "epoch": 18.543230016313213, "grad_norm": 0.0012308456934988499, "learning_rate": 1.6078021210017945e-05, "loss": 0.0006, "num_input_tokens_seen": 245530768, "step": 113670 }, { "epoch": 18.54404567699837, "grad_norm": 0.004262410569936037, "learning_rate": 1.6060120734357366e-05, "loss": 0.0009, "num_input_tokens_seen": 245541232, "step": 113675 }, { "epoch": 18.544861337683525, "grad_norm": 0.006430953275412321, "learning_rate": 1.604223006635236e-05, "loss": 0.0006, "num_input_tokens_seen": 245552976, "step": 113680 }, { "epoch": 18.545676998368677, "grad_norm": 0.0008573816157877445, "learning_rate": 1.6024349206365475e-05, "loss": 0.0023, "num_input_tokens_seen": 245564272, "step": 113685 }, { "epoch": 18.546492659053833, "grad_norm": 0.0015064050676301122, "learning_rate": 1.6006478154759197e-05, "loss": 0.003, "num_input_tokens_seen": 245574896, "step": 113690 }, { "epoch": 18.54730831973899, "grad_norm": 0.015532300807535648, "learning_rate": 1.598861691189557e-05, "loss": 0.0016, "num_input_tokens_seen": 245586096, "step": 113695 }, { "epoch": 18.548123980424144, "grad_norm": 0.0036405641585588455, "learning_rate": 1.5970765478136696e-05, "loss": 0.0007, "num_input_tokens_seen": 245596656, "step": 113700 }, { "epoch": 18.5489396411093, "grad_norm": 0.14665931463241577, "learning_rate": 1.5952923853844224e-05, "loss": 0.0032, "num_input_tokens_seen": 245608112, "step": 113705 }, { "epoch": 18.549755301794452, "grad_norm": 0.0008804710232652724, "learning_rate": 1.5935092039379874e-05, "loss": 0.0004, "num_input_tokens_seen": 245619152, "step": 113710 }, { "epoch": 18.550570962479608, "grad_norm": 0.006566312164068222, "learning_rate": 1.5917270035104903e-05, "loss": 0.0018, "num_input_tokens_seen": 245629424, "step": 113715 }, { "epoch": 18.551386623164763, "grad_norm": 0.0007352828979492188, "learning_rate": 1.5899457841380637e-05, "loss": 0.0004, "num_input_tokens_seen": 245640560, "step": 113720 }, { "epoch": 18.55220228384992, "grad_norm": 0.00016575584595557302, "learning_rate": 1.5881655458567847e-05, "loss": 0.0012, "num_input_tokens_seen": 245651568, "step": 113725 }, { "epoch": 18.553017944535075, "grad_norm": 0.025155337527394295, "learning_rate": 1.5863862887027626e-05, "loss": 0.001, "num_input_tokens_seen": 245662768, "step": 113730 }, { "epoch": 18.553833605220227, "grad_norm": 0.04157517850399017, "learning_rate": 1.5846080127120244e-05, "loss": 0.004, "num_input_tokens_seen": 245674128, "step": 113735 }, { "epoch": 18.554649265905383, "grad_norm": 0.0004997915239073336, "learning_rate": 1.58283071792063e-05, "loss": 0.0424, "num_input_tokens_seen": 245684912, "step": 113740 }, { "epoch": 18.55546492659054, "grad_norm": 0.017312675714492798, "learning_rate": 1.581054404364596e-05, "loss": 0.0033, "num_input_tokens_seen": 245695760, "step": 113745 }, { "epoch": 18.556280587275694, "grad_norm": 0.12121226638555527, "learning_rate": 1.5792790720799144e-05, "loss": 0.0295, "num_input_tokens_seen": 245706416, "step": 113750 }, { "epoch": 18.55709624796085, "grad_norm": 0.0010711740469560027, "learning_rate": 1.5775047211025685e-05, "loss": 0.0007, "num_input_tokens_seen": 245717648, "step": 113755 }, { "epoch": 18.557911908646002, "grad_norm": 0.007575131021440029, "learning_rate": 1.575731351468518e-05, "loss": 0.0034, "num_input_tokens_seen": 245729008, "step": 113760 }, { "epoch": 18.558727569331158, "grad_norm": 0.0685509443283081, "learning_rate": 1.5739589632137006e-05, "loss": 0.0027, "num_input_tokens_seen": 245738416, "step": 113765 }, { "epoch": 18.559543230016313, "grad_norm": 0.013830579817295074, "learning_rate": 1.572187556374044e-05, "loss": 0.0065, "num_input_tokens_seen": 245747920, "step": 113770 }, { "epoch": 18.56035889070147, "grad_norm": 0.016269782558083534, "learning_rate": 1.5704171309854354e-05, "loss": 0.0003, "num_input_tokens_seen": 245759920, "step": 113775 }, { "epoch": 18.561174551386625, "grad_norm": 0.0017003034008666873, "learning_rate": 1.568647687083763e-05, "loss": 0.0006, "num_input_tokens_seen": 245770416, "step": 113780 }, { "epoch": 18.561990212071777, "grad_norm": 0.0017265173373743892, "learning_rate": 1.5668792247048868e-05, "loss": 0.0009, "num_input_tokens_seen": 245782384, "step": 113785 }, { "epoch": 18.562805872756933, "grad_norm": 0.009840810671448708, "learning_rate": 1.565111743884634e-05, "loss": 0.0007, "num_input_tokens_seen": 245793744, "step": 113790 }, { "epoch": 18.563621533442088, "grad_norm": 0.008591379038989544, "learning_rate": 1.5633452446588537e-05, "loss": 0.0009, "num_input_tokens_seen": 245803824, "step": 113795 }, { "epoch": 18.564437194127244, "grad_norm": 0.0034411675296723843, "learning_rate": 1.5615797270633114e-05, "loss": 0.001, "num_input_tokens_seen": 245814832, "step": 113800 }, { "epoch": 18.5652528548124, "grad_norm": 0.004118712618947029, "learning_rate": 1.5598151911338176e-05, "loss": 0.0335, "num_input_tokens_seen": 245825072, "step": 113805 }, { "epoch": 18.56606851549755, "grad_norm": 0.007064030971378088, "learning_rate": 1.5580516369061103e-05, "loss": 0.0008, "num_input_tokens_seen": 245835312, "step": 113810 }, { "epoch": 18.566884176182707, "grad_norm": 0.16438302397727966, "learning_rate": 1.55628906441595e-05, "loss": 0.003, "num_input_tokens_seen": 245847056, "step": 113815 }, { "epoch": 18.567699836867863, "grad_norm": 0.005964207462966442, "learning_rate": 1.5545274736990354e-05, "loss": 0.001, "num_input_tokens_seen": 245858896, "step": 113820 }, { "epoch": 18.56851549755302, "grad_norm": 0.0032159110996872187, "learning_rate": 1.5527668647910886e-05, "loss": 0.0004, "num_input_tokens_seen": 245867472, "step": 113825 }, { "epoch": 18.569331158238175, "grad_norm": 0.05260089412331581, "learning_rate": 1.5510072377277696e-05, "loss": 0.0038, "num_input_tokens_seen": 245877040, "step": 113830 }, { "epoch": 18.570146818923327, "grad_norm": 0.4256327748298645, "learning_rate": 1.5492485925447663e-05, "loss": 0.0134, "num_input_tokens_seen": 245887664, "step": 113835 }, { "epoch": 18.570962479608482, "grad_norm": 0.055682551115751266, "learning_rate": 1.5474909292776895e-05, "loss": 0.0013, "num_input_tokens_seen": 245899248, "step": 113840 }, { "epoch": 18.571778140293638, "grad_norm": 0.005342925898730755, "learning_rate": 1.5457342479621883e-05, "loss": 0.0008, "num_input_tokens_seen": 245909488, "step": 113845 }, { "epoch": 18.572593800978794, "grad_norm": 0.0508258119225502, "learning_rate": 1.5439785486338396e-05, "loss": 0.0012, "num_input_tokens_seen": 245921264, "step": 113850 }, { "epoch": 18.57340946166395, "grad_norm": 0.0017754074651747942, "learning_rate": 1.5422238313282434e-05, "loss": 0.0076, "num_input_tokens_seen": 245932880, "step": 113855 }, { "epoch": 18.5742251223491, "grad_norm": 0.003250130685046315, "learning_rate": 1.540470096080948e-05, "loss": 0.0005, "num_input_tokens_seen": 245943120, "step": 113860 }, { "epoch": 18.575040783034257, "grad_norm": 0.039365023374557495, "learning_rate": 1.538717342927509e-05, "loss": 0.0042, "num_input_tokens_seen": 245954256, "step": 113865 }, { "epoch": 18.575856443719413, "grad_norm": 0.0029328095261007547, "learning_rate": 1.536965571903437e-05, "loss": 0.0018, "num_input_tokens_seen": 245966000, "step": 113870 }, { "epoch": 18.57667210440457, "grad_norm": 0.053971800953149796, "learning_rate": 1.535214783044242e-05, "loss": 0.0025, "num_input_tokens_seen": 245976752, "step": 113875 }, { "epoch": 18.57748776508972, "grad_norm": 0.002047772752121091, "learning_rate": 1.5334649763853903e-05, "loss": 0.0007, "num_input_tokens_seen": 245987888, "step": 113880 }, { "epoch": 18.578303425774877, "grad_norm": 0.0011964102741330862, "learning_rate": 1.5317161519623647e-05, "loss": 0.005, "num_input_tokens_seen": 245999216, "step": 113885 }, { "epoch": 18.579119086460032, "grad_norm": 0.0003268739383202046, "learning_rate": 1.529968309810592e-05, "loss": 0.0057, "num_input_tokens_seen": 246010032, "step": 113890 }, { "epoch": 18.579934747145188, "grad_norm": 0.008182469755411148, "learning_rate": 1.5282214499655055e-05, "loss": 0.0068, "num_input_tokens_seen": 246019952, "step": 113895 }, { "epoch": 18.580750407830344, "grad_norm": 0.0009706748533062637, "learning_rate": 1.526475572462499e-05, "loss": 0.0004, "num_input_tokens_seen": 246030704, "step": 113900 }, { "epoch": 18.581566068515496, "grad_norm": 0.016164537519216537, "learning_rate": 1.5247306773369552e-05, "loss": 0.0421, "num_input_tokens_seen": 246040816, "step": 113905 }, { "epoch": 18.58238172920065, "grad_norm": 0.0016942867077887058, "learning_rate": 1.5229867646242457e-05, "loss": 0.0023, "num_input_tokens_seen": 246049808, "step": 113910 }, { "epoch": 18.583197389885807, "grad_norm": 0.004356156103312969, "learning_rate": 1.5212438343597036e-05, "loss": 0.0015, "num_input_tokens_seen": 246060816, "step": 113915 }, { "epoch": 18.584013050570963, "grad_norm": 0.010752552188932896, "learning_rate": 1.5195018865786559e-05, "loss": 0.0013, "num_input_tokens_seen": 246071408, "step": 113920 }, { "epoch": 18.58482871125612, "grad_norm": 0.003911882638931274, "learning_rate": 1.5177609213164023e-05, "loss": 0.0014, "num_input_tokens_seen": 246082800, "step": 113925 }, { "epoch": 18.58564437194127, "grad_norm": 0.011551330797374249, "learning_rate": 1.5160209386082314e-05, "loss": 0.0008, "num_input_tokens_seen": 246093872, "step": 113930 }, { "epoch": 18.586460032626427, "grad_norm": 0.0020503744017332792, "learning_rate": 1.5142819384893925e-05, "loss": 0.0014, "num_input_tokens_seen": 246103568, "step": 113935 }, { "epoch": 18.587275693311582, "grad_norm": 0.004918430466204882, "learning_rate": 1.512543920995152e-05, "loss": 0.0013, "num_input_tokens_seen": 246114256, "step": 113940 }, { "epoch": 18.588091353996738, "grad_norm": 0.0029170692432671785, "learning_rate": 1.5108068861607094e-05, "loss": 0.0011, "num_input_tokens_seen": 246125456, "step": 113945 }, { "epoch": 18.588907014681894, "grad_norm": 0.002532045356929302, "learning_rate": 1.5090708340212867e-05, "loss": 0.0022, "num_input_tokens_seen": 246135920, "step": 113950 }, { "epoch": 18.589722675367046, "grad_norm": 0.00437668664380908, "learning_rate": 1.5073357646120501e-05, "loss": 0.0007, "num_input_tokens_seen": 246146576, "step": 113955 }, { "epoch": 18.5905383360522, "grad_norm": 0.010784292593598366, "learning_rate": 1.5056016779681825e-05, "loss": 0.0008, "num_input_tokens_seen": 246157552, "step": 113960 }, { "epoch": 18.591353996737357, "grad_norm": 0.009024699218571186, "learning_rate": 1.5038685741248059e-05, "loss": 0.002, "num_input_tokens_seen": 246168880, "step": 113965 }, { "epoch": 18.592169657422513, "grad_norm": 0.0006613527657464147, "learning_rate": 1.502136453117059e-05, "loss": 0.0202, "num_input_tokens_seen": 246179024, "step": 113970 }, { "epoch": 18.59298531810767, "grad_norm": 0.0016143594402819872, "learning_rate": 1.5004053149800356e-05, "loss": 0.0006, "num_input_tokens_seen": 246190832, "step": 113975 }, { "epoch": 18.59380097879282, "grad_norm": 0.0007201501284725964, "learning_rate": 1.4986751597488357e-05, "loss": 0.0036, "num_input_tokens_seen": 246203216, "step": 113980 }, { "epoch": 18.594616639477977, "grad_norm": 0.0003400088753551245, "learning_rate": 1.4969459874585034e-05, "loss": 0.0658, "num_input_tokens_seen": 246213488, "step": 113985 }, { "epoch": 18.595432300163132, "grad_norm": 0.0024805832654237747, "learning_rate": 1.495217798144094e-05, "loss": 0.0011, "num_input_tokens_seen": 246224624, "step": 113990 }, { "epoch": 18.596247960848288, "grad_norm": 0.06852786242961884, "learning_rate": 1.4934905918406239e-05, "loss": 0.0019, "num_input_tokens_seen": 246234928, "step": 113995 }, { "epoch": 18.597063621533444, "grad_norm": 0.30148282647132874, "learning_rate": 1.491764368583104e-05, "loss": 0.0127, "num_input_tokens_seen": 246244784, "step": 114000 }, { "epoch": 18.597879282218596, "grad_norm": 0.0019158965442329645, "learning_rate": 1.4900391284065229e-05, "loss": 0.0076, "num_input_tokens_seen": 246254672, "step": 114005 }, { "epoch": 18.59869494290375, "grad_norm": 0.00015638173499610275, "learning_rate": 1.4883148713458306e-05, "loss": 0.0009, "num_input_tokens_seen": 246265904, "step": 114010 }, { "epoch": 18.599510603588907, "grad_norm": 0.010793359018862247, "learning_rate": 1.4865915974359823e-05, "loss": 0.0018, "num_input_tokens_seen": 246276240, "step": 114015 }, { "epoch": 18.600326264274063, "grad_norm": 0.0045156884007155895, "learning_rate": 1.4848693067119e-05, "loss": 0.0022, "num_input_tokens_seen": 246287472, "step": 114020 }, { "epoch": 18.601141924959215, "grad_norm": 0.003806586842983961, "learning_rate": 1.483147999208484e-05, "loss": 0.0018, "num_input_tokens_seen": 246297200, "step": 114025 }, { "epoch": 18.60195758564437, "grad_norm": 0.03463774919509888, "learning_rate": 1.4814276749606226e-05, "loss": 0.0029, "num_input_tokens_seen": 246307472, "step": 114030 }, { "epoch": 18.602773246329527, "grad_norm": 0.0018811143236234784, "learning_rate": 1.4797083340031769e-05, "loss": 0.002, "num_input_tokens_seen": 246319152, "step": 114035 }, { "epoch": 18.603588907014682, "grad_norm": 0.15123361349105835, "learning_rate": 1.477989976370997e-05, "loss": 0.0057, "num_input_tokens_seen": 246329648, "step": 114040 }, { "epoch": 18.604404567699838, "grad_norm": 0.009590948931872845, "learning_rate": 1.4762726020989047e-05, "loss": 0.0011, "num_input_tokens_seen": 246339472, "step": 114045 }, { "epoch": 18.605220228384994, "grad_norm": 0.5494592189788818, "learning_rate": 1.4745562112217059e-05, "loss": 0.0209, "num_input_tokens_seen": 246350224, "step": 114050 }, { "epoch": 18.606035889070146, "grad_norm": 0.002357152756303549, "learning_rate": 1.4728408037741836e-05, "loss": 0.0023, "num_input_tokens_seen": 246362224, "step": 114055 }, { "epoch": 18.6068515497553, "grad_norm": 0.041462235152721405, "learning_rate": 1.4711263797911045e-05, "loss": 0.0024, "num_input_tokens_seen": 246373584, "step": 114060 }, { "epoch": 18.607667210440457, "grad_norm": 0.10488732159137726, "learning_rate": 1.469412939307213e-05, "loss": 0.0022, "num_input_tokens_seen": 246383920, "step": 114065 }, { "epoch": 18.608482871125613, "grad_norm": 0.038092661648988724, "learning_rate": 1.4677004823572316e-05, "loss": 0.0028, "num_input_tokens_seen": 246394544, "step": 114070 }, { "epoch": 18.609298531810765, "grad_norm": 0.0004575471393764019, "learning_rate": 1.4659890089758654e-05, "loss": 0.0005, "num_input_tokens_seen": 246405200, "step": 114075 }, { "epoch": 18.61011419249592, "grad_norm": 0.0010731341317296028, "learning_rate": 1.4642785191978036e-05, "loss": 0.0494, "num_input_tokens_seen": 246414960, "step": 114080 }, { "epoch": 18.610929853181077, "grad_norm": 0.0021726840641349554, "learning_rate": 1.462569013057713e-05, "loss": 0.0007, "num_input_tokens_seen": 246426352, "step": 114085 }, { "epoch": 18.611745513866232, "grad_norm": 0.00024937037960626185, "learning_rate": 1.4608604905902268e-05, "loss": 0.0006, "num_input_tokens_seen": 246437872, "step": 114090 }, { "epoch": 18.612561174551388, "grad_norm": 0.26687636971473694, "learning_rate": 1.4591529518299896e-05, "loss": 0.0089, "num_input_tokens_seen": 246448816, "step": 114095 }, { "epoch": 18.61337683523654, "grad_norm": 0.003532203147187829, "learning_rate": 1.4574463968115903e-05, "loss": 0.0012, "num_input_tokens_seen": 246459696, "step": 114100 }, { "epoch": 18.614192495921696, "grad_norm": 0.0039835479110479355, "learning_rate": 1.4557408255696181e-05, "loss": 0.0003, "num_input_tokens_seen": 246470032, "step": 114105 }, { "epoch": 18.61500815660685, "grad_norm": 0.0008681021281518042, "learning_rate": 1.4540362381386452e-05, "loss": 0.0711, "num_input_tokens_seen": 246481776, "step": 114110 }, { "epoch": 18.615823817292007, "grad_norm": 0.0026605729945003986, "learning_rate": 1.4523326345532163e-05, "loss": 0.002, "num_input_tokens_seen": 246491504, "step": 114115 }, { "epoch": 18.616639477977163, "grad_norm": 0.00021350267343223095, "learning_rate": 1.450630014847848e-05, "loss": 0.0047, "num_input_tokens_seen": 246501968, "step": 114120 }, { "epoch": 18.617455138662315, "grad_norm": 0.008653911761939526, "learning_rate": 1.4489283790570518e-05, "loss": 0.0005, "num_input_tokens_seen": 246512720, "step": 114125 }, { "epoch": 18.61827079934747, "grad_norm": 0.001182371866889298, "learning_rate": 1.4472277272153167e-05, "loss": 0.0051, "num_input_tokens_seen": 246522512, "step": 114130 }, { "epoch": 18.619086460032626, "grad_norm": 0.0021306921262294054, "learning_rate": 1.445528059357104e-05, "loss": 0.1048, "num_input_tokens_seen": 246534064, "step": 114135 }, { "epoch": 18.619902120717782, "grad_norm": 0.001645290874876082, "learning_rate": 1.4438293755168585e-05, "loss": 0.0061, "num_input_tokens_seen": 246544848, "step": 114140 }, { "epoch": 18.620717781402938, "grad_norm": 0.0037784897722303867, "learning_rate": 1.4421316757290082e-05, "loss": 0.0196, "num_input_tokens_seen": 246556272, "step": 114145 }, { "epoch": 18.62153344208809, "grad_norm": 0.004677009768784046, "learning_rate": 1.4404349600279642e-05, "loss": 0.0012, "num_input_tokens_seen": 246566288, "step": 114150 }, { "epoch": 18.622349102773246, "grad_norm": 0.0036729995626956224, "learning_rate": 1.4387392284481049e-05, "loss": 0.0008, "num_input_tokens_seen": 246577264, "step": 114155 }, { "epoch": 18.6231647634584, "grad_norm": 0.0003251898742746562, "learning_rate": 1.437044481023797e-05, "loss": 0.0006, "num_input_tokens_seen": 246587312, "step": 114160 }, { "epoch": 18.623980424143557, "grad_norm": 0.10283267498016357, "learning_rate": 1.4353507177893964e-05, "loss": 0.0039, "num_input_tokens_seen": 246597776, "step": 114165 }, { "epoch": 18.624796084828713, "grad_norm": 0.039473868906497955, "learning_rate": 1.4336579387792148e-05, "loss": 0.0061, "num_input_tokens_seen": 246611408, "step": 114170 }, { "epoch": 18.625611745513865, "grad_norm": 1.0579966306686401, "learning_rate": 1.4319661440275689e-05, "loss": 0.1601, "num_input_tokens_seen": 246621424, "step": 114175 }, { "epoch": 18.62642740619902, "grad_norm": 0.006494295317679644, "learning_rate": 1.4302753335687423e-05, "loss": 0.0009, "num_input_tokens_seen": 246632208, "step": 114180 }, { "epoch": 18.627243066884176, "grad_norm": 0.06191675364971161, "learning_rate": 1.4285855074370025e-05, "loss": 0.0116, "num_input_tokens_seen": 246643120, "step": 114185 }, { "epoch": 18.628058727569332, "grad_norm": 0.0025623554829508066, "learning_rate": 1.4268966656665938e-05, "loss": 0.0007, "num_input_tokens_seen": 246652336, "step": 114190 }, { "epoch": 18.628874388254488, "grad_norm": 0.08002685755491257, "learning_rate": 1.4252088082917391e-05, "loss": 0.0024, "num_input_tokens_seen": 246664176, "step": 114195 }, { "epoch": 18.62969004893964, "grad_norm": 0.0005803200765512884, "learning_rate": 1.4235219353466555e-05, "loss": 0.0002, "num_input_tokens_seen": 246674864, "step": 114200 }, { "epoch": 18.630505709624796, "grad_norm": 0.0005485960282385349, "learning_rate": 1.4218360468655212e-05, "loss": 0.0035, "num_input_tokens_seen": 246686480, "step": 114205 }, { "epoch": 18.63132137030995, "grad_norm": 0.0003624989476520568, "learning_rate": 1.4201511428824976e-05, "loss": 0.0683, "num_input_tokens_seen": 246697648, "step": 114210 }, { "epoch": 18.632137030995107, "grad_norm": 0.0008376438054256141, "learning_rate": 1.4184672234317463e-05, "loss": 0.0009, "num_input_tokens_seen": 246707472, "step": 114215 }, { "epoch": 18.63295269168026, "grad_norm": 0.005695714149624109, "learning_rate": 1.4167842885473903e-05, "loss": 0.0033, "num_input_tokens_seen": 246717584, "step": 114220 }, { "epoch": 18.633768352365415, "grad_norm": 0.013842624612152576, "learning_rate": 1.4151023382635298e-05, "loss": 0.001, "num_input_tokens_seen": 246728272, "step": 114225 }, { "epoch": 18.63458401305057, "grad_norm": 0.0036778177600353956, "learning_rate": 1.4134213726142541e-05, "loss": 0.0004, "num_input_tokens_seen": 246738736, "step": 114230 }, { "epoch": 18.635399673735726, "grad_norm": 0.0011332188732922077, "learning_rate": 1.4117413916336307e-05, "loss": 0.004, "num_input_tokens_seen": 246749424, "step": 114235 }, { "epoch": 18.636215334420882, "grad_norm": 0.001157104386948049, "learning_rate": 1.4100623953557045e-05, "loss": 0.023, "num_input_tokens_seen": 246760208, "step": 114240 }, { "epoch": 18.637030995106034, "grad_norm": 0.009338432922959328, "learning_rate": 1.4083843838145095e-05, "loss": 0.0025, "num_input_tokens_seen": 246771408, "step": 114245 }, { "epoch": 18.63784665579119, "grad_norm": 0.00642793532460928, "learning_rate": 1.4067073570440458e-05, "loss": 0.0007, "num_input_tokens_seen": 246782736, "step": 114250 }, { "epoch": 18.638662316476346, "grad_norm": 0.032264117151498795, "learning_rate": 1.4050313150782978e-05, "loss": 0.0029, "num_input_tokens_seen": 246793968, "step": 114255 }, { "epoch": 18.6394779771615, "grad_norm": 0.011337845586240292, "learning_rate": 1.4033562579512438e-05, "loss": 0.0032, "num_input_tokens_seen": 246804144, "step": 114260 }, { "epoch": 18.640293637846657, "grad_norm": 0.008878304623067379, "learning_rate": 1.4016821856968232e-05, "loss": 0.0011, "num_input_tokens_seen": 246815280, "step": 114265 }, { "epoch": 18.64110929853181, "grad_norm": 0.0022187468130141497, "learning_rate": 1.4000090983489588e-05, "loss": 0.0007, "num_input_tokens_seen": 246826064, "step": 114270 }, { "epoch": 18.641924959216965, "grad_norm": 0.0002171692467527464, "learning_rate": 1.3983369959415682e-05, "loss": 0.0035, "num_input_tokens_seen": 246837328, "step": 114275 }, { "epoch": 18.64274061990212, "grad_norm": 0.00032952241599559784, "learning_rate": 1.3966658785085352e-05, "loss": 0.0007, "num_input_tokens_seen": 246847568, "step": 114280 }, { "epoch": 18.643556280587276, "grad_norm": 0.00012888593482784927, "learning_rate": 1.394995746083727e-05, "loss": 0.0007, "num_input_tokens_seen": 246858704, "step": 114285 }, { "epoch": 18.644371941272432, "grad_norm": 0.019832175225019455, "learning_rate": 1.3933265987009836e-05, "loss": 0.0022, "num_input_tokens_seen": 246868688, "step": 114290 }, { "epoch": 18.645187601957584, "grad_norm": 0.003522902261465788, "learning_rate": 1.3916584363941442e-05, "loss": 0.0013, "num_input_tokens_seen": 246879728, "step": 114295 }, { "epoch": 18.64600326264274, "grad_norm": 0.000486519857076928, "learning_rate": 1.3899912591970099e-05, "loss": 0.0032, "num_input_tokens_seen": 246890096, "step": 114300 }, { "epoch": 18.646818923327896, "grad_norm": 0.018939530476927757, "learning_rate": 1.3883250671433645e-05, "loss": 0.0102, "num_input_tokens_seen": 246900016, "step": 114305 }, { "epoch": 18.64763458401305, "grad_norm": 0.0028730384074151516, "learning_rate": 1.3866598602669866e-05, "loss": 0.0305, "num_input_tokens_seen": 246910896, "step": 114310 }, { "epoch": 18.648450244698207, "grad_norm": 0.00035425060195848346, "learning_rate": 1.3849956386016049e-05, "loss": 0.0041, "num_input_tokens_seen": 246922256, "step": 114315 }, { "epoch": 18.64926590538336, "grad_norm": 0.0004246874595992267, "learning_rate": 1.3833324021809756e-05, "loss": 0.0023, "num_input_tokens_seen": 246933200, "step": 114320 }, { "epoch": 18.650081566068515, "grad_norm": 0.0004184528661426157, "learning_rate": 1.3816701510387775e-05, "loss": 0.0007, "num_input_tokens_seen": 246944368, "step": 114325 }, { "epoch": 18.65089722675367, "grad_norm": 0.002552991034463048, "learning_rate": 1.3800088852087166e-05, "loss": 0.0013, "num_input_tokens_seen": 246955280, "step": 114330 }, { "epoch": 18.651712887438826, "grad_norm": 0.002675180323421955, "learning_rate": 1.3783486047244497e-05, "loss": 0.0245, "num_input_tokens_seen": 246967920, "step": 114335 }, { "epoch": 18.652528548123982, "grad_norm": 0.011710644699633121, "learning_rate": 1.3766893096196386e-05, "loss": 0.0034, "num_input_tokens_seen": 246978576, "step": 114340 }, { "epoch": 18.653344208809134, "grad_norm": 0.0022386705968528986, "learning_rate": 1.3750309999278899e-05, "loss": 0.0019, "num_input_tokens_seen": 246989968, "step": 114345 }, { "epoch": 18.65415986949429, "grad_norm": 0.017226964235305786, "learning_rate": 1.373373675682832e-05, "loss": 0.0046, "num_input_tokens_seen": 247001872, "step": 114350 }, { "epoch": 18.654975530179446, "grad_norm": 0.00021573618869297206, "learning_rate": 1.371717336918038e-05, "loss": 0.0025, "num_input_tokens_seen": 247012400, "step": 114355 }, { "epoch": 18.6557911908646, "grad_norm": 0.0006454604445025325, "learning_rate": 1.3700619836670813e-05, "loss": 0.0068, "num_input_tokens_seen": 247023696, "step": 114360 }, { "epoch": 18.656606851549757, "grad_norm": 0.00180201162584126, "learning_rate": 1.3684076159635129e-05, "loss": 0.001, "num_input_tokens_seen": 247034128, "step": 114365 }, { "epoch": 18.65742251223491, "grad_norm": 0.00018552408437244594, "learning_rate": 1.3667542338408611e-05, "loss": 0.0015, "num_input_tokens_seen": 247044112, "step": 114370 }, { "epoch": 18.658238172920065, "grad_norm": 0.0041071511805057526, "learning_rate": 1.3651018373326219e-05, "loss": 0.0005, "num_input_tokens_seen": 247055696, "step": 114375 }, { "epoch": 18.65905383360522, "grad_norm": 0.2897575795650482, "learning_rate": 1.3634504264723013e-05, "loss": 0.0035, "num_input_tokens_seen": 247067280, "step": 114380 }, { "epoch": 18.659869494290376, "grad_norm": 0.00036819299566559494, "learning_rate": 1.3618000012933506e-05, "loss": 0.0049, "num_input_tokens_seen": 247078096, "step": 114385 }, { "epoch": 18.660685154975532, "grad_norm": 0.002458331175148487, "learning_rate": 1.3601505618292264e-05, "loss": 0.0012, "num_input_tokens_seen": 247088048, "step": 114390 }, { "epoch": 18.661500815660684, "grad_norm": 0.0003740904794540256, "learning_rate": 1.3585021081133575e-05, "loss": 0.0009, "num_input_tokens_seen": 247099184, "step": 114395 }, { "epoch": 18.66231647634584, "grad_norm": 0.008161837235093117, "learning_rate": 1.3568546401791449e-05, "loss": 0.0007, "num_input_tokens_seen": 247109328, "step": 114400 }, { "epoch": 18.663132137030995, "grad_norm": 0.008084426634013653, "learning_rate": 1.355208158059984e-05, "loss": 0.0095, "num_input_tokens_seen": 247118064, "step": 114405 }, { "epoch": 18.66394779771615, "grad_norm": 0.00394302187487483, "learning_rate": 1.3535626617892426e-05, "loss": 0.0037, "num_input_tokens_seen": 247130224, "step": 114410 }, { "epoch": 18.664763458401303, "grad_norm": 0.0034730606712400913, "learning_rate": 1.3519181514002665e-05, "loss": 0.0495, "num_input_tokens_seen": 247140848, "step": 114415 }, { "epoch": 18.66557911908646, "grad_norm": 0.001881223637610674, "learning_rate": 1.3502746269263788e-05, "loss": 0.0034, "num_input_tokens_seen": 247151952, "step": 114420 }, { "epoch": 18.666394779771615, "grad_norm": 0.00045266575762070715, "learning_rate": 1.3486320884008918e-05, "loss": 0.0103, "num_input_tokens_seen": 247162352, "step": 114425 }, { "epoch": 18.66721044045677, "grad_norm": 0.015825331211090088, "learning_rate": 1.3469905358570956e-05, "loss": 0.0006, "num_input_tokens_seen": 247173904, "step": 114430 }, { "epoch": 18.668026101141926, "grad_norm": 0.003665680531412363, "learning_rate": 1.3453499693282633e-05, "loss": 0.0008, "num_input_tokens_seen": 247184976, "step": 114435 }, { "epoch": 18.66884176182708, "grad_norm": 0.0014401959488168359, "learning_rate": 1.3437103888476244e-05, "loss": 0.0003, "num_input_tokens_seen": 247195824, "step": 114440 }, { "epoch": 18.669657422512234, "grad_norm": 0.0011207032948732376, "learning_rate": 1.342071794448435e-05, "loss": 0.0005, "num_input_tokens_seen": 247206064, "step": 114445 }, { "epoch": 18.67047308319739, "grad_norm": 0.00031443015905097127, "learning_rate": 1.340434186163869e-05, "loss": 0.0008, "num_input_tokens_seen": 247217360, "step": 114450 }, { "epoch": 18.671288743882545, "grad_norm": 0.026929231360554695, "learning_rate": 1.33879756402715e-05, "loss": 0.0033, "num_input_tokens_seen": 247228944, "step": 114455 }, { "epoch": 18.6721044045677, "grad_norm": 0.00107129430398345, "learning_rate": 1.3371619280714175e-05, "loss": 0.0021, "num_input_tokens_seen": 247238192, "step": 114460 }, { "epoch": 18.672920065252853, "grad_norm": 0.001654400723055005, "learning_rate": 1.3355272783298455e-05, "loss": 0.0025, "num_input_tokens_seen": 247250192, "step": 114465 }, { "epoch": 18.67373572593801, "grad_norm": 0.00030322172096930444, "learning_rate": 1.3338936148355351e-05, "loss": 0.0004, "num_input_tokens_seen": 247261168, "step": 114470 }, { "epoch": 18.674551386623165, "grad_norm": 0.002237173030152917, "learning_rate": 1.3322609376216155e-05, "loss": 0.0014, "num_input_tokens_seen": 247273104, "step": 114475 }, { "epoch": 18.67536704730832, "grad_norm": 0.004785965196788311, "learning_rate": 1.33062924672116e-05, "loss": 0.0011, "num_input_tokens_seen": 247284336, "step": 114480 }, { "epoch": 18.676182707993476, "grad_norm": 1.1634474992752075, "learning_rate": 1.3289985421672534e-05, "loss": 0.0547, "num_input_tokens_seen": 247295312, "step": 114485 }, { "epoch": 18.67699836867863, "grad_norm": 0.0024988525547087193, "learning_rate": 1.3273688239929248e-05, "loss": 0.0005, "num_input_tokens_seen": 247306864, "step": 114490 }, { "epoch": 18.677814029363784, "grad_norm": 0.0052915457636117935, "learning_rate": 1.3257400922312258e-05, "loss": 0.0056, "num_input_tokens_seen": 247317520, "step": 114495 }, { "epoch": 18.67862969004894, "grad_norm": 0.0011443269904702902, "learning_rate": 1.3241123469151406e-05, "loss": 0.0009, "num_input_tokens_seen": 247328240, "step": 114500 }, { "epoch": 18.679445350734095, "grad_norm": 0.0029852113220840693, "learning_rate": 1.322485588077671e-05, "loss": 0.0007, "num_input_tokens_seen": 247339952, "step": 114505 }, { "epoch": 18.68026101141925, "grad_norm": 0.0009250577422790229, "learning_rate": 1.3208598157517849e-05, "loss": 0.0013, "num_input_tokens_seen": 247350896, "step": 114510 }, { "epoch": 18.681076672104403, "grad_norm": 0.0006689508445560932, "learning_rate": 1.3192350299704225e-05, "loss": 0.0004, "num_input_tokens_seen": 247361168, "step": 114515 }, { "epoch": 18.68189233278956, "grad_norm": 0.00025370350340381265, "learning_rate": 1.3176112307665245e-05, "loss": 0.0007, "num_input_tokens_seen": 247373232, "step": 114520 }, { "epoch": 18.682707993474715, "grad_norm": 0.00041831081034615636, "learning_rate": 1.315988418172992e-05, "loss": 0.0073, "num_input_tokens_seen": 247384624, "step": 114525 }, { "epoch": 18.68352365415987, "grad_norm": 0.0011938015231862664, "learning_rate": 1.3143665922227155e-05, "loss": 0.0049, "num_input_tokens_seen": 247395248, "step": 114530 }, { "epoch": 18.684339314845026, "grad_norm": 0.25502070784568787, "learning_rate": 1.3127457529485576e-05, "loss": 0.0059, "num_input_tokens_seen": 247406096, "step": 114535 }, { "epoch": 18.68515497553018, "grad_norm": 0.06968465447425842, "learning_rate": 1.3111259003833753e-05, "loss": 0.0026, "num_input_tokens_seen": 247415568, "step": 114540 }, { "epoch": 18.685970636215334, "grad_norm": 0.0003617781330831349, "learning_rate": 1.3095070345599924e-05, "loss": 0.0016, "num_input_tokens_seen": 247426160, "step": 114545 }, { "epoch": 18.68678629690049, "grad_norm": 0.0005903160781599581, "learning_rate": 1.3078891555112161e-05, "loss": 0.0014, "num_input_tokens_seen": 247437712, "step": 114550 }, { "epoch": 18.687601957585645, "grad_norm": 0.0013629597378894687, "learning_rate": 1.306272263269831e-05, "loss": 0.0052, "num_input_tokens_seen": 247447536, "step": 114555 }, { "epoch": 18.6884176182708, "grad_norm": 0.07200295478105545, "learning_rate": 1.3046563578686222e-05, "loss": 0.0036, "num_input_tokens_seen": 247458768, "step": 114560 }, { "epoch": 18.689233278955953, "grad_norm": 0.05303337797522545, "learning_rate": 1.303041439340319e-05, "loss": 0.004, "num_input_tokens_seen": 247470032, "step": 114565 }, { "epoch": 18.69004893964111, "grad_norm": 0.0004202370473649353, "learning_rate": 1.3014275077176618e-05, "loss": 0.0003, "num_input_tokens_seen": 247481200, "step": 114570 }, { "epoch": 18.690864600326265, "grad_norm": 0.018010199069976807, "learning_rate": 1.2998145630333469e-05, "loss": 0.0019, "num_input_tokens_seen": 247491824, "step": 114575 }, { "epoch": 18.69168026101142, "grad_norm": 0.002485614735633135, "learning_rate": 1.2982026053200813e-05, "loss": 0.0024, "num_input_tokens_seen": 247501904, "step": 114580 }, { "epoch": 18.692495921696576, "grad_norm": 0.002339205238968134, "learning_rate": 1.2965916346105166e-05, "loss": 0.0043, "num_input_tokens_seen": 247511600, "step": 114585 }, { "epoch": 18.693311582381728, "grad_norm": 0.02498902939260006, "learning_rate": 1.2949816509373102e-05, "loss": 0.0008, "num_input_tokens_seen": 247522832, "step": 114590 }, { "epoch": 18.694127243066884, "grad_norm": 0.004603876266628504, "learning_rate": 1.2933726543330804e-05, "loss": 0.0007, "num_input_tokens_seen": 247533328, "step": 114595 }, { "epoch": 18.69494290375204, "grad_norm": 0.041026998311281204, "learning_rate": 1.2917646448304509e-05, "loss": 0.0009, "num_input_tokens_seen": 247544272, "step": 114600 }, { "epoch": 18.695758564437195, "grad_norm": 0.04702477157115936, "learning_rate": 1.2901576224619959e-05, "loss": 0.0041, "num_input_tokens_seen": 247554064, "step": 114605 }, { "epoch": 18.696574225122347, "grad_norm": 0.02038668841123581, "learning_rate": 1.2885515872602949e-05, "loss": 0.0025, "num_input_tokens_seen": 247565328, "step": 114610 }, { "epoch": 18.697389885807503, "grad_norm": 0.0002236200380139053, "learning_rate": 1.2869465392578828e-05, "loss": 0.0012, "num_input_tokens_seen": 247577136, "step": 114615 }, { "epoch": 18.69820554649266, "grad_norm": 0.004946097731590271, "learning_rate": 1.2853424784873059e-05, "loss": 0.0033, "num_input_tokens_seen": 247587184, "step": 114620 }, { "epoch": 18.699021207177815, "grad_norm": 0.010659070685505867, "learning_rate": 1.2837394049810547e-05, "loss": 0.0009, "num_input_tokens_seen": 247597040, "step": 114625 }, { "epoch": 18.69983686786297, "grad_norm": 0.0010920735076069832, "learning_rate": 1.2821373187716311e-05, "loss": 0.001, "num_input_tokens_seen": 247608944, "step": 114630 }, { "epoch": 18.700652528548122, "grad_norm": 0.001096490304917097, "learning_rate": 1.2805362198914872e-05, "loss": 0.0015, "num_input_tokens_seen": 247620528, "step": 114635 }, { "epoch": 18.701468189233278, "grad_norm": 0.0010115448385477066, "learning_rate": 1.2789361083730911e-05, "loss": 0.0003, "num_input_tokens_seen": 247630800, "step": 114640 }, { "epoch": 18.702283849918434, "grad_norm": 0.03550710901618004, "learning_rate": 1.2773369842488614e-05, "loss": 0.0024, "num_input_tokens_seen": 247642256, "step": 114645 }, { "epoch": 18.70309951060359, "grad_norm": 0.0003345920122228563, "learning_rate": 1.2757388475512055e-05, "loss": 0.0032, "num_input_tokens_seen": 247653808, "step": 114650 }, { "epoch": 18.703915171288745, "grad_norm": 0.014875334687530994, "learning_rate": 1.2741416983125143e-05, "loss": 0.0013, "num_input_tokens_seen": 247664720, "step": 114655 }, { "epoch": 18.704730831973897, "grad_norm": 0.0014985312009230256, "learning_rate": 1.2725455365651507e-05, "loss": 0.0008, "num_input_tokens_seen": 247676112, "step": 114660 }, { "epoch": 18.705546492659053, "grad_norm": 0.006818423047661781, "learning_rate": 1.270950362341472e-05, "loss": 0.0489, "num_input_tokens_seen": 247687248, "step": 114665 }, { "epoch": 18.70636215334421, "grad_norm": 0.0007490873686037958, "learning_rate": 1.269356175673797e-05, "loss": 0.0068, "num_input_tokens_seen": 247697968, "step": 114670 }, { "epoch": 18.707177814029365, "grad_norm": 0.0012531366664916277, "learning_rate": 1.2677629765944387e-05, "loss": 0.0026, "num_input_tokens_seen": 247707920, "step": 114675 }, { "epoch": 18.70799347471452, "grad_norm": 0.001863017096184194, "learning_rate": 1.266170765135688e-05, "loss": 0.0004, "num_input_tokens_seen": 247718992, "step": 114680 }, { "epoch": 18.708809135399672, "grad_norm": 0.04067402333021164, "learning_rate": 1.2645795413298078e-05, "loss": 0.0209, "num_input_tokens_seen": 247730800, "step": 114685 }, { "epoch": 18.709624796084828, "grad_norm": 0.0016221472760662436, "learning_rate": 1.2629893052090502e-05, "loss": 0.0048, "num_input_tokens_seen": 247740272, "step": 114690 }, { "epoch": 18.710440456769984, "grad_norm": 0.07899585366249084, "learning_rate": 1.2614000568056395e-05, "loss": 0.003, "num_input_tokens_seen": 247751504, "step": 114695 }, { "epoch": 18.71125611745514, "grad_norm": 0.000546223483979702, "learning_rate": 1.259811796151783e-05, "loss": 0.0002, "num_input_tokens_seen": 247762480, "step": 114700 }, { "epoch": 18.712071778140295, "grad_norm": 0.02150532603263855, "learning_rate": 1.258224523279683e-05, "loss": 0.0019, "num_input_tokens_seen": 247773552, "step": 114705 }, { "epoch": 18.712887438825447, "grad_norm": 0.008169742301106453, "learning_rate": 1.2566382382214859e-05, "loss": 0.0017, "num_input_tokens_seen": 247783216, "step": 114710 }, { "epoch": 18.713703099510603, "grad_norm": 0.09331442415714264, "learning_rate": 1.2550529410093548e-05, "loss": 0.0086, "num_input_tokens_seen": 247793936, "step": 114715 }, { "epoch": 18.71451876019576, "grad_norm": 0.04075554758310318, "learning_rate": 1.2534686316754085e-05, "loss": 0.0014, "num_input_tokens_seen": 247805392, "step": 114720 }, { "epoch": 18.715334420880914, "grad_norm": 0.0007709045894443989, "learning_rate": 1.2518853102517657e-05, "loss": 0.0003, "num_input_tokens_seen": 247817072, "step": 114725 }, { "epoch": 18.71615008156607, "grad_norm": 0.050977423787117004, "learning_rate": 1.250302976770501e-05, "loss": 0.0013, "num_input_tokens_seen": 247827888, "step": 114730 }, { "epoch": 18.716965742251222, "grad_norm": 0.038305021822452545, "learning_rate": 1.248721631263705e-05, "loss": 0.0019, "num_input_tokens_seen": 247838800, "step": 114735 }, { "epoch": 18.717781402936378, "grad_norm": 0.008927847258746624, "learning_rate": 1.2471412737633914e-05, "loss": 0.0019, "num_input_tokens_seen": 247849872, "step": 114740 }, { "epoch": 18.718597063621534, "grad_norm": 0.02261229418218136, "learning_rate": 1.2455619043016175e-05, "loss": 0.0007, "num_input_tokens_seen": 247862064, "step": 114745 }, { "epoch": 18.71941272430669, "grad_norm": 0.00020072948245797306, "learning_rate": 1.2439835229103803e-05, "loss": 0.0005, "num_input_tokens_seen": 247871920, "step": 114750 }, { "epoch": 18.72022838499184, "grad_norm": 0.001048129634000361, "learning_rate": 1.242406129621665e-05, "loss": 0.0007, "num_input_tokens_seen": 247882800, "step": 114755 }, { "epoch": 18.721044045676997, "grad_norm": 0.003789189737290144, "learning_rate": 1.240829724467446e-05, "loss": 0.0014, "num_input_tokens_seen": 247894288, "step": 114760 }, { "epoch": 18.721859706362153, "grad_norm": 0.2328716516494751, "learning_rate": 1.2392543074796702e-05, "loss": 0.0041, "num_input_tokens_seen": 247906672, "step": 114765 }, { "epoch": 18.72267536704731, "grad_norm": 0.0024382395204156637, "learning_rate": 1.2376798786902621e-05, "loss": 0.001, "num_input_tokens_seen": 247917936, "step": 114770 }, { "epoch": 18.723491027732464, "grad_norm": 0.002633201191201806, "learning_rate": 1.2361064381311293e-05, "loss": 0.0011, "num_input_tokens_seen": 247929680, "step": 114775 }, { "epoch": 18.724306688417617, "grad_norm": 0.0014084518188610673, "learning_rate": 1.2345339858341576e-05, "loss": 0.0008, "num_input_tokens_seen": 247940368, "step": 114780 }, { "epoch": 18.725122349102772, "grad_norm": 0.00445615453645587, "learning_rate": 1.2329625218312213e-05, "loss": 0.0005, "num_input_tokens_seen": 247951984, "step": 114785 }, { "epoch": 18.725938009787928, "grad_norm": 0.0038615395314991474, "learning_rate": 1.2313920461541672e-05, "loss": 0.0016, "num_input_tokens_seen": 247961456, "step": 114790 }, { "epoch": 18.726753670473084, "grad_norm": 0.00038773167761974037, "learning_rate": 1.22982255883482e-05, "loss": 0.0015, "num_input_tokens_seen": 247971440, "step": 114795 }, { "epoch": 18.72756933115824, "grad_norm": 0.00029015023028478026, "learning_rate": 1.2282540599049873e-05, "loss": 0.0009, "num_input_tokens_seen": 247982192, "step": 114800 }, { "epoch": 18.72838499184339, "grad_norm": 0.004771945532411337, "learning_rate": 1.2266865493964551e-05, "loss": 0.0015, "num_input_tokens_seen": 247993008, "step": 114805 }, { "epoch": 18.729200652528547, "grad_norm": 0.003790721297264099, "learning_rate": 1.2251200273409923e-05, "loss": 0.0197, "num_input_tokens_seen": 248004720, "step": 114810 }, { "epoch": 18.730016313213703, "grad_norm": 0.0013847892405465245, "learning_rate": 1.2235544937703513e-05, "loss": 0.0005, "num_input_tokens_seen": 248015024, "step": 114815 }, { "epoch": 18.73083197389886, "grad_norm": 0.004622996784746647, "learning_rate": 1.2219899487162567e-05, "loss": 0.0014, "num_input_tokens_seen": 248025232, "step": 114820 }, { "epoch": 18.731647634584014, "grad_norm": 0.031056227162480354, "learning_rate": 1.2204263922104108e-05, "loss": 0.0017, "num_input_tokens_seen": 248036016, "step": 114825 }, { "epoch": 18.732463295269167, "grad_norm": 0.0014217033749446273, "learning_rate": 1.2188638242845108e-05, "loss": 0.0003, "num_input_tokens_seen": 248046160, "step": 114830 }, { "epoch": 18.733278955954322, "grad_norm": 0.00779396528378129, "learning_rate": 1.2173022449702142e-05, "loss": 0.0009, "num_input_tokens_seen": 248055984, "step": 114835 }, { "epoch": 18.734094616639478, "grad_norm": 0.1975318193435669, "learning_rate": 1.215741654299174e-05, "loss": 0.0079, "num_input_tokens_seen": 248066512, "step": 114840 }, { "epoch": 18.734910277324634, "grad_norm": 0.017688684165477753, "learning_rate": 1.214182052303009e-05, "loss": 0.0032, "num_input_tokens_seen": 248075344, "step": 114845 }, { "epoch": 18.73572593800979, "grad_norm": 0.003837002906948328, "learning_rate": 1.2126234390133439e-05, "loss": 0.0007, "num_input_tokens_seen": 248086832, "step": 114850 }, { "epoch": 18.73654159869494, "grad_norm": 0.0023042631801217794, "learning_rate": 1.2110658144617538e-05, "loss": 0.0062, "num_input_tokens_seen": 248097328, "step": 114855 }, { "epoch": 18.737357259380097, "grad_norm": 0.0019547000993043184, "learning_rate": 1.2095091786798074e-05, "loss": 0.0005, "num_input_tokens_seen": 248107792, "step": 114860 }, { "epoch": 18.738172920065253, "grad_norm": 0.7636070251464844, "learning_rate": 1.207953531699052e-05, "loss": 0.1813, "num_input_tokens_seen": 248118608, "step": 114865 }, { "epoch": 18.73898858075041, "grad_norm": 0.0049219997599720955, "learning_rate": 1.206398873551018e-05, "loss": 0.0008, "num_input_tokens_seen": 248130192, "step": 114870 }, { "epoch": 18.739804241435564, "grad_norm": 0.020860247313976288, "learning_rate": 1.2048452042672075e-05, "loss": 0.0008, "num_input_tokens_seen": 248141232, "step": 114875 }, { "epoch": 18.740619902120716, "grad_norm": 0.0031487110536545515, "learning_rate": 1.2032925238791071e-05, "loss": 0.0005, "num_input_tokens_seen": 248151856, "step": 114880 }, { "epoch": 18.741435562805872, "grad_norm": 0.06586775928735733, "learning_rate": 1.2017408324181911e-05, "loss": 0.002, "num_input_tokens_seen": 248162416, "step": 114885 }, { "epoch": 18.742251223491028, "grad_norm": 0.0008164440514519811, "learning_rate": 1.2001901299159013e-05, "loss": 0.001, "num_input_tokens_seen": 248174064, "step": 114890 }, { "epoch": 18.743066884176184, "grad_norm": 0.0023708927910774946, "learning_rate": 1.1986404164036679e-05, "loss": 0.1441, "num_input_tokens_seen": 248184432, "step": 114895 }, { "epoch": 18.74388254486134, "grad_norm": 0.0004083328531123698, "learning_rate": 1.1970916919128937e-05, "loss": 0.001, "num_input_tokens_seen": 248195792, "step": 114900 }, { "epoch": 18.74469820554649, "grad_norm": 0.002401529112830758, "learning_rate": 1.1955439564749649e-05, "loss": 0.0026, "num_input_tokens_seen": 248206768, "step": 114905 }, { "epoch": 18.745513866231647, "grad_norm": 0.0043855938129127026, "learning_rate": 1.1939972101212503e-05, "loss": 0.001, "num_input_tokens_seen": 248218672, "step": 114910 }, { "epoch": 18.746329526916803, "grad_norm": 0.022415775805711746, "learning_rate": 1.1924514528831032e-05, "loss": 0.0008, "num_input_tokens_seen": 248229488, "step": 114915 }, { "epoch": 18.74714518760196, "grad_norm": 0.007502218242734671, "learning_rate": 1.190906684791837e-05, "loss": 0.0013, "num_input_tokens_seen": 248240176, "step": 114920 }, { "epoch": 18.747960848287114, "grad_norm": 0.0006908404175192118, "learning_rate": 1.1893629058787714e-05, "loss": 0.0012, "num_input_tokens_seen": 248249296, "step": 114925 }, { "epoch": 18.748776508972266, "grad_norm": 0.0905354842543602, "learning_rate": 1.187820116175181e-05, "loss": 0.0041, "num_input_tokens_seen": 248259696, "step": 114930 }, { "epoch": 18.749592169657422, "grad_norm": 0.001892009051516652, "learning_rate": 1.1862783157123413e-05, "loss": 0.0003, "num_input_tokens_seen": 248270416, "step": 114935 }, { "epoch": 18.750407830342578, "grad_norm": 0.010540666058659554, "learning_rate": 1.1847375045214992e-05, "loss": 0.0005, "num_input_tokens_seen": 248282000, "step": 114940 }, { "epoch": 18.751223491027734, "grad_norm": 0.0007760238368064165, "learning_rate": 1.1831976826338742e-05, "loss": 0.0028, "num_input_tokens_seen": 248294096, "step": 114945 }, { "epoch": 18.752039151712886, "grad_norm": 0.004532194696366787, "learning_rate": 1.1816588500806802e-05, "loss": 0.0014, "num_input_tokens_seen": 248305136, "step": 114950 }, { "epoch": 18.75285481239804, "grad_norm": 0.0071411640383303165, "learning_rate": 1.1801210068930923e-05, "loss": 0.0023, "num_input_tokens_seen": 248315632, "step": 114955 }, { "epoch": 18.753670473083197, "grad_norm": 0.011389593593776226, "learning_rate": 1.1785841531022968e-05, "loss": 0.0007, "num_input_tokens_seen": 248326576, "step": 114960 }, { "epoch": 18.754486133768353, "grad_norm": 0.05140427127480507, "learning_rate": 1.177048288739413e-05, "loss": 0.0049, "num_input_tokens_seen": 248337488, "step": 114965 }, { "epoch": 18.75530179445351, "grad_norm": 0.00020385747484397143, "learning_rate": 1.1755134138355995e-05, "loss": 0.0002, "num_input_tokens_seen": 248347728, "step": 114970 }, { "epoch": 18.75611745513866, "grad_norm": 0.010403123684227467, "learning_rate": 1.1739795284219256e-05, "loss": 0.0012, "num_input_tokens_seen": 248358448, "step": 114975 }, { "epoch": 18.756933115823816, "grad_norm": 0.024493994191288948, "learning_rate": 1.172446632529517e-05, "loss": 0.0011, "num_input_tokens_seen": 248369712, "step": 114980 }, { "epoch": 18.757748776508972, "grad_norm": 0.0017098193056881428, "learning_rate": 1.1709147261894037e-05, "loss": 0.0024, "num_input_tokens_seen": 248381232, "step": 114985 }, { "epoch": 18.758564437194128, "grad_norm": 0.004953702911734581, "learning_rate": 1.1693838094326502e-05, "loss": 0.0035, "num_input_tokens_seen": 248391536, "step": 114990 }, { "epoch": 18.759380097879284, "grad_norm": 0.0007283523445948958, "learning_rate": 1.1678538822902817e-05, "loss": 0.0006, "num_input_tokens_seen": 248402640, "step": 114995 }, { "epoch": 18.760195758564436, "grad_norm": 0.00265447492711246, "learning_rate": 1.1663249447933067e-05, "loss": 0.0007, "num_input_tokens_seen": 248413040, "step": 115000 }, { "epoch": 18.76101141924959, "grad_norm": 0.0027859921101480722, "learning_rate": 1.1647969969727e-05, "loss": 0.002, "num_input_tokens_seen": 248425008, "step": 115005 }, { "epoch": 18.761827079934747, "grad_norm": 0.0012829096522182226, "learning_rate": 1.1632700388594375e-05, "loss": 0.0017, "num_input_tokens_seen": 248436368, "step": 115010 }, { "epoch": 18.762642740619903, "grad_norm": 0.0012848442420363426, "learning_rate": 1.1617440704844661e-05, "loss": 0.0008, "num_input_tokens_seen": 248447920, "step": 115015 }, { "epoch": 18.76345840130506, "grad_norm": 0.009974795393645763, "learning_rate": 1.1602190918787004e-05, "loss": 0.0023, "num_input_tokens_seen": 248458768, "step": 115020 }, { "epoch": 18.76427406199021, "grad_norm": 0.3886551856994629, "learning_rate": 1.1586951030730542e-05, "loss": 0.0298, "num_input_tokens_seen": 248470384, "step": 115025 }, { "epoch": 18.765089722675366, "grad_norm": 0.0006359002436511219, "learning_rate": 1.1571721040984084e-05, "loss": 0.0011, "num_input_tokens_seen": 248481840, "step": 115030 }, { "epoch": 18.765905383360522, "grad_norm": 0.0023894875776022673, "learning_rate": 1.1556500949856386e-05, "loss": 0.003, "num_input_tokens_seen": 248492240, "step": 115035 }, { "epoch": 18.766721044045678, "grad_norm": 0.028124431148171425, "learning_rate": 1.1541290757655754e-05, "loss": 0.0018, "num_input_tokens_seen": 248503472, "step": 115040 }, { "epoch": 18.767536704730833, "grad_norm": 0.00027735813637264073, "learning_rate": 1.1526090464690553e-05, "loss": 0.0035, "num_input_tokens_seen": 248514288, "step": 115045 }, { "epoch": 18.768352365415986, "grad_norm": 0.0014982965076342225, "learning_rate": 1.1510900071268815e-05, "loss": 0.001, "num_input_tokens_seen": 248525456, "step": 115050 }, { "epoch": 18.76916802610114, "grad_norm": 0.0015739202499389648, "learning_rate": 1.149571957769835e-05, "loss": 0.0006, "num_input_tokens_seen": 248536976, "step": 115055 }, { "epoch": 18.769983686786297, "grad_norm": 0.057564280927181244, "learning_rate": 1.1480548984286853e-05, "loss": 0.0028, "num_input_tokens_seen": 248545904, "step": 115060 }, { "epoch": 18.770799347471453, "grad_norm": 0.0035541686229407787, "learning_rate": 1.1465388291341804e-05, "loss": 0.0013, "num_input_tokens_seen": 248556368, "step": 115065 }, { "epoch": 18.77161500815661, "grad_norm": 0.0001766427740221843, "learning_rate": 1.145023749917029e-05, "loss": 0.0007, "num_input_tokens_seen": 248566640, "step": 115070 }, { "epoch": 18.77243066884176, "grad_norm": 0.019232071936130524, "learning_rate": 1.143509660807962e-05, "loss": 0.0029, "num_input_tokens_seen": 248579440, "step": 115075 }, { "epoch": 18.773246329526916, "grad_norm": 0.0011684228666126728, "learning_rate": 1.1419965618376383e-05, "loss": 0.0013, "num_input_tokens_seen": 248589008, "step": 115080 }, { "epoch": 18.774061990212072, "grad_norm": 0.0053740390576422215, "learning_rate": 1.1404844530367498e-05, "loss": 0.0039, "num_input_tokens_seen": 248598640, "step": 115085 }, { "epoch": 18.774877650897228, "grad_norm": 0.0055008502677083015, "learning_rate": 1.138973334435911e-05, "loss": 0.0016, "num_input_tokens_seen": 248610064, "step": 115090 }, { "epoch": 18.775693311582383, "grad_norm": 0.0013196800136938691, "learning_rate": 1.1374632060657753e-05, "loss": 0.0007, "num_input_tokens_seen": 248620848, "step": 115095 }, { "epoch": 18.776508972267536, "grad_norm": 0.0019128243438899517, "learning_rate": 1.1359540679569236e-05, "loss": 0.001, "num_input_tokens_seen": 248631792, "step": 115100 }, { "epoch": 18.77732463295269, "grad_norm": 0.0034028757363557816, "learning_rate": 1.1344459201399592e-05, "loss": 0.0016, "num_input_tokens_seen": 248641264, "step": 115105 }, { "epoch": 18.778140293637847, "grad_norm": 0.009112970903515816, "learning_rate": 1.1329387626454358e-05, "loss": 0.0007, "num_input_tokens_seen": 248652432, "step": 115110 }, { "epoch": 18.778955954323003, "grad_norm": 0.003698774380609393, "learning_rate": 1.1314325955039007e-05, "loss": 0.0041, "num_input_tokens_seen": 248663056, "step": 115115 }, { "epoch": 18.77977161500816, "grad_norm": 0.002369646681472659, "learning_rate": 1.1299274187458741e-05, "loss": 0.0069, "num_input_tokens_seen": 248673936, "step": 115120 }, { "epoch": 18.78058727569331, "grad_norm": 0.0011856432538479567, "learning_rate": 1.1284232324018761e-05, "loss": 0.0007, "num_input_tokens_seen": 248683600, "step": 115125 }, { "epoch": 18.781402936378466, "grad_norm": 0.0010567542631179094, "learning_rate": 1.1269200365023657e-05, "loss": 0.0011, "num_input_tokens_seen": 248694256, "step": 115130 }, { "epoch": 18.782218597063622, "grad_norm": 0.4526723623275757, "learning_rate": 1.125417831077824e-05, "loss": 0.021, "num_input_tokens_seen": 248704560, "step": 115135 }, { "epoch": 18.783034257748778, "grad_norm": 0.00026864337269216776, "learning_rate": 1.1239166161586933e-05, "loss": 0.0008, "num_input_tokens_seen": 248717232, "step": 115140 }, { "epoch": 18.78384991843393, "grad_norm": 0.002488876460120082, "learning_rate": 1.1224163917753993e-05, "loss": 0.0085, "num_input_tokens_seen": 248727728, "step": 115145 }, { "epoch": 18.784665579119086, "grad_norm": 0.00027354180929251015, "learning_rate": 1.1209171579583399e-05, "loss": 0.0007, "num_input_tokens_seen": 248738544, "step": 115150 }, { "epoch": 18.78548123980424, "grad_norm": 0.0005132790538482368, "learning_rate": 1.1194189147379018e-05, "loss": 0.0013, "num_input_tokens_seen": 248749456, "step": 115155 }, { "epoch": 18.786296900489397, "grad_norm": 0.0002152713859686628, "learning_rate": 1.1179216621444499e-05, "loss": 0.001, "num_input_tokens_seen": 248760208, "step": 115160 }, { "epoch": 18.787112561174553, "grad_norm": 0.007923472672700882, "learning_rate": 1.1164254002083262e-05, "loss": 0.001, "num_input_tokens_seen": 248771600, "step": 115165 }, { "epoch": 18.787928221859705, "grad_norm": 0.00116739550139755, "learning_rate": 1.1149301289598569e-05, "loss": 0.0011, "num_input_tokens_seen": 248782416, "step": 115170 }, { "epoch": 18.78874388254486, "grad_norm": 0.0004970782902091742, "learning_rate": 1.1134358484293395e-05, "loss": 0.0028, "num_input_tokens_seen": 248793296, "step": 115175 }, { "epoch": 18.789559543230016, "grad_norm": 0.0009810201590880752, "learning_rate": 1.1119425586470667e-05, "loss": 0.0249, "num_input_tokens_seen": 248804528, "step": 115180 }, { "epoch": 18.790375203915172, "grad_norm": 0.0015762445982545614, "learning_rate": 1.1104502596432863e-05, "loss": 0.0008, "num_input_tokens_seen": 248814608, "step": 115185 }, { "epoch": 18.791190864600328, "grad_norm": 0.0027050410863012075, "learning_rate": 1.1089589514482635e-05, "loss": 0.0003, "num_input_tokens_seen": 248825680, "step": 115190 }, { "epoch": 18.79200652528548, "grad_norm": 0.004898820538073778, "learning_rate": 1.1074686340922068e-05, "loss": 0.0011, "num_input_tokens_seen": 248837744, "step": 115195 }, { "epoch": 18.792822185970635, "grad_norm": 0.0005425384151749313, "learning_rate": 1.105979307605326e-05, "loss": 0.002, "num_input_tokens_seen": 248845904, "step": 115200 }, { "epoch": 18.79363784665579, "grad_norm": 0.0005339878844097257, "learning_rate": 1.104490972017791e-05, "loss": 0.0003, "num_input_tokens_seen": 248856720, "step": 115205 }, { "epoch": 18.794453507340947, "grad_norm": 0.003522041952237487, "learning_rate": 1.1030036273597888e-05, "loss": 0.0012, "num_input_tokens_seen": 248867536, "step": 115210 }, { "epoch": 18.795269168026103, "grad_norm": 0.0015702954260632396, "learning_rate": 1.1015172736614343e-05, "loss": 0.0032, "num_input_tokens_seen": 248878096, "step": 115215 }, { "epoch": 18.796084828711255, "grad_norm": 0.0037163293454796076, "learning_rate": 1.1000319109528755e-05, "loss": 0.0005, "num_input_tokens_seen": 248889936, "step": 115220 }, { "epoch": 18.79690048939641, "grad_norm": 0.0001467862311983481, "learning_rate": 1.0985475392641941e-05, "loss": 0.0008, "num_input_tokens_seen": 248900464, "step": 115225 }, { "epoch": 18.797716150081566, "grad_norm": 0.0035111182369291782, "learning_rate": 1.0970641586254937e-05, "loss": 0.001, "num_input_tokens_seen": 248911280, "step": 115230 }, { "epoch": 18.798531810766722, "grad_norm": 0.0016056197928264737, "learning_rate": 1.0955817690668169e-05, "loss": 0.0009, "num_input_tokens_seen": 248922864, "step": 115235 }, { "epoch": 18.799347471451878, "grad_norm": 0.0003613026347011328, "learning_rate": 1.094100370618223e-05, "loss": 0.0019, "num_input_tokens_seen": 248932560, "step": 115240 }, { "epoch": 18.80016313213703, "grad_norm": 0.0015075618866831064, "learning_rate": 1.0926199633097156e-05, "loss": 0.0004, "num_input_tokens_seen": 248943440, "step": 115245 }, { "epoch": 18.800978792822185, "grad_norm": 0.003285182174295187, "learning_rate": 1.091140547171321e-05, "loss": 0.0007, "num_input_tokens_seen": 248953712, "step": 115250 }, { "epoch": 18.80179445350734, "grad_norm": 0.006845667026937008, "learning_rate": 1.0896621222329983e-05, "loss": 0.0007, "num_input_tokens_seen": 248965392, "step": 115255 }, { "epoch": 18.802610114192497, "grad_norm": 0.003190365619957447, "learning_rate": 1.0881846885247293e-05, "loss": 0.0005, "num_input_tokens_seen": 248975952, "step": 115260 }, { "epoch": 18.803425774877653, "grad_norm": 0.00023938875528983772, "learning_rate": 1.0867082460764343e-05, "loss": 0.0032, "num_input_tokens_seen": 248987184, "step": 115265 }, { "epoch": 18.804241435562805, "grad_norm": 0.0011155434185639024, "learning_rate": 1.0852327949180618e-05, "loss": 0.0003, "num_input_tokens_seen": 248998448, "step": 115270 }, { "epoch": 18.80505709624796, "grad_norm": 0.001338596804998815, "learning_rate": 1.0837583350794878e-05, "loss": 0.0027, "num_input_tokens_seen": 249008592, "step": 115275 }, { "epoch": 18.805872756933116, "grad_norm": 0.01313408650457859, "learning_rate": 1.0822848665906104e-05, "loss": 0.0024, "num_input_tokens_seen": 249019216, "step": 115280 }, { "epoch": 18.806688417618272, "grad_norm": 0.008214665576815605, "learning_rate": 1.0808123894812838e-05, "loss": 0.0006, "num_input_tokens_seen": 249031056, "step": 115285 }, { "epoch": 18.807504078303424, "grad_norm": 0.0005401197704486549, "learning_rate": 1.0793409037813562e-05, "loss": 0.0405, "num_input_tokens_seen": 249041712, "step": 115290 }, { "epoch": 18.80831973898858, "grad_norm": 0.0016540754586458206, "learning_rate": 1.0778704095206427e-05, "loss": 0.002, "num_input_tokens_seen": 249051504, "step": 115295 }, { "epoch": 18.809135399673735, "grad_norm": 0.004030495882034302, "learning_rate": 1.0764009067289526e-05, "loss": 0.0005, "num_input_tokens_seen": 249062960, "step": 115300 }, { "epoch": 18.80995106035889, "grad_norm": 0.001202249201014638, "learning_rate": 1.0749323954360568e-05, "loss": 0.0006, "num_input_tokens_seen": 249072560, "step": 115305 }, { "epoch": 18.810766721044047, "grad_norm": 0.04477335512638092, "learning_rate": 1.0734648756717258e-05, "loss": 0.001, "num_input_tokens_seen": 249083056, "step": 115310 }, { "epoch": 18.8115823817292, "grad_norm": 0.0677478015422821, "learning_rate": 1.0719983474656914e-05, "loss": 0.0036, "num_input_tokens_seen": 249092688, "step": 115315 }, { "epoch": 18.812398042414355, "grad_norm": 0.00028590558213181794, "learning_rate": 1.0705328108476852e-05, "loss": 0.0005, "num_input_tokens_seen": 249103696, "step": 115320 }, { "epoch": 18.81321370309951, "grad_norm": 0.0034891394898295403, "learning_rate": 1.0690682658474004e-05, "loss": 0.0011, "num_input_tokens_seen": 249115120, "step": 115325 }, { "epoch": 18.814029363784666, "grad_norm": 0.004498482681810856, "learning_rate": 1.0676047124945187e-05, "loss": 0.0013, "num_input_tokens_seen": 249126128, "step": 115330 }, { "epoch": 18.81484502446982, "grad_norm": 0.11555361747741699, "learning_rate": 1.0661421508187109e-05, "loss": 0.0038, "num_input_tokens_seen": 249137360, "step": 115335 }, { "epoch": 18.815660685154974, "grad_norm": 0.0011468741577118635, "learning_rate": 1.0646805808495974e-05, "loss": 0.0084, "num_input_tokens_seen": 249148400, "step": 115340 }, { "epoch": 18.81647634584013, "grad_norm": 0.0004126753192394972, "learning_rate": 1.0632200026168215e-05, "loss": 0.0002, "num_input_tokens_seen": 249160656, "step": 115345 }, { "epoch": 18.817292006525285, "grad_norm": 0.011663687415421009, "learning_rate": 1.061760416149965e-05, "loss": 0.0147, "num_input_tokens_seen": 249172304, "step": 115350 }, { "epoch": 18.81810766721044, "grad_norm": 0.07764124870300293, "learning_rate": 1.0603018214786264e-05, "loss": 0.0031, "num_input_tokens_seen": 249182896, "step": 115355 }, { "epoch": 18.818923327895597, "grad_norm": 0.0025914544239640236, "learning_rate": 1.0588442186323433e-05, "loss": 0.0028, "num_input_tokens_seen": 249193712, "step": 115360 }, { "epoch": 18.81973898858075, "grad_norm": 0.0012623146176338196, "learning_rate": 1.0573876076406807e-05, "loss": 0.0006, "num_input_tokens_seen": 249203696, "step": 115365 }, { "epoch": 18.820554649265905, "grad_norm": 0.007144573610275984, "learning_rate": 1.055931988533132e-05, "loss": 0.0012, "num_input_tokens_seen": 249213936, "step": 115370 }, { "epoch": 18.82137030995106, "grad_norm": 0.010374244302511215, "learning_rate": 1.0544773613392289e-05, "loss": 0.0061, "num_input_tokens_seen": 249225520, "step": 115375 }, { "epoch": 18.822185970636216, "grad_norm": 0.0038773410487920046, "learning_rate": 1.0530237260884146e-05, "loss": 0.0084, "num_input_tokens_seen": 249236368, "step": 115380 }, { "epoch": 18.82300163132137, "grad_norm": 0.002093594754114747, "learning_rate": 1.051571082810182e-05, "loss": 0.0022, "num_input_tokens_seen": 249247984, "step": 115385 }, { "epoch": 18.823817292006524, "grad_norm": 0.0006376361125148833, "learning_rate": 1.0501194315339523e-05, "loss": 0.0006, "num_input_tokens_seen": 249258832, "step": 115390 }, { "epoch": 18.82463295269168, "grad_norm": 0.5937981605529785, "learning_rate": 1.048668772289152e-05, "loss": 0.0298, "num_input_tokens_seen": 249269776, "step": 115395 }, { "epoch": 18.825448613376835, "grad_norm": 0.0035166044253855944, "learning_rate": 1.0472191051051738e-05, "loss": 0.0012, "num_input_tokens_seen": 249280432, "step": 115400 }, { "epoch": 18.82626427406199, "grad_norm": 0.003904164768755436, "learning_rate": 1.0457704300114057e-05, "loss": 0.0017, "num_input_tokens_seen": 249291472, "step": 115405 }, { "epoch": 18.827079934747147, "grad_norm": 0.00040897587314248085, "learning_rate": 1.0443227470372018e-05, "loss": 0.0004, "num_input_tokens_seen": 249302768, "step": 115410 }, { "epoch": 18.8278955954323, "grad_norm": 0.0001675260136835277, "learning_rate": 1.0428760562119e-05, "loss": 0.0006, "num_input_tokens_seen": 249315216, "step": 115415 }, { "epoch": 18.828711256117455, "grad_norm": 0.10226722061634064, "learning_rate": 1.041430357564821e-05, "loss": 0.0026, "num_input_tokens_seen": 249326128, "step": 115420 }, { "epoch": 18.82952691680261, "grad_norm": 0.00045980140566825867, "learning_rate": 1.0399856511252692e-05, "loss": 0.0003, "num_input_tokens_seen": 249338608, "step": 115425 }, { "epoch": 18.830342577487766, "grad_norm": 0.0012188085820525885, "learning_rate": 1.0385419369225157e-05, "loss": 0.0011, "num_input_tokens_seen": 249348496, "step": 115430 }, { "epoch": 18.83115823817292, "grad_norm": 0.0006280313245952129, "learning_rate": 1.0370992149858205e-05, "loss": 0.0004, "num_input_tokens_seen": 249358832, "step": 115435 }, { "epoch": 18.831973898858074, "grad_norm": 0.0012271900195628405, "learning_rate": 1.0356574853444211e-05, "loss": 0.0007, "num_input_tokens_seen": 249367760, "step": 115440 }, { "epoch": 18.83278955954323, "grad_norm": 0.004578801337629557, "learning_rate": 1.0342167480275444e-05, "loss": 0.002, "num_input_tokens_seen": 249378320, "step": 115445 }, { "epoch": 18.833605220228385, "grad_norm": 0.0065257553942501545, "learning_rate": 1.032777003064378e-05, "loss": 0.0015, "num_input_tokens_seen": 249389328, "step": 115450 }, { "epoch": 18.83442088091354, "grad_norm": 0.549997866153717, "learning_rate": 1.0313382504841096e-05, "loss": 0.0268, "num_input_tokens_seen": 249402192, "step": 115455 }, { "epoch": 18.835236541598697, "grad_norm": 0.00017082234262488782, "learning_rate": 1.0299004903158882e-05, "loss": 0.0004, "num_input_tokens_seen": 249414096, "step": 115460 }, { "epoch": 18.83605220228385, "grad_norm": 0.0017220403533428907, "learning_rate": 1.0284637225888626e-05, "loss": 0.0006, "num_input_tokens_seen": 249425520, "step": 115465 }, { "epoch": 18.836867862969005, "grad_norm": 0.0026273017283529043, "learning_rate": 1.0270279473321375e-05, "loss": 0.0015, "num_input_tokens_seen": 249437872, "step": 115470 }, { "epoch": 18.83768352365416, "grad_norm": 0.0008895907667465508, "learning_rate": 1.0255931645748174e-05, "loss": 0.0013, "num_input_tokens_seen": 249448848, "step": 115475 }, { "epoch": 18.838499184339316, "grad_norm": 0.0026277219876646996, "learning_rate": 1.0241593743459898e-05, "loss": 0.0013, "num_input_tokens_seen": 249461008, "step": 115480 }, { "epoch": 18.839314845024468, "grad_norm": 0.0016103885136544704, "learning_rate": 1.0227265766746874e-05, "loss": 0.0232, "num_input_tokens_seen": 249471216, "step": 115485 }, { "epoch": 18.840130505709624, "grad_norm": 0.0002219690359197557, "learning_rate": 1.0212947715899757e-05, "loss": 0.0048, "num_input_tokens_seen": 249482256, "step": 115490 }, { "epoch": 18.84094616639478, "grad_norm": 0.0006362979183904827, "learning_rate": 1.0198639591208535e-05, "loss": 0.0005, "num_input_tokens_seen": 249493328, "step": 115495 }, { "epoch": 18.841761827079935, "grad_norm": 0.004850686062127352, "learning_rate": 1.0184341392963259e-05, "loss": 0.002, "num_input_tokens_seen": 249505360, "step": 115500 }, { "epoch": 18.84257748776509, "grad_norm": 0.006848242599517107, "learning_rate": 1.0170053121453694e-05, "loss": 0.0006, "num_input_tokens_seen": 249515632, "step": 115505 }, { "epoch": 18.843393148450243, "grad_norm": 0.0005007157451473176, "learning_rate": 1.0155774776969385e-05, "loss": 0.0014, "num_input_tokens_seen": 249525680, "step": 115510 }, { "epoch": 18.8442088091354, "grad_norm": 0.002938035409897566, "learning_rate": 1.0141506359799712e-05, "loss": 0.0019, "num_input_tokens_seen": 249535824, "step": 115515 }, { "epoch": 18.845024469820554, "grad_norm": 0.0021078032441437244, "learning_rate": 1.0127247870233836e-05, "loss": 0.0006, "num_input_tokens_seen": 249547376, "step": 115520 }, { "epoch": 18.84584013050571, "grad_norm": 0.032018523663282394, "learning_rate": 1.011299930856069e-05, "loss": 0.0034, "num_input_tokens_seen": 249557808, "step": 115525 }, { "epoch": 18.846655791190866, "grad_norm": 0.0021978262811899185, "learning_rate": 1.0098760675069151e-05, "loss": 0.0195, "num_input_tokens_seen": 249566864, "step": 115530 }, { "epoch": 18.847471451876018, "grad_norm": 0.003689467441290617, "learning_rate": 1.0084531970047662e-05, "loss": 0.0014, "num_input_tokens_seen": 249577328, "step": 115535 }, { "epoch": 18.848287112561174, "grad_norm": 0.004149348940700293, "learning_rate": 1.0070313193784653e-05, "loss": 0.0048, "num_input_tokens_seen": 249588784, "step": 115540 }, { "epoch": 18.84910277324633, "grad_norm": 0.007302007172256708, "learning_rate": 1.0056104346568285e-05, "loss": 0.0013, "num_input_tokens_seen": 249598928, "step": 115545 }, { "epoch": 18.849918433931485, "grad_norm": 0.0032237458508461714, "learning_rate": 1.0041905428686493e-05, "loss": 0.0015, "num_input_tokens_seen": 249610160, "step": 115550 }, { "epoch": 18.85073409461664, "grad_norm": 0.06821348518133163, "learning_rate": 1.0027716440427049e-05, "loss": 0.0021, "num_input_tokens_seen": 249621776, "step": 115555 }, { "epoch": 18.851549755301793, "grad_norm": 0.01791023463010788, "learning_rate": 1.0013537382077443e-05, "loss": 0.004, "num_input_tokens_seen": 249633072, "step": 115560 }, { "epoch": 18.85236541598695, "grad_norm": 0.00621650880202651, "learning_rate": 9.999368253925167e-06, "loss": 0.0031, "num_input_tokens_seen": 249643568, "step": 115565 }, { "epoch": 18.853181076672104, "grad_norm": 0.012429935857653618, "learning_rate": 9.985209056257272e-06, "loss": 0.0153, "num_input_tokens_seen": 249654832, "step": 115570 }, { "epoch": 18.85399673735726, "grad_norm": 0.0016874076100066304, "learning_rate": 9.971059789360749e-06, "loss": 0.0004, "num_input_tokens_seen": 249666416, "step": 115575 }, { "epoch": 18.854812398042416, "grad_norm": 0.05518745630979538, "learning_rate": 9.956920453522366e-06, "loss": 0.0016, "num_input_tokens_seen": 249677808, "step": 115580 }, { "epoch": 18.855628058727568, "grad_norm": 0.015488969162106514, "learning_rate": 9.942791049028621e-06, "loss": 0.001, "num_input_tokens_seen": 249688176, "step": 115585 }, { "epoch": 18.856443719412724, "grad_norm": 0.01876658760011196, "learning_rate": 9.928671576165893e-06, "loss": 0.0032, "num_input_tokens_seen": 249699920, "step": 115590 }, { "epoch": 18.85725938009788, "grad_norm": 0.001732186763547361, "learning_rate": 9.914562035220287e-06, "loss": 0.0026, "num_input_tokens_seen": 249710800, "step": 115595 }, { "epoch": 18.858075040783035, "grad_norm": 0.010737020522356033, "learning_rate": 9.900462426477908e-06, "loss": 0.0021, "num_input_tokens_seen": 249721712, "step": 115600 }, { "epoch": 18.85889070146819, "grad_norm": 0.0032939244993031025, "learning_rate": 9.886372750224304e-06, "loss": 0.003, "num_input_tokens_seen": 249733040, "step": 115605 }, { "epoch": 18.859706362153343, "grad_norm": 0.000969278160482645, "learning_rate": 9.872293006745192e-06, "loss": 0.0481, "num_input_tokens_seen": 249744848, "step": 115610 }, { "epoch": 18.8605220228385, "grad_norm": 0.0010464123915880919, "learning_rate": 9.858223196325789e-06, "loss": 0.0004, "num_input_tokens_seen": 249755248, "step": 115615 }, { "epoch": 18.861337683523654, "grad_norm": 0.0052170101553201675, "learning_rate": 9.844163319251253e-06, "loss": 0.0023, "num_input_tokens_seen": 249766352, "step": 115620 }, { "epoch": 18.86215334420881, "grad_norm": 0.0025711439084261656, "learning_rate": 9.830113375806582e-06, "loss": 0.0005, "num_input_tokens_seen": 249776912, "step": 115625 }, { "epoch": 18.862969004893966, "grad_norm": 0.0010705140884965658, "learning_rate": 9.816073366276545e-06, "loss": 0.0006, "num_input_tokens_seen": 249788272, "step": 115630 }, { "epoch": 18.863784665579118, "grad_norm": 0.11389190703630447, "learning_rate": 9.802043290945529e-06, "loss": 0.0059, "num_input_tokens_seen": 249800368, "step": 115635 }, { "epoch": 18.864600326264274, "grad_norm": 0.02490387298166752, "learning_rate": 9.788023150098024e-06, "loss": 0.0035, "num_input_tokens_seen": 249811408, "step": 115640 }, { "epoch": 18.86541598694943, "grad_norm": 0.017327111214399338, "learning_rate": 9.774012944018085e-06, "loss": 0.0006, "num_input_tokens_seen": 249821808, "step": 115645 }, { "epoch": 18.866231647634585, "grad_norm": 0.059728048741817474, "learning_rate": 9.760012672989704e-06, "loss": 0.0044, "num_input_tokens_seen": 249832528, "step": 115650 }, { "epoch": 18.86704730831974, "grad_norm": 0.02098773419857025, "learning_rate": 9.746022337296546e-06, "loss": 0.0035, "num_input_tokens_seen": 249842640, "step": 115655 }, { "epoch": 18.867862969004893, "grad_norm": 0.00045805005356669426, "learning_rate": 9.732041937222157e-06, "loss": 0.0007, "num_input_tokens_seen": 249852240, "step": 115660 }, { "epoch": 18.86867862969005, "grad_norm": 0.002120513003319502, "learning_rate": 9.718071473049927e-06, "loss": 0.001, "num_input_tokens_seen": 249863056, "step": 115665 }, { "epoch": 18.869494290375204, "grad_norm": 0.016152381896972656, "learning_rate": 9.70411094506296e-06, "loss": 0.0011, "num_input_tokens_seen": 249874224, "step": 115670 }, { "epoch": 18.87030995106036, "grad_norm": 0.025782400742173195, "learning_rate": 9.690160353544142e-06, "loss": 0.0004, "num_input_tokens_seen": 249882704, "step": 115675 }, { "epoch": 18.871125611745512, "grad_norm": 0.009104576893150806, "learning_rate": 9.67621969877619e-06, "loss": 0.0009, "num_input_tokens_seen": 249893424, "step": 115680 }, { "epoch": 18.871941272430668, "grad_norm": 0.008896476589143276, "learning_rate": 9.66228898104171e-06, "loss": 0.0005, "num_input_tokens_seen": 249902992, "step": 115685 }, { "epoch": 18.872756933115824, "grad_norm": 0.002238509012386203, "learning_rate": 9.64836820062298e-06, "loss": 0.0012, "num_input_tokens_seen": 249913072, "step": 115690 }, { "epoch": 18.87357259380098, "grad_norm": 0.03264370560646057, "learning_rate": 9.634457357802107e-06, "loss": 0.001, "num_input_tokens_seen": 249923312, "step": 115695 }, { "epoch": 18.874388254486135, "grad_norm": 0.06711148470640182, "learning_rate": 9.62055645286103e-06, "loss": 0.0342, "num_input_tokens_seen": 249933712, "step": 115700 }, { "epoch": 18.875203915171287, "grad_norm": 0.0005814563482999802, "learning_rate": 9.606665486081522e-06, "loss": 0.0003, "num_input_tokens_seen": 249943024, "step": 115705 }, { "epoch": 18.876019575856443, "grad_norm": 0.0026038573123514652, "learning_rate": 9.592784457744918e-06, "loss": 0.0016, "num_input_tokens_seen": 249953648, "step": 115710 }, { "epoch": 18.8768352365416, "grad_norm": 0.0004891370190307498, "learning_rate": 9.578913368132824e-06, "loss": 0.0003, "num_input_tokens_seen": 249963536, "step": 115715 }, { "epoch": 18.877650897226754, "grad_norm": 0.00019704003352671862, "learning_rate": 9.565052217526072e-06, "loss": 0.0004, "num_input_tokens_seen": 249974064, "step": 115720 }, { "epoch": 18.87846655791191, "grad_norm": 0.19600987434387207, "learning_rate": 9.551201006205767e-06, "loss": 0.0057, "num_input_tokens_seen": 249984240, "step": 115725 }, { "epoch": 18.879282218597062, "grad_norm": 0.025361159816384315, "learning_rate": 9.537359734452466e-06, "loss": 0.0008, "num_input_tokens_seen": 249996016, "step": 115730 }, { "epoch": 18.880097879282218, "grad_norm": 0.0005024754791520536, "learning_rate": 9.523528402546888e-06, "loss": 0.0064, "num_input_tokens_seen": 250007408, "step": 115735 }, { "epoch": 18.880913539967374, "grad_norm": 0.0042595332488417625, "learning_rate": 9.509707010769086e-06, "loss": 0.0015, "num_input_tokens_seen": 250017456, "step": 115740 }, { "epoch": 18.88172920065253, "grad_norm": 0.019248517230153084, "learning_rate": 9.495895559399449e-06, "loss": 0.0541, "num_input_tokens_seen": 250029296, "step": 115745 }, { "epoch": 18.882544861337685, "grad_norm": 0.0002776541223283857, "learning_rate": 9.482094048717637e-06, "loss": 0.0017, "num_input_tokens_seen": 250040336, "step": 115750 }, { "epoch": 18.883360522022837, "grad_norm": 0.009291916154325008, "learning_rate": 9.468302479003487e-06, "loss": 0.0004, "num_input_tokens_seen": 250050896, "step": 115755 }, { "epoch": 18.884176182707993, "grad_norm": 0.002779381349682808, "learning_rate": 9.45452085053644e-06, "loss": 0.004, "num_input_tokens_seen": 250061168, "step": 115760 }, { "epoch": 18.88499184339315, "grad_norm": 0.004881167318671942, "learning_rate": 9.44074916359583e-06, "loss": 0.0006, "num_input_tokens_seen": 250071632, "step": 115765 }, { "epoch": 18.885807504078304, "grad_norm": 0.012340943329036236, "learning_rate": 9.42698741846082e-06, "loss": 0.0006, "num_input_tokens_seen": 250082768, "step": 115770 }, { "epoch": 18.88662316476346, "grad_norm": 0.07426692545413971, "learning_rate": 9.413235615410188e-06, "loss": 0.0325, "num_input_tokens_seen": 250093936, "step": 115775 }, { "epoch": 18.887438825448612, "grad_norm": 0.0016281426651403308, "learning_rate": 9.39949375472271e-06, "loss": 0.0517, "num_input_tokens_seen": 250105040, "step": 115780 }, { "epoch": 18.888254486133768, "grad_norm": 0.009493221528828144, "learning_rate": 9.385761836676832e-06, "loss": 0.0015, "num_input_tokens_seen": 250114960, "step": 115785 }, { "epoch": 18.889070146818923, "grad_norm": 0.0010956472251564264, "learning_rate": 9.37203986155094e-06, "loss": 0.0004, "num_input_tokens_seen": 250126224, "step": 115790 }, { "epoch": 18.88988580750408, "grad_norm": 0.0018236135365441442, "learning_rate": 9.358327829623038e-06, "loss": 0.0007, "num_input_tokens_seen": 250135536, "step": 115795 }, { "epoch": 18.890701468189235, "grad_norm": 0.17699794471263885, "learning_rate": 9.344625741171009e-06, "loss": 0.0035, "num_input_tokens_seen": 250146128, "step": 115800 }, { "epoch": 18.891517128874387, "grad_norm": 0.028981979936361313, "learning_rate": 9.330933596472635e-06, "loss": 0.0049, "num_input_tokens_seen": 250157232, "step": 115805 }, { "epoch": 18.892332789559543, "grad_norm": 0.0012324347626417875, "learning_rate": 9.317251395805304e-06, "loss": 0.0008, "num_input_tokens_seen": 250167888, "step": 115810 }, { "epoch": 18.8931484502447, "grad_norm": 0.004059400409460068, "learning_rate": 9.303579139446349e-06, "loss": 0.0005, "num_input_tokens_seen": 250179184, "step": 115815 }, { "epoch": 18.893964110929854, "grad_norm": 0.006122584920376539, "learning_rate": 9.28991682767294e-06, "loss": 0.1101, "num_input_tokens_seen": 250188624, "step": 115820 }, { "epoch": 18.894779771615006, "grad_norm": 0.0018198771867901087, "learning_rate": 9.27626446076174e-06, "loss": 0.0006, "num_input_tokens_seen": 250197712, "step": 115825 }, { "epoch": 18.895595432300162, "grad_norm": 0.04111974686384201, "learning_rate": 9.2626220389897e-06, "loss": 0.0015, "num_input_tokens_seen": 250208272, "step": 115830 }, { "epoch": 18.896411092985318, "grad_norm": 0.00017160769493784755, "learning_rate": 9.248989562633037e-06, "loss": 0.0004, "num_input_tokens_seen": 250218448, "step": 115835 }, { "epoch": 18.897226753670473, "grad_norm": 0.0034603665117174387, "learning_rate": 9.235367031968312e-06, "loss": 0.0744, "num_input_tokens_seen": 250229552, "step": 115840 }, { "epoch": 18.89804241435563, "grad_norm": 0.0018716700142249465, "learning_rate": 9.221754447271302e-06, "loss": 0.002, "num_input_tokens_seen": 250240080, "step": 115845 }, { "epoch": 18.898858075040785, "grad_norm": 0.003498975420370698, "learning_rate": 9.208151808818177e-06, "loss": 0.001, "num_input_tokens_seen": 250251152, "step": 115850 }, { "epoch": 18.899673735725937, "grad_norm": 0.007458406500518322, "learning_rate": 9.194559116884327e-06, "loss": 0.0056, "num_input_tokens_seen": 250262480, "step": 115855 }, { "epoch": 18.900489396411093, "grad_norm": 0.0024467897601425648, "learning_rate": 9.18097637174553e-06, "loss": 0.1571, "num_input_tokens_seen": 250273584, "step": 115860 }, { "epoch": 18.90130505709625, "grad_norm": 0.0011468434240669012, "learning_rate": 9.167403573676736e-06, "loss": 0.054, "num_input_tokens_seen": 250285488, "step": 115865 }, { "epoch": 18.902120717781404, "grad_norm": 0.0010236542439088225, "learning_rate": 9.153840722953278e-06, "loss": 0.0011, "num_input_tokens_seen": 250295536, "step": 115870 }, { "epoch": 18.902936378466556, "grad_norm": 0.0005618699942715466, "learning_rate": 9.14028781984988e-06, "loss": 0.0083, "num_input_tokens_seen": 250306288, "step": 115875 }, { "epoch": 18.903752039151712, "grad_norm": 0.00021621494670398533, "learning_rate": 9.126744864641267e-06, "loss": 0.0003, "num_input_tokens_seen": 250318352, "step": 115880 }, { "epoch": 18.904567699836868, "grad_norm": 0.0009274820913560688, "learning_rate": 9.113211857601833e-06, "loss": 0.0004, "num_input_tokens_seen": 250328528, "step": 115885 }, { "epoch": 18.905383360522023, "grad_norm": 0.007432404439896345, "learning_rate": 9.099688799005967e-06, "loss": 0.0009, "num_input_tokens_seen": 250339536, "step": 115890 }, { "epoch": 18.90619902120718, "grad_norm": 0.0005458049126900733, "learning_rate": 9.086175689127618e-06, "loss": 0.0013, "num_input_tokens_seen": 250350640, "step": 115895 }, { "epoch": 18.90701468189233, "grad_norm": 0.0005109109915792942, "learning_rate": 9.072672528240733e-06, "loss": 0.0193, "num_input_tokens_seen": 250360624, "step": 115900 }, { "epoch": 18.907830342577487, "grad_norm": 0.006997792515903711, "learning_rate": 9.059179316618871e-06, "loss": 0.104, "num_input_tokens_seen": 250371088, "step": 115905 }, { "epoch": 18.908646003262643, "grad_norm": 0.04977473244071007, "learning_rate": 9.045696054535535e-06, "loss": 0.0017, "num_input_tokens_seen": 250381872, "step": 115910 }, { "epoch": 18.9094616639478, "grad_norm": 0.0013368047075346112, "learning_rate": 9.032222742264008e-06, "loss": 0.0003, "num_input_tokens_seen": 250391632, "step": 115915 }, { "epoch": 18.910277324632954, "grad_norm": 0.009490997530519962, "learning_rate": 9.018759380077346e-06, "loss": 0.0006, "num_input_tokens_seen": 250403632, "step": 115920 }, { "epoch": 18.911092985318106, "grad_norm": 0.0015856948448345065, "learning_rate": 9.005305968248334e-06, "loss": 0.0011, "num_input_tokens_seen": 250414704, "step": 115925 }, { "epoch": 18.911908646003262, "grad_norm": 0.0006589622935280204, "learning_rate": 8.991862507049698e-06, "loss": 0.0002, "num_input_tokens_seen": 250425872, "step": 115930 }, { "epoch": 18.912724306688418, "grad_norm": 0.0006037901039235294, "learning_rate": 8.978428996753885e-06, "loss": 0.0007, "num_input_tokens_seen": 250436752, "step": 115935 }, { "epoch": 18.913539967373573, "grad_norm": 0.0019094824092462659, "learning_rate": 8.965005437633067e-06, "loss": 0.0004, "num_input_tokens_seen": 250447952, "step": 115940 }, { "epoch": 18.91435562805873, "grad_norm": 0.018927903845906258, "learning_rate": 8.95159182995936e-06, "loss": 0.0012, "num_input_tokens_seen": 250459888, "step": 115945 }, { "epoch": 18.91517128874388, "grad_norm": 0.00041185764712281525, "learning_rate": 8.938188174004602e-06, "loss": 0.0022, "num_input_tokens_seen": 250470704, "step": 115950 }, { "epoch": 18.915986949429037, "grad_norm": 0.009737544693052769, "learning_rate": 8.924794470040354e-06, "loss": 0.0008, "num_input_tokens_seen": 250480880, "step": 115955 }, { "epoch": 18.916802610114193, "grad_norm": 0.0019982964731752872, "learning_rate": 8.91141071833812e-06, "loss": 0.0028, "num_input_tokens_seen": 250491568, "step": 115960 }, { "epoch": 18.91761827079935, "grad_norm": 0.01937304250895977, "learning_rate": 8.89803691916924e-06, "loss": 0.0052, "num_input_tokens_seen": 250501840, "step": 115965 }, { "epoch": 18.918433931484504, "grad_norm": 0.00334971328265965, "learning_rate": 8.88467307280455e-06, "loss": 0.0018, "num_input_tokens_seen": 250513776, "step": 115970 }, { "epoch": 18.919249592169656, "grad_norm": 0.013554212637245655, "learning_rate": 8.871319179515058e-06, "loss": 0.0008, "num_input_tokens_seen": 250524400, "step": 115975 }, { "epoch": 18.920065252854812, "grad_norm": 0.06993885338306427, "learning_rate": 8.857975239571215e-06, "loss": 0.0027, "num_input_tokens_seen": 250535088, "step": 115980 }, { "epoch": 18.920880913539968, "grad_norm": 0.005087228491902351, "learning_rate": 8.84464125324369e-06, "loss": 0.0029, "num_input_tokens_seen": 250547056, "step": 115985 }, { "epoch": 18.921696574225123, "grad_norm": 0.00482860766351223, "learning_rate": 8.831317220802493e-06, "loss": 0.0029, "num_input_tokens_seen": 250558256, "step": 115990 }, { "epoch": 18.92251223491028, "grad_norm": 0.0009906106861308217, "learning_rate": 8.818003142517794e-06, "loss": 0.0015, "num_input_tokens_seen": 250569360, "step": 115995 }, { "epoch": 18.92332789559543, "grad_norm": 0.004632228519767523, "learning_rate": 8.804699018659324e-06, "loss": 0.0006, "num_input_tokens_seen": 250581008, "step": 116000 }, { "epoch": 18.924143556280587, "grad_norm": 0.0004046796530019492, "learning_rate": 8.79140484949681e-06, "loss": 0.0005, "num_input_tokens_seen": 250592272, "step": 116005 }, { "epoch": 18.924959216965743, "grad_norm": 0.006622139364480972, "learning_rate": 8.778120635299537e-06, "loss": 0.0061, "num_input_tokens_seen": 250603376, "step": 116010 }, { "epoch": 18.9257748776509, "grad_norm": 0.0018909795908257365, "learning_rate": 8.7648463763369e-06, "loss": 0.0006, "num_input_tokens_seen": 250614224, "step": 116015 }, { "epoch": 18.92659053833605, "grad_norm": 0.0008149382774718106, "learning_rate": 8.751582072877739e-06, "loss": 0.0008, "num_input_tokens_seen": 250624752, "step": 116020 }, { "epoch": 18.927406199021206, "grad_norm": 0.0009487331844866276, "learning_rate": 8.738327725191064e-06, "loss": 0.0011, "num_input_tokens_seen": 250635504, "step": 116025 }, { "epoch": 18.928221859706362, "grad_norm": 0.0005238814628683031, "learning_rate": 8.725083333545326e-06, "loss": 0.0007, "num_input_tokens_seen": 250646096, "step": 116030 }, { "epoch": 18.929037520391518, "grad_norm": 0.00510450080037117, "learning_rate": 8.711848898208974e-06, "loss": 0.0004, "num_input_tokens_seen": 250657424, "step": 116035 }, { "epoch": 18.929853181076673, "grad_norm": 0.009839179925620556, "learning_rate": 8.698624419450296e-06, "loss": 0.002, "num_input_tokens_seen": 250665936, "step": 116040 }, { "epoch": 18.930668841761825, "grad_norm": 0.10089553147554398, "learning_rate": 8.685409897537244e-06, "loss": 0.0028, "num_input_tokens_seen": 250676912, "step": 116045 }, { "epoch": 18.93148450244698, "grad_norm": 0.0005019927630200982, "learning_rate": 8.672205332737603e-06, "loss": 0.0089, "num_input_tokens_seen": 250688080, "step": 116050 }, { "epoch": 18.932300163132137, "grad_norm": 0.00039481374551542103, "learning_rate": 8.65901072531905e-06, "loss": 0.0008, "num_input_tokens_seen": 250698224, "step": 116055 }, { "epoch": 18.933115823817293, "grad_norm": 0.0027844554278999567, "learning_rate": 8.64582607554898e-06, "loss": 0.0006, "num_input_tokens_seen": 250709776, "step": 116060 }, { "epoch": 18.93393148450245, "grad_norm": 0.0012518571456894279, "learning_rate": 8.632651383694513e-06, "loss": 0.0011, "num_input_tokens_seen": 250721360, "step": 116065 }, { "epoch": 18.9347471451876, "grad_norm": 0.05422195792198181, "learning_rate": 8.619486650022768e-06, "loss": 0.0024, "num_input_tokens_seen": 250732112, "step": 116070 }, { "epoch": 18.935562805872756, "grad_norm": 0.0015667621046304703, "learning_rate": 8.606331874800421e-06, "loss": 0.0022, "num_input_tokens_seen": 250742384, "step": 116075 }, { "epoch": 18.936378466557912, "grad_norm": 0.0554991140961647, "learning_rate": 8.593187058294205e-06, "loss": 0.0018, "num_input_tokens_seen": 250752048, "step": 116080 }, { "epoch": 18.937194127243067, "grad_norm": 0.005774365738034248, "learning_rate": 8.580052200770405e-06, "loss": 0.0054, "num_input_tokens_seen": 250762704, "step": 116085 }, { "epoch": 18.938009787928223, "grad_norm": 0.00019944304949603975, "learning_rate": 8.566927302495254e-06, "loss": 0.001, "num_input_tokens_seen": 250772336, "step": 116090 }, { "epoch": 18.938825448613375, "grad_norm": 0.005691157653927803, "learning_rate": 8.553812363734759e-06, "loss": 0.0025, "num_input_tokens_seen": 250783184, "step": 116095 }, { "epoch": 18.93964110929853, "grad_norm": 0.0055681378580629826, "learning_rate": 8.54070738475471e-06, "loss": 0.001, "num_input_tokens_seen": 250793360, "step": 116100 }, { "epoch": 18.940456769983687, "grad_norm": 0.0003302092372905463, "learning_rate": 8.527612365820613e-06, "loss": 0.0007, "num_input_tokens_seen": 250804880, "step": 116105 }, { "epoch": 18.941272430668842, "grad_norm": 0.002624097280204296, "learning_rate": 8.514527307198038e-06, "loss": 0.0005, "num_input_tokens_seen": 250814416, "step": 116110 }, { "epoch": 18.942088091353998, "grad_norm": 0.007586228661239147, "learning_rate": 8.501452209151995e-06, "loss": 0.0032, "num_input_tokens_seen": 250825552, "step": 116115 }, { "epoch": 18.94290375203915, "grad_norm": 0.0007885852828621864, "learning_rate": 8.488387071947601e-06, "loss": 0.0006, "num_input_tokens_seen": 250836336, "step": 116120 }, { "epoch": 18.943719412724306, "grad_norm": 0.04808273911476135, "learning_rate": 8.47533189584948e-06, "loss": 0.0021, "num_input_tokens_seen": 250847984, "step": 116125 }, { "epoch": 18.94453507340946, "grad_norm": 0.00023655618133489043, "learning_rate": 8.46228668112231e-06, "loss": 0.001, "num_input_tokens_seen": 250858512, "step": 116130 }, { "epoch": 18.945350734094617, "grad_norm": 0.0001312753011006862, "learning_rate": 8.449251428030492e-06, "loss": 0.0012, "num_input_tokens_seen": 250869776, "step": 116135 }, { "epoch": 18.946166394779773, "grad_norm": 0.00039740095962770283, "learning_rate": 8.436226136838198e-06, "loss": 0.0004, "num_input_tokens_seen": 250880656, "step": 116140 }, { "epoch": 18.946982055464925, "grad_norm": 0.003538029734045267, "learning_rate": 8.423210807809333e-06, "loss": 0.0029, "num_input_tokens_seen": 250891376, "step": 116145 }, { "epoch": 18.94779771615008, "grad_norm": 0.00038812385173514485, "learning_rate": 8.410205441207741e-06, "loss": 0.0501, "num_input_tokens_seen": 250903600, "step": 116150 }, { "epoch": 18.948613376835237, "grad_norm": 0.09095561504364014, "learning_rate": 8.397210037296931e-06, "loss": 0.0016, "num_input_tokens_seen": 250914896, "step": 116155 }, { "epoch": 18.949429037520392, "grad_norm": 0.9006114602088928, "learning_rate": 8.384224596340306e-06, "loss": 0.138, "num_input_tokens_seen": 250924848, "step": 116160 }, { "epoch": 18.950244698205548, "grad_norm": 0.1281556338071823, "learning_rate": 8.371249118601043e-06, "loss": 0.0046, "num_input_tokens_seen": 250935472, "step": 116165 }, { "epoch": 18.9510603588907, "grad_norm": 0.011393179185688496, "learning_rate": 8.358283604342098e-06, "loss": 0.0012, "num_input_tokens_seen": 250946960, "step": 116170 }, { "epoch": 18.951876019575856, "grad_norm": 0.0008362371590919793, "learning_rate": 8.345328053826207e-06, "loss": 0.0011, "num_input_tokens_seen": 250957040, "step": 116175 }, { "epoch": 18.95269168026101, "grad_norm": 0.00016408613009843975, "learning_rate": 8.33238246731599e-06, "loss": 0.0033, "num_input_tokens_seen": 250968496, "step": 116180 }, { "epoch": 18.953507340946167, "grad_norm": 0.0007871238049119711, "learning_rate": 8.319446845073741e-06, "loss": 0.0002, "num_input_tokens_seen": 250979600, "step": 116185 }, { "epoch": 18.954323001631323, "grad_norm": 0.007136243861168623, "learning_rate": 8.306521187361638e-06, "loss": 0.0005, "num_input_tokens_seen": 250989200, "step": 116190 }, { "epoch": 18.955138662316475, "grad_norm": 0.20850905776023865, "learning_rate": 8.293605494441636e-06, "loss": 0.0036, "num_input_tokens_seen": 250999824, "step": 116195 }, { "epoch": 18.95595432300163, "grad_norm": 0.0009187610703520477, "learning_rate": 8.280699766575528e-06, "loss": 0.0006, "num_input_tokens_seen": 251010576, "step": 116200 }, { "epoch": 18.956769983686787, "grad_norm": 0.08381669223308563, "learning_rate": 8.26780400402477e-06, "loss": 0.0037, "num_input_tokens_seen": 251021808, "step": 116205 }, { "epoch": 18.957585644371942, "grad_norm": 0.027391066774725914, "learning_rate": 8.254918207050821e-06, "loss": 0.0015, "num_input_tokens_seen": 251033168, "step": 116210 }, { "epoch": 18.958401305057095, "grad_norm": 0.002228178782388568, "learning_rate": 8.242042375914748e-06, "loss": 0.0003, "num_input_tokens_seen": 251044976, "step": 116215 }, { "epoch": 18.95921696574225, "grad_norm": 0.00165777956135571, "learning_rate": 8.229176510877512e-06, "loss": 0.0018, "num_input_tokens_seen": 251056816, "step": 116220 }, { "epoch": 18.960032626427406, "grad_norm": 0.008388200774788857, "learning_rate": 8.216320612199901e-06, "loss": 0.0007, "num_input_tokens_seen": 251067568, "step": 116225 }, { "epoch": 18.96084828711256, "grad_norm": 0.008720295503735542, "learning_rate": 8.203474680142431e-06, "loss": 0.0083, "num_input_tokens_seen": 251077456, "step": 116230 }, { "epoch": 18.961663947797717, "grad_norm": 0.007424628362059593, "learning_rate": 8.190638714965393e-06, "loss": 0.0012, "num_input_tokens_seen": 251089232, "step": 116235 }, { "epoch": 18.96247960848287, "grad_norm": 0.005576164927333593, "learning_rate": 8.177812716928967e-06, "loss": 0.0004, "num_input_tokens_seen": 251099536, "step": 116240 }, { "epoch": 18.963295269168025, "grad_norm": 0.00031054625287652016, "learning_rate": 8.164996686293114e-06, "loss": 0.0009, "num_input_tokens_seen": 251110128, "step": 116245 }, { "epoch": 18.96411092985318, "grad_norm": 0.016679290682077408, "learning_rate": 8.152190623317569e-06, "loss": 0.0014, "num_input_tokens_seen": 251120368, "step": 116250 }, { "epoch": 18.964926590538337, "grad_norm": 0.07447069138288498, "learning_rate": 8.13939452826179e-06, "loss": 0.0028, "num_input_tokens_seen": 251130672, "step": 116255 }, { "epoch": 18.965742251223492, "grad_norm": 0.00019847380463033915, "learning_rate": 8.126608401385183e-06, "loss": 0.0708, "num_input_tokens_seen": 251141264, "step": 116260 }, { "epoch": 18.966557911908644, "grad_norm": 0.002280977787449956, "learning_rate": 8.113832242946818e-06, "loss": 0.005, "num_input_tokens_seen": 251153040, "step": 116265 }, { "epoch": 18.9673735725938, "grad_norm": 0.0031850580126047134, "learning_rate": 8.101066053205653e-06, "loss": 0.0013, "num_input_tokens_seen": 251163504, "step": 116270 }, { "epoch": 18.968189233278956, "grad_norm": 0.06683320552110672, "learning_rate": 8.08830983242037e-06, "loss": 0.0042, "num_input_tokens_seen": 251174480, "step": 116275 }, { "epoch": 18.96900489396411, "grad_norm": 0.0011318209581077099, "learning_rate": 8.0755635808496e-06, "loss": 0.001, "num_input_tokens_seen": 251185200, "step": 116280 }, { "epoch": 18.969820554649267, "grad_norm": 0.005109846591949463, "learning_rate": 8.062827298751518e-06, "loss": 0.0011, "num_input_tokens_seen": 251197904, "step": 116285 }, { "epoch": 18.97063621533442, "grad_norm": 0.00022063420328777283, "learning_rate": 8.050100986384312e-06, "loss": 0.0029, "num_input_tokens_seen": 251207984, "step": 116290 }, { "epoch": 18.971451876019575, "grad_norm": 0.006765791680663824, "learning_rate": 8.037384644005941e-06, "loss": 0.0023, "num_input_tokens_seen": 251218704, "step": 116295 }, { "epoch": 18.97226753670473, "grad_norm": 0.0005463669076561928, "learning_rate": 8.024678271874031e-06, "loss": 0.0189, "num_input_tokens_seen": 251229136, "step": 116300 }, { "epoch": 18.973083197389887, "grad_norm": 0.006524787284433842, "learning_rate": 8.011981870246099e-06, "loss": 0.0029, "num_input_tokens_seen": 251240272, "step": 116305 }, { "epoch": 18.973898858075042, "grad_norm": 0.026674775406718254, "learning_rate": 7.99929543937955e-06, "loss": 0.0042, "num_input_tokens_seen": 251250800, "step": 116310 }, { "epoch": 18.974714518760194, "grad_norm": 0.004923704545944929, "learning_rate": 7.9866189795314e-06, "loss": 0.0007, "num_input_tokens_seen": 251260720, "step": 116315 }, { "epoch": 18.97553017944535, "grad_norm": 0.0006612880388274789, "learning_rate": 7.973952490958559e-06, "loss": 0.0016, "num_input_tokens_seen": 251272080, "step": 116320 }, { "epoch": 18.976345840130506, "grad_norm": 0.40225768089294434, "learning_rate": 7.961295973917759e-06, "loss": 0.0155, "num_input_tokens_seen": 251282800, "step": 116325 }, { "epoch": 18.97716150081566, "grad_norm": 0.023114413022994995, "learning_rate": 7.948649428665522e-06, "loss": 0.001, "num_input_tokens_seen": 251293648, "step": 116330 }, { "epoch": 18.977977161500817, "grad_norm": 0.003399110399186611, "learning_rate": 7.936012855458085e-06, "loss": 0.0008, "num_input_tokens_seen": 251304368, "step": 116335 }, { "epoch": 18.97879282218597, "grad_norm": 0.01887008547782898, "learning_rate": 7.923386254551523e-06, "loss": 0.0142, "num_input_tokens_seen": 251314992, "step": 116340 }, { "epoch": 18.979608482871125, "grad_norm": 0.01707925647497177, "learning_rate": 7.910769626201908e-06, "loss": 0.0153, "num_input_tokens_seen": 251325232, "step": 116345 }, { "epoch": 18.98042414355628, "grad_norm": 0.00015691196313127875, "learning_rate": 7.898162970664702e-06, "loss": 0.0007, "num_input_tokens_seen": 251335792, "step": 116350 }, { "epoch": 18.981239804241437, "grad_norm": 0.004649725742638111, "learning_rate": 7.88556628819559e-06, "loss": 0.0005, "num_input_tokens_seen": 251347120, "step": 116355 }, { "epoch": 18.982055464926592, "grad_norm": 0.002170866122469306, "learning_rate": 7.872979579049644e-06, "loss": 0.0102, "num_input_tokens_seen": 251357872, "step": 116360 }, { "epoch": 18.982871125611744, "grad_norm": 0.0008006882853806019, "learning_rate": 7.860402843482218e-06, "loss": 0.0018, "num_input_tokens_seen": 251368592, "step": 116365 }, { "epoch": 18.9836867862969, "grad_norm": 0.023480042815208435, "learning_rate": 7.847836081747939e-06, "loss": 0.0038, "num_input_tokens_seen": 251380112, "step": 116370 }, { "epoch": 18.984502446982056, "grad_norm": 0.009893891401588917, "learning_rate": 7.83527929410166e-06, "loss": 0.0019, "num_input_tokens_seen": 251392016, "step": 116375 }, { "epoch": 18.98531810766721, "grad_norm": 0.004652200732380152, "learning_rate": 7.822732480797734e-06, "loss": 0.0044, "num_input_tokens_seen": 251402384, "step": 116380 }, { "epoch": 18.986133768352367, "grad_norm": 0.0024909900967031717, "learning_rate": 7.810195642090568e-06, "loss": 0.0038, "num_input_tokens_seen": 251413136, "step": 116385 }, { "epoch": 18.98694942903752, "grad_norm": 0.0004570172750391066, "learning_rate": 7.797668778234179e-06, "loss": 0.0031, "num_input_tokens_seen": 251424240, "step": 116390 }, { "epoch": 18.987765089722675, "grad_norm": 0.0004380632599350065, "learning_rate": 7.785151889482422e-06, "loss": 0.0006, "num_input_tokens_seen": 251435472, "step": 116395 }, { "epoch": 18.98858075040783, "grad_norm": 0.0006351694464683533, "learning_rate": 7.772644976088982e-06, "loss": 0.0012, "num_input_tokens_seen": 251446768, "step": 116400 }, { "epoch": 18.989396411092986, "grad_norm": 0.014859266579151154, "learning_rate": 7.760148038307324e-06, "loss": 0.0065, "num_input_tokens_seen": 251457808, "step": 116405 }, { "epoch": 18.99021207177814, "grad_norm": 0.004753198474645615, "learning_rate": 7.747661076390688e-06, "loss": 0.0008, "num_input_tokens_seen": 251469936, "step": 116410 }, { "epoch": 18.991027732463294, "grad_norm": 0.011917391791939735, "learning_rate": 7.735184090592206e-06, "loss": 0.0025, "num_input_tokens_seen": 251480336, "step": 116415 }, { "epoch": 18.99184339314845, "grad_norm": 0.0011199692962691188, "learning_rate": 7.722717081164677e-06, "loss": 0.0007, "num_input_tokens_seen": 251490448, "step": 116420 }, { "epoch": 18.992659053833606, "grad_norm": 0.0011024402920156717, "learning_rate": 7.710260048360784e-06, "loss": 0.0038, "num_input_tokens_seen": 251501072, "step": 116425 }, { "epoch": 18.99347471451876, "grad_norm": 0.006140429060906172, "learning_rate": 7.697812992432996e-06, "loss": 0.001, "num_input_tokens_seen": 251513648, "step": 116430 }, { "epoch": 18.994290375203914, "grad_norm": 0.06212307885289192, "learning_rate": 7.685375913633607e-06, "loss": 0.001, "num_input_tokens_seen": 251523248, "step": 116435 }, { "epoch": 18.99510603588907, "grad_norm": 0.02086738497018814, "learning_rate": 7.67294881221453e-06, "loss": 0.0007, "num_input_tokens_seen": 251534256, "step": 116440 }, { "epoch": 18.995921696574225, "grad_norm": 0.004113317932933569, "learning_rate": 7.660531688427729e-06, "loss": 0.0007, "num_input_tokens_seen": 251543632, "step": 116445 }, { "epoch": 18.99673735725938, "grad_norm": 0.0009120566537603736, "learning_rate": 7.648124542524892e-06, "loss": 0.0005, "num_input_tokens_seen": 251554128, "step": 116450 }, { "epoch": 18.997553017944536, "grad_norm": 0.06423819810152054, "learning_rate": 7.635727374757318e-06, "loss": 0.0035, "num_input_tokens_seen": 251563728, "step": 116455 }, { "epoch": 18.99836867862969, "grad_norm": 0.00021028223272878677, "learning_rate": 7.623340185376415e-06, "loss": 0.0014, "num_input_tokens_seen": 251575088, "step": 116460 }, { "epoch": 18.999184339314844, "grad_norm": 0.021248627454042435, "learning_rate": 7.6109629746330955e-06, "loss": 0.0011, "num_input_tokens_seen": 251585328, "step": 116465 }, { "epoch": 19.0, "grad_norm": 0.0254563819617033, "learning_rate": 7.5985957427782695e-06, "loss": 0.0019, "num_input_tokens_seen": 251594480, "step": 116470 }, { "epoch": 19.0, "eval_loss": 0.3440645635128021, "eval_runtime": 103.9658, "eval_samples_per_second": 26.211, "eval_steps_per_second": 6.56, "num_input_tokens_seen": 251594480, "step": 116470 }, { "epoch": 19.000815660685156, "grad_norm": 0.001239095930941403, "learning_rate": 7.5862384900625135e-06, "loss": 0.0035, "num_input_tokens_seen": 251605456, "step": 116475 }, { "epoch": 19.00163132137031, "grad_norm": 0.0003935934801120311, "learning_rate": 7.573891216736406e-06, "loss": 0.0265, "num_input_tokens_seen": 251616656, "step": 116480 }, { "epoch": 19.002446982055464, "grad_norm": 0.0005455015343613923, "learning_rate": 7.561553923049969e-06, "loss": 0.0006, "num_input_tokens_seen": 251627472, "step": 116485 }, { "epoch": 19.00326264274062, "grad_norm": 0.00020809544366784394, "learning_rate": 7.549226609253446e-06, "loss": 0.0016, "num_input_tokens_seen": 251638288, "step": 116490 }, { "epoch": 19.004078303425775, "grad_norm": 0.12179408222436905, "learning_rate": 7.536909275596471e-06, "loss": 0.0041, "num_input_tokens_seen": 251649104, "step": 116495 }, { "epoch": 19.00489396411093, "grad_norm": 0.0023868621792644262, "learning_rate": 7.524601922328844e-06, "loss": 0.0003, "num_input_tokens_seen": 251659568, "step": 116500 }, { "epoch": 19.005709624796086, "grad_norm": 0.001443555229343474, "learning_rate": 7.512304549699811e-06, "loss": 0.0003, "num_input_tokens_seen": 251670416, "step": 116505 }, { "epoch": 19.00652528548124, "grad_norm": 0.05254826694726944, "learning_rate": 7.500017157958838e-06, "loss": 0.0148, "num_input_tokens_seen": 251682224, "step": 116510 }, { "epoch": 19.007340946166394, "grad_norm": 0.008038941770792007, "learning_rate": 7.487739747354672e-06, "loss": 0.0013, "num_input_tokens_seen": 251693072, "step": 116515 }, { "epoch": 19.00815660685155, "grad_norm": 0.0005029493477195501, "learning_rate": 7.475472318136334e-06, "loss": 0.0017, "num_input_tokens_seen": 251703728, "step": 116520 }, { "epoch": 19.008972267536706, "grad_norm": 0.025066372007131577, "learning_rate": 7.4632148705522374e-06, "loss": 0.0015, "num_input_tokens_seen": 251715472, "step": 116525 }, { "epoch": 19.00978792822186, "grad_norm": 0.012049240060150623, "learning_rate": 7.450967404851017e-06, "loss": 0.0009, "num_input_tokens_seen": 251726032, "step": 116530 }, { "epoch": 19.010603588907014, "grad_norm": 0.020044559612870216, "learning_rate": 7.438729921280752e-06, "loss": 0.0008, "num_input_tokens_seen": 251736656, "step": 116535 }, { "epoch": 19.01141924959217, "grad_norm": 0.007238124031573534, "learning_rate": 7.42650242008952e-06, "loss": 0.0008, "num_input_tokens_seen": 251747216, "step": 116540 }, { "epoch": 19.012234910277325, "grad_norm": 0.003442551242187619, "learning_rate": 7.41428490152507e-06, "loss": 0.0007, "num_input_tokens_seen": 251757424, "step": 116545 }, { "epoch": 19.01305057096248, "grad_norm": 0.0030905790627002716, "learning_rate": 7.402077365835036e-06, "loss": 0.0016, "num_input_tokens_seen": 251769072, "step": 116550 }, { "epoch": 19.013866231647636, "grad_norm": 0.006709754001349211, "learning_rate": 7.389879813266831e-06, "loss": 0.0004, "num_input_tokens_seen": 251780240, "step": 116555 }, { "epoch": 19.01468189233279, "grad_norm": 0.00024487529299221933, "learning_rate": 7.377692244067591e-06, "loss": 0.0009, "num_input_tokens_seen": 251791344, "step": 116560 }, { "epoch": 19.015497553017944, "grad_norm": 0.002104366896674037, "learning_rate": 7.36551465848434e-06, "loss": 0.0005, "num_input_tokens_seen": 251803216, "step": 116565 }, { "epoch": 19.0163132137031, "grad_norm": 0.00022835972777102143, "learning_rate": 7.353347056763937e-06, "loss": 0.0009, "num_input_tokens_seen": 251814864, "step": 116570 }, { "epoch": 19.017128874388256, "grad_norm": 0.0004103815299458802, "learning_rate": 7.341189439152907e-06, "loss": 0.0003, "num_input_tokens_seen": 251825680, "step": 116575 }, { "epoch": 19.017944535073408, "grad_norm": 0.00012124201748520136, "learning_rate": 7.329041805897551e-06, "loss": 0.0027, "num_input_tokens_seen": 251836016, "step": 116580 }, { "epoch": 19.018760195758563, "grad_norm": 0.03281310573220253, "learning_rate": 7.316904157244342e-06, "loss": 0.002, "num_input_tokens_seen": 251847536, "step": 116585 }, { "epoch": 19.01957585644372, "grad_norm": 0.0035343714989721775, "learning_rate": 7.304776493438914e-06, "loss": 0.0004, "num_input_tokens_seen": 251859440, "step": 116590 }, { "epoch": 19.020391517128875, "grad_norm": 0.0038738809525966644, "learning_rate": 7.2926588147273484e-06, "loss": 0.002, "num_input_tokens_seen": 251870352, "step": 116595 }, { "epoch": 19.02120717781403, "grad_norm": 0.0016290287021547556, "learning_rate": 7.280551121355005e-06, "loss": 0.0005, "num_input_tokens_seen": 251881584, "step": 116600 }, { "epoch": 19.022022838499183, "grad_norm": 0.07534030079841614, "learning_rate": 7.268453413567467e-06, "loss": 0.0019, "num_input_tokens_seen": 251891568, "step": 116605 }, { "epoch": 19.02283849918434, "grad_norm": 0.5721881985664368, "learning_rate": 7.256365691609645e-06, "loss": 0.0873, "num_input_tokens_seen": 251900272, "step": 116610 }, { "epoch": 19.023654159869494, "grad_norm": 0.005474581383168697, "learning_rate": 7.244287955726791e-06, "loss": 0.1363, "num_input_tokens_seen": 251911728, "step": 116615 }, { "epoch": 19.02446982055465, "grad_norm": 0.0002741733333095908, "learning_rate": 7.232220206163431e-06, "loss": 0.0019, "num_input_tokens_seen": 251922128, "step": 116620 }, { "epoch": 19.025285481239806, "grad_norm": 0.008602812886238098, "learning_rate": 7.220162443164369e-06, "loss": 0.001, "num_input_tokens_seen": 251932400, "step": 116625 }, { "epoch": 19.026101141924958, "grad_norm": 0.0025156764313578606, "learning_rate": 7.2081146669737416e-06, "loss": 0.0005, "num_input_tokens_seen": 251942416, "step": 116630 }, { "epoch": 19.026916802610113, "grad_norm": 0.0007300799479708076, "learning_rate": 7.196076877835911e-06, "loss": 0.0058, "num_input_tokens_seen": 251954064, "step": 116635 }, { "epoch": 19.02773246329527, "grad_norm": 0.0036786433774977922, "learning_rate": 7.1840490759946805e-06, "loss": 0.0021, "num_input_tokens_seen": 251964848, "step": 116640 }, { "epoch": 19.028548123980425, "grad_norm": 0.0011658791918307543, "learning_rate": 7.172031261693967e-06, "loss": 0.0033, "num_input_tokens_seen": 251974736, "step": 116645 }, { "epoch": 19.02936378466558, "grad_norm": 0.001465832581743598, "learning_rate": 7.160023435177132e-06, "loss": 0.0008, "num_input_tokens_seen": 251987024, "step": 116650 }, { "epoch": 19.030179445350733, "grad_norm": 0.00024204261717386544, "learning_rate": 7.148025596687702e-06, "loss": 0.0222, "num_input_tokens_seen": 251997648, "step": 116655 }, { "epoch": 19.03099510603589, "grad_norm": 0.0005633268738165498, "learning_rate": 7.136037746468704e-06, "loss": 0.0016, "num_input_tokens_seen": 252008784, "step": 116660 }, { "epoch": 19.031810766721044, "grad_norm": 0.001010783831588924, "learning_rate": 7.124059884763168e-06, "loss": 0.0007, "num_input_tokens_seen": 252018992, "step": 116665 }, { "epoch": 19.0326264274062, "grad_norm": 0.008563019335269928, "learning_rate": 7.112092011813842e-06, "loss": 0.0015, "num_input_tokens_seen": 252029328, "step": 116670 }, { "epoch": 19.033442088091356, "grad_norm": 0.0015962064499035478, "learning_rate": 7.1001341278632e-06, "loss": 0.0013, "num_input_tokens_seen": 252040144, "step": 116675 }, { "epoch": 19.034257748776508, "grad_norm": 0.00032261203159578145, "learning_rate": 7.08818623315366e-06, "loss": 0.0025, "num_input_tokens_seen": 252051472, "step": 116680 }, { "epoch": 19.035073409461663, "grad_norm": 0.003594001056626439, "learning_rate": 7.076248327927359e-06, "loss": 0.0007, "num_input_tokens_seen": 252062352, "step": 116685 }, { "epoch": 19.03588907014682, "grad_norm": 0.005797537509351969, "learning_rate": 7.064320412426162e-06, "loss": 0.0077, "num_input_tokens_seen": 252073904, "step": 116690 }, { "epoch": 19.036704730831975, "grad_norm": 0.0007113271858543158, "learning_rate": 7.052402486891818e-06, "loss": 0.0006, "num_input_tokens_seen": 252085264, "step": 116695 }, { "epoch": 19.03752039151713, "grad_norm": 0.0007374466513283551, "learning_rate": 7.040494551565912e-06, "loss": 0.0025, "num_input_tokens_seen": 252096240, "step": 116700 }, { "epoch": 19.038336052202283, "grad_norm": 0.009614666923880577, "learning_rate": 7.028596606689808e-06, "loss": 0.0029, "num_input_tokens_seen": 252106320, "step": 116705 }, { "epoch": 19.03915171288744, "grad_norm": 0.005013478919863701, "learning_rate": 7.016708652504477e-06, "loss": 0.0006, "num_input_tokens_seen": 252117136, "step": 116710 }, { "epoch": 19.039967373572594, "grad_norm": 0.00930885411798954, "learning_rate": 7.004830689251007e-06, "loss": 0.0008, "num_input_tokens_seen": 252128464, "step": 116715 }, { "epoch": 19.04078303425775, "grad_norm": 0.0016969816060736775, "learning_rate": 6.992962717170038e-06, "loss": 0.1444, "num_input_tokens_seen": 252138576, "step": 116720 }, { "epoch": 19.041598694942905, "grad_norm": 0.0009569272515363991, "learning_rate": 6.981104736502042e-06, "loss": 0.001, "num_input_tokens_seen": 252150448, "step": 116725 }, { "epoch": 19.042414355628058, "grad_norm": 0.00043876888230443, "learning_rate": 6.969256747487496e-06, "loss": 0.0009, "num_input_tokens_seen": 252161424, "step": 116730 }, { "epoch": 19.043230016313213, "grad_norm": 0.5943044424057007, "learning_rate": 6.957418750366318e-06, "loss": 0.0351, "num_input_tokens_seen": 252173424, "step": 116735 }, { "epoch": 19.04404567699837, "grad_norm": 0.024856556206941605, "learning_rate": 6.945590745378594e-06, "loss": 0.0016, "num_input_tokens_seen": 252184656, "step": 116740 }, { "epoch": 19.044861337683525, "grad_norm": 0.03356698527932167, "learning_rate": 6.9337727327639096e-06, "loss": 0.0014, "num_input_tokens_seen": 252196880, "step": 116745 }, { "epoch": 19.045676998368677, "grad_norm": 0.31922638416290283, "learning_rate": 6.921964712761853e-06, "loss": 0.0058, "num_input_tokens_seen": 252206704, "step": 116750 }, { "epoch": 19.046492659053833, "grad_norm": 0.004116969183087349, "learning_rate": 6.910166685611674e-06, "loss": 0.0012, "num_input_tokens_seen": 252216912, "step": 116755 }, { "epoch": 19.04730831973899, "grad_norm": 0.001806379295885563, "learning_rate": 6.898378651552517e-06, "loss": 0.0013, "num_input_tokens_seen": 252227984, "step": 116760 }, { "epoch": 19.048123980424144, "grad_norm": 0.02706415392458439, "learning_rate": 6.88660061082319e-06, "loss": 0.0032, "num_input_tokens_seen": 252238128, "step": 116765 }, { "epoch": 19.0489396411093, "grad_norm": 0.0019197918009012938, "learning_rate": 6.874832563662559e-06, "loss": 0.0013, "num_input_tokens_seen": 252247024, "step": 116770 }, { "epoch": 19.049755301794452, "grad_norm": 0.01021169126033783, "learning_rate": 6.863074510308931e-06, "loss": 0.0028, "num_input_tokens_seen": 252258608, "step": 116775 }, { "epoch": 19.050570962479608, "grad_norm": 0.06623980402946472, "learning_rate": 6.851326451000783e-06, "loss": 0.0032, "num_input_tokens_seen": 252269136, "step": 116780 }, { "epoch": 19.051386623164763, "grad_norm": 0.005052113905549049, "learning_rate": 6.839588385976036e-06, "loss": 0.0018, "num_input_tokens_seen": 252278928, "step": 116785 }, { "epoch": 19.05220228384992, "grad_norm": 0.02421603351831436, "learning_rate": 6.827860315472667e-06, "loss": 0.0016, "num_input_tokens_seen": 252289040, "step": 116790 }, { "epoch": 19.053017944535075, "grad_norm": 0.0010228599421679974, "learning_rate": 6.816142239728373e-06, "loss": 0.0004, "num_input_tokens_seen": 252297680, "step": 116795 }, { "epoch": 19.053833605220227, "grad_norm": 0.013187268748879433, "learning_rate": 6.804434158980577e-06, "loss": 0.0033, "num_input_tokens_seen": 252308176, "step": 116800 }, { "epoch": 19.054649265905383, "grad_norm": 0.0018919931026175618, "learning_rate": 6.792736073466587e-06, "loss": 0.0012, "num_input_tokens_seen": 252319024, "step": 116805 }, { "epoch": 19.05546492659054, "grad_norm": 0.004553182981908321, "learning_rate": 6.781047983423439e-06, "loss": 0.0004, "num_input_tokens_seen": 252329360, "step": 116810 }, { "epoch": 19.056280587275694, "grad_norm": 0.005015052855014801, "learning_rate": 6.769369889088106e-06, "loss": 0.0007, "num_input_tokens_seen": 252338800, "step": 116815 }, { "epoch": 19.05709624796085, "grad_norm": 0.002011285861954093, "learning_rate": 6.75770179069718e-06, "loss": 0.003, "num_input_tokens_seen": 252348848, "step": 116820 }, { "epoch": 19.057911908646002, "grad_norm": 0.0005088383913971484, "learning_rate": 6.746043688487136e-06, "loss": 0.0004, "num_input_tokens_seen": 252360048, "step": 116825 }, { "epoch": 19.058727569331158, "grad_norm": 0.027116188779473305, "learning_rate": 6.734395582694286e-06, "loss": 0.0039, "num_input_tokens_seen": 252371472, "step": 116830 }, { "epoch": 19.059543230016313, "grad_norm": 0.030280839651823044, "learning_rate": 6.722757473554608e-06, "loss": 0.0017, "num_input_tokens_seen": 252381520, "step": 116835 }, { "epoch": 19.06035889070147, "grad_norm": 0.004732145462185144, "learning_rate": 6.71112936130408e-06, "loss": 0.0027, "num_input_tokens_seen": 252391536, "step": 116840 }, { "epoch": 19.061174551386625, "grad_norm": 0.025895684957504272, "learning_rate": 6.6995112461782355e-06, "loss": 0.0015, "num_input_tokens_seen": 252402416, "step": 116845 }, { "epoch": 19.061990212071777, "grad_norm": 0.00530358636751771, "learning_rate": 6.6879031284126646e-06, "loss": 0.0046, "num_input_tokens_seen": 252413008, "step": 116850 }, { "epoch": 19.062805872756933, "grad_norm": 0.0013073545414954424, "learning_rate": 6.676305008242512e-06, "loss": 0.0017, "num_input_tokens_seen": 252424848, "step": 116855 }, { "epoch": 19.063621533442088, "grad_norm": 0.00042659181053750217, "learning_rate": 6.664716885902811e-06, "loss": 0.0022, "num_input_tokens_seen": 252435120, "step": 116860 }, { "epoch": 19.064437194127244, "grad_norm": 0.001548528904095292, "learning_rate": 6.653138761628541e-06, "loss": 0.0036, "num_input_tokens_seen": 252446320, "step": 116865 }, { "epoch": 19.0652528548124, "grad_norm": 0.04929887875914574, "learning_rate": 6.641570635654182e-06, "loss": 0.0008, "num_input_tokens_seen": 252456944, "step": 116870 }, { "epoch": 19.06606851549755, "grad_norm": 0.0002772485022433102, "learning_rate": 6.630012508214322e-06, "loss": 0.005, "num_input_tokens_seen": 252466736, "step": 116875 }, { "epoch": 19.066884176182707, "grad_norm": 0.00854497030377388, "learning_rate": 6.618464379543166e-06, "loss": 0.0027, "num_input_tokens_seen": 252477328, "step": 116880 }, { "epoch": 19.067699836867863, "grad_norm": 0.0005079619586467743, "learning_rate": 6.6069262498746895e-06, "loss": 0.0028, "num_input_tokens_seen": 252487664, "step": 116885 }, { "epoch": 19.06851549755302, "grad_norm": 0.003630138235166669, "learning_rate": 6.595398119442764e-06, "loss": 0.0008, "num_input_tokens_seen": 252499248, "step": 116890 }, { "epoch": 19.069331158238175, "grad_norm": 0.004067980218678713, "learning_rate": 6.583879988481034e-06, "loss": 0.0004, "num_input_tokens_seen": 252509584, "step": 116895 }, { "epoch": 19.070146818923327, "grad_norm": 0.0006684943800792098, "learning_rate": 6.572371857222925e-06, "loss": 0.0007, "num_input_tokens_seen": 252520816, "step": 116900 }, { "epoch": 19.070962479608482, "grad_norm": 0.00023432802117895335, "learning_rate": 6.560873725901695e-06, "loss": 0.0021, "num_input_tokens_seen": 252532336, "step": 116905 }, { "epoch": 19.071778140293638, "grad_norm": 0.002191467909142375, "learning_rate": 6.5493855947502674e-06, "loss": 0.0008, "num_input_tokens_seen": 252543344, "step": 116910 }, { "epoch": 19.072593800978794, "grad_norm": 0.014515231363475323, "learning_rate": 6.537907464001569e-06, "loss": 0.0007, "num_input_tokens_seen": 252553968, "step": 116915 }, { "epoch": 19.07340946166395, "grad_norm": 0.004022237379103899, "learning_rate": 6.5264393338881345e-06, "loss": 0.0005, "num_input_tokens_seen": 252564368, "step": 116920 }, { "epoch": 19.0742251223491, "grad_norm": 0.002793958643451333, "learning_rate": 6.514981204642445e-06, "loss": 0.0009, "num_input_tokens_seen": 252576304, "step": 116925 }, { "epoch": 19.075040783034257, "grad_norm": 0.08884984999895096, "learning_rate": 6.503533076496704e-06, "loss": 0.0028, "num_input_tokens_seen": 252586864, "step": 116930 }, { "epoch": 19.075856443719413, "grad_norm": 0.0009011936490423977, "learning_rate": 6.492094949682892e-06, "loss": 0.0003, "num_input_tokens_seen": 252597360, "step": 116935 }, { "epoch": 19.07667210440457, "grad_norm": 0.0028816265985369682, "learning_rate": 6.480666824432879e-06, "loss": 0.0006, "num_input_tokens_seen": 252607888, "step": 116940 }, { "epoch": 19.07748776508972, "grad_norm": 0.0019338749116286635, "learning_rate": 6.469248700978148e-06, "loss": 0.0009, "num_input_tokens_seen": 252619056, "step": 116945 }, { "epoch": 19.078303425774877, "grad_norm": 0.001564970356412232, "learning_rate": 6.457840579550234e-06, "loss": 0.0005, "num_input_tokens_seen": 252629776, "step": 116950 }, { "epoch": 19.079119086460032, "grad_norm": 0.0020377058535814285, "learning_rate": 6.4464424603802865e-06, "loss": 0.0003, "num_input_tokens_seen": 252641008, "step": 116955 }, { "epoch": 19.079934747145188, "grad_norm": 0.04820120707154274, "learning_rate": 6.435054343699287e-06, "loss": 0.0025, "num_input_tokens_seen": 252651408, "step": 116960 }, { "epoch": 19.080750407830344, "grad_norm": 0.01028536818921566, "learning_rate": 6.423676229738051e-06, "loss": 0.001, "num_input_tokens_seen": 252662160, "step": 116965 }, { "epoch": 19.081566068515496, "grad_norm": 0.00021261714573483914, "learning_rate": 6.412308118727117e-06, "loss": 0.0004, "num_input_tokens_seen": 252673456, "step": 116970 }, { "epoch": 19.08238172920065, "grad_norm": 0.005654108710587025, "learning_rate": 6.400950010896966e-06, "loss": 0.0016, "num_input_tokens_seen": 252686000, "step": 116975 }, { "epoch": 19.083197389885807, "grad_norm": 0.001454255892895162, "learning_rate": 6.389601906477693e-06, "loss": 0.0014, "num_input_tokens_seen": 252697776, "step": 116980 }, { "epoch": 19.084013050570963, "grad_norm": 0.0011134854285046458, "learning_rate": 6.378263805699391e-06, "loss": 0.0005, "num_input_tokens_seen": 252708272, "step": 116985 }, { "epoch": 19.08482871125612, "grad_norm": 0.015325321815907955, "learning_rate": 6.36693570879171e-06, "loss": 0.0019, "num_input_tokens_seen": 252718416, "step": 116990 }, { "epoch": 19.08564437194127, "grad_norm": 0.0018553230911493301, "learning_rate": 6.355617615984355e-06, "loss": 0.0018, "num_input_tokens_seen": 252729968, "step": 116995 }, { "epoch": 19.086460032626427, "grad_norm": 0.001695129438303411, "learning_rate": 6.344309527506587e-06, "loss": 0.0021, "num_input_tokens_seen": 252740400, "step": 117000 }, { "epoch": 19.087275693311582, "grad_norm": 0.01279063243418932, "learning_rate": 6.333011443587722e-06, "loss": 0.0022, "num_input_tokens_seen": 252750992, "step": 117005 }, { "epoch": 19.088091353996738, "grad_norm": 0.0003366192686371505, "learning_rate": 6.3217233644565216e-06, "loss": 0.0003, "num_input_tokens_seen": 252762224, "step": 117010 }, { "epoch": 19.088907014681894, "grad_norm": 0.04421667009592056, "learning_rate": 6.3104452903419704e-06, "loss": 0.0045, "num_input_tokens_seen": 252773872, "step": 117015 }, { "epoch": 19.089722675367046, "grad_norm": 0.0018370678881183267, "learning_rate": 6.299177221472496e-06, "loss": 0.0012, "num_input_tokens_seen": 252783792, "step": 117020 }, { "epoch": 19.0905383360522, "grad_norm": 0.07757820188999176, "learning_rate": 6.287919158076472e-06, "loss": 0.0023, "num_input_tokens_seen": 252794320, "step": 117025 }, { "epoch": 19.091353996737357, "grad_norm": 0.00017441553063690662, "learning_rate": 6.2766711003821035e-06, "loss": 0.0049, "num_input_tokens_seen": 252805552, "step": 117030 }, { "epoch": 19.092169657422513, "grad_norm": 0.0007293216185644269, "learning_rate": 6.265433048617375e-06, "loss": 0.0707, "num_input_tokens_seen": 252817648, "step": 117035 }, { "epoch": 19.09298531810767, "grad_norm": 0.0004739946161862463, "learning_rate": 6.254205003009938e-06, "loss": 0.0035, "num_input_tokens_seen": 252827408, "step": 117040 }, { "epoch": 19.09380097879282, "grad_norm": 0.013173624873161316, "learning_rate": 6.242986963787445e-06, "loss": 0.0009, "num_input_tokens_seen": 252838224, "step": 117045 }, { "epoch": 19.094616639477977, "grad_norm": 0.0021853481885045767, "learning_rate": 6.231778931177157e-06, "loss": 0.0003, "num_input_tokens_seen": 252849648, "step": 117050 }, { "epoch": 19.095432300163132, "grad_norm": 0.0002483553544152528, "learning_rate": 6.220580905406226e-06, "loss": 0.0011, "num_input_tokens_seen": 252860816, "step": 117055 }, { "epoch": 19.096247960848288, "grad_norm": 0.006167882587760687, "learning_rate": 6.209392886701692e-06, "loss": 0.0066, "num_input_tokens_seen": 252871088, "step": 117060 }, { "epoch": 19.097063621533444, "grad_norm": 0.013794321566820145, "learning_rate": 6.198214875290209e-06, "loss": 0.001, "num_input_tokens_seen": 252880816, "step": 117065 }, { "epoch": 19.097879282218596, "grad_norm": 0.0014956948580220342, "learning_rate": 6.187046871398316e-06, "loss": 0.0008, "num_input_tokens_seen": 252891472, "step": 117070 }, { "epoch": 19.09869494290375, "grad_norm": 0.044382814317941666, "learning_rate": 6.175888875252389e-06, "loss": 0.0022, "num_input_tokens_seen": 252902064, "step": 117075 }, { "epoch": 19.099510603588907, "grad_norm": 0.003115827450528741, "learning_rate": 6.1647408870785236e-06, "loss": 0.0006, "num_input_tokens_seen": 252912272, "step": 117080 }, { "epoch": 19.100326264274063, "grad_norm": 0.02964218147099018, "learning_rate": 6.1536029071025955e-06, "loss": 0.0044, "num_input_tokens_seen": 252923664, "step": 117085 }, { "epoch": 19.10114192495922, "grad_norm": 0.5666136741638184, "learning_rate": 6.142474935550535e-06, "loss": 0.0195, "num_input_tokens_seen": 252934128, "step": 117090 }, { "epoch": 19.10195758564437, "grad_norm": 0.011689902283251286, "learning_rate": 6.131356972647606e-06, "loss": 0.0006, "num_input_tokens_seen": 252944752, "step": 117095 }, { "epoch": 19.102773246329527, "grad_norm": 0.0005122054717503488, "learning_rate": 6.120249018619295e-06, "loss": 0.0049, "num_input_tokens_seen": 252955152, "step": 117100 }, { "epoch": 19.103588907014682, "grad_norm": 0.0004246353928465396, "learning_rate": 6.109151073690644e-06, "loss": 0.0035, "num_input_tokens_seen": 252966928, "step": 117105 }, { "epoch": 19.104404567699838, "grad_norm": 0.000601739389821887, "learning_rate": 6.0980631380866405e-06, "loss": 0.0015, "num_input_tokens_seen": 252977040, "step": 117110 }, { "epoch": 19.10522022838499, "grad_norm": 0.007016733754426241, "learning_rate": 6.086985212031881e-06, "loss": 0.0008, "num_input_tokens_seen": 252988688, "step": 117115 }, { "epoch": 19.106035889070146, "grad_norm": 0.0003089867241214961, "learning_rate": 6.075917295750965e-06, "loss": 0.0004, "num_input_tokens_seen": 252999824, "step": 117120 }, { "epoch": 19.1068515497553, "grad_norm": 0.0007841411279514432, "learning_rate": 6.064859389468158e-06, "loss": 0.002, "num_input_tokens_seen": 253010544, "step": 117125 }, { "epoch": 19.107667210440457, "grad_norm": 0.006227858830243349, "learning_rate": 6.053811493407613e-06, "loss": 0.0294, "num_input_tokens_seen": 253021456, "step": 117130 }, { "epoch": 19.108482871125613, "grad_norm": 0.0027969330549240112, "learning_rate": 6.04277360779315e-06, "loss": 0.0867, "num_input_tokens_seen": 253031632, "step": 117135 }, { "epoch": 19.109298531810765, "grad_norm": 0.012334640137851238, "learning_rate": 6.031745732848593e-06, "loss": 0.0036, "num_input_tokens_seen": 253043088, "step": 117140 }, { "epoch": 19.11011419249592, "grad_norm": 0.0017199370777234435, "learning_rate": 6.02072786879726e-06, "loss": 0.0014, "num_input_tokens_seen": 253055152, "step": 117145 }, { "epoch": 19.110929853181077, "grad_norm": 0.002734170528128743, "learning_rate": 6.009720015862585e-06, "loss": 0.0003, "num_input_tokens_seen": 253065968, "step": 117150 }, { "epoch": 19.111745513866232, "grad_norm": 0.01975974440574646, "learning_rate": 5.9987221742675566e-06, "loss": 0.0032, "num_input_tokens_seen": 253078416, "step": 117155 }, { "epoch": 19.112561174551388, "grad_norm": 0.003430173732340336, "learning_rate": 5.987734344235107e-06, "loss": 0.0006, "num_input_tokens_seen": 253089456, "step": 117160 }, { "epoch": 19.11337683523654, "grad_norm": 0.04491540789604187, "learning_rate": 5.976756525987948e-06, "loss": 0.003, "num_input_tokens_seen": 253100880, "step": 117165 }, { "epoch": 19.114192495921696, "grad_norm": 0.006507876794785261, "learning_rate": 5.965788719748566e-06, "loss": 0.0048, "num_input_tokens_seen": 253111280, "step": 117170 }, { "epoch": 19.11500815660685, "grad_norm": 0.5028917789459229, "learning_rate": 5.954830925739174e-06, "loss": 0.0135, "num_input_tokens_seen": 253121424, "step": 117175 }, { "epoch": 19.115823817292007, "grad_norm": 0.004586064722388983, "learning_rate": 5.943883144181872e-06, "loss": 0.0013, "num_input_tokens_seen": 253131920, "step": 117180 }, { "epoch": 19.116639477977163, "grad_norm": 0.006693511735647917, "learning_rate": 5.932945375298537e-06, "loss": 0.0064, "num_input_tokens_seen": 253142416, "step": 117185 }, { "epoch": 19.117455138662315, "grad_norm": 0.0018724793335422873, "learning_rate": 5.922017619310826e-06, "loss": 0.0028, "num_input_tokens_seen": 253152048, "step": 117190 }, { "epoch": 19.11827079934747, "grad_norm": 0.02574349008500576, "learning_rate": 5.911099876440173e-06, "loss": 0.0024, "num_input_tokens_seen": 253162480, "step": 117195 }, { "epoch": 19.119086460032626, "grad_norm": 0.024611355736851692, "learning_rate": 5.900192146907957e-06, "loss": 0.0021, "num_input_tokens_seen": 253172304, "step": 117200 }, { "epoch": 19.119902120717782, "grad_norm": 0.0015057043638080359, "learning_rate": 5.889294430935111e-06, "loss": 0.021, "num_input_tokens_seen": 253182832, "step": 117205 }, { "epoch": 19.120717781402938, "grad_norm": 0.01581161841750145, "learning_rate": 5.8784067287424584e-06, "loss": 0.0009, "num_input_tokens_seen": 253193776, "step": 117210 }, { "epoch": 19.12153344208809, "grad_norm": 0.0052831522189080715, "learning_rate": 5.8675290405508785e-06, "loss": 0.0006, "num_input_tokens_seen": 253203792, "step": 117215 }, { "epoch": 19.122349102773246, "grad_norm": 0.0007881993660703301, "learning_rate": 5.856661366580584e-06, "loss": 0.001, "num_input_tokens_seen": 253213776, "step": 117220 }, { "epoch": 19.1231647634584, "grad_norm": 0.03156473860144615, "learning_rate": 5.845803707051955e-06, "loss": 0.0077, "num_input_tokens_seen": 253223984, "step": 117225 }, { "epoch": 19.123980424143557, "grad_norm": 0.0015464631142094731, "learning_rate": 5.834956062184926e-06, "loss": 0.0004, "num_input_tokens_seen": 253233744, "step": 117230 }, { "epoch": 19.124796084828713, "grad_norm": 0.0007315798429772258, "learning_rate": 5.824118432199488e-06, "loss": 0.0012, "num_input_tokens_seen": 253243984, "step": 117235 }, { "epoch": 19.125611745513865, "grad_norm": 0.000989689608104527, "learning_rate": 5.813290817315131e-06, "loss": 0.0033, "num_input_tokens_seen": 253256176, "step": 117240 }, { "epoch": 19.12642740619902, "grad_norm": 0.8543770909309387, "learning_rate": 5.8024732177514585e-06, "loss": 0.0732, "num_input_tokens_seen": 253265616, "step": 117245 }, { "epoch": 19.127243066884176, "grad_norm": 0.6602007150650024, "learning_rate": 5.791665633727461e-06, "loss": 0.0358, "num_input_tokens_seen": 253276688, "step": 117250 }, { "epoch": 19.128058727569332, "grad_norm": 0.6810281872749329, "learning_rate": 5.780868065462408e-06, "loss": 0.0384, "num_input_tokens_seen": 253287792, "step": 117255 }, { "epoch": 19.128874388254488, "grad_norm": 0.0007883647922426462, "learning_rate": 5.770080513174958e-06, "loss": 0.0013, "num_input_tokens_seen": 253298512, "step": 117260 }, { "epoch": 19.12969004893964, "grad_norm": 0.002425673883408308, "learning_rate": 5.75930297708388e-06, "loss": 0.0028, "num_input_tokens_seen": 253309424, "step": 117265 }, { "epoch": 19.130505709624796, "grad_norm": 0.0005828720168210566, "learning_rate": 5.748535457407444e-06, "loss": 0.0017, "num_input_tokens_seen": 253319696, "step": 117270 }, { "epoch": 19.13132137030995, "grad_norm": 0.001128783798776567, "learning_rate": 5.737777954364032e-06, "loss": 0.0004, "num_input_tokens_seen": 253330192, "step": 117275 }, { "epoch": 19.132137030995107, "grad_norm": 0.0017047654837369919, "learning_rate": 5.727030468171468e-06, "loss": 0.0003, "num_input_tokens_seen": 253341264, "step": 117280 }, { "epoch": 19.13295269168026, "grad_norm": 0.049430977553129196, "learning_rate": 5.71629299904769e-06, "loss": 0.0017, "num_input_tokens_seen": 253352368, "step": 117285 }, { "epoch": 19.133768352365415, "grad_norm": 0.40919792652130127, "learning_rate": 5.705565547210301e-06, "loss": 0.0129, "num_input_tokens_seen": 253364208, "step": 117290 }, { "epoch": 19.13458401305057, "grad_norm": 0.001255987910553813, "learning_rate": 5.694848112876683e-06, "loss": 0.0006, "num_input_tokens_seen": 253375440, "step": 117295 }, { "epoch": 19.135399673735726, "grad_norm": 0.0008938516257330775, "learning_rate": 5.684140696263995e-06, "loss": 0.0008, "num_input_tokens_seen": 253385360, "step": 117300 }, { "epoch": 19.136215334420882, "grad_norm": 0.001965533709153533, "learning_rate": 5.673443297589287e-06, "loss": 0.0011, "num_input_tokens_seen": 253396016, "step": 117305 }, { "epoch": 19.137030995106034, "grad_norm": 0.03859832137823105, "learning_rate": 5.662755917069384e-06, "loss": 0.0035, "num_input_tokens_seen": 253407152, "step": 117310 }, { "epoch": 19.13784665579119, "grad_norm": 0.006483915261924267, "learning_rate": 5.652078554920836e-06, "loss": 0.0009, "num_input_tokens_seen": 253417136, "step": 117315 }, { "epoch": 19.138662316476346, "grad_norm": 0.0037106431555002928, "learning_rate": 5.6414112113600254e-06, "loss": 0.0051, "num_input_tokens_seen": 253427376, "step": 117320 }, { "epoch": 19.1394779771615, "grad_norm": 0.00029239041032269597, "learning_rate": 5.630753886603168e-06, "loss": 0.0006, "num_input_tokens_seen": 253437232, "step": 117325 }, { "epoch": 19.140293637846657, "grad_norm": 0.007669747807085514, "learning_rate": 5.6201065808662025e-06, "loss": 0.0008, "num_input_tokens_seen": 253448688, "step": 117330 }, { "epoch": 19.14110929853181, "grad_norm": 0.004348631016910076, "learning_rate": 5.609469294364955e-06, "loss": 0.0004, "num_input_tokens_seen": 253459248, "step": 117335 }, { "epoch": 19.141924959216965, "grad_norm": 0.0015068600187078118, "learning_rate": 5.598842027315032e-06, "loss": 0.0032, "num_input_tokens_seen": 253470960, "step": 117340 }, { "epoch": 19.14274061990212, "grad_norm": 0.0020891670137643814, "learning_rate": 5.588224779931761e-06, "loss": 0.0015, "num_input_tokens_seen": 253480528, "step": 117345 }, { "epoch": 19.143556280587276, "grad_norm": 0.004071689676493406, "learning_rate": 5.577617552430303e-06, "loss": 0.0003, "num_input_tokens_seen": 253491632, "step": 117350 }, { "epoch": 19.144371941272432, "grad_norm": 0.0002320687344763428, "learning_rate": 5.567020345025597e-06, "loss": 0.0009, "num_input_tokens_seen": 253502128, "step": 117355 }, { "epoch": 19.145187601957584, "grad_norm": 0.5973049402236938, "learning_rate": 5.556433157932528e-06, "loss": 0.0255, "num_input_tokens_seen": 253512880, "step": 117360 }, { "epoch": 19.14600326264274, "grad_norm": 0.013899151235818863, "learning_rate": 5.5458559913655335e-06, "loss": 0.0009, "num_input_tokens_seen": 253524656, "step": 117365 }, { "epoch": 19.146818923327896, "grad_norm": 0.0036639608442783356, "learning_rate": 5.5352888455390546e-06, "loss": 0.0005, "num_input_tokens_seen": 253535088, "step": 117370 }, { "epoch": 19.14763458401305, "grad_norm": 0.00020154824596829712, "learning_rate": 5.524731720667197e-06, "loss": 0.0011, "num_input_tokens_seen": 253545552, "step": 117375 }, { "epoch": 19.148450244698207, "grad_norm": 0.0019946058746427298, "learning_rate": 5.514184616964013e-06, "loss": 0.0009, "num_input_tokens_seen": 253556144, "step": 117380 }, { "epoch": 19.14926590538336, "grad_norm": 0.0249673742800951, "learning_rate": 5.503647534643108e-06, "loss": 0.0017, "num_input_tokens_seen": 253567920, "step": 117385 }, { "epoch": 19.150081566068515, "grad_norm": 0.008728111162781715, "learning_rate": 5.493120473918145e-06, "loss": 0.0319, "num_input_tokens_seen": 253579600, "step": 117390 }, { "epoch": 19.15089722675367, "grad_norm": 0.0003968800010625273, "learning_rate": 5.4826034350023426e-06, "loss": 0.001, "num_input_tokens_seen": 253589008, "step": 117395 }, { "epoch": 19.151712887438826, "grad_norm": 0.00017136444512289017, "learning_rate": 5.472096418108974e-06, "loss": 0.0057, "num_input_tokens_seen": 253598960, "step": 117400 }, { "epoch": 19.152528548123982, "grad_norm": 0.004832593258470297, "learning_rate": 5.461599423450924e-06, "loss": 0.0006, "num_input_tokens_seen": 253609360, "step": 117405 }, { "epoch": 19.153344208809134, "grad_norm": 0.0034371260553598404, "learning_rate": 5.451112451240914e-06, "loss": 0.001, "num_input_tokens_seen": 253621616, "step": 117410 }, { "epoch": 19.15415986949429, "grad_norm": 0.007875418290495872, "learning_rate": 5.440635501691493e-06, "loss": 0.0019, "num_input_tokens_seen": 253632848, "step": 117415 }, { "epoch": 19.154975530179446, "grad_norm": 0.036121610552072525, "learning_rate": 5.4301685750149935e-06, "loss": 0.0076, "num_input_tokens_seen": 253642800, "step": 117420 }, { "epoch": 19.1557911908646, "grad_norm": 0.0009760346729308367, "learning_rate": 5.419711671423577e-06, "loss": 0.0019, "num_input_tokens_seen": 253654544, "step": 117425 }, { "epoch": 19.156606851549757, "grad_norm": 0.002728499239310622, "learning_rate": 5.409264791129076e-06, "loss": 0.0021, "num_input_tokens_seen": 253666000, "step": 117430 }, { "epoch": 19.15742251223491, "grad_norm": 0.002243755152449012, "learning_rate": 5.398827934343264e-06, "loss": 0.0018, "num_input_tokens_seen": 253675504, "step": 117435 }, { "epoch": 19.158238172920065, "grad_norm": 0.07680145651102066, "learning_rate": 5.38840110127764e-06, "loss": 0.0041, "num_input_tokens_seen": 253686768, "step": 117440 }, { "epoch": 19.15905383360522, "grad_norm": 0.0037976547610014677, "learning_rate": 5.377984292143534e-06, "loss": 0.0013, "num_input_tokens_seen": 253697520, "step": 117445 }, { "epoch": 19.159869494290376, "grad_norm": 0.0003936160064768046, "learning_rate": 5.367577507152055e-06, "loss": 0.0015, "num_input_tokens_seen": 253705776, "step": 117450 }, { "epoch": 19.160685154975532, "grad_norm": 0.03844565153121948, "learning_rate": 5.35718074651409e-06, "loss": 0.002, "num_input_tokens_seen": 253716752, "step": 117455 }, { "epoch": 19.161500815660684, "grad_norm": 0.03414055332541466, "learning_rate": 5.346794010440359e-06, "loss": 0.0018, "num_input_tokens_seen": 253727760, "step": 117460 }, { "epoch": 19.16231647634584, "grad_norm": 0.001296155620366335, "learning_rate": 5.336417299141361e-06, "loss": 0.0038, "num_input_tokens_seen": 253738992, "step": 117465 }, { "epoch": 19.163132137030995, "grad_norm": 0.0953516960144043, "learning_rate": 5.326050612827426e-06, "loss": 0.0021, "num_input_tokens_seen": 253749040, "step": 117470 }, { "epoch": 19.16394779771615, "grad_norm": 0.00608966825529933, "learning_rate": 5.315693951708555e-06, "loss": 0.0012, "num_input_tokens_seen": 253759856, "step": 117475 }, { "epoch": 19.164763458401303, "grad_norm": 0.020399967208504677, "learning_rate": 5.305347315994747e-06, "loss": 0.0021, "num_input_tokens_seen": 253769648, "step": 117480 }, { "epoch": 19.16557911908646, "grad_norm": 0.004619190003722906, "learning_rate": 5.295010705895609e-06, "loss": 0.005, "num_input_tokens_seen": 253780624, "step": 117485 }, { "epoch": 19.166394779771615, "grad_norm": 0.0010359683074057102, "learning_rate": 5.284684121620697e-06, "loss": 0.0019, "num_input_tokens_seen": 253791600, "step": 117490 }, { "epoch": 19.16721044045677, "grad_norm": 0.0013771315570920706, "learning_rate": 5.2743675633792345e-06, "loss": 0.0038, "num_input_tokens_seen": 253801584, "step": 117495 }, { "epoch": 19.168026101141926, "grad_norm": 0.0016533228335902095, "learning_rate": 5.264061031380274e-06, "loss": 0.0003, "num_input_tokens_seen": 253813456, "step": 117500 }, { "epoch": 19.16884176182708, "grad_norm": 0.004072641488164663, "learning_rate": 5.253764525832761e-06, "loss": 0.0019, "num_input_tokens_seen": 253824464, "step": 117505 }, { "epoch": 19.169657422512234, "grad_norm": 0.028276223689317703, "learning_rate": 5.243478046945305e-06, "loss": 0.0015, "num_input_tokens_seen": 253835344, "step": 117510 }, { "epoch": 19.17047308319739, "grad_norm": 0.002490977058187127, "learning_rate": 5.233201594926462e-06, "loss": 0.0008, "num_input_tokens_seen": 253845360, "step": 117515 }, { "epoch": 19.171288743882545, "grad_norm": 0.018174799159169197, "learning_rate": 5.222935169984455e-06, "loss": 0.0058, "num_input_tokens_seen": 253856880, "step": 117520 }, { "epoch": 19.1721044045677, "grad_norm": 0.00024710557772777975, "learning_rate": 5.212678772327284e-06, "loss": 0.0008, "num_input_tokens_seen": 253867088, "step": 117525 }, { "epoch": 19.172920065252853, "grad_norm": 0.0025143155362457037, "learning_rate": 5.202432402162893e-06, "loss": 0.0562, "num_input_tokens_seen": 253878608, "step": 117530 }, { "epoch": 19.17373572593801, "grad_norm": 0.08865071088075638, "learning_rate": 5.192196059698895e-06, "loss": 0.0031, "num_input_tokens_seen": 253886896, "step": 117535 }, { "epoch": 19.174551386623165, "grad_norm": 0.0015286827692762017, "learning_rate": 5.18196974514279e-06, "loss": 0.0027, "num_input_tokens_seen": 253898352, "step": 117540 }, { "epoch": 19.17536704730832, "grad_norm": 0.0010672721546143293, "learning_rate": 5.1717534587017445e-06, "loss": 0.002, "num_input_tokens_seen": 253910448, "step": 117545 }, { "epoch": 19.176182707993476, "grad_norm": 0.12418833374977112, "learning_rate": 5.161547200582872e-06, "loss": 0.0031, "num_input_tokens_seen": 253921008, "step": 117550 }, { "epoch": 19.17699836867863, "grad_norm": 0.00027891527861356735, "learning_rate": 5.151350970993007e-06, "loss": 0.0004, "num_input_tokens_seen": 253931600, "step": 117555 }, { "epoch": 19.177814029363784, "grad_norm": 0.00248112459667027, "learning_rate": 5.141164770138707e-06, "loss": 0.0006, "num_input_tokens_seen": 253942512, "step": 117560 }, { "epoch": 19.17862969004894, "grad_norm": 0.0024953444954007864, "learning_rate": 5.130988598226527e-06, "loss": 0.0013, "num_input_tokens_seen": 253954032, "step": 117565 }, { "epoch": 19.179445350734095, "grad_norm": 0.021138276904821396, "learning_rate": 5.120822455462637e-06, "loss": 0.0049, "num_input_tokens_seen": 253965136, "step": 117570 }, { "epoch": 19.18026101141925, "grad_norm": 0.001367824850603938, "learning_rate": 5.110666342053094e-06, "loss": 0.0119, "num_input_tokens_seen": 253975408, "step": 117575 }, { "epoch": 19.181076672104403, "grad_norm": 0.013493673875927925, "learning_rate": 5.100520258203734e-06, "loss": 0.0017, "num_input_tokens_seen": 253984912, "step": 117580 }, { "epoch": 19.18189233278956, "grad_norm": 0.02722967229783535, "learning_rate": 5.090384204120113e-06, "loss": 0.0046, "num_input_tokens_seen": 253996656, "step": 117585 }, { "epoch": 19.182707993474715, "grad_norm": 0.001604323973879218, "learning_rate": 5.08025818000768e-06, "loss": 0.0018, "num_input_tokens_seen": 254007312, "step": 117590 }, { "epoch": 19.18352365415987, "grad_norm": 0.00195878348313272, "learning_rate": 5.0701421860717135e-06, "loss": 0.0145, "num_input_tokens_seen": 254018928, "step": 117595 }, { "epoch": 19.184339314845026, "grad_norm": 0.0005671089165844023, "learning_rate": 5.060036222517161e-06, "loss": 0.001, "num_input_tokens_seen": 254030160, "step": 117600 }, { "epoch": 19.18515497553018, "grad_norm": 0.2616872191429138, "learning_rate": 5.049940289548804e-06, "loss": 0.0708, "num_input_tokens_seen": 254040400, "step": 117605 }, { "epoch": 19.185970636215334, "grad_norm": 0.00945583451539278, "learning_rate": 5.039854387371368e-06, "loss": 0.001, "num_input_tokens_seen": 254051472, "step": 117610 }, { "epoch": 19.18678629690049, "grad_norm": 0.003492174670100212, "learning_rate": 5.0297785161891315e-06, "loss": 0.1164, "num_input_tokens_seen": 254062192, "step": 117615 }, { "epoch": 19.187601957585645, "grad_norm": 0.0006185670499689877, "learning_rate": 5.019712676206323e-06, "loss": 0.0007, "num_input_tokens_seen": 254073360, "step": 117620 }, { "epoch": 19.1884176182708, "grad_norm": 0.0019063102081418037, "learning_rate": 5.009656867627055e-06, "loss": 0.0005, "num_input_tokens_seen": 254084112, "step": 117625 }, { "epoch": 19.189233278955953, "grad_norm": 0.0005886392900720239, "learning_rate": 4.999611090654943e-06, "loss": 0.0005, "num_input_tokens_seen": 254094672, "step": 117630 }, { "epoch": 19.19004893964111, "grad_norm": 0.0002696272567845881, "learning_rate": 4.989575345493713e-06, "loss": 0.0006, "num_input_tokens_seen": 254105616, "step": 117635 }, { "epoch": 19.190864600326265, "grad_norm": 0.005746932700276375, "learning_rate": 4.979549632346702e-06, "loss": 0.0005, "num_input_tokens_seen": 254116240, "step": 117640 }, { "epoch": 19.19168026101142, "grad_norm": 0.00037611470906995237, "learning_rate": 4.969533951417082e-06, "loss": 0.0004, "num_input_tokens_seen": 254127088, "step": 117645 }, { "epoch": 19.192495921696572, "grad_norm": 0.04209225997328758, "learning_rate": 4.959528302907857e-06, "loss": 0.0075, "num_input_tokens_seen": 254138096, "step": 117650 }, { "epoch": 19.193311582381728, "grad_norm": 0.00023832359875086695, "learning_rate": 4.949532687021751e-06, "loss": 0.0016, "num_input_tokens_seen": 254149552, "step": 117655 }, { "epoch": 19.194127243066884, "grad_norm": 0.008501997217535973, "learning_rate": 4.939547103961439e-06, "loss": 0.0012, "num_input_tokens_seen": 254159184, "step": 117660 }, { "epoch": 19.19494290375204, "grad_norm": 0.009387916885316372, "learning_rate": 4.929571553929202e-06, "loss": 0.0012, "num_input_tokens_seen": 254170800, "step": 117665 }, { "epoch": 19.195758564437195, "grad_norm": 0.003329535946249962, "learning_rate": 4.919606037127267e-06, "loss": 0.0065, "num_input_tokens_seen": 254181584, "step": 117670 }, { "epoch": 19.196574225122347, "grad_norm": 0.042027123272418976, "learning_rate": 4.909650553757583e-06, "loss": 0.0012, "num_input_tokens_seen": 254191952, "step": 117675 }, { "epoch": 19.197389885807503, "grad_norm": 0.0032699257135391235, "learning_rate": 4.8997051040218235e-06, "loss": 0.0013, "num_input_tokens_seen": 254202864, "step": 117680 }, { "epoch": 19.19820554649266, "grad_norm": 0.0007212890195660293, "learning_rate": 4.889769688121715e-06, "loss": 0.0002, "num_input_tokens_seen": 254212848, "step": 117685 }, { "epoch": 19.199021207177815, "grad_norm": 0.00019408235675655305, "learning_rate": 4.87984430625843e-06, "loss": 0.0015, "num_input_tokens_seen": 254223408, "step": 117690 }, { "epoch": 19.19983686786297, "grad_norm": 0.0005199372535571456, "learning_rate": 4.869928958633252e-06, "loss": 0.0038, "num_input_tokens_seen": 254234288, "step": 117695 }, { "epoch": 19.200652528548122, "grad_norm": 0.003805541666224599, "learning_rate": 4.860023645447076e-06, "loss": 0.0003, "num_input_tokens_seen": 254245392, "step": 117700 }, { "epoch": 19.201468189233278, "grad_norm": 0.03584432229399681, "learning_rate": 4.85012836690063e-06, "loss": 0.0896, "num_input_tokens_seen": 254255728, "step": 117705 }, { "epoch": 19.202283849918434, "grad_norm": 0.48327863216400146, "learning_rate": 4.840243123194477e-06, "loss": 0.1086, "num_input_tokens_seen": 254265904, "step": 117710 }, { "epoch": 19.20309951060359, "grad_norm": 0.011278538964688778, "learning_rate": 4.83036791452901e-06, "loss": 0.0007, "num_input_tokens_seen": 254276944, "step": 117715 }, { "epoch": 19.203915171288745, "grad_norm": 0.0011459417873993516, "learning_rate": 4.820502741104238e-06, "loss": 0.0035, "num_input_tokens_seen": 254287760, "step": 117720 }, { "epoch": 19.204730831973897, "grad_norm": 0.0020964513532817364, "learning_rate": 4.810647603120166e-06, "loss": 0.0005, "num_input_tokens_seen": 254299216, "step": 117725 }, { "epoch": 19.205546492659053, "grad_norm": 0.00022387487115338445, "learning_rate": 4.800802500776524e-06, "loss": 0.0006, "num_input_tokens_seen": 254309328, "step": 117730 }, { "epoch": 19.20636215334421, "grad_norm": 0.009423289448022842, "learning_rate": 4.790967434272819e-06, "loss": 0.0006, "num_input_tokens_seen": 254319280, "step": 117735 }, { "epoch": 19.207177814029365, "grad_norm": 0.004757652059197426, "learning_rate": 4.781142403808392e-06, "loss": 0.0041, "num_input_tokens_seen": 254329584, "step": 117740 }, { "epoch": 19.20799347471452, "grad_norm": 0.0001649027253733948, "learning_rate": 4.771327409582305e-06, "loss": 0.0013, "num_input_tokens_seen": 254339888, "step": 117745 }, { "epoch": 19.208809135399672, "grad_norm": 0.0006828425102867186, "learning_rate": 4.761522451793565e-06, "loss": 0.0015, "num_input_tokens_seen": 254351280, "step": 117750 }, { "epoch": 19.209624796084828, "grad_norm": 0.18796727061271667, "learning_rate": 4.751727530640793e-06, "loss": 0.0026, "num_input_tokens_seen": 254362640, "step": 117755 }, { "epoch": 19.210440456769984, "grad_norm": 0.00041193258948624134, "learning_rate": 4.74194264632255e-06, "loss": 0.0378, "num_input_tokens_seen": 254373136, "step": 117760 }, { "epoch": 19.21125611745514, "grad_norm": 0.0002577627310529351, "learning_rate": 4.732167799037068e-06, "loss": 0.0002, "num_input_tokens_seen": 254383792, "step": 117765 }, { "epoch": 19.212071778140295, "grad_norm": 0.010403187945485115, "learning_rate": 4.722402988982577e-06, "loss": 0.1138, "num_input_tokens_seen": 254393648, "step": 117770 }, { "epoch": 19.212887438825447, "grad_norm": 0.005965354852378368, "learning_rate": 4.7126482163568075e-06, "loss": 0.0006, "num_input_tokens_seen": 254403728, "step": 117775 }, { "epoch": 19.213703099510603, "grad_norm": 0.0038343167398124933, "learning_rate": 4.702903481357601e-06, "loss": 0.0496, "num_input_tokens_seen": 254414544, "step": 117780 }, { "epoch": 19.21451876019576, "grad_norm": 0.002071639057248831, "learning_rate": 4.693168784182356e-06, "loss": 0.0015, "num_input_tokens_seen": 254425872, "step": 117785 }, { "epoch": 19.215334420880914, "grad_norm": 0.0016836397117003798, "learning_rate": 4.6834441250284135e-06, "loss": 0.0005, "num_input_tokens_seen": 254436464, "step": 117790 }, { "epoch": 19.21615008156607, "grad_norm": 1.1679688692092896, "learning_rate": 4.673729504092783e-06, "loss": 0.0623, "num_input_tokens_seen": 254446512, "step": 117795 }, { "epoch": 19.216965742251222, "grad_norm": 0.00035125756403431296, "learning_rate": 4.664024921572419e-06, "loss": 0.0024, "num_input_tokens_seen": 254458320, "step": 117800 }, { "epoch": 19.217781402936378, "grad_norm": 0.002369961701333523, "learning_rate": 4.654330377663996e-06, "loss": 0.0014, "num_input_tokens_seen": 254468624, "step": 117805 }, { "epoch": 19.218597063621534, "grad_norm": 0.008754052221775055, "learning_rate": 4.644645872563913e-06, "loss": 0.003, "num_input_tokens_seen": 254478480, "step": 117810 }, { "epoch": 19.21941272430669, "grad_norm": 0.007495723199099302, "learning_rate": 4.634971406468514e-06, "loss": 0.0021, "num_input_tokens_seen": 254490416, "step": 117815 }, { "epoch": 19.22022838499184, "grad_norm": 0.05519472435116768, "learning_rate": 4.625306979573807e-06, "loss": 0.0013, "num_input_tokens_seen": 254500720, "step": 117820 }, { "epoch": 19.221044045676997, "grad_norm": 0.020412957295775414, "learning_rate": 4.615652592075747e-06, "loss": 0.0018, "num_input_tokens_seen": 254512304, "step": 117825 }, { "epoch": 19.221859706362153, "grad_norm": 0.0004900472704321146, "learning_rate": 4.606008244169846e-06, "loss": 0.0017, "num_input_tokens_seen": 254522608, "step": 117830 }, { "epoch": 19.22267536704731, "grad_norm": 0.36650630831718445, "learning_rate": 4.596373936051667e-06, "loss": 0.0111, "num_input_tokens_seen": 254533040, "step": 117835 }, { "epoch": 19.223491027732464, "grad_norm": 0.01161511242389679, "learning_rate": 4.586749667916446e-06, "loss": 0.0018, "num_input_tokens_seen": 254543728, "step": 117840 }, { "epoch": 19.224306688417617, "grad_norm": 0.000983836012892425, "learning_rate": 4.57713543995919e-06, "loss": 0.0027, "num_input_tokens_seen": 254553360, "step": 117845 }, { "epoch": 19.225122349102772, "grad_norm": 0.05470266193151474, "learning_rate": 4.567531252374801e-06, "loss": 0.0025, "num_input_tokens_seen": 254565328, "step": 117850 }, { "epoch": 19.225938009787928, "grad_norm": 0.010444153100252151, "learning_rate": 4.557937105357901e-06, "loss": 0.0104, "num_input_tokens_seen": 254577552, "step": 117855 }, { "epoch": 19.226753670473084, "grad_norm": 0.0179997980594635, "learning_rate": 4.54835299910289e-06, "loss": 0.0028, "num_input_tokens_seen": 254589712, "step": 117860 }, { "epoch": 19.22756933115824, "grad_norm": 0.0017869179137051105, "learning_rate": 4.5387789338040555e-06, "loss": 0.0004, "num_input_tokens_seen": 254601744, "step": 117865 }, { "epoch": 19.22838499184339, "grad_norm": 0.00039110815851017833, "learning_rate": 4.529214909655355e-06, "loss": 0.0013, "num_input_tokens_seen": 254613904, "step": 117870 }, { "epoch": 19.229200652528547, "grad_norm": 0.04959358274936676, "learning_rate": 4.519660926850744e-06, "loss": 0.0027, "num_input_tokens_seen": 254625200, "step": 117875 }, { "epoch": 19.230016313213703, "grad_norm": 0.011024784296751022, "learning_rate": 4.510116985583679e-06, "loss": 0.001, "num_input_tokens_seen": 254636560, "step": 117880 }, { "epoch": 19.23083197389886, "grad_norm": 0.2488701045513153, "learning_rate": 4.500583086047782e-06, "loss": 0.0047, "num_input_tokens_seen": 254647440, "step": 117885 }, { "epoch": 19.231647634584014, "grad_norm": 0.02225778065621853, "learning_rate": 4.491059228436012e-06, "loss": 0.0024, "num_input_tokens_seen": 254657296, "step": 117890 }, { "epoch": 19.232463295269167, "grad_norm": 0.931378960609436, "learning_rate": 4.481545412941657e-06, "loss": 0.126, "num_input_tokens_seen": 254668432, "step": 117895 }, { "epoch": 19.233278955954322, "grad_norm": 0.0007507778936997056, "learning_rate": 4.472041639757285e-06, "loss": 0.0009, "num_input_tokens_seen": 254680208, "step": 117900 }, { "epoch": 19.234094616639478, "grad_norm": 0.003547506872564554, "learning_rate": 4.462547909075687e-06, "loss": 0.0032, "num_input_tokens_seen": 254690800, "step": 117905 }, { "epoch": 19.234910277324634, "grad_norm": 0.00012341790716163814, "learning_rate": 4.453064221089154e-06, "loss": 0.0005, "num_input_tokens_seen": 254702576, "step": 117910 }, { "epoch": 19.23572593800979, "grad_norm": 0.010750551708042622, "learning_rate": 4.44359057598992e-06, "loss": 0.0007, "num_input_tokens_seen": 254714480, "step": 117915 }, { "epoch": 19.23654159869494, "grad_norm": 0.012489181011915207, "learning_rate": 4.434126973969998e-06, "loss": 0.0216, "num_input_tokens_seen": 254725456, "step": 117920 }, { "epoch": 19.237357259380097, "grad_norm": 0.0013013400603085756, "learning_rate": 4.424673415221181e-06, "loss": 0.0005, "num_input_tokens_seen": 254736560, "step": 117925 }, { "epoch": 19.238172920065253, "grad_norm": 0.015076026320457458, "learning_rate": 4.41522989993498e-06, "loss": 0.0011, "num_input_tokens_seen": 254748624, "step": 117930 }, { "epoch": 19.23898858075041, "grad_norm": 0.001261144527234137, "learning_rate": 4.405796428302855e-06, "loss": 0.0003, "num_input_tokens_seen": 254759664, "step": 117935 }, { "epoch": 19.239804241435564, "grad_norm": 0.001300856121815741, "learning_rate": 4.396373000515986e-06, "loss": 0.0004, "num_input_tokens_seen": 254770352, "step": 117940 }, { "epoch": 19.240619902120716, "grad_norm": 0.0004666333843488246, "learning_rate": 4.3869596167653296e-06, "loss": 0.0016, "num_input_tokens_seen": 254780880, "step": 117945 }, { "epoch": 19.241435562805872, "grad_norm": 0.018931837752461433, "learning_rate": 4.377556277241679e-06, "loss": 0.005, "num_input_tokens_seen": 254793008, "step": 117950 }, { "epoch": 19.242251223491028, "grad_norm": 0.0013247689930722117, "learning_rate": 4.368162982135604e-06, "loss": 0.0015, "num_input_tokens_seen": 254803312, "step": 117955 }, { "epoch": 19.243066884176184, "grad_norm": 0.06686493009328842, "learning_rate": 4.3587797316373965e-06, "loss": 0.0038, "num_input_tokens_seen": 254814032, "step": 117960 }, { "epoch": 19.24388254486134, "grad_norm": 0.0002263520291307941, "learning_rate": 4.34940652593735e-06, "loss": 0.0012, "num_input_tokens_seen": 254824816, "step": 117965 }, { "epoch": 19.24469820554649, "grad_norm": 0.007901106961071491, "learning_rate": 4.34004336522531e-06, "loss": 0.0033, "num_input_tokens_seen": 254835952, "step": 117970 }, { "epoch": 19.245513866231647, "grad_norm": 0.04995737969875336, "learning_rate": 4.330690249691127e-06, "loss": 0.0035, "num_input_tokens_seen": 254845584, "step": 117975 }, { "epoch": 19.246329526916803, "grad_norm": 0.0003960870089940727, "learning_rate": 4.321347179524316e-06, "loss": 0.0007, "num_input_tokens_seen": 254856656, "step": 117980 }, { "epoch": 19.24714518760196, "grad_norm": 0.0004914596793241799, "learning_rate": 4.312014154914113e-06, "loss": 0.0018, "num_input_tokens_seen": 254866256, "step": 117985 }, { "epoch": 19.247960848287114, "grad_norm": 0.003282026154920459, "learning_rate": 4.302691176049922e-06, "loss": 0.0042, "num_input_tokens_seen": 254875888, "step": 117990 }, { "epoch": 19.248776508972266, "grad_norm": 0.00018431748321745545, "learning_rate": 4.293378243120371e-06, "loss": 0.0003, "num_input_tokens_seen": 254886672, "step": 117995 }, { "epoch": 19.249592169657422, "grad_norm": 0.0007251430070027709, "learning_rate": 4.284075356314476e-06, "loss": 0.0004, "num_input_tokens_seen": 254898096, "step": 118000 }, { "epoch": 19.250407830342578, "grad_norm": 0.011223108507692814, "learning_rate": 4.2747825158205855e-06, "loss": 0.0019, "num_input_tokens_seen": 254908080, "step": 118005 }, { "epoch": 19.251223491027734, "grad_norm": 0.0019064913503825665, "learning_rate": 4.265499721827159e-06, "loss": 0.001, "num_input_tokens_seen": 254918256, "step": 118010 }, { "epoch": 19.252039151712886, "grad_norm": 0.00043139405897818506, "learning_rate": 4.256226974522215e-06, "loss": 0.0009, "num_input_tokens_seen": 254928272, "step": 118015 }, { "epoch": 19.25285481239804, "grad_norm": 0.0022782967425882816, "learning_rate": 4.246964274093767e-06, "loss": 0.0005, "num_input_tokens_seen": 254940016, "step": 118020 }, { "epoch": 19.253670473083197, "grad_norm": 0.014259060844779015, "learning_rate": 4.237711620729501e-06, "loss": 0.001, "num_input_tokens_seen": 254950800, "step": 118025 }, { "epoch": 19.254486133768353, "grad_norm": 0.0016648249002173543, "learning_rate": 4.228469014616931e-06, "loss": 0.0036, "num_input_tokens_seen": 254962992, "step": 118030 }, { "epoch": 19.25530179445351, "grad_norm": 0.010028230026364326, "learning_rate": 4.219236455943298e-06, "loss": 0.0023, "num_input_tokens_seen": 254973392, "step": 118035 }, { "epoch": 19.25611745513866, "grad_norm": 0.0038817732129245996, "learning_rate": 4.210013944895841e-06, "loss": 0.0058, "num_input_tokens_seen": 254984368, "step": 118040 }, { "epoch": 19.256933115823816, "grad_norm": 0.0060980357229709625, "learning_rate": 4.2008014816613534e-06, "loss": 0.0013, "num_input_tokens_seen": 254995280, "step": 118045 }, { "epoch": 19.257748776508972, "grad_norm": 0.0006417598924599588, "learning_rate": 4.191599066426632e-06, "loss": 0.0018, "num_input_tokens_seen": 255006832, "step": 118050 }, { "epoch": 19.258564437194128, "grad_norm": 0.00042296203901059926, "learning_rate": 4.182406699378138e-06, "loss": 0.0022, "num_input_tokens_seen": 255017424, "step": 118055 }, { "epoch": 19.259380097879284, "grad_norm": 0.00021238908811938018, "learning_rate": 4.173224380702112e-06, "loss": 0.0005, "num_input_tokens_seen": 255027184, "step": 118060 }, { "epoch": 19.260195758564436, "grad_norm": 0.0052239480428397655, "learning_rate": 4.164052110584737e-06, "loss": 0.0014, "num_input_tokens_seen": 255036816, "step": 118065 }, { "epoch": 19.26101141924959, "grad_norm": 0.00019243801943957806, "learning_rate": 4.154889889211866e-06, "loss": 0.0014, "num_input_tokens_seen": 255047408, "step": 118070 }, { "epoch": 19.261827079934747, "grad_norm": 0.001658376189880073, "learning_rate": 4.145737716769182e-06, "loss": 0.0012, "num_input_tokens_seen": 255057168, "step": 118075 }, { "epoch": 19.262642740619903, "grad_norm": 0.0030029506888240576, "learning_rate": 4.136595593442149e-06, "loss": 0.0078, "num_input_tokens_seen": 255068752, "step": 118080 }, { "epoch": 19.26345840130506, "grad_norm": 0.00022957536566536874, "learning_rate": 4.1274635194160086e-06, "loss": 0.0006, "num_input_tokens_seen": 255079760, "step": 118085 }, { "epoch": 19.26427406199021, "grad_norm": 0.0007117181667126715, "learning_rate": 4.118341494875944e-06, "loss": 0.0008, "num_input_tokens_seen": 255091088, "step": 118090 }, { "epoch": 19.265089722675366, "grad_norm": 0.027176441624760628, "learning_rate": 4.1092295200066966e-06, "loss": 0.0159, "num_input_tokens_seen": 255101968, "step": 118095 }, { "epoch": 19.265905383360522, "grad_norm": 0.07800480723381042, "learning_rate": 4.100127594993064e-06, "loss": 0.0029, "num_input_tokens_seen": 255112976, "step": 118100 }, { "epoch": 19.266721044045678, "grad_norm": 0.009774110279977322, "learning_rate": 4.091035720019398e-06, "loss": 0.0008, "num_input_tokens_seen": 255122288, "step": 118105 }, { "epoch": 19.267536704730833, "grad_norm": 0.0008097323589026928, "learning_rate": 4.081953895269996e-06, "loss": 0.0011, "num_input_tokens_seen": 255132048, "step": 118110 }, { "epoch": 19.268352365415986, "grad_norm": 0.0004488844715524465, "learning_rate": 4.072882120928933e-06, "loss": 0.0004, "num_input_tokens_seen": 255143152, "step": 118115 }, { "epoch": 19.26916802610114, "grad_norm": 0.0007533471216447651, "learning_rate": 4.063820397180007e-06, "loss": 0.0054, "num_input_tokens_seen": 255155664, "step": 118120 }, { "epoch": 19.269983686786297, "grad_norm": 0.04722285270690918, "learning_rate": 4.054768724206958e-06, "loss": 0.0015, "num_input_tokens_seen": 255165872, "step": 118125 }, { "epoch": 19.270799347471453, "grad_norm": 0.005354705266654491, "learning_rate": 4.045727102193087e-06, "loss": 0.0021, "num_input_tokens_seen": 255176560, "step": 118130 }, { "epoch": 19.27161500815661, "grad_norm": 0.0037820693105459213, "learning_rate": 4.036695531321799e-06, "loss": 0.0012, "num_input_tokens_seen": 255188176, "step": 118135 }, { "epoch": 19.27243066884176, "grad_norm": 0.0006078414153307676, "learning_rate": 4.027674011776006e-06, "loss": 0.0008, "num_input_tokens_seen": 255198736, "step": 118140 }, { "epoch": 19.273246329526916, "grad_norm": 0.0028573654126375914, "learning_rate": 4.018662543738616e-06, "loss": 0.0004, "num_input_tokens_seen": 255210384, "step": 118145 }, { "epoch": 19.274061990212072, "grad_norm": 0.001126722665503621, "learning_rate": 4.009661127392206e-06, "loss": 0.0005, "num_input_tokens_seen": 255220688, "step": 118150 }, { "epoch": 19.274877650897228, "grad_norm": 0.010454751551151276, "learning_rate": 4.00066976291924e-06, "loss": 0.0034, "num_input_tokens_seen": 255230608, "step": 118155 }, { "epoch": 19.275693311582383, "grad_norm": 0.023937121033668518, "learning_rate": 3.9916884505019065e-06, "loss": 0.0018, "num_input_tokens_seen": 255240848, "step": 118160 }, { "epoch": 19.276508972267536, "grad_norm": 0.0342971608042717, "learning_rate": 3.982717190322227e-06, "loss": 0.0008, "num_input_tokens_seen": 255251344, "step": 118165 }, { "epoch": 19.27732463295269, "grad_norm": 0.0011292777489870787, "learning_rate": 3.973755982562055e-06, "loss": 0.0005, "num_input_tokens_seen": 255261904, "step": 118170 }, { "epoch": 19.278140293637847, "grad_norm": 0.0016227407613769174, "learning_rate": 3.964804827402913e-06, "loss": 0.0003, "num_input_tokens_seen": 255273008, "step": 118175 }, { "epoch": 19.278955954323003, "grad_norm": 0.007771195378154516, "learning_rate": 3.955863725026321e-06, "loss": 0.0006, "num_input_tokens_seen": 255283664, "step": 118180 }, { "epoch": 19.27977161500816, "grad_norm": 0.0009318848024122417, "learning_rate": 3.946932675613413e-06, "loss": 0.0022, "num_input_tokens_seen": 255293456, "step": 118185 }, { "epoch": 19.28058727569331, "grad_norm": 0.0019882982596755028, "learning_rate": 3.93801167934521e-06, "loss": 0.0005, "num_input_tokens_seen": 255304528, "step": 118190 }, { "epoch": 19.281402936378466, "grad_norm": 0.018294580280780792, "learning_rate": 3.929100736402513e-06, "loss": 0.0006, "num_input_tokens_seen": 255316368, "step": 118195 }, { "epoch": 19.282218597063622, "grad_norm": 0.0001411344128428027, "learning_rate": 3.920199846965844e-06, "loss": 0.0002, "num_input_tokens_seen": 255328432, "step": 118200 }, { "epoch": 19.283034257748778, "grad_norm": 0.00018316751811653376, "learning_rate": 3.911309011215725e-06, "loss": 0.0009, "num_input_tokens_seen": 255340080, "step": 118205 }, { "epoch": 19.28384991843393, "grad_norm": 0.0008493968634866178, "learning_rate": 3.902428229332233e-06, "loss": 0.0008, "num_input_tokens_seen": 255350512, "step": 118210 }, { "epoch": 19.284665579119086, "grad_norm": 0.0007020276971161366, "learning_rate": 3.8935575014953374e-06, "loss": 0.0024, "num_input_tokens_seen": 255362128, "step": 118215 }, { "epoch": 19.28548123980424, "grad_norm": 0.002223310759291053, "learning_rate": 3.884696827884893e-06, "loss": 0.0007, "num_input_tokens_seen": 255373200, "step": 118220 }, { "epoch": 19.286296900489397, "grad_norm": 0.006080263294279575, "learning_rate": 3.8758462086804225e-06, "loss": 0.0006, "num_input_tokens_seen": 255383696, "step": 118225 }, { "epoch": 19.287112561174553, "grad_norm": 0.001246536965481937, "learning_rate": 3.867005644061283e-06, "loss": 0.0024, "num_input_tokens_seen": 255394448, "step": 118230 }, { "epoch": 19.287928221859705, "grad_norm": 0.012224650010466576, "learning_rate": 3.8581751342067205e-06, "loss": 0.0013, "num_input_tokens_seen": 255404592, "step": 118235 }, { "epoch": 19.28874388254486, "grad_norm": 0.09165532886981964, "learning_rate": 3.849354679295591e-06, "loss": 0.0036, "num_input_tokens_seen": 255414928, "step": 118240 }, { "epoch": 19.289559543230016, "grad_norm": 0.011029092594981194, "learning_rate": 3.840544279506753e-06, "loss": 0.0052, "num_input_tokens_seen": 255426992, "step": 118245 }, { "epoch": 19.290375203915172, "grad_norm": 0.00046244170516729355, "learning_rate": 3.831743935018672e-06, "loss": 0.0005, "num_input_tokens_seen": 255436880, "step": 118250 }, { "epoch": 19.291190864600328, "grad_norm": 0.8931393623352051, "learning_rate": 3.822953646009708e-06, "loss": 0.0574, "num_input_tokens_seen": 255447024, "step": 118255 }, { "epoch": 19.29200652528548, "grad_norm": 0.012058882042765617, "learning_rate": 3.8141734126580505e-06, "loss": 0.001, "num_input_tokens_seen": 255457264, "step": 118260 }, { "epoch": 19.292822185970635, "grad_norm": 0.0006178324692882597, "learning_rate": 3.805403235141669e-06, "loss": 0.0007, "num_input_tokens_seen": 255467408, "step": 118265 }, { "epoch": 19.29363784665579, "grad_norm": 0.0005874041235074401, "learning_rate": 3.7966431136381985e-06, "loss": 0.0009, "num_input_tokens_seen": 255478032, "step": 118270 }, { "epoch": 19.294453507340947, "grad_norm": 0.0011451609898358583, "learning_rate": 3.7878930483252195e-06, "loss": 0.0008, "num_input_tokens_seen": 255489328, "step": 118275 }, { "epoch": 19.295269168026103, "grad_norm": 0.007449743337929249, "learning_rate": 3.7791530393801456e-06, "loss": 0.0015, "num_input_tokens_seen": 255500272, "step": 118280 }, { "epoch": 19.296084828711255, "grad_norm": 0.00993234384804964, "learning_rate": 3.7704230869800015e-06, "loss": 0.0007, "num_input_tokens_seen": 255511920, "step": 118285 }, { "epoch": 19.29690048939641, "grad_norm": 0.0009379129041917622, "learning_rate": 3.7617031913017573e-06, "loss": 0.0014, "num_input_tokens_seen": 255521936, "step": 118290 }, { "epoch": 19.297716150081566, "grad_norm": 0.0034401898737996817, "learning_rate": 3.752993352522105e-06, "loss": 0.0009, "num_input_tokens_seen": 255532432, "step": 118295 }, { "epoch": 19.298531810766722, "grad_norm": 0.025949660688638687, "learning_rate": 3.7442935708176253e-06, "loss": 0.0654, "num_input_tokens_seen": 255542960, "step": 118300 }, { "epoch": 19.299347471451878, "grad_norm": 0.004528548568487167, "learning_rate": 3.7356038463645105e-06, "loss": 0.0044, "num_input_tokens_seen": 255553296, "step": 118305 }, { "epoch": 19.30016313213703, "grad_norm": 0.012715993449091911, "learning_rate": 3.7269241793390084e-06, "loss": 0.002, "num_input_tokens_seen": 255562832, "step": 118310 }, { "epoch": 19.300978792822185, "grad_norm": 0.0012263595126569271, "learning_rate": 3.7182545699169236e-06, "loss": 0.0004, "num_input_tokens_seen": 255571888, "step": 118315 }, { "epoch": 19.30179445350734, "grad_norm": 0.011231029406189919, "learning_rate": 3.7095950182739478e-06, "loss": 0.0044, "num_input_tokens_seen": 255583408, "step": 118320 }, { "epoch": 19.302610114192497, "grad_norm": 0.001041249604895711, "learning_rate": 3.700945524585664e-06, "loss": 0.0005, "num_input_tokens_seen": 255594608, "step": 118325 }, { "epoch": 19.303425774877653, "grad_norm": 0.00013112963642925024, "learning_rate": 3.6923060890273195e-06, "loss": 0.0187, "num_input_tokens_seen": 255605296, "step": 118330 }, { "epoch": 19.304241435562805, "grad_norm": 0.0004950486472807825, "learning_rate": 3.683676711773998e-06, "loss": 0.0111, "num_input_tokens_seen": 255615120, "step": 118335 }, { "epoch": 19.30505709624796, "grad_norm": 0.018098052591085434, "learning_rate": 3.6750573930005583e-06, "loss": 0.0017, "num_input_tokens_seen": 255626160, "step": 118340 }, { "epoch": 19.305872756933116, "grad_norm": 0.0003636969195213169, "learning_rate": 3.66644813288175e-06, "loss": 0.0007, "num_input_tokens_seen": 255637488, "step": 118345 }, { "epoch": 19.306688417618272, "grad_norm": 0.00021313635807018727, "learning_rate": 3.6578489315919893e-06, "loss": 0.0011, "num_input_tokens_seen": 255648272, "step": 118350 }, { "epoch": 19.307504078303428, "grad_norm": 0.07795961946249008, "learning_rate": 3.6492597893056367e-06, "loss": 0.0048, "num_input_tokens_seen": 255659632, "step": 118355 }, { "epoch": 19.30831973898858, "grad_norm": 0.001691961195319891, "learning_rate": 3.6406807061966085e-06, "loss": 0.0004, "num_input_tokens_seen": 255671504, "step": 118360 }, { "epoch": 19.309135399673735, "grad_norm": 0.021926045417785645, "learning_rate": 3.6321116824388767e-06, "loss": 0.0017, "num_input_tokens_seen": 255683152, "step": 118365 }, { "epoch": 19.30995106035889, "grad_norm": 0.0014312977436929941, "learning_rate": 3.6235527182061912e-06, "loss": 0.0007, "num_input_tokens_seen": 255695280, "step": 118370 }, { "epoch": 19.310766721044047, "grad_norm": 0.0011881274404004216, "learning_rate": 3.615003813671802e-06, "loss": 0.0004, "num_input_tokens_seen": 255706320, "step": 118375 }, { "epoch": 19.3115823817292, "grad_norm": 0.002549724653363228, "learning_rate": 3.6064649690091268e-06, "loss": 0.0006, "num_input_tokens_seen": 255717744, "step": 118380 }, { "epoch": 19.312398042414355, "grad_norm": 0.027324769645929337, "learning_rate": 3.5979361843910817e-06, "loss": 0.0012, "num_input_tokens_seen": 255727344, "step": 118385 }, { "epoch": 19.31321370309951, "grad_norm": 0.24201883375644684, "learning_rate": 3.589417459990696e-06, "loss": 0.0021, "num_input_tokens_seen": 255737520, "step": 118390 }, { "epoch": 19.314029363784666, "grad_norm": 0.04044476896524429, "learning_rate": 3.580908795980442e-06, "loss": 0.0016, "num_input_tokens_seen": 255747920, "step": 118395 }, { "epoch": 19.31484502446982, "grad_norm": 0.05046987533569336, "learning_rate": 3.572410192532849e-06, "loss": 0.0019, "num_input_tokens_seen": 255758896, "step": 118400 }, { "epoch": 19.315660685154974, "grad_norm": 0.0012489468790590763, "learning_rate": 3.563921649820112e-06, "loss": 0.0004, "num_input_tokens_seen": 255769392, "step": 118405 }, { "epoch": 19.31647634584013, "grad_norm": 0.041108161211013794, "learning_rate": 3.555443168014261e-06, "loss": 0.0023, "num_input_tokens_seen": 255779440, "step": 118410 }, { "epoch": 19.317292006525285, "grad_norm": 0.3899226784706116, "learning_rate": 3.5469747472871574e-06, "loss": 0.0115, "num_input_tokens_seen": 255790800, "step": 118415 }, { "epoch": 19.31810766721044, "grad_norm": 0.0016491117421537638, "learning_rate": 3.5385163878103864e-06, "loss": 0.0041, "num_input_tokens_seen": 255801296, "step": 118420 }, { "epoch": 19.318923327895597, "grad_norm": 0.001680854824371636, "learning_rate": 3.5300680897554226e-06, "loss": 0.0021, "num_input_tokens_seen": 255812400, "step": 118425 }, { "epoch": 19.31973898858075, "grad_norm": 0.08602673560380936, "learning_rate": 3.5216298532934068e-06, "loss": 0.0047, "num_input_tokens_seen": 255822992, "step": 118430 }, { "epoch": 19.320554649265905, "grad_norm": 0.0004549971781671047, "learning_rate": 3.5132016785954235e-06, "loss": 0.0014, "num_input_tokens_seen": 255833616, "step": 118435 }, { "epoch": 19.32137030995106, "grad_norm": 0.00023607736511621624, "learning_rate": 3.504783565832226e-06, "loss": 0.0028, "num_input_tokens_seen": 255845008, "step": 118440 }, { "epoch": 19.322185970636216, "grad_norm": 0.009514588862657547, "learning_rate": 3.496375515174455e-06, "loss": 0.0012, "num_input_tokens_seen": 255855440, "step": 118445 }, { "epoch": 19.32300163132137, "grad_norm": 0.009080817922949791, "learning_rate": 3.4879775267925297e-06, "loss": 0.0031, "num_input_tokens_seen": 255866608, "step": 118450 }, { "epoch": 19.323817292006524, "grad_norm": 0.04134466499090195, "learning_rate": 3.4795896008565363e-06, "loss": 0.0035, "num_input_tokens_seen": 255878416, "step": 118455 }, { "epoch": 19.32463295269168, "grad_norm": 0.019892286509275436, "learning_rate": 3.4712117375365615e-06, "loss": 0.0011, "num_input_tokens_seen": 255888848, "step": 118460 }, { "epoch": 19.325448613376835, "grad_norm": 0.0010795299895107746, "learning_rate": 3.4628439370024133e-06, "loss": 0.0858, "num_input_tokens_seen": 255899312, "step": 118465 }, { "epoch": 19.32626427406199, "grad_norm": 0.0002479618415236473, "learning_rate": 3.454486199423568e-06, "loss": 0.0012, "num_input_tokens_seen": 255910640, "step": 118470 }, { "epoch": 19.327079934747147, "grad_norm": 0.001029806793667376, "learning_rate": 3.4461385249695e-06, "loss": 0.0092, "num_input_tokens_seen": 255921840, "step": 118475 }, { "epoch": 19.3278955954323, "grad_norm": 0.005694256164133549, "learning_rate": 3.4378009138093524e-06, "loss": 0.0031, "num_input_tokens_seen": 255931472, "step": 118480 }, { "epoch": 19.328711256117455, "grad_norm": 0.031680673360824585, "learning_rate": 3.429473366112157e-06, "loss": 0.0023, "num_input_tokens_seen": 255941360, "step": 118485 }, { "epoch": 19.32952691680261, "grad_norm": 0.0053010135889053345, "learning_rate": 3.421155882046556e-06, "loss": 0.0004, "num_input_tokens_seen": 255951632, "step": 118490 }, { "epoch": 19.330342577487766, "grad_norm": 0.002608070382848382, "learning_rate": 3.4128484617812482e-06, "loss": 0.0012, "num_input_tokens_seen": 255961584, "step": 118495 }, { "epoch": 19.33115823817292, "grad_norm": 0.0017077375669032335, "learning_rate": 3.404551105484488e-06, "loss": 0.0004, "num_input_tokens_seen": 255971248, "step": 118500 }, { "epoch": 19.331973898858074, "grad_norm": 0.03870732709765434, "learning_rate": 3.3962638133245296e-06, "loss": 0.0016, "num_input_tokens_seen": 255982384, "step": 118505 }, { "epoch": 19.33278955954323, "grad_norm": 0.00011174618703080341, "learning_rate": 3.3879865854691825e-06, "loss": 0.0002, "num_input_tokens_seen": 255992976, "step": 118510 }, { "epoch": 19.333605220228385, "grad_norm": 0.012585737742483616, "learning_rate": 3.3797194220863694e-06, "loss": 0.0023, "num_input_tokens_seen": 256004080, "step": 118515 }, { "epoch": 19.33442088091354, "grad_norm": 0.002018422121182084, "learning_rate": 3.371462323343455e-06, "loss": 0.0009, "num_input_tokens_seen": 256015184, "step": 118520 }, { "epoch": 19.335236541598697, "grad_norm": 0.07069176435470581, "learning_rate": 3.3632152894079727e-06, "loss": 0.0013, "num_input_tokens_seen": 256026192, "step": 118525 }, { "epoch": 19.33605220228385, "grad_norm": 0.006027981173247099, "learning_rate": 3.3549783204469e-06, "loss": 0.0018, "num_input_tokens_seen": 256036496, "step": 118530 }, { "epoch": 19.336867862969005, "grad_norm": 0.0005176325212232769, "learning_rate": 3.3467514166272696e-06, "loss": 0.0015, "num_input_tokens_seen": 256046992, "step": 118535 }, { "epoch": 19.33768352365416, "grad_norm": 0.05079817399382591, "learning_rate": 3.338534578115726e-06, "loss": 0.0023, "num_input_tokens_seen": 256056176, "step": 118540 }, { "epoch": 19.338499184339316, "grad_norm": 0.015587205067276955, "learning_rate": 3.3303278050789143e-06, "loss": 0.0006, "num_input_tokens_seen": 256066768, "step": 118545 }, { "epoch": 19.339314845024468, "grad_norm": 0.07898391783237457, "learning_rate": 3.3221310976829787e-06, "loss": 0.0059, "num_input_tokens_seen": 256076880, "step": 118550 }, { "epoch": 19.340130505709624, "grad_norm": 0.003357226261869073, "learning_rate": 3.313944456094231e-06, "loss": 0.0061, "num_input_tokens_seen": 256088528, "step": 118555 }, { "epoch": 19.34094616639478, "grad_norm": 0.0023918889928609133, "learning_rate": 3.3057678804784276e-06, "loss": 0.0015, "num_input_tokens_seen": 256098800, "step": 118560 }, { "epoch": 19.341761827079935, "grad_norm": 0.015937641263008118, "learning_rate": 3.29760137100138e-06, "loss": 0.0009, "num_input_tokens_seen": 256109488, "step": 118565 }, { "epoch": 19.34257748776509, "grad_norm": 0.08707918971776962, "learning_rate": 3.289444927828511e-06, "loss": 0.0018, "num_input_tokens_seen": 256119600, "step": 118570 }, { "epoch": 19.343393148450243, "grad_norm": 0.0558028407394886, "learning_rate": 3.281298551125189e-06, "loss": 0.003, "num_input_tokens_seen": 256130096, "step": 118575 }, { "epoch": 19.3442088091354, "grad_norm": 0.0014487484004348516, "learning_rate": 3.2731622410565043e-06, "loss": 0.0006, "num_input_tokens_seen": 256141264, "step": 118580 }, { "epoch": 19.345024469820554, "grad_norm": 0.0015977158909663558, "learning_rate": 3.265035997787269e-06, "loss": 0.0006, "num_input_tokens_seen": 256151120, "step": 118585 }, { "epoch": 19.34584013050571, "grad_norm": 0.0008332631550729275, "learning_rate": 3.256919821482296e-06, "loss": 0.0005, "num_input_tokens_seen": 256162448, "step": 118590 }, { "epoch": 19.346655791190866, "grad_norm": 0.002260663080960512, "learning_rate": 3.2488137123059537e-06, "loss": 0.0004, "num_input_tokens_seen": 256173296, "step": 118595 }, { "epoch": 19.347471451876018, "grad_norm": 0.02533086948096752, "learning_rate": 3.2407176704226102e-06, "loss": 0.0007, "num_input_tokens_seen": 256184080, "step": 118600 }, { "epoch": 19.348287112561174, "grad_norm": 0.005144801922142506, "learning_rate": 3.2326316959962463e-06, "loss": 0.0014, "num_input_tokens_seen": 256195184, "step": 118605 }, { "epoch": 19.34910277324633, "grad_norm": 0.012791904620826244, "learning_rate": 3.224555789190897e-06, "loss": 0.0014, "num_input_tokens_seen": 256206640, "step": 118610 }, { "epoch": 19.349918433931485, "grad_norm": 0.00018290229490958154, "learning_rate": 3.216489950170043e-06, "loss": 0.0013, "num_input_tokens_seen": 256218288, "step": 118615 }, { "epoch": 19.35073409461664, "grad_norm": 0.00029546156292781234, "learning_rate": 3.208434179097275e-06, "loss": 0.0003, "num_input_tokens_seen": 256229904, "step": 118620 }, { "epoch": 19.351549755301793, "grad_norm": 0.0011031778994947672, "learning_rate": 3.200388476135796e-06, "loss": 0.0096, "num_input_tokens_seen": 256239152, "step": 118625 }, { "epoch": 19.35236541598695, "grad_norm": 0.013649040833115578, "learning_rate": 3.1923528414487535e-06, "loss": 0.0011, "num_input_tokens_seen": 256250288, "step": 118630 }, { "epoch": 19.353181076672104, "grad_norm": 0.0035270475782454014, "learning_rate": 3.184327275198795e-06, "loss": 0.0005, "num_input_tokens_seen": 256260880, "step": 118635 }, { "epoch": 19.35399673735726, "grad_norm": 0.2129955291748047, "learning_rate": 3.1763117775487903e-06, "loss": 0.0048, "num_input_tokens_seen": 256272240, "step": 118640 }, { "epoch": 19.354812398042416, "grad_norm": 0.0009809770854189992, "learning_rate": 3.168306348661054e-06, "loss": 0.0006, "num_input_tokens_seen": 256282704, "step": 118645 }, { "epoch": 19.355628058727568, "grad_norm": 0.005014079622924328, "learning_rate": 3.160310988697901e-06, "loss": 0.0015, "num_input_tokens_seen": 256292784, "step": 118650 }, { "epoch": 19.356443719412724, "grad_norm": 0.0024872953072190285, "learning_rate": 3.152325697821312e-06, "loss": 0.0031, "num_input_tokens_seen": 256302576, "step": 118655 }, { "epoch": 19.35725938009788, "grad_norm": 0.0008488795720040798, "learning_rate": 3.1443504761931585e-06, "loss": 0.0009, "num_input_tokens_seen": 256313456, "step": 118660 }, { "epoch": 19.358075040783035, "grad_norm": 0.005055803805589676, "learning_rate": 3.1363853239750327e-06, "loss": 0.0019, "num_input_tokens_seen": 256323216, "step": 118665 }, { "epoch": 19.35889070146819, "grad_norm": 0.005085945129394531, "learning_rate": 3.1284302413283615e-06, "loss": 0.0042, "num_input_tokens_seen": 256333616, "step": 118670 }, { "epoch": 19.359706362153343, "grad_norm": 0.03696412593126297, "learning_rate": 3.1204852284143493e-06, "loss": 0.0023, "num_input_tokens_seen": 256343760, "step": 118675 }, { "epoch": 19.3605220228385, "grad_norm": 0.0008415200281888247, "learning_rate": 3.1125502853941444e-06, "loss": 0.001, "num_input_tokens_seen": 256355344, "step": 118680 }, { "epoch": 19.361337683523654, "grad_norm": 0.005204454530030489, "learning_rate": 3.1046254124283413e-06, "loss": 0.0016, "num_input_tokens_seen": 256366032, "step": 118685 }, { "epoch": 19.36215334420881, "grad_norm": 0.02970193885266781, "learning_rate": 3.0967106096777e-06, "loss": 0.0017, "num_input_tokens_seen": 256376208, "step": 118690 }, { "epoch": 19.362969004893966, "grad_norm": 0.0011499657994136214, "learning_rate": 3.088805877302592e-06, "loss": 0.0005, "num_input_tokens_seen": 256387280, "step": 118695 }, { "epoch": 19.363784665579118, "grad_norm": 0.0023238202556967735, "learning_rate": 3.0809112154632226e-06, "loss": 0.0195, "num_input_tokens_seen": 256397648, "step": 118700 }, { "epoch": 19.364600326264274, "grad_norm": 0.00012586277443915606, "learning_rate": 3.073026624319575e-06, "loss": 0.002, "num_input_tokens_seen": 256407472, "step": 118705 }, { "epoch": 19.36541598694943, "grad_norm": 0.5267165899276733, "learning_rate": 3.06515210403141e-06, "loss": 0.0105, "num_input_tokens_seen": 256418096, "step": 118710 }, { "epoch": 19.366231647634585, "grad_norm": 0.00432355422526598, "learning_rate": 3.0572876547583785e-06, "loss": 0.0009, "num_input_tokens_seen": 256427568, "step": 118715 }, { "epoch": 19.36704730831974, "grad_norm": 0.02034470997750759, "learning_rate": 3.0494332766597967e-06, "loss": 0.004, "num_input_tokens_seen": 256438736, "step": 118720 }, { "epoch": 19.367862969004893, "grad_norm": 0.00162877154070884, "learning_rate": 3.0415889698949262e-06, "loss": 0.0012, "num_input_tokens_seen": 256448592, "step": 118725 }, { "epoch": 19.36867862969005, "grad_norm": 0.007950716651976109, "learning_rate": 3.0337547346226404e-06, "loss": 0.0053, "num_input_tokens_seen": 256459472, "step": 118730 }, { "epoch": 19.369494290375204, "grad_norm": 0.05686101317405701, "learning_rate": 3.025930571001756e-06, "loss": 0.0053, "num_input_tokens_seen": 256469296, "step": 118735 }, { "epoch": 19.37030995106036, "grad_norm": 0.028149202466011047, "learning_rate": 3.018116479190869e-06, "loss": 0.0017, "num_input_tokens_seen": 256479248, "step": 118740 }, { "epoch": 19.371125611745512, "grad_norm": 0.0007396186701953411, "learning_rate": 3.0103124593483522e-06, "loss": 0.0009, "num_input_tokens_seen": 256491344, "step": 118745 }, { "epoch": 19.371941272430668, "grad_norm": 0.00017671390378382057, "learning_rate": 3.002518511632246e-06, "loss": 0.0005, "num_input_tokens_seen": 256501776, "step": 118750 }, { "epoch": 19.372756933115824, "grad_norm": 0.024652238935232162, "learning_rate": 2.9947346362006466e-06, "loss": 0.0038, "num_input_tokens_seen": 256512944, "step": 118755 }, { "epoch": 19.37357259380098, "grad_norm": 0.0003830741043202579, "learning_rate": 2.986960833211205e-06, "loss": 0.0009, "num_input_tokens_seen": 256522064, "step": 118760 }, { "epoch": 19.374388254486135, "grad_norm": 0.0011993915541097522, "learning_rate": 2.9791971028215737e-06, "loss": 0.0037, "num_input_tokens_seen": 256533456, "step": 118765 }, { "epoch": 19.375203915171287, "grad_norm": 0.04528145119547844, "learning_rate": 2.9714434451889595e-06, "loss": 0.0014, "num_input_tokens_seen": 256542896, "step": 118770 }, { "epoch": 19.376019575856443, "grad_norm": 0.008095352910459042, "learning_rate": 2.9636998604706255e-06, "loss": 0.0038, "num_input_tokens_seen": 256553264, "step": 118775 }, { "epoch": 19.3768352365416, "grad_norm": 0.003776898607611656, "learning_rate": 2.955966348823391e-06, "loss": 0.0008, "num_input_tokens_seen": 256563536, "step": 118780 }, { "epoch": 19.377650897226754, "grad_norm": 0.06061271205544472, "learning_rate": 2.948242910404131e-06, "loss": 0.0021, "num_input_tokens_seen": 256573808, "step": 118785 }, { "epoch": 19.37846655791191, "grad_norm": 0.0016545297112315893, "learning_rate": 2.9405295453692195e-06, "loss": 0.0031, "num_input_tokens_seen": 256584304, "step": 118790 }, { "epoch": 19.379282218597062, "grad_norm": 0.002400145400315523, "learning_rate": 2.9328262538750316e-06, "loss": 0.0031, "num_input_tokens_seen": 256595024, "step": 118795 }, { "epoch": 19.380097879282218, "grad_norm": 0.061130374670028687, "learning_rate": 2.9251330360777205e-06, "loss": 0.0021, "num_input_tokens_seen": 256605776, "step": 118800 }, { "epoch": 19.380913539967374, "grad_norm": 0.0015816733939573169, "learning_rate": 2.9174498921331616e-06, "loss": 0.0096, "num_input_tokens_seen": 256615920, "step": 118805 }, { "epoch": 19.38172920065253, "grad_norm": 0.07032934576272964, "learning_rate": 2.909776822197063e-06, "loss": 0.0015, "num_input_tokens_seen": 256627504, "step": 118810 }, { "epoch": 19.382544861337685, "grad_norm": 0.0010459988843649626, "learning_rate": 2.902113826424968e-06, "loss": 0.0007, "num_input_tokens_seen": 256639856, "step": 118815 }, { "epoch": 19.383360522022837, "grad_norm": 0.0010384476045146585, "learning_rate": 2.8944609049721406e-06, "loss": 0.0003, "num_input_tokens_seen": 256650096, "step": 118820 }, { "epoch": 19.384176182707993, "grad_norm": 0.0006806451710872352, "learning_rate": 2.8868180579936787e-06, "loss": 0.0007, "num_input_tokens_seen": 256659760, "step": 118825 }, { "epoch": 19.38499184339315, "grad_norm": 0.012895084917545319, "learning_rate": 2.8791852856445143e-06, "loss": 0.0008, "num_input_tokens_seen": 256670512, "step": 118830 }, { "epoch": 19.385807504078304, "grad_norm": 0.005823103711009026, "learning_rate": 2.8715625880792463e-06, "loss": 0.0005, "num_input_tokens_seen": 256681744, "step": 118835 }, { "epoch": 19.38662316476346, "grad_norm": 0.0037850309163331985, "learning_rate": 2.8639499654524724e-06, "loss": 0.0003, "num_input_tokens_seen": 256692912, "step": 118840 }, { "epoch": 19.387438825448612, "grad_norm": 0.02443789690732956, "learning_rate": 2.856347417918348e-06, "loss": 0.0018, "num_input_tokens_seen": 256704400, "step": 118845 }, { "epoch": 19.388254486133768, "grad_norm": 0.008325217291712761, "learning_rate": 2.8487549456310824e-06, "loss": 0.0025, "num_input_tokens_seen": 256714704, "step": 118850 }, { "epoch": 19.389070146818923, "grad_norm": 0.09903115034103394, "learning_rate": 2.841172548744442e-06, "loss": 0.0031, "num_input_tokens_seen": 256725488, "step": 118855 }, { "epoch": 19.38988580750408, "grad_norm": 0.0006401181453838944, "learning_rate": 2.8336002274121365e-06, "loss": 0.0005, "num_input_tokens_seen": 256735184, "step": 118860 }, { "epoch": 19.390701468189235, "grad_norm": 0.0005234939744696021, "learning_rate": 2.8260379817875993e-06, "loss": 0.0006, "num_input_tokens_seen": 256746160, "step": 118865 }, { "epoch": 19.391517128874387, "grad_norm": 0.007503115572035313, "learning_rate": 2.818485812024152e-06, "loss": 0.0008, "num_input_tokens_seen": 256757808, "step": 118870 }, { "epoch": 19.392332789559543, "grad_norm": 0.00616914639249444, "learning_rate": 2.810943718274783e-06, "loss": 0.0004, "num_input_tokens_seen": 256769296, "step": 118875 }, { "epoch": 19.3931484502447, "grad_norm": 0.0006514721899293363, "learning_rate": 2.8034117006924264e-06, "loss": 0.0012, "num_input_tokens_seen": 256780144, "step": 118880 }, { "epoch": 19.393964110929854, "grad_norm": 0.030497943982481956, "learning_rate": 2.795889759429626e-06, "loss": 0.0025, "num_input_tokens_seen": 256790832, "step": 118885 }, { "epoch": 19.39477977161501, "grad_norm": 0.14161071181297302, "learning_rate": 2.788377894638816e-06, "loss": 0.0029, "num_input_tokens_seen": 256801232, "step": 118890 }, { "epoch": 19.395595432300162, "grad_norm": 0.008544592186808586, "learning_rate": 2.7808761064723186e-06, "loss": 0.0014, "num_input_tokens_seen": 256812880, "step": 118895 }, { "epoch": 19.396411092985318, "grad_norm": 0.13467442989349365, "learning_rate": 2.773384395082179e-06, "loss": 0.0047, "num_input_tokens_seen": 256823888, "step": 118900 }, { "epoch": 19.397226753670473, "grad_norm": 0.006914534140378237, "learning_rate": 2.765902760620165e-06, "loss": 0.0017, "num_input_tokens_seen": 256835600, "step": 118905 }, { "epoch": 19.39804241435563, "grad_norm": 0.008217011578381062, "learning_rate": 2.758431203237877e-06, "loss": 0.0013, "num_input_tokens_seen": 256845424, "step": 118910 }, { "epoch": 19.39885807504078, "grad_norm": 0.005663623567670584, "learning_rate": 2.7509697230868048e-06, "loss": 0.0009, "num_input_tokens_seen": 256857168, "step": 118915 }, { "epoch": 19.399673735725937, "grad_norm": 0.0005932076601311564, "learning_rate": 2.7435183203181613e-06, "loss": 0.0043, "num_input_tokens_seen": 256868496, "step": 118920 }, { "epoch": 19.400489396411093, "grad_norm": 0.0005741248605772853, "learning_rate": 2.7360769950828814e-06, "loss": 0.0006, "num_input_tokens_seen": 256878544, "step": 118925 }, { "epoch": 19.40130505709625, "grad_norm": 0.001585560035891831, "learning_rate": 2.728645747531844e-06, "loss": 0.0164, "num_input_tokens_seen": 256889264, "step": 118930 }, { "epoch": 19.402120717781404, "grad_norm": 0.011332179419696331, "learning_rate": 2.721224577815651e-06, "loss": 0.0007, "num_input_tokens_seen": 256901296, "step": 118935 }, { "epoch": 19.402936378466556, "grad_norm": 0.010597261600196362, "learning_rate": 2.713813486084682e-06, "loss": 0.0023, "num_input_tokens_seen": 256912048, "step": 118940 }, { "epoch": 19.403752039151712, "grad_norm": 0.1503845602273941, "learning_rate": 2.7064124724891505e-06, "loss": 0.0027, "num_input_tokens_seen": 256922960, "step": 118945 }, { "epoch": 19.404567699836868, "grad_norm": 0.0053455098532140255, "learning_rate": 2.6990215371789916e-06, "loss": 0.002, "num_input_tokens_seen": 256932656, "step": 118950 }, { "epoch": 19.405383360522023, "grad_norm": 0.03206378594040871, "learning_rate": 2.691640680304086e-06, "loss": 0.0036, "num_input_tokens_seen": 256941712, "step": 118955 }, { "epoch": 19.40619902120718, "grad_norm": 0.0004055328026879579, "learning_rate": 2.684269902013925e-06, "loss": 0.0017, "num_input_tokens_seen": 256952592, "step": 118960 }, { "epoch": 19.40701468189233, "grad_norm": 0.0035495799966156483, "learning_rate": 2.676909202457889e-06, "loss": 0.0009, "num_input_tokens_seen": 256963920, "step": 118965 }, { "epoch": 19.407830342577487, "grad_norm": 0.0012541578616946936, "learning_rate": 2.6695585817852476e-06, "loss": 0.0029, "num_input_tokens_seen": 256975312, "step": 118970 }, { "epoch": 19.408646003262643, "grad_norm": 0.020598217844963074, "learning_rate": 2.6622180401448815e-06, "loss": 0.016, "num_input_tokens_seen": 256984304, "step": 118975 }, { "epoch": 19.4094616639478, "grad_norm": 0.0014313864521682262, "learning_rate": 2.6548875776856163e-06, "loss": 0.0012, "num_input_tokens_seen": 256996624, "step": 118980 }, { "epoch": 19.410277324632954, "grad_norm": 0.009284721687436104, "learning_rate": 2.6475671945559442e-06, "loss": 0.0017, "num_input_tokens_seen": 257008144, "step": 118985 }, { "epoch": 19.411092985318106, "grad_norm": 0.02313319407403469, "learning_rate": 2.6402568909042467e-06, "loss": 0.001, "num_input_tokens_seen": 257020048, "step": 118990 }, { "epoch": 19.411908646003262, "grad_norm": 0.002016447950154543, "learning_rate": 2.6329566668787384e-06, "loss": 0.0003, "num_input_tokens_seen": 257031952, "step": 118995 }, { "epoch": 19.412724306688418, "grad_norm": 0.001090245321393013, "learning_rate": 2.625666522627301e-06, "loss": 0.0005, "num_input_tokens_seen": 257042928, "step": 119000 }, { "epoch": 19.413539967373573, "grad_norm": 0.026525719091296196, "learning_rate": 2.6183864582976503e-06, "loss": 0.0012, "num_input_tokens_seen": 257053872, "step": 119005 }, { "epoch": 19.41435562805873, "grad_norm": 0.0006216730689629912, "learning_rate": 2.611116474037445e-06, "loss": 0.001, "num_input_tokens_seen": 257064528, "step": 119010 }, { "epoch": 19.41517128874388, "grad_norm": 0.07049921900033951, "learning_rate": 2.603856569993901e-06, "loss": 0.0021, "num_input_tokens_seen": 257076304, "step": 119015 }, { "epoch": 19.415986949429037, "grad_norm": 0.006914778146892786, "learning_rate": 2.596606746314234e-06, "loss": 0.0008, "num_input_tokens_seen": 257088400, "step": 119020 }, { "epoch": 19.416802610114193, "grad_norm": 0.02052348665893078, "learning_rate": 2.589367003145271e-06, "loss": 0.0007, "num_input_tokens_seen": 257099728, "step": 119025 }, { "epoch": 19.41761827079935, "grad_norm": 0.0031544077210128307, "learning_rate": 2.5821373406338387e-06, "loss": 0.0021, "num_input_tokens_seen": 257110800, "step": 119030 }, { "epoch": 19.418433931484504, "grad_norm": 0.010480822063982487, "learning_rate": 2.574917758926376e-06, "loss": 0.0011, "num_input_tokens_seen": 257121968, "step": 119035 }, { "epoch": 19.419249592169656, "grad_norm": 0.003024548990651965, "learning_rate": 2.5677082581692657e-06, "loss": 0.0043, "num_input_tokens_seen": 257131952, "step": 119040 }, { "epoch": 19.420065252854812, "grad_norm": 0.03305144980549812, "learning_rate": 2.5605088385085573e-06, "loss": 0.0015, "num_input_tokens_seen": 257142864, "step": 119045 }, { "epoch": 19.420880913539968, "grad_norm": 0.0013253577053546906, "learning_rate": 2.553319500090245e-06, "loss": 0.0019, "num_input_tokens_seen": 257153904, "step": 119050 }, { "epoch": 19.421696574225123, "grad_norm": 0.009747570380568504, "learning_rate": 2.5461402430599357e-06, "loss": 0.0013, "num_input_tokens_seen": 257166032, "step": 119055 }, { "epoch": 19.42251223491028, "grad_norm": 0.00021776130597572774, "learning_rate": 2.5389710675631227e-06, "loss": 0.0076, "num_input_tokens_seen": 257177296, "step": 119060 }, { "epoch": 19.42332789559543, "grad_norm": 0.0012730876915156841, "learning_rate": 2.5318119737451905e-06, "loss": 0.0008, "num_input_tokens_seen": 257187824, "step": 119065 }, { "epoch": 19.424143556280587, "grad_norm": 0.0005625274498015642, "learning_rate": 2.524662961751134e-06, "loss": 0.0003, "num_input_tokens_seen": 257197616, "step": 119070 }, { "epoch": 19.424959216965743, "grad_norm": 0.0018120675813406706, "learning_rate": 2.517524031725893e-06, "loss": 0.0003, "num_input_tokens_seen": 257207216, "step": 119075 }, { "epoch": 19.4257748776509, "grad_norm": 0.001958174630999565, "learning_rate": 2.5103951838141292e-06, "loss": 0.0079, "num_input_tokens_seen": 257217840, "step": 119080 }, { "epoch": 19.42659053833605, "grad_norm": 0.0011971911881119013, "learning_rate": 2.503276418160283e-06, "loss": 0.0018, "num_input_tokens_seen": 257229136, "step": 119085 }, { "epoch": 19.427406199021206, "grad_norm": 0.013330371119081974, "learning_rate": 2.496167734908683e-06, "loss": 0.1763, "num_input_tokens_seen": 257239664, "step": 119090 }, { "epoch": 19.428221859706362, "grad_norm": 0.00020406786643434316, "learning_rate": 2.489069134203381e-06, "loss": 0.0005, "num_input_tokens_seen": 257250544, "step": 119095 }, { "epoch": 19.429037520391518, "grad_norm": 0.025051139295101166, "learning_rate": 2.481980616188262e-06, "loss": 0.0021, "num_input_tokens_seen": 257261168, "step": 119100 }, { "epoch": 19.429853181076673, "grad_norm": 0.011714156717061996, "learning_rate": 2.474902181006877e-06, "loss": 0.0024, "num_input_tokens_seen": 257272624, "step": 119105 }, { "epoch": 19.430668841761825, "grad_norm": 0.00022403965704143047, "learning_rate": 2.467833828802779e-06, "loss": 0.0023, "num_input_tokens_seen": 257283344, "step": 119110 }, { "epoch": 19.43148450244698, "grad_norm": 0.0003524889179971069, "learning_rate": 2.4607755597192417e-06, "loss": 0.0023, "num_input_tokens_seen": 257293840, "step": 119115 }, { "epoch": 19.432300163132137, "grad_norm": 0.001508756191469729, "learning_rate": 2.453727373899206e-06, "loss": 0.0013, "num_input_tokens_seen": 257305168, "step": 119120 }, { "epoch": 19.433115823817293, "grad_norm": 0.011893289163708687, "learning_rate": 2.4466892714856137e-06, "loss": 0.0044, "num_input_tokens_seen": 257316752, "step": 119125 }, { "epoch": 19.43393148450245, "grad_norm": 0.23823878169059753, "learning_rate": 2.439661252621017e-06, "loss": 0.0065, "num_input_tokens_seen": 257328880, "step": 119130 }, { "epoch": 19.4347471451876, "grad_norm": 0.0054446193389594555, "learning_rate": 2.4326433174479133e-06, "loss": 0.0005, "num_input_tokens_seen": 257340208, "step": 119135 }, { "epoch": 19.435562805872756, "grad_norm": 0.004071327392011881, "learning_rate": 2.4256354661084666e-06, "loss": 0.0039, "num_input_tokens_seen": 257349904, "step": 119140 }, { "epoch": 19.436378466557912, "grad_norm": 0.0009328233427368104, "learning_rate": 2.4186376987447857e-06, "loss": 0.0017, "num_input_tokens_seen": 257361552, "step": 119145 }, { "epoch": 19.437194127243067, "grad_norm": 0.0005683921044692397, "learning_rate": 2.41165001549859e-06, "loss": 0.0015, "num_input_tokens_seen": 257373072, "step": 119150 }, { "epoch": 19.438009787928223, "grad_norm": 0.005114169325679541, "learning_rate": 2.4046724165115998e-06, "loss": 0.0005, "num_input_tokens_seen": 257383984, "step": 119155 }, { "epoch": 19.438825448613375, "grad_norm": 0.08982323110103607, "learning_rate": 2.3977049019250907e-06, "loss": 0.0094, "num_input_tokens_seen": 257395152, "step": 119160 }, { "epoch": 19.43964110929853, "grad_norm": 0.0284635778516531, "learning_rate": 2.3907474718803944e-06, "loss": 0.0021, "num_input_tokens_seen": 257404240, "step": 119165 }, { "epoch": 19.440456769983687, "grad_norm": 0.039190005511045456, "learning_rate": 2.383800126518454e-06, "loss": 0.0013, "num_input_tokens_seen": 257413904, "step": 119170 }, { "epoch": 19.441272430668842, "grad_norm": 0.1472269892692566, "learning_rate": 2.3768628659801005e-06, "loss": 0.0035, "num_input_tokens_seen": 257425008, "step": 119175 }, { "epoch": 19.442088091353998, "grad_norm": 0.00017574279627297074, "learning_rate": 2.3699356904058334e-06, "loss": 0.0015, "num_input_tokens_seen": 257436112, "step": 119180 }, { "epoch": 19.44290375203915, "grad_norm": 0.001107974792830646, "learning_rate": 2.363018599936151e-06, "loss": 0.0004, "num_input_tokens_seen": 257448144, "step": 119185 }, { "epoch": 19.443719412724306, "grad_norm": 1.321487545967102, "learning_rate": 2.3561115947111635e-06, "loss": 0.0023, "num_input_tokens_seen": 257458608, "step": 119190 }, { "epoch": 19.44453507340946, "grad_norm": 0.0002932049101218581, "learning_rate": 2.349214674870925e-06, "loss": 0.0038, "num_input_tokens_seen": 257469936, "step": 119195 }, { "epoch": 19.445350734094617, "grad_norm": 0.023268211632966995, "learning_rate": 2.3423278405551583e-06, "loss": 0.0033, "num_input_tokens_seen": 257478736, "step": 119200 }, { "epoch": 19.446166394779773, "grad_norm": 0.004889782518148422, "learning_rate": 2.335451091903418e-06, "loss": 0.0024, "num_input_tokens_seen": 257489424, "step": 119205 }, { "epoch": 19.446982055464925, "grad_norm": 0.020940110087394714, "learning_rate": 2.3285844290550916e-06, "loss": 0.0017, "num_input_tokens_seen": 257499888, "step": 119210 }, { "epoch": 19.44779771615008, "grad_norm": 0.0025529158301651478, "learning_rate": 2.321727852149402e-06, "loss": 0.0003, "num_input_tokens_seen": 257509840, "step": 119215 }, { "epoch": 19.448613376835237, "grad_norm": 0.0006794088985770941, "learning_rate": 2.314881361325183e-06, "loss": 0.0005, "num_input_tokens_seen": 257521680, "step": 119220 }, { "epoch": 19.449429037520392, "grad_norm": 0.0012869905913248658, "learning_rate": 2.308044956721267e-06, "loss": 0.0009, "num_input_tokens_seen": 257530672, "step": 119225 }, { "epoch": 19.450244698205548, "grad_norm": 0.03023373894393444, "learning_rate": 2.30121863847621e-06, "loss": 0.0013, "num_input_tokens_seen": 257540720, "step": 119230 }, { "epoch": 19.4510603588907, "grad_norm": 0.00452503701671958, "learning_rate": 2.294402406728291e-06, "loss": 0.0002, "num_input_tokens_seen": 257552048, "step": 119235 }, { "epoch": 19.451876019575856, "grad_norm": 0.0002490385086275637, "learning_rate": 2.2875962616157318e-06, "loss": 0.0003, "num_input_tokens_seen": 257562416, "step": 119240 }, { "epoch": 19.45269168026101, "grad_norm": 0.02928837016224861, "learning_rate": 2.2808002032763676e-06, "loss": 0.0011, "num_input_tokens_seen": 257572592, "step": 119245 }, { "epoch": 19.453507340946167, "grad_norm": 0.004705357365310192, "learning_rate": 2.2740142318480873e-06, "loss": 0.0064, "num_input_tokens_seen": 257583856, "step": 119250 }, { "epoch": 19.454323001631323, "grad_norm": 0.0028875924181193113, "learning_rate": 2.267238347468226e-06, "loss": 0.0007, "num_input_tokens_seen": 257594640, "step": 119255 }, { "epoch": 19.455138662316475, "grad_norm": 0.0010396402794867754, "learning_rate": 2.2604725502742286e-06, "loss": 0.0006, "num_input_tokens_seen": 257605712, "step": 119260 }, { "epoch": 19.45595432300163, "grad_norm": 0.00031200837111100554, "learning_rate": 2.2537168404032082e-06, "loss": 0.0004, "num_input_tokens_seen": 257617232, "step": 119265 }, { "epoch": 19.456769983686787, "grad_norm": 0.0012279873481020331, "learning_rate": 2.2469712179920555e-06, "loss": 0.0022, "num_input_tokens_seen": 257628976, "step": 119270 }, { "epoch": 19.457585644371942, "grad_norm": 0.037878625094890594, "learning_rate": 2.2402356831774383e-06, "loss": 0.0011, "num_input_tokens_seen": 257639696, "step": 119275 }, { "epoch": 19.458401305057095, "grad_norm": 0.0029649846255779266, "learning_rate": 2.2335102360959148e-06, "loss": 0.0025, "num_input_tokens_seen": 257651280, "step": 119280 }, { "epoch": 19.45921696574225, "grad_norm": 0.019405025988817215, "learning_rate": 2.226794876883764e-06, "loss": 0.0167, "num_input_tokens_seen": 257661808, "step": 119285 }, { "epoch": 19.460032626427406, "grad_norm": 0.05081169307231903, "learning_rate": 2.2200896056771004e-06, "loss": 0.002, "num_input_tokens_seen": 257672912, "step": 119290 }, { "epoch": 19.46084828711256, "grad_norm": 0.007771750912070274, "learning_rate": 2.2133944226117587e-06, "loss": 0.0014, "num_input_tokens_seen": 257685008, "step": 119295 }, { "epoch": 19.461663947797717, "grad_norm": 0.0009954735869541764, "learning_rate": 2.2067093278235194e-06, "loss": 0.0003, "num_input_tokens_seen": 257695760, "step": 119300 }, { "epoch": 19.46247960848287, "grad_norm": 0.0010892596328631043, "learning_rate": 2.2000343214477746e-06, "loss": 0.0019, "num_input_tokens_seen": 257707280, "step": 119305 }, { "epoch": 19.463295269168025, "grad_norm": 0.0006688821013085544, "learning_rate": 2.1933694036198605e-06, "loss": 0.0009, "num_input_tokens_seen": 257717328, "step": 119310 }, { "epoch": 19.46411092985318, "grad_norm": 0.12306705862283707, "learning_rate": 2.1867145744747796e-06, "loss": 0.0038, "num_input_tokens_seen": 257727760, "step": 119315 }, { "epoch": 19.464926590538337, "grad_norm": 0.00043416867265477777, "learning_rate": 2.1800698341475355e-06, "loss": 0.0007, "num_input_tokens_seen": 257737040, "step": 119320 }, { "epoch": 19.465742251223492, "grad_norm": 0.00032600079430267215, "learning_rate": 2.173435182772632e-06, "loss": 0.0006, "num_input_tokens_seen": 257747888, "step": 119325 }, { "epoch": 19.466557911908644, "grad_norm": 0.007647597696632147, "learning_rate": 2.166810620484627e-06, "loss": 0.0387, "num_input_tokens_seen": 257758672, "step": 119330 }, { "epoch": 19.4673735725938, "grad_norm": 0.031011521816253662, "learning_rate": 2.160196147417748e-06, "loss": 0.002, "num_input_tokens_seen": 257768720, "step": 119335 }, { "epoch": 19.468189233278956, "grad_norm": 0.008647006005048752, "learning_rate": 2.153591763706053e-06, "loss": 0.0024, "num_input_tokens_seen": 257779376, "step": 119340 }, { "epoch": 19.46900489396411, "grad_norm": 0.0007197380182333291, "learning_rate": 2.1469974694833805e-06, "loss": 0.0222, "num_input_tokens_seen": 257790672, "step": 119345 }, { "epoch": 19.469820554649267, "grad_norm": 0.007564597297459841, "learning_rate": 2.140413264883401e-06, "loss": 0.0036, "num_input_tokens_seen": 257801296, "step": 119350 }, { "epoch": 19.47063621533442, "grad_norm": 0.002344276290386915, "learning_rate": 2.1338391500394516e-06, "loss": 0.0356, "num_input_tokens_seen": 257813072, "step": 119355 }, { "epoch": 19.471451876019575, "grad_norm": 0.00835796445608139, "learning_rate": 2.1272751250849263e-06, "loss": 0.0009, "num_input_tokens_seen": 257824720, "step": 119360 }, { "epoch": 19.47226753670473, "grad_norm": 0.003108770353719592, "learning_rate": 2.120721190152719e-06, "loss": 0.0018, "num_input_tokens_seen": 257835312, "step": 119365 }, { "epoch": 19.473083197389887, "grad_norm": 0.0004182121774647385, "learning_rate": 2.114177345375723e-06, "loss": 0.0023, "num_input_tokens_seen": 257846544, "step": 119370 }, { "epoch": 19.473898858075042, "grad_norm": 0.03978124260902405, "learning_rate": 2.1076435908864986e-06, "loss": 0.0021, "num_input_tokens_seen": 257857776, "step": 119375 }, { "epoch": 19.474714518760194, "grad_norm": 0.0012924002949148417, "learning_rate": 2.1011199268175517e-06, "loss": 0.017, "num_input_tokens_seen": 257868592, "step": 119380 }, { "epoch": 19.47553017944535, "grad_norm": 0.002158062532544136, "learning_rate": 2.0946063533009986e-06, "loss": 0.0006, "num_input_tokens_seen": 257879504, "step": 119385 }, { "epoch": 19.476345840130506, "grad_norm": 0.005779027007520199, "learning_rate": 2.0881028704688997e-06, "loss": 0.0005, "num_input_tokens_seen": 257891152, "step": 119390 }, { "epoch": 19.47716150081566, "grad_norm": 0.019146548584103584, "learning_rate": 2.0816094784530394e-06, "loss": 0.0013, "num_input_tokens_seen": 257901456, "step": 119395 }, { "epoch": 19.477977161500817, "grad_norm": 0.010578310117125511, "learning_rate": 2.075126177385034e-06, "loss": 0.0021, "num_input_tokens_seen": 257912144, "step": 119400 }, { "epoch": 19.47879282218597, "grad_norm": 0.0001685271126916632, "learning_rate": 2.0686529673962784e-06, "loss": 0.0015, "num_input_tokens_seen": 257921584, "step": 119405 }, { "epoch": 19.479608482871125, "grad_norm": 0.0007612688350491226, "learning_rate": 2.06218984861789e-06, "loss": 0.0069, "num_input_tokens_seen": 257930800, "step": 119410 }, { "epoch": 19.48042414355628, "grad_norm": 0.0007769337389618158, "learning_rate": 2.0557368211809314e-06, "loss": 0.0075, "num_input_tokens_seen": 257940848, "step": 119415 }, { "epoch": 19.481239804241437, "grad_norm": 0.001466889400035143, "learning_rate": 2.0492938852161304e-06, "loss": 0.0004, "num_input_tokens_seen": 257952304, "step": 119420 }, { "epoch": 19.482055464926592, "grad_norm": 0.016221707686781883, "learning_rate": 2.042861040854105e-06, "loss": 0.0008, "num_input_tokens_seen": 257963056, "step": 119425 }, { "epoch": 19.482871125611744, "grad_norm": 0.03494711592793465, "learning_rate": 2.0364382882251952e-06, "loss": 0.0013, "num_input_tokens_seen": 257973744, "step": 119430 }, { "epoch": 19.4836867862969, "grad_norm": 0.0013683143770322204, "learning_rate": 2.030025627459575e-06, "loss": 0.0018, "num_input_tokens_seen": 257984912, "step": 119435 }, { "epoch": 19.484502446982056, "grad_norm": 0.001632693805731833, "learning_rate": 2.023623058687196e-06, "loss": 0.0003, "num_input_tokens_seen": 257996048, "step": 119440 }, { "epoch": 19.48531810766721, "grad_norm": 0.0575888529419899, "learning_rate": 2.0172305820378434e-06, "loss": 0.0027, "num_input_tokens_seen": 258005936, "step": 119445 }, { "epoch": 19.486133768352367, "grad_norm": 0.0022574923932552338, "learning_rate": 2.010848197641024e-06, "loss": 0.0011, "num_input_tokens_seen": 258016336, "step": 119450 }, { "epoch": 19.48694942903752, "grad_norm": 0.00851681362837553, "learning_rate": 2.0044759056261354e-06, "loss": 0.0007, "num_input_tokens_seen": 258026384, "step": 119455 }, { "epoch": 19.487765089722675, "grad_norm": 0.0007707860204391181, "learning_rate": 1.9981137061222954e-06, "loss": 0.008, "num_input_tokens_seen": 258036784, "step": 119460 }, { "epoch": 19.48858075040783, "grad_norm": 0.00015356663789134473, "learning_rate": 1.9917615992584017e-06, "loss": 0.1028, "num_input_tokens_seen": 258048688, "step": 119465 }, { "epoch": 19.489396411092986, "grad_norm": 0.6992786526679993, "learning_rate": 1.985419585163295e-06, "loss": 0.0374, "num_input_tokens_seen": 258059376, "step": 119470 }, { "epoch": 19.49021207177814, "grad_norm": 0.0010557199129834771, "learning_rate": 1.9790876639653733e-06, "loss": 0.0299, "num_input_tokens_seen": 258071472, "step": 119475 }, { "epoch": 19.491027732463294, "grad_norm": 0.014713788405060768, "learning_rate": 1.972765835793089e-06, "loss": 0.0016, "num_input_tokens_seen": 258082448, "step": 119480 }, { "epoch": 19.49184339314845, "grad_norm": 0.0009430281934328377, "learning_rate": 1.9664541007744508e-06, "loss": 0.0006, "num_input_tokens_seen": 258093232, "step": 119485 }, { "epoch": 19.492659053833606, "grad_norm": 0.0018127475632354617, "learning_rate": 1.960152459037412e-06, "loss": 0.0006, "num_input_tokens_seen": 258104880, "step": 119490 }, { "epoch": 19.49347471451876, "grad_norm": 0.008685811422765255, "learning_rate": 1.953860910709704e-06, "loss": 0.0016, "num_input_tokens_seen": 258115984, "step": 119495 }, { "epoch": 19.494290375203914, "grad_norm": 0.006860397756099701, "learning_rate": 1.9475794559188354e-06, "loss": 0.0013, "num_input_tokens_seen": 258125520, "step": 119500 }, { "epoch": 19.49510603588907, "grad_norm": 0.0003657161723822355, "learning_rate": 1.9413080947920934e-06, "loss": 0.0028, "num_input_tokens_seen": 258137712, "step": 119505 }, { "epoch": 19.495921696574225, "grad_norm": 0.0007896169554442167, "learning_rate": 1.9350468274565434e-06, "loss": 0.0004, "num_input_tokens_seen": 258148272, "step": 119510 }, { "epoch": 19.49673735725938, "grad_norm": 0.0059143430553376675, "learning_rate": 1.9287956540391395e-06, "loss": 0.0202, "num_input_tokens_seen": 258159120, "step": 119515 }, { "epoch": 19.497553017944536, "grad_norm": 0.03275555744767189, "learning_rate": 1.9225545746665575e-06, "loss": 0.0007, "num_input_tokens_seen": 258170384, "step": 119520 }, { "epoch": 19.49836867862969, "grad_norm": 0.0034429405350238085, "learning_rate": 1.9163235894651965e-06, "loss": 0.0006, "num_input_tokens_seen": 258181808, "step": 119525 }, { "epoch": 19.499184339314844, "grad_norm": 0.0001575792266521603, "learning_rate": 1.9101026985614558e-06, "loss": 0.0013, "num_input_tokens_seen": 258191984, "step": 119530 }, { "epoch": 19.5, "grad_norm": 0.00021977766300551593, "learning_rate": 1.903891902081345e-06, "loss": 0.0008, "num_input_tokens_seen": 258202736, "step": 119535 }, { "epoch": 19.500815660685156, "grad_norm": 0.0011759491171687841, "learning_rate": 1.8976912001507084e-06, "loss": 0.0006, "num_input_tokens_seen": 258214576, "step": 119540 }, { "epoch": 19.50163132137031, "grad_norm": 0.0012104709167033434, "learning_rate": 1.8915005928953344e-06, "loss": 0.001, "num_input_tokens_seen": 258224368, "step": 119545 }, { "epoch": 19.502446982055464, "grad_norm": 0.0025493651628494263, "learning_rate": 1.8853200804405113e-06, "loss": 0.0011, "num_input_tokens_seen": 258235056, "step": 119550 }, { "epoch": 19.50326264274062, "grad_norm": 0.008421454578638077, "learning_rate": 1.879149662911639e-06, "loss": 0.0007, "num_input_tokens_seen": 258246544, "step": 119555 }, { "epoch": 19.504078303425775, "grad_norm": 0.00022099376656115055, "learning_rate": 1.8729893404336728e-06, "loss": 0.0008, "num_input_tokens_seen": 258258352, "step": 119560 }, { "epoch": 19.50489396411093, "grad_norm": 0.007480515167117119, "learning_rate": 1.8668391131315133e-06, "loss": 0.0008, "num_input_tokens_seen": 258269520, "step": 119565 }, { "epoch": 19.505709624796086, "grad_norm": 0.0013238782994449139, "learning_rate": 1.8606989811297824e-06, "loss": 0.0017, "num_input_tokens_seen": 258279600, "step": 119570 }, { "epoch": 19.50652528548124, "grad_norm": 0.01068634632974863, "learning_rate": 1.8545689445528813e-06, "loss": 0.0007, "num_input_tokens_seen": 258291664, "step": 119575 }, { "epoch": 19.507340946166394, "grad_norm": 0.053680844604969025, "learning_rate": 1.8484490035251544e-06, "loss": 0.0034, "num_input_tokens_seen": 258303152, "step": 119580 }, { "epoch": 19.50815660685155, "grad_norm": 0.0013580488739535213, "learning_rate": 1.842339158170503e-06, "loss": 0.0082, "num_input_tokens_seen": 258312528, "step": 119585 }, { "epoch": 19.508972267536706, "grad_norm": 0.00019802911265287548, "learning_rate": 1.8362394086128276e-06, "loss": 0.0011, "num_input_tokens_seen": 258323792, "step": 119590 }, { "epoch": 19.50978792822186, "grad_norm": 0.07218465209007263, "learning_rate": 1.8301497549757518e-06, "loss": 0.0024, "num_input_tokens_seen": 258334064, "step": 119595 }, { "epoch": 19.510603588907014, "grad_norm": 0.0058241235092282295, "learning_rate": 1.8240701973826213e-06, "loss": 0.0006, "num_input_tokens_seen": 258342704, "step": 119600 }, { "epoch": 19.51141924959217, "grad_norm": 0.2714917063713074, "learning_rate": 1.8180007359567263e-06, "loss": 0.0048, "num_input_tokens_seen": 258353296, "step": 119605 }, { "epoch": 19.512234910277325, "grad_norm": 0.0011957393726333976, "learning_rate": 1.8119413708210243e-06, "loss": 0.002, "num_input_tokens_seen": 258364944, "step": 119610 }, { "epoch": 19.51305057096248, "grad_norm": 0.0009538216982036829, "learning_rate": 1.8058921020983055e-06, "loss": 0.0008, "num_input_tokens_seen": 258376592, "step": 119615 }, { "epoch": 19.513866231647633, "grad_norm": 0.0010454310104250908, "learning_rate": 1.7998529299111944e-06, "loss": 0.0062, "num_input_tokens_seen": 258387280, "step": 119620 }, { "epoch": 19.51468189233279, "grad_norm": 0.006765805184841156, "learning_rate": 1.7938238543820928e-06, "loss": 0.0009, "num_input_tokens_seen": 258397104, "step": 119625 }, { "epoch": 19.515497553017944, "grad_norm": 0.011556816287338734, "learning_rate": 1.7878048756331256e-06, "loss": 0.0594, "num_input_tokens_seen": 258407344, "step": 119630 }, { "epoch": 19.5163132137031, "grad_norm": 0.048716992139816284, "learning_rate": 1.7817959937863615e-06, "loss": 0.0569, "num_input_tokens_seen": 258418416, "step": 119635 }, { "epoch": 19.517128874388256, "grad_norm": 0.01016552746295929, "learning_rate": 1.7757972089635367e-06, "loss": 0.0012, "num_input_tokens_seen": 258428176, "step": 119640 }, { "epoch": 19.517944535073408, "grad_norm": 0.005958016030490398, "learning_rate": 1.7698085212862203e-06, "loss": 0.002, "num_input_tokens_seen": 258439952, "step": 119645 }, { "epoch": 19.518760195758563, "grad_norm": 0.0012907384661957622, "learning_rate": 1.76382993087576e-06, "loss": 0.0026, "num_input_tokens_seen": 258449424, "step": 119650 }, { "epoch": 19.51957585644372, "grad_norm": 0.0009790941840037704, "learning_rate": 1.7578614378533365e-06, "loss": 0.0015, "num_input_tokens_seen": 258461264, "step": 119655 }, { "epoch": 19.520391517128875, "grad_norm": 0.03771071508526802, "learning_rate": 1.751903042339964e-06, "loss": 0.0021, "num_input_tokens_seen": 258472688, "step": 119660 }, { "epoch": 19.52120717781403, "grad_norm": 0.0106875104829669, "learning_rate": 1.745954744456324e-06, "loss": 0.0124, "num_input_tokens_seen": 258484080, "step": 119665 }, { "epoch": 19.522022838499183, "grad_norm": 0.0014177068369463086, "learning_rate": 1.7400165443229865e-06, "loss": 0.12, "num_input_tokens_seen": 258494512, "step": 119670 }, { "epoch": 19.52283849918434, "grad_norm": 0.010459963232278824, "learning_rate": 1.7340884420603e-06, "loss": 0.0485, "num_input_tokens_seen": 258505200, "step": 119675 }, { "epoch": 19.523654159869494, "grad_norm": 0.006741983816027641, "learning_rate": 1.7281704377884454e-06, "loss": 0.0012, "num_input_tokens_seen": 258516656, "step": 119680 }, { "epoch": 19.52446982055465, "grad_norm": 0.0004489065904635936, "learning_rate": 1.7222625316272723e-06, "loss": 0.0018, "num_input_tokens_seen": 258527408, "step": 119685 }, { "epoch": 19.525285481239806, "grad_norm": 0.0006436831317842007, "learning_rate": 1.7163647236965728e-06, "loss": 0.0022, "num_input_tokens_seen": 258538064, "step": 119690 }, { "epoch": 19.526101141924958, "grad_norm": 0.000623969070147723, "learning_rate": 1.7104770141158631e-06, "loss": 0.0021, "num_input_tokens_seen": 258550448, "step": 119695 }, { "epoch": 19.526916802610113, "grad_norm": 0.00015011659706942737, "learning_rate": 1.704599403004492e-06, "loss": 0.0002, "num_input_tokens_seen": 258561488, "step": 119700 }, { "epoch": 19.52773246329527, "grad_norm": 0.026546292006969452, "learning_rate": 1.6987318904814753e-06, "loss": 0.0012, "num_input_tokens_seen": 258571792, "step": 119705 }, { "epoch": 19.528548123980425, "grad_norm": 0.0041951765306293964, "learning_rate": 1.6928744766658844e-06, "loss": 0.0025, "num_input_tokens_seen": 258582288, "step": 119710 }, { "epoch": 19.52936378466558, "grad_norm": 0.00497661717236042, "learning_rate": 1.687027161676291e-06, "loss": 0.0029, "num_input_tokens_seen": 258593104, "step": 119715 }, { "epoch": 19.530179445350733, "grad_norm": 0.021917784586548805, "learning_rate": 1.6811899456312119e-06, "loss": 0.0011, "num_input_tokens_seen": 258603728, "step": 119720 }, { "epoch": 19.53099510603589, "grad_norm": 0.0025708836037665606, "learning_rate": 1.6753628286490518e-06, "loss": 0.0002, "num_input_tokens_seen": 258613968, "step": 119725 }, { "epoch": 19.531810766721044, "grad_norm": 0.00024284885148517787, "learning_rate": 1.6695458108477724e-06, "loss": 0.0016, "num_input_tokens_seen": 258624304, "step": 119730 }, { "epoch": 19.5326264274062, "grad_norm": 0.009805253706872463, "learning_rate": 1.66373889234539e-06, "loss": 0.0016, "num_input_tokens_seen": 258636432, "step": 119735 }, { "epoch": 19.533442088091356, "grad_norm": 0.0007772601675242186, "learning_rate": 1.6579420732594774e-06, "loss": 0.0018, "num_input_tokens_seen": 258647248, "step": 119740 }, { "epoch": 19.534257748776508, "grad_norm": 0.016048630699515343, "learning_rate": 1.6521553537075518e-06, "loss": 0.0014, "num_input_tokens_seen": 258658768, "step": 119745 }, { "epoch": 19.535073409461663, "grad_norm": 0.07577808946371078, "learning_rate": 1.646378733806908e-06, "loss": 0.0022, "num_input_tokens_seen": 258669648, "step": 119750 }, { "epoch": 19.53588907014682, "grad_norm": 0.00451402785256505, "learning_rate": 1.6406122136746193e-06, "loss": 0.0009, "num_input_tokens_seen": 258679376, "step": 119755 }, { "epoch": 19.536704730831975, "grad_norm": 0.0001716844126349315, "learning_rate": 1.634855793427481e-06, "loss": 0.0022, "num_input_tokens_seen": 258690288, "step": 119760 }, { "epoch": 19.53752039151713, "grad_norm": 0.0004944111569784582, "learning_rate": 1.6291094731822886e-06, "loss": 0.0014, "num_input_tokens_seen": 258701584, "step": 119765 }, { "epoch": 19.538336052202283, "grad_norm": 0.0008311573183164, "learning_rate": 1.6233732530553935e-06, "loss": 0.0018, "num_input_tokens_seen": 258711920, "step": 119770 }, { "epoch": 19.53915171288744, "grad_norm": 0.005382438190281391, "learning_rate": 1.6176471331630915e-06, "loss": 0.0007, "num_input_tokens_seen": 258722224, "step": 119775 }, { "epoch": 19.539967373572594, "grad_norm": 0.007278508972376585, "learning_rate": 1.6119311136213455e-06, "loss": 0.0005, "num_input_tokens_seen": 258733520, "step": 119780 }, { "epoch": 19.54078303425775, "grad_norm": 0.0013596662320196629, "learning_rate": 1.6062251945461737e-06, "loss": 0.0008, "num_input_tokens_seen": 258744368, "step": 119785 }, { "epoch": 19.541598694942905, "grad_norm": 0.04996887966990471, "learning_rate": 1.6005293760530393e-06, "loss": 0.0204, "num_input_tokens_seen": 258755088, "step": 119790 }, { "epoch": 19.542414355628058, "grad_norm": 0.030599098652601242, "learning_rate": 1.594843658257461e-06, "loss": 0.0014, "num_input_tokens_seen": 258764976, "step": 119795 }, { "epoch": 19.543230016313213, "grad_norm": 0.006039235275238752, "learning_rate": 1.5891680412746246e-06, "loss": 0.0008, "num_input_tokens_seen": 258776528, "step": 119800 }, { "epoch": 19.54404567699837, "grad_norm": 0.0015127577353268862, "learning_rate": 1.5835025252196044e-06, "loss": 0.0029, "num_input_tokens_seen": 258787248, "step": 119805 }, { "epoch": 19.544861337683525, "grad_norm": 0.005725604481995106, "learning_rate": 1.5778471102071423e-06, "loss": 0.0005, "num_input_tokens_seen": 258798416, "step": 119810 }, { "epoch": 19.545676998368677, "grad_norm": 0.015134379267692566, "learning_rate": 1.572201796351924e-06, "loss": 0.0028, "num_input_tokens_seen": 258809680, "step": 119815 }, { "epoch": 19.546492659053833, "grad_norm": 0.010965199209749699, "learning_rate": 1.5665665837683584e-06, "loss": 0.003, "num_input_tokens_seen": 258820528, "step": 119820 }, { "epoch": 19.54730831973899, "grad_norm": 0.000573936675209552, "learning_rate": 1.5609414725706317e-06, "loss": 0.0026, "num_input_tokens_seen": 258831056, "step": 119825 }, { "epoch": 19.548123980424144, "grad_norm": 0.011310549452900887, "learning_rate": 1.5553264628727082e-06, "loss": 0.0009, "num_input_tokens_seen": 258842736, "step": 119830 }, { "epoch": 19.5489396411093, "grad_norm": 0.013286259956657887, "learning_rate": 1.5497215547884414e-06, "loss": 0.0804, "num_input_tokens_seen": 258853872, "step": 119835 }, { "epoch": 19.549755301794452, "grad_norm": 0.02167029306292534, "learning_rate": 1.544126748431407e-06, "loss": 0.0234, "num_input_tokens_seen": 258864144, "step": 119840 }, { "epoch": 19.550570962479608, "grad_norm": 0.0003503776097204536, "learning_rate": 1.538542043914959e-06, "loss": 0.0055, "num_input_tokens_seen": 258874992, "step": 119845 }, { "epoch": 19.551386623164763, "grad_norm": 0.0004293158417567611, "learning_rate": 1.5329674413522843e-06, "loss": 0.0004, "num_input_tokens_seen": 258884880, "step": 119850 }, { "epoch": 19.55220228384992, "grad_norm": 0.007084133103489876, "learning_rate": 1.527402940856404e-06, "loss": 0.0009, "num_input_tokens_seen": 258896016, "step": 119855 }, { "epoch": 19.553017944535075, "grad_norm": 0.13451212644577026, "learning_rate": 1.5218485425400607e-06, "loss": 0.0141, "num_input_tokens_seen": 258906096, "step": 119860 }, { "epoch": 19.553833605220227, "grad_norm": 0.0023989281617105007, "learning_rate": 1.516304246515776e-06, "loss": 0.0003, "num_input_tokens_seen": 258916432, "step": 119865 }, { "epoch": 19.554649265905383, "grad_norm": 0.007038596551865339, "learning_rate": 1.5107700528960156e-06, "loss": 0.0015, "num_input_tokens_seen": 258926416, "step": 119870 }, { "epoch": 19.55546492659054, "grad_norm": 0.0006149518303573132, "learning_rate": 1.505245961792856e-06, "loss": 0.0015, "num_input_tokens_seen": 258938608, "step": 119875 }, { "epoch": 19.556280587275694, "grad_norm": 0.0030068473424762487, "learning_rate": 1.4997319733182636e-06, "loss": 0.0008, "num_input_tokens_seen": 258948880, "step": 119880 }, { "epoch": 19.55709624796085, "grad_norm": 0.0009674095781520009, "learning_rate": 1.494228087583982e-06, "loss": 0.0012, "num_input_tokens_seen": 258957936, "step": 119885 }, { "epoch": 19.557911908646002, "grad_norm": 0.0016425395151600242, "learning_rate": 1.4887343047016444e-06, "loss": 0.0007, "num_input_tokens_seen": 258969552, "step": 119890 }, { "epoch": 19.558727569331158, "grad_norm": 0.010517501272261143, "learning_rate": 1.4832506247824396e-06, "loss": 0.0051, "num_input_tokens_seen": 258979856, "step": 119895 }, { "epoch": 19.559543230016313, "grad_norm": 0.0007454265141859651, "learning_rate": 1.4777770479376118e-06, "loss": 0.0016, "num_input_tokens_seen": 258990160, "step": 119900 }, { "epoch": 19.56035889070147, "grad_norm": 0.0038657390978187323, "learning_rate": 1.472313574278017e-06, "loss": 0.0009, "num_input_tokens_seen": 259001168, "step": 119905 }, { "epoch": 19.561174551386625, "grad_norm": 0.0014202585443854332, "learning_rate": 1.4668602039144551e-06, "loss": 0.0006, "num_input_tokens_seen": 259012432, "step": 119910 }, { "epoch": 19.561990212071777, "grad_norm": 0.11280523985624313, "learning_rate": 1.4614169369573382e-06, "loss": 0.0048, "num_input_tokens_seen": 259023824, "step": 119915 }, { "epoch": 19.562805872756933, "grad_norm": 0.002691589528694749, "learning_rate": 1.4559837735171333e-06, "loss": 0.0015, "num_input_tokens_seen": 259034896, "step": 119920 }, { "epoch": 19.563621533442088, "grad_norm": 0.001120995031669736, "learning_rate": 1.450560713703808e-06, "loss": 0.0016, "num_input_tokens_seen": 259045680, "step": 119925 }, { "epoch": 19.564437194127244, "grad_norm": 0.14628851413726807, "learning_rate": 1.4451477576273298e-06, "loss": 0.0029, "num_input_tokens_seen": 259056944, "step": 119930 }, { "epoch": 19.5652528548124, "grad_norm": 0.0006048278883099556, "learning_rate": 1.4397449053973888e-06, "loss": 0.0006, "num_input_tokens_seen": 259067120, "step": 119935 }, { "epoch": 19.56606851549755, "grad_norm": 0.005681201349943876, "learning_rate": 1.4343521571235086e-06, "loss": 0.0012, "num_input_tokens_seen": 259077296, "step": 119940 }, { "epoch": 19.566884176182707, "grad_norm": 0.0018342856783419847, "learning_rate": 1.4289695129149349e-06, "loss": 0.0024, "num_input_tokens_seen": 259087632, "step": 119945 }, { "epoch": 19.567699836867863, "grad_norm": 0.005485404282808304, "learning_rate": 1.423596972880803e-06, "loss": 0.0005, "num_input_tokens_seen": 259096656, "step": 119950 }, { "epoch": 19.56851549755302, "grad_norm": 0.25319576263427734, "learning_rate": 1.4182345371299699e-06, "loss": 0.0058, "num_input_tokens_seen": 259108304, "step": 119955 }, { "epoch": 19.569331158238175, "grad_norm": 0.002403180580586195, "learning_rate": 1.412882205771071e-06, "loss": 0.0003, "num_input_tokens_seen": 259117456, "step": 119960 }, { "epoch": 19.570146818923327, "grad_norm": 0.001975890714675188, "learning_rate": 1.4075399789126308e-06, "loss": 0.0006, "num_input_tokens_seen": 259128528, "step": 119965 }, { "epoch": 19.570962479608482, "grad_norm": 0.00043759553227573633, "learning_rate": 1.4022078566629515e-06, "loss": 0.0016, "num_input_tokens_seen": 259140016, "step": 119970 }, { "epoch": 19.571778140293638, "grad_norm": 0.2719343602657318, "learning_rate": 1.396885839130002e-06, "loss": 0.0081, "num_input_tokens_seen": 259150064, "step": 119975 }, { "epoch": 19.572593800978794, "grad_norm": 0.0024640439078211784, "learning_rate": 1.3915739264216964e-06, "loss": 0.0032, "num_input_tokens_seen": 259161360, "step": 119980 }, { "epoch": 19.57340946166395, "grad_norm": 0.009985164739191532, "learning_rate": 1.3862721186456706e-06, "loss": 0.0005, "num_input_tokens_seen": 259172720, "step": 119985 }, { "epoch": 19.5742251223491, "grad_norm": 0.0014474820345640182, "learning_rate": 1.3809804159093386e-06, "loss": 0.0012, "num_input_tokens_seen": 259184336, "step": 119990 }, { "epoch": 19.575040783034257, "grad_norm": 0.0026534050703048706, "learning_rate": 1.3756988183200037e-06, "loss": 0.0004, "num_input_tokens_seen": 259195120, "step": 119995 }, { "epoch": 19.575856443719413, "grad_norm": 0.00017664516053628176, "learning_rate": 1.3704273259847467e-06, "loss": 0.0011, "num_input_tokens_seen": 259207024, "step": 120000 }, { "epoch": 19.57667210440457, "grad_norm": 0.06056229770183563, "learning_rate": 1.36516593901026e-06, "loss": 0.0053, "num_input_tokens_seen": 259217872, "step": 120005 }, { "epoch": 19.57748776508972, "grad_norm": 0.008258544839918613, "learning_rate": 1.3599146575032363e-06, "loss": 0.001, "num_input_tokens_seen": 259228304, "step": 120010 }, { "epoch": 19.578303425774877, "grad_norm": 0.0049018110148608685, "learning_rate": 1.3546734815702012e-06, "loss": 0.0012, "num_input_tokens_seen": 259239632, "step": 120015 }, { "epoch": 19.579119086460032, "grad_norm": 0.0018762719118967652, "learning_rate": 1.349442411317181e-06, "loss": 0.0017, "num_input_tokens_seen": 259251024, "step": 120020 }, { "epoch": 19.579934747145188, "grad_norm": 0.06191835179924965, "learning_rate": 1.3442214468503688e-06, "loss": 0.0029, "num_input_tokens_seen": 259259856, "step": 120025 }, { "epoch": 19.580750407830344, "grad_norm": 0.0006233254680410028, "learning_rate": 1.3390105882754577e-06, "loss": 0.0003, "num_input_tokens_seen": 259269936, "step": 120030 }, { "epoch": 19.581566068515496, "grad_norm": 0.00015882418665569276, "learning_rate": 1.333809835698141e-06, "loss": 0.0005, "num_input_tokens_seen": 259279888, "step": 120035 }, { "epoch": 19.58238172920065, "grad_norm": 0.01703408546745777, "learning_rate": 1.3286191892237231e-06, "loss": 0.0041, "num_input_tokens_seen": 259290608, "step": 120040 }, { "epoch": 19.583197389885807, "grad_norm": 0.00424957275390625, "learning_rate": 1.323438648957509e-06, "loss": 0.0563, "num_input_tokens_seen": 259301808, "step": 120045 }, { "epoch": 19.584013050570963, "grad_norm": 0.04442833736538887, "learning_rate": 1.318268215004359e-06, "loss": 0.0027, "num_input_tokens_seen": 259311472, "step": 120050 }, { "epoch": 19.58482871125612, "grad_norm": 0.006586727686226368, "learning_rate": 1.3131078874691337e-06, "loss": 0.0032, "num_input_tokens_seen": 259322416, "step": 120055 }, { "epoch": 19.58564437194127, "grad_norm": 0.00016416041762568057, "learning_rate": 1.3079576664564163e-06, "loss": 0.0003, "num_input_tokens_seen": 259333616, "step": 120060 }, { "epoch": 19.586460032626427, "grad_norm": 0.002335761673748493, "learning_rate": 1.302817552070623e-06, "loss": 0.0009, "num_input_tokens_seen": 259343248, "step": 120065 }, { "epoch": 19.587275693311582, "grad_norm": 0.0020196493715047836, "learning_rate": 1.297687544415782e-06, "loss": 0.0008, "num_input_tokens_seen": 259353008, "step": 120070 }, { "epoch": 19.588091353996738, "grad_norm": 0.03180438652634621, "learning_rate": 1.292567643596032e-06, "loss": 0.0011, "num_input_tokens_seen": 259365040, "step": 120075 }, { "epoch": 19.588907014681894, "grad_norm": 0.0011726239463314414, "learning_rate": 1.2874578497150125e-06, "loss": 0.0004, "num_input_tokens_seen": 259376688, "step": 120080 }, { "epoch": 19.589722675367046, "grad_norm": 0.0019082275684922934, "learning_rate": 1.282358162876307e-06, "loss": 0.0122, "num_input_tokens_seen": 259388592, "step": 120085 }, { "epoch": 19.5905383360522, "grad_norm": 0.0001999034866457805, "learning_rate": 1.277268583183333e-06, "loss": 0.0011, "num_input_tokens_seen": 259399408, "step": 120090 }, { "epoch": 19.591353996737357, "grad_norm": 0.0022268639877438545, "learning_rate": 1.2721891107391192e-06, "loss": 0.0008, "num_input_tokens_seen": 259410224, "step": 120095 }, { "epoch": 19.592169657422513, "grad_norm": 0.0030834621284157038, "learning_rate": 1.2671197456467497e-06, "loss": 0.0005, "num_input_tokens_seen": 259420048, "step": 120100 }, { "epoch": 19.59298531810767, "grad_norm": 0.0024934473913162947, "learning_rate": 1.2620604880088093e-06, "loss": 0.0017, "num_input_tokens_seen": 259430864, "step": 120105 }, { "epoch": 19.59380097879282, "grad_norm": 0.019052933901548386, "learning_rate": 1.2570113379279936e-06, "loss": 0.0006, "num_input_tokens_seen": 259442128, "step": 120110 }, { "epoch": 19.594616639477977, "grad_norm": 0.051151152700185776, "learning_rate": 1.2519722955064982e-06, "loss": 0.0058, "num_input_tokens_seen": 259453360, "step": 120115 }, { "epoch": 19.595432300163132, "grad_norm": 0.0011440202360972762, "learning_rate": 1.2469433608464642e-06, "loss": 0.0003, "num_input_tokens_seen": 259464112, "step": 120120 }, { "epoch": 19.596247960848288, "grad_norm": 0.0026353909634053707, "learning_rate": 1.2419245340498652e-06, "loss": 0.0007, "num_input_tokens_seen": 259475088, "step": 120125 }, { "epoch": 19.597063621533444, "grad_norm": 0.0006127303349785507, "learning_rate": 1.236915815218398e-06, "loss": 0.023, "num_input_tokens_seen": 259486160, "step": 120130 }, { "epoch": 19.597879282218596, "grad_norm": 0.0013539056526497006, "learning_rate": 1.2319172044535365e-06, "loss": 0.0016, "num_input_tokens_seen": 259496880, "step": 120135 }, { "epoch": 19.59869494290375, "grad_norm": 0.0010810773819684982, "learning_rate": 1.2269287018565888e-06, "loss": 0.0006, "num_input_tokens_seen": 259508336, "step": 120140 }, { "epoch": 19.599510603588907, "grad_norm": 0.00026650080690160394, "learning_rate": 1.2219503075286963e-06, "loss": 0.0007, "num_input_tokens_seen": 259518896, "step": 120145 }, { "epoch": 19.600326264274063, "grad_norm": 0.001171777956187725, "learning_rate": 1.2169820215707228e-06, "loss": 0.0022, "num_input_tokens_seen": 259531184, "step": 120150 }, { "epoch": 19.601141924959215, "grad_norm": 0.0004032772849313915, "learning_rate": 1.2120238440833653e-06, "loss": 0.0067, "num_input_tokens_seen": 259542768, "step": 120155 }, { "epoch": 19.60195758564437, "grad_norm": 0.0009708287543617189, "learning_rate": 1.207075775167099e-06, "loss": 0.0022, "num_input_tokens_seen": 259551856, "step": 120160 }, { "epoch": 19.602773246329527, "grad_norm": 0.0016535356407985091, "learning_rate": 1.2021378149221773e-06, "loss": 0.0008, "num_input_tokens_seen": 259561328, "step": 120165 }, { "epoch": 19.603588907014682, "grad_norm": 0.003574906848371029, "learning_rate": 1.1972099634487422e-06, "loss": 0.0403, "num_input_tokens_seen": 259571248, "step": 120170 }, { "epoch": 19.604404567699838, "grad_norm": 0.0005903760902583599, "learning_rate": 1.1922922208466026e-06, "loss": 0.0007, "num_input_tokens_seen": 259582544, "step": 120175 }, { "epoch": 19.605220228384994, "grad_norm": 0.0017175710527226329, "learning_rate": 1.1873845872154565e-06, "loss": 0.0078, "num_input_tokens_seen": 259593936, "step": 120180 }, { "epoch": 19.606035889070146, "grad_norm": 0.00047921930672600865, "learning_rate": 1.1824870626547247e-06, "loss": 0.0004, "num_input_tokens_seen": 259604336, "step": 120185 }, { "epoch": 19.6068515497553, "grad_norm": 0.006056176032871008, "learning_rate": 1.1775996472637163e-06, "loss": 0.0007, "num_input_tokens_seen": 259615696, "step": 120190 }, { "epoch": 19.607667210440457, "grad_norm": 0.00036932036164216697, "learning_rate": 1.1727223411414078e-06, "loss": 0.0012, "num_input_tokens_seen": 259627536, "step": 120195 }, { "epoch": 19.608482871125613, "grad_norm": 0.001396836363710463, "learning_rate": 1.1678551443867203e-06, "loss": 0.0008, "num_input_tokens_seen": 259637040, "step": 120200 }, { "epoch": 19.609298531810765, "grad_norm": 0.004054496064782143, "learning_rate": 1.1629980570982967e-06, "loss": 0.0018, "num_input_tokens_seen": 259648336, "step": 120205 }, { "epoch": 19.61011419249592, "grad_norm": 0.017525173723697662, "learning_rate": 1.1581510793745032e-06, "loss": 0.0015, "num_input_tokens_seen": 259660304, "step": 120210 }, { "epoch": 19.610929853181077, "grad_norm": 0.00820534024387598, "learning_rate": 1.153314211313594e-06, "loss": 0.0008, "num_input_tokens_seen": 259671120, "step": 120215 }, { "epoch": 19.611745513866232, "grad_norm": 0.0011833886383101344, "learning_rate": 1.1484874530136025e-06, "loss": 0.0006, "num_input_tokens_seen": 259681968, "step": 120220 }, { "epoch": 19.612561174551388, "grad_norm": 0.01561807096004486, "learning_rate": 1.1436708045723388e-06, "loss": 0.001, "num_input_tokens_seen": 259691568, "step": 120225 }, { "epoch": 19.61337683523654, "grad_norm": 0.0010521980002522469, "learning_rate": 1.1388642660875025e-06, "loss": 0.0012, "num_input_tokens_seen": 259703248, "step": 120230 }, { "epoch": 19.614192495921696, "grad_norm": 0.05352751538157463, "learning_rate": 1.1340678376563495e-06, "loss": 0.0025, "num_input_tokens_seen": 259714224, "step": 120235 }, { "epoch": 19.61500815660685, "grad_norm": 0.00045512960059568286, "learning_rate": 1.1292815193761907e-06, "loss": 0.0014, "num_input_tokens_seen": 259725808, "step": 120240 }, { "epoch": 19.615823817292007, "grad_norm": 0.27281224727630615, "learning_rate": 1.1245053113440596e-06, "loss": 0.0098, "num_input_tokens_seen": 259737168, "step": 120245 }, { "epoch": 19.616639477977163, "grad_norm": 0.002132084220647812, "learning_rate": 1.1197392136566565e-06, "loss": 0.0005, "num_input_tokens_seen": 259747760, "step": 120250 }, { "epoch": 19.617455138662315, "grad_norm": 0.038451775908470154, "learning_rate": 1.114983226410571e-06, "loss": 0.0009, "num_input_tokens_seen": 259759632, "step": 120255 }, { "epoch": 19.61827079934747, "grad_norm": 1.293935775756836, "learning_rate": 1.110237349702281e-06, "loss": 0.1449, "num_input_tokens_seen": 259770672, "step": 120260 }, { "epoch": 19.619086460032626, "grad_norm": 0.003990166820585728, "learning_rate": 1.1055015836279326e-06, "loss": 0.0027, "num_input_tokens_seen": 259781360, "step": 120265 }, { "epoch": 19.619902120717782, "grad_norm": 0.0009797315578907728, "learning_rate": 1.1007759282834484e-06, "loss": 0.0006, "num_input_tokens_seen": 259792656, "step": 120270 }, { "epoch": 19.620717781402938, "grad_norm": 0.03197610005736351, "learning_rate": 1.096060383764641e-06, "loss": 0.0007, "num_input_tokens_seen": 259802288, "step": 120275 }, { "epoch": 19.62153344208809, "grad_norm": 0.01582932658493519, "learning_rate": 1.0913549501671004e-06, "loss": 0.0007, "num_input_tokens_seen": 259812400, "step": 120280 }, { "epoch": 19.622349102773246, "grad_norm": 0.0015432540094479918, "learning_rate": 1.0866596275861395e-06, "loss": 0.0071, "num_input_tokens_seen": 259823728, "step": 120285 }, { "epoch": 19.6231647634584, "grad_norm": 0.0003914940753020346, "learning_rate": 1.0819744161169597e-06, "loss": 0.0003, "num_input_tokens_seen": 259835408, "step": 120290 }, { "epoch": 19.623980424143557, "grad_norm": 0.013478913344442844, "learning_rate": 1.0772993158544297e-06, "loss": 0.0032, "num_input_tokens_seen": 259845616, "step": 120295 }, { "epoch": 19.624796084828713, "grad_norm": 0.0010959201026707888, "learning_rate": 1.072634326893418e-06, "loss": 0.0033, "num_input_tokens_seen": 259856464, "step": 120300 }, { "epoch": 19.625611745513865, "grad_norm": 0.005378579255193472, "learning_rate": 1.0679794493284045e-06, "loss": 0.0004, "num_input_tokens_seen": 259868720, "step": 120305 }, { "epoch": 19.62642740619902, "grad_norm": 0.015822935849428177, "learning_rate": 1.0633346832537026e-06, "loss": 0.0024, "num_input_tokens_seen": 259879600, "step": 120310 }, { "epoch": 19.627243066884176, "grad_norm": 0.0004820248286705464, "learning_rate": 1.0587000287634596e-06, "loss": 0.0008, "num_input_tokens_seen": 259889232, "step": 120315 }, { "epoch": 19.628058727569332, "grad_norm": 0.010135025717318058, "learning_rate": 1.0540754859516554e-06, "loss": 0.0008, "num_input_tokens_seen": 259900240, "step": 120320 }, { "epoch": 19.628874388254488, "grad_norm": 0.07188961654901505, "learning_rate": 1.0494610549119377e-06, "loss": 0.0028, "num_input_tokens_seen": 259910992, "step": 120325 }, { "epoch": 19.62969004893964, "grad_norm": 0.010499064810574055, "learning_rate": 1.0448567357378424e-06, "loss": 0.0016, "num_input_tokens_seen": 259922224, "step": 120330 }, { "epoch": 19.630505709624796, "grad_norm": 0.01909225806593895, "learning_rate": 1.0402625285227396e-06, "loss": 0.0015, "num_input_tokens_seen": 259932944, "step": 120335 }, { "epoch": 19.63132137030995, "grad_norm": 0.20919649302959442, "learning_rate": 1.0356784333596658e-06, "loss": 0.0064, "num_input_tokens_seen": 259944272, "step": 120340 }, { "epoch": 19.632137030995107, "grad_norm": 0.1292264610528946, "learning_rate": 1.0311044503415468e-06, "loss": 0.0029, "num_input_tokens_seen": 259953232, "step": 120345 }, { "epoch": 19.63295269168026, "grad_norm": 0.05426434054970741, "learning_rate": 1.026540579561086e-06, "loss": 0.002, "num_input_tokens_seen": 259963856, "step": 120350 }, { "epoch": 19.633768352365415, "grad_norm": 0.00942949764430523, "learning_rate": 1.0219868211108208e-06, "loss": 0.0013, "num_input_tokens_seen": 259974992, "step": 120355 }, { "epoch": 19.63458401305057, "grad_norm": 0.0067382687702775, "learning_rate": 1.0174431750828993e-06, "loss": 0.0065, "num_input_tokens_seen": 259986800, "step": 120360 }, { "epoch": 19.635399673735726, "grad_norm": 0.014512408524751663, "learning_rate": 1.0129096415695816e-06, "loss": 0.0056, "num_input_tokens_seen": 259997552, "step": 120365 }, { "epoch": 19.636215334420882, "grad_norm": 0.006454018875956535, "learning_rate": 1.008386220662627e-06, "loss": 0.0009, "num_input_tokens_seen": 260008912, "step": 120370 }, { "epoch": 19.637030995106034, "grad_norm": 0.0028814023826271296, "learning_rate": 1.0038729124537405e-06, "loss": 0.0027, "num_input_tokens_seen": 260020752, "step": 120375 }, { "epoch": 19.63784665579119, "grad_norm": 0.00044552632607519627, "learning_rate": 9.993697170343485e-07, "loss": 0.0009, "num_input_tokens_seen": 260032176, "step": 120380 }, { "epoch": 19.638662316476346, "grad_norm": 0.07475344091653824, "learning_rate": 9.948766344958227e-07, "loss": 0.003, "num_input_tokens_seen": 260043216, "step": 120385 }, { "epoch": 19.6394779771615, "grad_norm": 0.000331522838678211, "learning_rate": 9.9039366492909e-07, "loss": 0.0008, "num_input_tokens_seen": 260054192, "step": 120390 }, { "epoch": 19.640293637846657, "grad_norm": 0.0012437768746167421, "learning_rate": 9.859208084251337e-07, "loss": 0.0005, "num_input_tokens_seen": 260063792, "step": 120395 }, { "epoch": 19.64110929853181, "grad_norm": 0.0010564016411080956, "learning_rate": 9.81458065074492e-07, "loss": 0.0004, "num_input_tokens_seen": 260075120, "step": 120400 }, { "epoch": 19.641924959216965, "grad_norm": 0.0017424465622752905, "learning_rate": 9.770054349677037e-07, "loss": 0.0006, "num_input_tokens_seen": 260085456, "step": 120405 }, { "epoch": 19.64274061990212, "grad_norm": 0.0012548412196338177, "learning_rate": 9.725629181949192e-07, "loss": 0.001, "num_input_tokens_seen": 260096080, "step": 120410 }, { "epoch": 19.643556280587276, "grad_norm": 0.005961594637483358, "learning_rate": 9.681305148462328e-07, "loss": 0.0012, "num_input_tokens_seen": 260106320, "step": 120415 }, { "epoch": 19.644371941272432, "grad_norm": 0.0003299217496532947, "learning_rate": 9.63708225011406e-07, "loss": 0.0007, "num_input_tokens_seen": 260118096, "step": 120420 }, { "epoch": 19.645187601957584, "grad_norm": 0.0025435383431613445, "learning_rate": 9.59296048780145e-07, "loss": 0.0007, "num_input_tokens_seen": 260129232, "step": 120425 }, { "epoch": 19.64600326264274, "grad_norm": 0.00037977020838297904, "learning_rate": 9.54893986241767e-07, "loss": 0.0079, "num_input_tokens_seen": 260139728, "step": 120430 }, { "epoch": 19.646818923327896, "grad_norm": 0.003552139736711979, "learning_rate": 9.505020374855899e-07, "loss": 0.0009, "num_input_tokens_seen": 260150800, "step": 120435 }, { "epoch": 19.64763458401305, "grad_norm": 0.0030579909216612577, "learning_rate": 9.461202026005978e-07, "loss": 0.0035, "num_input_tokens_seen": 260162224, "step": 120440 }, { "epoch": 19.648450244698207, "grad_norm": 0.010102360509335995, "learning_rate": 9.417484816755528e-07, "loss": 0.0007, "num_input_tokens_seen": 260171984, "step": 120445 }, { "epoch": 19.64926590538336, "grad_norm": 0.003113190643489361, "learning_rate": 9.37386874799051e-07, "loss": 0.0034, "num_input_tokens_seen": 260182672, "step": 120450 }, { "epoch": 19.650081566068515, "grad_norm": 0.002211271785199642, "learning_rate": 9.330353820595217e-07, "loss": 0.0107, "num_input_tokens_seen": 260192912, "step": 120455 }, { "epoch": 19.65089722675367, "grad_norm": 0.00032293720869347453, "learning_rate": 9.286940035451718e-07, "loss": 0.001, "num_input_tokens_seen": 260204912, "step": 120460 }, { "epoch": 19.651712887438826, "grad_norm": 0.07494331896305084, "learning_rate": 9.243627393439313e-07, "loss": 0.0024, "num_input_tokens_seen": 260216976, "step": 120465 }, { "epoch": 19.652528548123982, "grad_norm": 0.0016844982746988535, "learning_rate": 9.200415895436187e-07, "loss": 0.0003, "num_input_tokens_seen": 260227984, "step": 120470 }, { "epoch": 19.653344208809134, "grad_norm": 0.0008055089274421334, "learning_rate": 9.157305542317751e-07, "loss": 0.0028, "num_input_tokens_seen": 260238544, "step": 120475 }, { "epoch": 19.65415986949429, "grad_norm": 0.007196464110165834, "learning_rate": 9.11429633495775e-07, "loss": 0.0132, "num_input_tokens_seen": 260249360, "step": 120480 }, { "epoch": 19.654975530179446, "grad_norm": 0.001205752487294376, "learning_rate": 9.071388274228264e-07, "loss": 0.0009, "num_input_tokens_seen": 260260400, "step": 120485 }, { "epoch": 19.6557911908646, "grad_norm": 0.009604084305465221, "learning_rate": 9.028581360998045e-07, "loss": 0.0031, "num_input_tokens_seen": 260270032, "step": 120490 }, { "epoch": 19.656606851549757, "grad_norm": 0.005923762917518616, "learning_rate": 8.985875596135285e-07, "loss": 0.0006, "num_input_tokens_seen": 260281264, "step": 120495 }, { "epoch": 19.65742251223491, "grad_norm": 0.003814230440184474, "learning_rate": 8.943270980505957e-07, "loss": 0.0009, "num_input_tokens_seen": 260291568, "step": 120500 }, { "epoch": 19.658238172920065, "grad_norm": 0.5724601149559021, "learning_rate": 8.900767514972152e-07, "loss": 0.0137, "num_input_tokens_seen": 260303920, "step": 120505 }, { "epoch": 19.65905383360522, "grad_norm": 0.004697203170508146, "learning_rate": 8.858365200395957e-07, "loss": 0.0005, "num_input_tokens_seen": 260314032, "step": 120510 }, { "epoch": 19.659869494290376, "grad_norm": 0.22248674929141998, "learning_rate": 8.816064037636684e-07, "loss": 0.0052, "num_input_tokens_seen": 260325008, "step": 120515 }, { "epoch": 19.660685154975532, "grad_norm": 0.018114212900400162, "learning_rate": 8.773864027551981e-07, "loss": 0.0021, "num_input_tokens_seen": 260336400, "step": 120520 }, { "epoch": 19.661500815660684, "grad_norm": 0.0033652205020189285, "learning_rate": 8.73176517099672e-07, "loss": 0.0051, "num_input_tokens_seen": 260347888, "step": 120525 }, { "epoch": 19.66231647634584, "grad_norm": 0.019522542133927345, "learning_rate": 8.689767468824105e-07, "loss": 0.0008, "num_input_tokens_seen": 260359216, "step": 120530 }, { "epoch": 19.663132137030995, "grad_norm": 0.023196915164589882, "learning_rate": 8.647870921885126e-07, "loss": 0.004, "num_input_tokens_seen": 260370096, "step": 120535 }, { "epoch": 19.66394779771615, "grad_norm": 0.3506559431552887, "learning_rate": 8.606075531029101e-07, "loss": 0.0067, "num_input_tokens_seen": 260380432, "step": 120540 }, { "epoch": 19.664763458401303, "grad_norm": 0.003931544255465269, "learning_rate": 8.564381297102575e-07, "loss": 0.0171, "num_input_tokens_seen": 260391536, "step": 120545 }, { "epoch": 19.66557911908646, "grad_norm": 0.003058247035369277, "learning_rate": 8.522788220951538e-07, "loss": 0.0005, "num_input_tokens_seen": 260402320, "step": 120550 }, { "epoch": 19.666394779771615, "grad_norm": 0.004594247788190842, "learning_rate": 8.481296303418096e-07, "loss": 0.001, "num_input_tokens_seen": 260413776, "step": 120555 }, { "epoch": 19.66721044045677, "grad_norm": 0.006736138369888067, "learning_rate": 8.439905545343796e-07, "loss": 0.0006, "num_input_tokens_seen": 260425232, "step": 120560 }, { "epoch": 19.668026101141926, "grad_norm": 0.003108308184891939, "learning_rate": 8.398615947566302e-07, "loss": 0.0018, "num_input_tokens_seen": 260435792, "step": 120565 }, { "epoch": 19.66884176182708, "grad_norm": 0.0003533684357535094, "learning_rate": 8.357427510923832e-07, "loss": 0.0004, "num_input_tokens_seen": 260446320, "step": 120570 }, { "epoch": 19.669657422512234, "grad_norm": 0.0026117609813809395, "learning_rate": 8.316340236249609e-07, "loss": 0.0015, "num_input_tokens_seen": 260458160, "step": 120575 }, { "epoch": 19.67047308319739, "grad_norm": 0.0008695587166585028, "learning_rate": 8.275354124377965e-07, "loss": 0.0005, "num_input_tokens_seen": 260469712, "step": 120580 }, { "epoch": 19.671288743882545, "grad_norm": 0.002099714009091258, "learning_rate": 8.234469176138238e-07, "loss": 0.0014, "num_input_tokens_seen": 260481296, "step": 120585 }, { "epoch": 19.6721044045677, "grad_norm": 0.008611966855823994, "learning_rate": 8.193685392359762e-07, "loss": 0.0008, "num_input_tokens_seen": 260490928, "step": 120590 }, { "epoch": 19.672920065252853, "grad_norm": 0.0001795227435650304, "learning_rate": 8.153002773868546e-07, "loss": 0.0005, "num_input_tokens_seen": 260501648, "step": 120595 }, { "epoch": 19.67373572593801, "grad_norm": 0.005488082300871611, "learning_rate": 8.112421321489483e-07, "loss": 0.0007, "num_input_tokens_seen": 260512272, "step": 120600 }, { "epoch": 19.674551386623165, "grad_norm": 0.0002953264338430017, "learning_rate": 8.07194103604525e-07, "loss": 0.0003, "num_input_tokens_seen": 260523088, "step": 120605 }, { "epoch": 19.67536704730832, "grad_norm": 0.009819312021136284, "learning_rate": 8.03156191835519e-07, "loss": 0.0009, "num_input_tokens_seen": 260534512, "step": 120610 }, { "epoch": 19.676182707993476, "grad_norm": 0.0009091253159567714, "learning_rate": 7.99128396923865e-07, "loss": 0.0023, "num_input_tokens_seen": 260543280, "step": 120615 }, { "epoch": 19.67699836867863, "grad_norm": 0.022837691009044647, "learning_rate": 7.951107189511641e-07, "loss": 0.0013, "num_input_tokens_seen": 260553712, "step": 120620 }, { "epoch": 19.677814029363784, "grad_norm": 0.0009270032169297338, "learning_rate": 7.91103157998796e-07, "loss": 0.0028, "num_input_tokens_seen": 260563632, "step": 120625 }, { "epoch": 19.67862969004894, "grad_norm": 0.4371551275253296, "learning_rate": 7.871057141480287e-07, "loss": 0.0252, "num_input_tokens_seen": 260573808, "step": 120630 }, { "epoch": 19.679445350734095, "grad_norm": 0.0009796569356694818, "learning_rate": 7.831183874798531e-07, "loss": 0.0005, "num_input_tokens_seen": 260585808, "step": 120635 }, { "epoch": 19.68026101141925, "grad_norm": 0.00811771210283041, "learning_rate": 7.791411780750935e-07, "loss": 0.0006, "num_input_tokens_seen": 260596400, "step": 120640 }, { "epoch": 19.681076672104403, "grad_norm": 0.20137394964694977, "learning_rate": 7.751740860143519e-07, "loss": 0.0064, "num_input_tokens_seen": 260606640, "step": 120645 }, { "epoch": 19.68189233278956, "grad_norm": 0.013613549061119556, "learning_rate": 7.712171113780086e-07, "loss": 0.0008, "num_input_tokens_seen": 260618000, "step": 120650 }, { "epoch": 19.682707993474715, "grad_norm": 0.0029773954302072525, "learning_rate": 7.672702542462773e-07, "loss": 0.0006, "num_input_tokens_seen": 260629264, "step": 120655 }, { "epoch": 19.68352365415987, "grad_norm": 0.0026561871636658907, "learning_rate": 7.633335146991493e-07, "loss": 0.0004, "num_input_tokens_seen": 260640368, "step": 120660 }, { "epoch": 19.684339314845026, "grad_norm": 0.009900989942252636, "learning_rate": 7.594068928163944e-07, "loss": 0.0137, "num_input_tokens_seen": 260650768, "step": 120665 }, { "epoch": 19.68515497553018, "grad_norm": 0.018907103687524796, "learning_rate": 7.554903886775599e-07, "loss": 0.0014, "num_input_tokens_seen": 260661680, "step": 120670 }, { "epoch": 19.685970636215334, "grad_norm": 0.0027872032951563597, "learning_rate": 7.515840023620824e-07, "loss": 0.0004, "num_input_tokens_seen": 260673584, "step": 120675 }, { "epoch": 19.68678629690049, "grad_norm": 0.011443251743912697, "learning_rate": 7.476877339490651e-07, "loss": 0.0011, "num_input_tokens_seen": 260684912, "step": 120680 }, { "epoch": 19.687601957585645, "grad_norm": 0.06114175543189049, "learning_rate": 7.438015835175005e-07, "loss": 0.0086, "num_input_tokens_seen": 260694864, "step": 120685 }, { "epoch": 19.6884176182708, "grad_norm": 0.0009734915802255273, "learning_rate": 7.399255511461589e-07, "loss": 0.0014, "num_input_tokens_seen": 260704592, "step": 120690 }, { "epoch": 19.689233278955953, "grad_norm": 0.0013609203742817044, "learning_rate": 7.360596369135886e-07, "loss": 0.0005, "num_input_tokens_seen": 260715888, "step": 120695 }, { "epoch": 19.69004893964111, "grad_norm": 0.003440382657572627, "learning_rate": 7.322038408981157e-07, "loss": 0.0044, "num_input_tokens_seen": 260725840, "step": 120700 }, { "epoch": 19.690864600326265, "grad_norm": 0.0012171886628493667, "learning_rate": 7.283581631779002e-07, "loss": 0.0071, "num_input_tokens_seen": 260736368, "step": 120705 }, { "epoch": 19.69168026101142, "grad_norm": 0.015224111266434193, "learning_rate": 7.245226038308794e-07, "loss": 0.0876, "num_input_tokens_seen": 260745648, "step": 120710 }, { "epoch": 19.692495921696576, "grad_norm": 0.007610964123159647, "learning_rate": 7.206971629348246e-07, "loss": 0.0017, "num_input_tokens_seen": 260755888, "step": 120715 }, { "epoch": 19.693311582381728, "grad_norm": 0.0033253964502364397, "learning_rate": 7.16881840567174e-07, "loss": 0.0003, "num_input_tokens_seen": 260766768, "step": 120720 }, { "epoch": 19.694127243066884, "grad_norm": 0.0012907941127195954, "learning_rate": 7.130766368053099e-07, "loss": 0.0003, "num_input_tokens_seen": 260778576, "step": 120725 }, { "epoch": 19.69494290375204, "grad_norm": 0.03420671820640564, "learning_rate": 7.092815517263373e-07, "loss": 0.0015, "num_input_tokens_seen": 260789136, "step": 120730 }, { "epoch": 19.695758564437195, "grad_norm": 0.00766712473705411, "learning_rate": 7.054965854071948e-07, "loss": 0.0003, "num_input_tokens_seen": 260800496, "step": 120735 }, { "epoch": 19.696574225122347, "grad_norm": 0.0032816394232213497, "learning_rate": 7.017217379245433e-07, "loss": 0.0006, "num_input_tokens_seen": 260812048, "step": 120740 }, { "epoch": 19.697389885807503, "grad_norm": 0.0004926707479171455, "learning_rate": 6.979570093548771e-07, "loss": 0.0005, "num_input_tokens_seen": 260823984, "step": 120745 }, { "epoch": 19.69820554649266, "grad_norm": 0.0021867440082132816, "learning_rate": 6.942023997745794e-07, "loss": 0.0431, "num_input_tokens_seen": 260834800, "step": 120750 }, { "epoch": 19.699021207177815, "grad_norm": 0.0003217519260942936, "learning_rate": 6.904579092596452e-07, "loss": 0.0003, "num_input_tokens_seen": 260845744, "step": 120755 }, { "epoch": 19.69983686786297, "grad_norm": 0.003933432046324015, "learning_rate": 6.867235378860137e-07, "loss": 0.0005, "num_input_tokens_seen": 260856112, "step": 120760 }, { "epoch": 19.700652528548122, "grad_norm": 0.0009351072367280722, "learning_rate": 6.829992857293465e-07, "loss": 0.0007, "num_input_tokens_seen": 260867440, "step": 120765 }, { "epoch": 19.701468189233278, "grad_norm": 0.022774334996938705, "learning_rate": 6.792851528651389e-07, "loss": 0.0032, "num_input_tokens_seen": 260877936, "step": 120770 }, { "epoch": 19.702283849918434, "grad_norm": 0.0001839139877120033, "learning_rate": 6.755811393686084e-07, "loss": 0.0005, "num_input_tokens_seen": 260889808, "step": 120775 }, { "epoch": 19.70309951060359, "grad_norm": 0.012218792922794819, "learning_rate": 6.718872453149172e-07, "loss": 0.002, "num_input_tokens_seen": 260901520, "step": 120780 }, { "epoch": 19.703915171288745, "grad_norm": 0.009069071151316166, "learning_rate": 6.682034707788386e-07, "loss": 0.0007, "num_input_tokens_seen": 260911184, "step": 120785 }, { "epoch": 19.704730831973897, "grad_norm": 0.0019039036706089973, "learning_rate": 6.645298158350909e-07, "loss": 0.0007, "num_input_tokens_seen": 260921008, "step": 120790 }, { "epoch": 19.705546492659053, "grad_norm": 0.002577113453298807, "learning_rate": 6.608662805580589e-07, "loss": 0.0004, "num_input_tokens_seen": 260933104, "step": 120795 }, { "epoch": 19.70636215334421, "grad_norm": 0.004389368463307619, "learning_rate": 6.572128650220721e-07, "loss": 0.0044, "num_input_tokens_seen": 260943728, "step": 120800 }, { "epoch": 19.707177814029365, "grad_norm": 0.00035338502493686974, "learning_rate": 6.535695693011268e-07, "loss": 0.0003, "num_input_tokens_seen": 260953008, "step": 120805 }, { "epoch": 19.70799347471452, "grad_norm": 0.0008030373719520867, "learning_rate": 6.499363934690528e-07, "loss": 0.0013, "num_input_tokens_seen": 260964400, "step": 120810 }, { "epoch": 19.708809135399672, "grad_norm": 0.00022977576008997858, "learning_rate": 6.463133375994579e-07, "loss": 0.0008, "num_input_tokens_seen": 260974096, "step": 120815 }, { "epoch": 19.709624796084828, "grad_norm": 0.0035287451464682817, "learning_rate": 6.427004017658389e-07, "loss": 0.0013, "num_input_tokens_seen": 260985296, "step": 120820 }, { "epoch": 19.710440456769984, "grad_norm": 0.011943004094064236, "learning_rate": 6.390975860413594e-07, "loss": 0.003, "num_input_tokens_seen": 260996368, "step": 120825 }, { "epoch": 19.71125611745514, "grad_norm": 0.06614907085895538, "learning_rate": 6.355048904990724e-07, "loss": 0.0653, "num_input_tokens_seen": 261006608, "step": 120830 }, { "epoch": 19.712071778140295, "grad_norm": 0.006899102125316858, "learning_rate": 6.319223152117526e-07, "loss": 0.0007, "num_input_tokens_seen": 261018032, "step": 120835 }, { "epoch": 19.712887438825447, "grad_norm": 0.010936878621578217, "learning_rate": 6.283498602520088e-07, "loss": 0.001, "num_input_tokens_seen": 261029104, "step": 120840 }, { "epoch": 19.713703099510603, "grad_norm": 0.0007386531797237694, "learning_rate": 6.247875256922275e-07, "loss": 0.0011, "num_input_tokens_seen": 261038480, "step": 120845 }, { "epoch": 19.71451876019576, "grad_norm": 0.24150149524211884, "learning_rate": 6.212353116046843e-07, "loss": 0.0099, "num_input_tokens_seen": 261048656, "step": 120850 }, { "epoch": 19.715334420880914, "grad_norm": 0.001007181708700955, "learning_rate": 6.17693218061266e-07, "loss": 0.1039, "num_input_tokens_seen": 261060816, "step": 120855 }, { "epoch": 19.71615008156607, "grad_norm": 0.0009667161502875388, "learning_rate": 6.141612451338596e-07, "loss": 0.0017, "num_input_tokens_seen": 261071568, "step": 120860 }, { "epoch": 19.716965742251222, "grad_norm": 0.019680418074131012, "learning_rate": 6.106393928939635e-07, "loss": 0.0016, "num_input_tokens_seen": 261083184, "step": 120865 }, { "epoch": 19.717781402936378, "grad_norm": 0.000474753585876897, "learning_rate": 6.07127661412965e-07, "loss": 0.0021, "num_input_tokens_seen": 261094928, "step": 120870 }, { "epoch": 19.718597063621534, "grad_norm": 0.0027427682653069496, "learning_rate": 6.036260507620849e-07, "loss": 0.0002, "num_input_tokens_seen": 261105840, "step": 120875 }, { "epoch": 19.71941272430669, "grad_norm": 0.03328926861286163, "learning_rate": 6.001345610122111e-07, "loss": 0.005, "num_input_tokens_seen": 261116336, "step": 120880 }, { "epoch": 19.72022838499184, "grad_norm": 0.0030891122296452522, "learning_rate": 5.966531922341756e-07, "loss": 0.0005, "num_input_tokens_seen": 261128496, "step": 120885 }, { "epoch": 19.721044045676997, "grad_norm": 0.0007997844368219376, "learning_rate": 5.931819444984777e-07, "loss": 0.0016, "num_input_tokens_seen": 261139440, "step": 120890 }, { "epoch": 19.721859706362153, "grad_norm": 0.0012421332066878676, "learning_rate": 5.897208178755054e-07, "loss": 0.0007, "num_input_tokens_seen": 261150192, "step": 120895 }, { "epoch": 19.72267536704731, "grad_norm": 0.0008974650991149247, "learning_rate": 5.862698124353694e-07, "loss": 0.0004, "num_input_tokens_seen": 261159824, "step": 120900 }, { "epoch": 19.723491027732464, "grad_norm": 0.08313114941120148, "learning_rate": 5.828289282480692e-07, "loss": 0.0048, "num_input_tokens_seen": 261170960, "step": 120905 }, { "epoch": 19.724306688417617, "grad_norm": 0.010251346975564957, "learning_rate": 5.793981653832714e-07, "loss": 0.0006, "num_input_tokens_seen": 261182000, "step": 120910 }, { "epoch": 19.725122349102772, "grad_norm": 0.0011222073808312416, "learning_rate": 5.759775239105314e-07, "loss": 0.0007, "num_input_tokens_seen": 261193168, "step": 120915 }, { "epoch": 19.725938009787928, "grad_norm": 0.0014564159791916609, "learning_rate": 5.72567003899127e-07, "loss": 0.0018, "num_input_tokens_seen": 261203856, "step": 120920 }, { "epoch": 19.726753670473084, "grad_norm": 0.0033955418039113283, "learning_rate": 5.691666054182809e-07, "loss": 0.0008, "num_input_tokens_seen": 261215088, "step": 120925 }, { "epoch": 19.72756933115824, "grad_norm": 0.0031751454807817936, "learning_rate": 5.657763285368267e-07, "loss": 0.0011, "num_input_tokens_seen": 261226512, "step": 120930 }, { "epoch": 19.72838499184339, "grad_norm": 0.00040086961234919727, "learning_rate": 5.623961733234873e-07, "loss": 0.0005, "num_input_tokens_seen": 261237040, "step": 120935 }, { "epoch": 19.729200652528547, "grad_norm": 0.005273323971778154, "learning_rate": 5.590261398467633e-07, "loss": 0.0005, "num_input_tokens_seen": 261248624, "step": 120940 }, { "epoch": 19.730016313213703, "grad_norm": 0.00022064868244342506, "learning_rate": 5.556662281749891e-07, "loss": 0.0026, "num_input_tokens_seen": 261259248, "step": 120945 }, { "epoch": 19.73083197389886, "grad_norm": 0.011284386739134789, "learning_rate": 5.523164383762213e-07, "loss": 0.0179, "num_input_tokens_seen": 261269584, "step": 120950 }, { "epoch": 19.731647634584014, "grad_norm": 0.19131779670715332, "learning_rate": 5.489767705183501e-07, "loss": 0.038, "num_input_tokens_seen": 261280688, "step": 120955 }, { "epoch": 19.732463295269167, "grad_norm": 0.0009722855174914002, "learning_rate": 5.456472246690436e-07, "loss": 0.001, "num_input_tokens_seen": 261290512, "step": 120960 }, { "epoch": 19.733278955954322, "grad_norm": 0.03766850382089615, "learning_rate": 5.423278008958032e-07, "loss": 0.0011, "num_input_tokens_seen": 261300880, "step": 120965 }, { "epoch": 19.734094616639478, "grad_norm": 0.05344652757048607, "learning_rate": 5.390184992659641e-07, "loss": 0.0024, "num_input_tokens_seen": 261311760, "step": 120970 }, { "epoch": 19.734910277324634, "grad_norm": 0.002851302269846201, "learning_rate": 5.357193198464727e-07, "loss": 0.001, "num_input_tokens_seen": 261323760, "step": 120975 }, { "epoch": 19.73572593800979, "grad_norm": 0.28350090980529785, "learning_rate": 5.324302627042199e-07, "loss": 0.0059, "num_input_tokens_seen": 261334704, "step": 120980 }, { "epoch": 19.73654159869494, "grad_norm": 0.005798796657472849, "learning_rate": 5.291513279059301e-07, "loss": 0.0009, "num_input_tokens_seen": 261345616, "step": 120985 }, { "epoch": 19.737357259380097, "grad_norm": 0.004781166557222605, "learning_rate": 5.258825155179948e-07, "loss": 0.0006, "num_input_tokens_seen": 261357168, "step": 120990 }, { "epoch": 19.738172920065253, "grad_norm": 0.001854881877079606, "learning_rate": 5.226238256066384e-07, "loss": 0.1473, "num_input_tokens_seen": 261368848, "step": 120995 }, { "epoch": 19.73898858075041, "grad_norm": 0.019145376980304718, "learning_rate": 5.193752582379752e-07, "loss": 0.0018, "num_input_tokens_seen": 261379504, "step": 121000 }, { "epoch": 19.739804241435564, "grad_norm": 0.0002677353622857481, "learning_rate": 5.16136813477841e-07, "loss": 0.0007, "num_input_tokens_seen": 261390224, "step": 121005 }, { "epoch": 19.740619902120716, "grad_norm": 0.0040374575182795525, "learning_rate": 5.129084913917948e-07, "loss": 0.0016, "num_input_tokens_seen": 261401424, "step": 121010 }, { "epoch": 19.741435562805872, "grad_norm": 0.11591839045286179, "learning_rate": 5.096902920453395e-07, "loss": 0.0055, "num_input_tokens_seen": 261412752, "step": 121015 }, { "epoch": 19.742251223491028, "grad_norm": 0.0011770358541980386, "learning_rate": 5.064822155036453e-07, "loss": 0.0003, "num_input_tokens_seen": 261424208, "step": 121020 }, { "epoch": 19.743066884176184, "grad_norm": 0.0017884820699691772, "learning_rate": 5.032842618317157e-07, "loss": 0.0002, "num_input_tokens_seen": 261435344, "step": 121025 }, { "epoch": 19.74388254486134, "grad_norm": 0.2122792750597, "learning_rate": 5.000964310943878e-07, "loss": 0.0038, "num_input_tokens_seen": 261445520, "step": 121030 }, { "epoch": 19.74469820554649, "grad_norm": 0.0012699973303824663, "learning_rate": 4.969187233562767e-07, "loss": 0.0006, "num_input_tokens_seen": 261455728, "step": 121035 }, { "epoch": 19.745513866231647, "grad_norm": 0.0003483338514342904, "learning_rate": 4.937511386817751e-07, "loss": 0.0004, "num_input_tokens_seen": 261466512, "step": 121040 }, { "epoch": 19.746329526916803, "grad_norm": 0.00047351993271149695, "learning_rate": 4.905936771351094e-07, "loss": 0.0016, "num_input_tokens_seen": 261477936, "step": 121045 }, { "epoch": 19.74714518760196, "grad_norm": 0.0007477999897673726, "learning_rate": 4.874463387801731e-07, "loss": 0.0024, "num_input_tokens_seen": 261488912, "step": 121050 }, { "epoch": 19.747960848287114, "grad_norm": 0.032901830971241, "learning_rate": 4.843091236808594e-07, "loss": 0.001, "num_input_tokens_seen": 261499024, "step": 121055 }, { "epoch": 19.748776508972266, "grad_norm": 0.0010255652014166117, "learning_rate": 4.811820319006732e-07, "loss": 0.0029, "num_input_tokens_seen": 261509328, "step": 121060 }, { "epoch": 19.749592169657422, "grad_norm": 0.05027930438518524, "learning_rate": 4.780650635030081e-07, "loss": 0.0014, "num_input_tokens_seen": 261521136, "step": 121065 }, { "epoch": 19.750407830342578, "grad_norm": 0.0001708085328573361, "learning_rate": 4.7495821855109145e-07, "loss": 0.0016, "num_input_tokens_seen": 261531824, "step": 121070 }, { "epoch": 19.751223491027734, "grad_norm": 0.0003755021607503295, "learning_rate": 4.718614971078172e-07, "loss": 0.0016, "num_input_tokens_seen": 261543120, "step": 121075 }, { "epoch": 19.752039151712886, "grad_norm": 0.0013738623820245266, "learning_rate": 4.6877489923596863e-07, "loss": 0.0013, "num_input_tokens_seen": 261552464, "step": 121080 }, { "epoch": 19.75285481239804, "grad_norm": 0.00029178871773183346, "learning_rate": 4.6569842499805113e-07, "loss": 0.0003, "num_input_tokens_seen": 261563088, "step": 121085 }, { "epoch": 19.753670473083197, "grad_norm": 0.0012256011832505465, "learning_rate": 4.626320744565149e-07, "loss": 0.0037, "num_input_tokens_seen": 261574672, "step": 121090 }, { "epoch": 19.754486133768353, "grad_norm": 0.06535517424345016, "learning_rate": 4.5957584767342133e-07, "loss": 0.0027, "num_input_tokens_seen": 261586224, "step": 121095 }, { "epoch": 19.75530179445351, "grad_norm": 0.437174916267395, "learning_rate": 4.5652974471077637e-07, "loss": 0.0119, "num_input_tokens_seen": 261597552, "step": 121100 }, { "epoch": 19.75611745513866, "grad_norm": 0.0011823754757642746, "learning_rate": 4.534937656301974e-07, "loss": 0.0008, "num_input_tokens_seen": 261609264, "step": 121105 }, { "epoch": 19.756933115823816, "grad_norm": 0.00021741993259638548, "learning_rate": 4.5046791049335733e-07, "loss": 0.0019, "num_input_tokens_seen": 261620400, "step": 121110 }, { "epoch": 19.757748776508972, "grad_norm": 0.007211287505924702, "learning_rate": 4.47452179361485e-07, "loss": 0.0036, "num_input_tokens_seen": 261631632, "step": 121115 }, { "epoch": 19.758564437194128, "grad_norm": 0.02904735691845417, "learning_rate": 4.444465722956981e-07, "loss": 0.1387, "num_input_tokens_seen": 261641168, "step": 121120 }, { "epoch": 19.759380097879284, "grad_norm": 0.02278851345181465, "learning_rate": 4.414510893569479e-07, "loss": 0.0011, "num_input_tokens_seen": 261652016, "step": 121125 }, { "epoch": 19.760195758564436, "grad_norm": 0.0007043814403004944, "learning_rate": 4.384657306059636e-07, "loss": 0.0027, "num_input_tokens_seen": 261662544, "step": 121130 }, { "epoch": 19.76101141924959, "grad_norm": 0.0009921282762661576, "learning_rate": 4.354904961031414e-07, "loss": 0.0096, "num_input_tokens_seen": 261671888, "step": 121135 }, { "epoch": 19.761827079934747, "grad_norm": 0.017717812210321426, "learning_rate": 4.3252538590893285e-07, "loss": 0.001, "num_input_tokens_seen": 261682928, "step": 121140 }, { "epoch": 19.762642740619903, "grad_norm": 0.04009336233139038, "learning_rate": 4.2957040008323456e-07, "loss": 0.0018, "num_input_tokens_seen": 261693520, "step": 121145 }, { "epoch": 19.76345840130506, "grad_norm": 0.1430460810661316, "learning_rate": 4.266255386861095e-07, "loss": 0.0026, "num_input_tokens_seen": 261704432, "step": 121150 }, { "epoch": 19.76427406199021, "grad_norm": 0.011360394768416882, "learning_rate": 4.2369080177717676e-07, "loss": 0.0069, "num_input_tokens_seen": 261715568, "step": 121155 }, { "epoch": 19.765089722675366, "grad_norm": 0.006870058830827475, "learning_rate": 4.2076618941588875e-07, "loss": 0.0013, "num_input_tokens_seen": 261725008, "step": 121160 }, { "epoch": 19.765905383360522, "grad_norm": 0.01938176155090332, "learning_rate": 4.178517016615313e-07, "loss": 0.0018, "num_input_tokens_seen": 261734992, "step": 121165 }, { "epoch": 19.766721044045678, "grad_norm": 0.002593178069218993, "learning_rate": 4.1494733857322385e-07, "loss": 0.0121, "num_input_tokens_seen": 261746864, "step": 121170 }, { "epoch": 19.767536704730833, "grad_norm": 0.00021053437376394868, "learning_rate": 4.120531002096972e-07, "loss": 0.0008, "num_input_tokens_seen": 261757648, "step": 121175 }, { "epoch": 19.768352365415986, "grad_norm": 0.0394943468272686, "learning_rate": 4.091689866297377e-07, "loss": 0.0032, "num_input_tokens_seen": 261769104, "step": 121180 }, { "epoch": 19.76916802610114, "grad_norm": 0.02225523442029953, "learning_rate": 4.0629499789174293e-07, "loss": 0.0011, "num_input_tokens_seen": 261780432, "step": 121185 }, { "epoch": 19.769983686786297, "grad_norm": 0.00019127337145619094, "learning_rate": 4.034311340539443e-07, "loss": 0.0002, "num_input_tokens_seen": 261791312, "step": 121190 }, { "epoch": 19.770799347471453, "grad_norm": 0.0008985060267150402, "learning_rate": 4.005773951744063e-07, "loss": 0.0011, "num_input_tokens_seen": 261802416, "step": 121195 }, { "epoch": 19.77161500815661, "grad_norm": 0.208012193441391, "learning_rate": 3.977337813109716e-07, "loss": 0.0042, "num_input_tokens_seen": 261813328, "step": 121200 }, { "epoch": 19.77243066884176, "grad_norm": 0.0006497858557850122, "learning_rate": 3.949002925212053e-07, "loss": 0.0006, "num_input_tokens_seen": 261824464, "step": 121205 }, { "epoch": 19.773246329526916, "grad_norm": 0.0009609381668269634, "learning_rate": 3.920769288626169e-07, "loss": 0.0002, "num_input_tokens_seen": 261834416, "step": 121210 }, { "epoch": 19.774061990212072, "grad_norm": 0.013297447003424168, "learning_rate": 3.8926369039238295e-07, "loss": 0.0395, "num_input_tokens_seen": 261844944, "step": 121215 }, { "epoch": 19.774877650897228, "grad_norm": 0.0035913216415792704, "learning_rate": 3.864605771675134e-07, "loss": 0.0034, "num_input_tokens_seen": 261855120, "step": 121220 }, { "epoch": 19.775693311582383, "grad_norm": 0.0012953771511092782, "learning_rate": 3.8366758924479605e-07, "loss": 0.0014, "num_input_tokens_seen": 261865488, "step": 121225 }, { "epoch": 19.776508972267536, "grad_norm": 0.0031643963884562254, "learning_rate": 3.808847266809079e-07, "loss": 0.0058, "num_input_tokens_seen": 261876752, "step": 121230 }, { "epoch": 19.77732463295269, "grad_norm": 0.0012785486178472638, "learning_rate": 3.781119895321927e-07, "loss": 0.0013, "num_input_tokens_seen": 261887728, "step": 121235 }, { "epoch": 19.778140293637847, "grad_norm": 0.1394270658493042, "learning_rate": 3.753493778548278e-07, "loss": 0.0021, "num_input_tokens_seen": 261899056, "step": 121240 }, { "epoch": 19.778955954323003, "grad_norm": 0.18898367881774902, "learning_rate": 3.725968917048794e-07, "loss": 0.0079, "num_input_tokens_seen": 261909200, "step": 121245 }, { "epoch": 19.77977161500816, "grad_norm": 0.047763291746377945, "learning_rate": 3.6985453113802525e-07, "loss": 0.0452, "num_input_tokens_seen": 261920304, "step": 121250 }, { "epoch": 19.78058727569331, "grad_norm": 0.006899984087795019, "learning_rate": 3.6712229620988744e-07, "loss": 0.0005, "num_input_tokens_seen": 261931952, "step": 121255 }, { "epoch": 19.781402936378466, "grad_norm": 0.0034642857499420643, "learning_rate": 3.644001869758662e-07, "loss": 0.0015, "num_input_tokens_seen": 261944080, "step": 121260 }, { "epoch": 19.782218597063622, "grad_norm": 0.2206059992313385, "learning_rate": 3.616882034911395e-07, "loss": 0.0048, "num_input_tokens_seen": 261955888, "step": 121265 }, { "epoch": 19.783034257748778, "grad_norm": 0.007103486452251673, "learning_rate": 3.58986345810608e-07, "loss": 0.0023, "num_input_tokens_seen": 261966960, "step": 121270 }, { "epoch": 19.78384991843393, "grad_norm": 0.0023222120944410563, "learning_rate": 3.56294613989061e-07, "loss": 0.0003, "num_input_tokens_seen": 261978224, "step": 121275 }, { "epoch": 19.784665579119086, "grad_norm": 0.0007937622140161693, "learning_rate": 3.5361300808106625e-07, "loss": 0.0017, "num_input_tokens_seen": 261988208, "step": 121280 }, { "epoch": 19.78548123980424, "grad_norm": 0.08231380581855774, "learning_rate": 3.509415281409134e-07, "loss": 0.0082, "num_input_tokens_seen": 261998960, "step": 121285 }, { "epoch": 19.786296900489397, "grad_norm": 0.001557769370265305, "learning_rate": 3.4828017422278146e-07, "loss": 0.0012, "num_input_tokens_seen": 262010288, "step": 121290 }, { "epoch": 19.787112561174553, "grad_norm": 0.00967491790652275, "learning_rate": 3.4562894638062727e-07, "loss": 0.0039, "num_input_tokens_seen": 262020816, "step": 121295 }, { "epoch": 19.787928221859705, "grad_norm": 0.06240087002515793, "learning_rate": 3.4298784466818553e-07, "loss": 0.0029, "num_input_tokens_seen": 262032528, "step": 121300 }, { "epoch": 19.78874388254486, "grad_norm": 0.015026670880615711, "learning_rate": 3.403568691389136e-07, "loss": 0.0316, "num_input_tokens_seen": 262043600, "step": 121305 }, { "epoch": 19.789559543230016, "grad_norm": 0.018473364412784576, "learning_rate": 3.3773601984615766e-07, "loss": 0.0026, "num_input_tokens_seen": 262055952, "step": 121310 }, { "epoch": 19.790375203915172, "grad_norm": 0.007226514630019665, "learning_rate": 3.3512529684309736e-07, "loss": 0.0008, "num_input_tokens_seen": 262067280, "step": 121315 }, { "epoch": 19.791190864600328, "grad_norm": 0.004439678508788347, "learning_rate": 3.325247001825793e-07, "loss": 0.0079, "num_input_tokens_seen": 262077968, "step": 121320 }, { "epoch": 19.79200652528548, "grad_norm": 0.15969239175319672, "learning_rate": 3.299342299172836e-07, "loss": 0.0034, "num_input_tokens_seen": 262089072, "step": 121325 }, { "epoch": 19.792822185970635, "grad_norm": 0.004921938292682171, "learning_rate": 3.2735388609977936e-07, "loss": 0.0004, "num_input_tokens_seen": 262099184, "step": 121330 }, { "epoch": 19.79363784665579, "grad_norm": 0.0014474753988906741, "learning_rate": 3.24783668782358e-07, "loss": 0.0012, "num_input_tokens_seen": 262110480, "step": 121335 }, { "epoch": 19.794453507340947, "grad_norm": 0.0009749328601174057, "learning_rate": 3.222235780170335e-07, "loss": 0.0009, "num_input_tokens_seen": 262120240, "step": 121340 }, { "epoch": 19.795269168026103, "grad_norm": 0.0023331588599830866, "learning_rate": 3.196736138557088e-07, "loss": 0.0005, "num_input_tokens_seen": 262132272, "step": 121345 }, { "epoch": 19.796084828711255, "grad_norm": 0.0006216101464815438, "learning_rate": 3.171337763501203e-07, "loss": 0.0071, "num_input_tokens_seen": 262142736, "step": 121350 }, { "epoch": 19.79690048939641, "grad_norm": 0.0431019552052021, "learning_rate": 3.146040655517268e-07, "loss": 0.0047, "num_input_tokens_seen": 262153552, "step": 121355 }, { "epoch": 19.797716150081566, "grad_norm": 0.0003282705438323319, "learning_rate": 3.1208448151176516e-07, "loss": 0.0016, "num_input_tokens_seen": 262164240, "step": 121360 }, { "epoch": 19.798531810766722, "grad_norm": 0.0002833571925293654, "learning_rate": 3.0957502428130557e-07, "loss": 0.0004, "num_input_tokens_seen": 262176656, "step": 121365 }, { "epoch": 19.799347471451878, "grad_norm": 0.0010412463452666998, "learning_rate": 3.070756939111963e-07, "loss": 0.0011, "num_input_tokens_seen": 262186448, "step": 121370 }, { "epoch": 19.80016313213703, "grad_norm": 0.0002267042436869815, "learning_rate": 3.0458649045211895e-07, "loss": 0.0004, "num_input_tokens_seen": 262196592, "step": 121375 }, { "epoch": 19.800978792822185, "grad_norm": 0.03307168558239937, "learning_rate": 3.021074139545332e-07, "loss": 0.0019, "num_input_tokens_seen": 262207728, "step": 121380 }, { "epoch": 19.80179445350734, "grad_norm": 0.007719150744378567, "learning_rate": 2.996384644686212e-07, "loss": 0.0411, "num_input_tokens_seen": 262219760, "step": 121385 }, { "epoch": 19.802610114192497, "grad_norm": 0.0004898576298728585, "learning_rate": 2.971796420444539e-07, "loss": 0.0013, "num_input_tokens_seen": 262230256, "step": 121390 }, { "epoch": 19.803425774877653, "grad_norm": 0.15931858122348785, "learning_rate": 2.947309467318804e-07, "loss": 0.005, "num_input_tokens_seen": 262241776, "step": 121395 }, { "epoch": 19.804241435562805, "grad_norm": 0.0005706818192265928, "learning_rate": 2.922923785804721e-07, "loss": 0.0006, "num_input_tokens_seen": 262252304, "step": 121400 }, { "epoch": 19.80505709624796, "grad_norm": 0.0008127331384457648, "learning_rate": 2.898639376396894e-07, "loss": 0.0013, "num_input_tokens_seen": 262262736, "step": 121405 }, { "epoch": 19.805872756933116, "grad_norm": 0.014661543071269989, "learning_rate": 2.8744562395877083e-07, "loss": 0.0016, "num_input_tokens_seen": 262275280, "step": 121410 }, { "epoch": 19.806688417618272, "grad_norm": 0.00010798094444908202, "learning_rate": 2.850374375866216e-07, "loss": 0.0009, "num_input_tokens_seen": 262284560, "step": 121415 }, { "epoch": 19.807504078303424, "grad_norm": 0.015252824872732162, "learning_rate": 2.826393785722026e-07, "loss": 0.0005, "num_input_tokens_seen": 262294864, "step": 121420 }, { "epoch": 19.80831973898858, "grad_norm": 0.024584434926509857, "learning_rate": 2.80251446963975e-07, "loss": 0.0019, "num_input_tokens_seen": 262305584, "step": 121425 }, { "epoch": 19.809135399673735, "grad_norm": 0.000190110455150716, "learning_rate": 2.778736428104556e-07, "loss": 0.0005, "num_input_tokens_seen": 262317040, "step": 121430 }, { "epoch": 19.80995106035889, "grad_norm": 0.030826276168227196, "learning_rate": 2.75505966159717e-07, "loss": 0.0023, "num_input_tokens_seen": 262328336, "step": 121435 }, { "epoch": 19.810766721044047, "grad_norm": 0.00027534199762158096, "learning_rate": 2.73148417059832e-07, "loss": 0.0045, "num_input_tokens_seen": 262338928, "step": 121440 }, { "epoch": 19.8115823817292, "grad_norm": 0.0006019662832841277, "learning_rate": 2.708009955584845e-07, "loss": 0.0008, "num_input_tokens_seen": 262350896, "step": 121445 }, { "epoch": 19.812398042414355, "grad_norm": 0.00021458462288137525, "learning_rate": 2.684637017033587e-07, "loss": 0.0009, "num_input_tokens_seen": 262360976, "step": 121450 }, { "epoch": 19.81321370309951, "grad_norm": 0.0041677881963551044, "learning_rate": 2.6613653554175e-07, "loss": 0.0008, "num_input_tokens_seen": 262371824, "step": 121455 }, { "epoch": 19.814029363784666, "grad_norm": 0.00046735754585824907, "learning_rate": 2.6381949712089846e-07, "loss": 0.017, "num_input_tokens_seen": 262382320, "step": 121460 }, { "epoch": 19.81484502446982, "grad_norm": 0.0006864400929771364, "learning_rate": 2.6151258648765553e-07, "loss": 0.001, "num_input_tokens_seen": 262392624, "step": 121465 }, { "epoch": 19.815660685154974, "grad_norm": 0.0004593665653374046, "learning_rate": 2.59215803688817e-07, "loss": 0.0009, "num_input_tokens_seen": 262403792, "step": 121470 }, { "epoch": 19.81647634584013, "grad_norm": 0.00036628826637752354, "learning_rate": 2.5692914877090135e-07, "loss": 0.001, "num_input_tokens_seen": 262414800, "step": 121475 }, { "epoch": 19.817292006525285, "grad_norm": 0.002507440047338605, "learning_rate": 2.546526217803713e-07, "loss": 0.0037, "num_input_tokens_seen": 262425776, "step": 121480 }, { "epoch": 19.81810766721044, "grad_norm": 0.001124211703427136, "learning_rate": 2.5238622276319014e-07, "loss": 0.0008, "num_input_tokens_seen": 262435728, "step": 121485 }, { "epoch": 19.818923327895597, "grad_norm": 0.004534002393484116, "learning_rate": 2.501299517654321e-07, "loss": 0.0088, "num_input_tokens_seen": 262446608, "step": 121490 }, { "epoch": 19.81973898858075, "grad_norm": 0.002117312513291836, "learning_rate": 2.4788380883278285e-07, "loss": 0.0007, "num_input_tokens_seen": 262458512, "step": 121495 }, { "epoch": 19.820554649265905, "grad_norm": 0.0016950422432273626, "learning_rate": 2.4564779401070604e-07, "loss": 0.0021, "num_input_tokens_seen": 262468688, "step": 121500 }, { "epoch": 19.82137030995106, "grad_norm": 0.005130813457071781, "learning_rate": 2.434219073445543e-07, "loss": 0.0008, "num_input_tokens_seen": 262478992, "step": 121505 }, { "epoch": 19.822185970636216, "grad_norm": 0.17918558418750763, "learning_rate": 2.412061488795136e-07, "loss": 0.0012, "num_input_tokens_seen": 262490224, "step": 121510 }, { "epoch": 19.82300163132137, "grad_norm": 0.03207606077194214, "learning_rate": 2.390005186603261e-07, "loss": 0.001, "num_input_tokens_seen": 262501040, "step": 121515 }, { "epoch": 19.823817292006524, "grad_norm": 0.0009639065247029066, "learning_rate": 2.3680501673184474e-07, "loss": 0.0007, "num_input_tokens_seen": 262511152, "step": 121520 }, { "epoch": 19.82463295269168, "grad_norm": 0.0005210313247516751, "learning_rate": 2.346196431384784e-07, "loss": 0.0008, "num_input_tokens_seen": 262521744, "step": 121525 }, { "epoch": 19.825448613376835, "grad_norm": 0.0011603363091126084, "learning_rate": 2.324443979245805e-07, "loss": 0.0008, "num_input_tokens_seen": 262532368, "step": 121530 }, { "epoch": 19.82626427406199, "grad_norm": 0.0002771125582512468, "learning_rate": 2.302792811341714e-07, "loss": 0.0008, "num_input_tokens_seen": 262543152, "step": 121535 }, { "epoch": 19.827079934747147, "grad_norm": 0.00139376032166183, "learning_rate": 2.2812429281116043e-07, "loss": 0.0003, "num_input_tokens_seen": 262554512, "step": 121540 }, { "epoch": 19.8278955954323, "grad_norm": 0.017726287245750427, "learning_rate": 2.2597943299923484e-07, "loss": 0.0015, "num_input_tokens_seen": 262564592, "step": 121545 }, { "epoch": 19.828711256117455, "grad_norm": 0.02565399929881096, "learning_rate": 2.2384470174180438e-07, "loss": 0.0028, "num_input_tokens_seen": 262574192, "step": 121550 }, { "epoch": 19.82952691680261, "grad_norm": 0.0008128905319608748, "learning_rate": 2.2172009908216772e-07, "loss": 0.0083, "num_input_tokens_seen": 262585200, "step": 121555 }, { "epoch": 19.830342577487766, "grad_norm": 0.002057405421510339, "learning_rate": 2.1960562506340153e-07, "loss": 0.0007, "num_input_tokens_seen": 262596496, "step": 121560 }, { "epoch": 19.83115823817292, "grad_norm": 0.0012084591435268521, "learning_rate": 2.1750127972836042e-07, "loss": 0.0005, "num_input_tokens_seen": 262608144, "step": 121565 }, { "epoch": 19.831973898858074, "grad_norm": 0.001822744612582028, "learning_rate": 2.1540706311967695e-07, "loss": 0.0005, "num_input_tokens_seen": 262619312, "step": 121570 }, { "epoch": 19.83278955954323, "grad_norm": 0.0852610394358635, "learning_rate": 2.1332297527976164e-07, "loss": 0.006, "num_input_tokens_seen": 262630000, "step": 121575 }, { "epoch": 19.833605220228385, "grad_norm": 0.002231738530099392, "learning_rate": 2.1124901625091397e-07, "loss": 0.0006, "num_input_tokens_seen": 262640784, "step": 121580 }, { "epoch": 19.83442088091354, "grad_norm": 0.029305459931492805, "learning_rate": 2.091851860751004e-07, "loss": 0.0027, "num_input_tokens_seen": 262651664, "step": 121585 }, { "epoch": 19.835236541598697, "grad_norm": 0.00031926666270010173, "learning_rate": 2.071314847941763e-07, "loss": 0.0005, "num_input_tokens_seen": 262662288, "step": 121590 }, { "epoch": 19.83605220228385, "grad_norm": 0.0077890572138130665, "learning_rate": 2.050879124498306e-07, "loss": 0.0006, "num_input_tokens_seen": 262672080, "step": 121595 }, { "epoch": 19.836867862969005, "grad_norm": 0.07575695961713791, "learning_rate": 2.0305446908336355e-07, "loss": 0.0029, "num_input_tokens_seen": 262683952, "step": 121600 }, { "epoch": 19.83768352365416, "grad_norm": 0.010576908476650715, "learning_rate": 2.0103115473601996e-07, "loss": 0.0006, "num_input_tokens_seen": 262693904, "step": 121605 }, { "epoch": 19.838499184339316, "grad_norm": 0.008228904567658901, "learning_rate": 1.9901796944882254e-07, "loss": 0.0011, "num_input_tokens_seen": 262706064, "step": 121610 }, { "epoch": 19.839314845024468, "grad_norm": 0.000713883840944618, "learning_rate": 1.9701491326257203e-07, "loss": 0.0005, "num_input_tokens_seen": 262716688, "step": 121615 }, { "epoch": 19.840130505709624, "grad_norm": 0.0005843836115673184, "learning_rate": 1.9502198621790257e-07, "loss": 0.0027, "num_input_tokens_seen": 262726800, "step": 121620 }, { "epoch": 19.84094616639478, "grad_norm": 0.00699404114857316, "learning_rate": 1.9303918835511526e-07, "loss": 0.0032, "num_input_tokens_seen": 262737968, "step": 121625 }, { "epoch": 19.841761827079935, "grad_norm": 0.0425538644194603, "learning_rate": 1.9106651971445564e-07, "loss": 0.0016, "num_input_tokens_seen": 262747696, "step": 121630 }, { "epoch": 19.84257748776509, "grad_norm": 0.013981866650283337, "learning_rate": 1.8910398033589182e-07, "loss": 0.0991, "num_input_tokens_seen": 262759440, "step": 121635 }, { "epoch": 19.843393148450243, "grad_norm": 0.0002504394797142595, "learning_rate": 1.8715157025916972e-07, "loss": 0.002, "num_input_tokens_seen": 262770896, "step": 121640 }, { "epoch": 19.8442088091354, "grad_norm": 0.00026449389406479895, "learning_rate": 1.8520928952386885e-07, "loss": 0.0005, "num_input_tokens_seen": 262782192, "step": 121645 }, { "epoch": 19.845024469820554, "grad_norm": 0.007747107185423374, "learning_rate": 1.8327713816940207e-07, "loss": 0.0004, "num_input_tokens_seen": 262792624, "step": 121650 }, { "epoch": 19.84584013050571, "grad_norm": 0.0021923938766121864, "learning_rate": 1.8135511623484925e-07, "loss": 0.0006, "num_input_tokens_seen": 262803888, "step": 121655 }, { "epoch": 19.846655791190866, "grad_norm": 0.018954308703541756, "learning_rate": 1.7944322375923472e-07, "loss": 0.0011, "num_input_tokens_seen": 262814480, "step": 121660 }, { "epoch": 19.847471451876018, "grad_norm": 0.0002542664296925068, "learning_rate": 1.7754146078124976e-07, "loss": 0.0003, "num_input_tokens_seen": 262824752, "step": 121665 }, { "epoch": 19.848287112561174, "grad_norm": 0.004432213492691517, "learning_rate": 1.7564982733947465e-07, "loss": 0.001, "num_input_tokens_seen": 262835536, "step": 121670 }, { "epoch": 19.84910277324633, "grad_norm": 0.0007810618262737989, "learning_rate": 1.7376832347221206e-07, "loss": 0.0011, "num_input_tokens_seen": 262846320, "step": 121675 }, { "epoch": 19.849918433931485, "grad_norm": 0.002990703098475933, "learning_rate": 1.7189694921759813e-07, "loss": 0.0007, "num_input_tokens_seen": 262856656, "step": 121680 }, { "epoch": 19.85073409461664, "grad_norm": 0.00043039917363785207, "learning_rate": 1.700357046136025e-07, "loss": 0.0005, "num_input_tokens_seen": 262866128, "step": 121685 }, { "epoch": 19.851549755301793, "grad_norm": 0.09391754120588303, "learning_rate": 1.6818458969786177e-07, "loss": 0.0046, "num_input_tokens_seen": 262876976, "step": 121690 }, { "epoch": 19.85236541598695, "grad_norm": 0.0021796554792672396, "learning_rate": 1.6634360450795694e-07, "loss": 0.0006, "num_input_tokens_seen": 262887664, "step": 121695 }, { "epoch": 19.853181076672104, "grad_norm": 0.004136047791689634, "learning_rate": 1.6451274908124703e-07, "loss": 0.0005, "num_input_tokens_seen": 262899408, "step": 121700 }, { "epoch": 19.85399673735726, "grad_norm": 0.029257941991090775, "learning_rate": 1.6269202345470247e-07, "loss": 0.0119, "num_input_tokens_seen": 262911120, "step": 121705 }, { "epoch": 19.854812398042416, "grad_norm": 0.0014244935009628534, "learning_rate": 1.6088142766529367e-07, "loss": 0.0005, "num_input_tokens_seen": 262923120, "step": 121710 }, { "epoch": 19.855628058727568, "grad_norm": 0.0018149535171687603, "learning_rate": 1.5908096174976904e-07, "loss": 0.0014, "num_input_tokens_seen": 262933552, "step": 121715 }, { "epoch": 19.856443719412724, "grad_norm": 0.0005949955666437745, "learning_rate": 1.5729062574448838e-07, "loss": 0.0006, "num_input_tokens_seen": 262944784, "step": 121720 }, { "epoch": 19.85725938009788, "grad_norm": 0.03684856742620468, "learning_rate": 1.55510419685867e-07, "loss": 0.0007, "num_input_tokens_seen": 262957040, "step": 121725 }, { "epoch": 19.858075040783035, "grad_norm": 0.03020774945616722, "learning_rate": 1.5374034360993162e-07, "loss": 0.0104, "num_input_tokens_seen": 262969136, "step": 121730 }, { "epoch": 19.85889070146819, "grad_norm": 0.0009404148440808058, "learning_rate": 1.5198039755248693e-07, "loss": 0.001, "num_input_tokens_seen": 262980080, "step": 121735 }, { "epoch": 19.859706362153343, "grad_norm": 0.009650173597037792, "learning_rate": 1.5023058154928216e-07, "loss": 0.0013, "num_input_tokens_seen": 262990576, "step": 121740 }, { "epoch": 19.8605220228385, "grad_norm": 0.0007336720591410995, "learning_rate": 1.4849089563578888e-07, "loss": 0.0014, "num_input_tokens_seen": 263000240, "step": 121745 }, { "epoch": 19.861337683523654, "grad_norm": 0.0011980609269812703, "learning_rate": 1.467613398472567e-07, "loss": 0.0031, "num_input_tokens_seen": 263011280, "step": 121750 }, { "epoch": 19.86215334420881, "grad_norm": 0.0014163218438625336, "learning_rate": 1.4504191421865765e-07, "loss": 0.0004, "num_input_tokens_seen": 263021200, "step": 121755 }, { "epoch": 19.862969004893966, "grad_norm": 0.4547708034515381, "learning_rate": 1.433326187849082e-07, "loss": 0.0037, "num_input_tokens_seen": 263032560, "step": 121760 }, { "epoch": 19.863784665579118, "grad_norm": 0.0008637936552986503, "learning_rate": 1.416334535806474e-07, "loss": 0.0148, "num_input_tokens_seen": 263043984, "step": 121765 }, { "epoch": 19.864600326264274, "grad_norm": 0.006428821943700314, "learning_rate": 1.3994441864029206e-07, "loss": 0.0031, "num_input_tokens_seen": 263055152, "step": 121770 }, { "epoch": 19.86541598694943, "grad_norm": 0.0065291267819702625, "learning_rate": 1.3826551399809263e-07, "loss": 0.0008, "num_input_tokens_seen": 263065904, "step": 121775 }, { "epoch": 19.866231647634585, "grad_norm": 0.0003827828913927078, "learning_rate": 1.3659673968802188e-07, "loss": 0.0023, "num_input_tokens_seen": 263077232, "step": 121780 }, { "epoch": 19.86704730831974, "grad_norm": 0.001834007678553462, "learning_rate": 1.3493809574399717e-07, "loss": 0.0004, "num_input_tokens_seen": 263088816, "step": 121785 }, { "epoch": 19.867862969004893, "grad_norm": 0.00888867024332285, "learning_rate": 1.3328958219954724e-07, "loss": 0.0005, "num_input_tokens_seen": 263100912, "step": 121790 }, { "epoch": 19.86867862969005, "grad_norm": 0.006172510329633951, "learning_rate": 1.3165119908808976e-07, "loss": 0.001, "num_input_tokens_seen": 263112176, "step": 121795 }, { "epoch": 19.869494290375204, "grad_norm": 0.025691168382763863, "learning_rate": 1.3002294644287593e-07, "loss": 0.0026, "num_input_tokens_seen": 263123248, "step": 121800 }, { "epoch": 19.87030995106036, "grad_norm": 0.00295065576210618, "learning_rate": 1.284048242968794e-07, "loss": 0.0007, "num_input_tokens_seen": 263135440, "step": 121805 }, { "epoch": 19.871125611745512, "grad_norm": 0.0006291031604632735, "learning_rate": 1.267968326829072e-07, "loss": 0.0061, "num_input_tokens_seen": 263145584, "step": 121810 }, { "epoch": 19.871941272430668, "grad_norm": 0.0017852471210062504, "learning_rate": 1.2519897163348894e-07, "loss": 0.0006, "num_input_tokens_seen": 263156112, "step": 121815 }, { "epoch": 19.872756933115824, "grad_norm": 0.00026627699844539165, "learning_rate": 1.2361124118109856e-07, "loss": 0.0016, "num_input_tokens_seen": 263167856, "step": 121820 }, { "epoch": 19.87357259380098, "grad_norm": 0.0008435967029072344, "learning_rate": 1.220336413578216e-07, "loss": 0.0022, "num_input_tokens_seen": 263179248, "step": 121825 }, { "epoch": 19.874388254486135, "grad_norm": 0.014502918347716331, "learning_rate": 1.204661721956879e-07, "loss": 0.0008, "num_input_tokens_seen": 263191120, "step": 121830 }, { "epoch": 19.875203915171287, "grad_norm": 0.0047426181845366955, "learning_rate": 1.1890883372644989e-07, "loss": 0.0011, "num_input_tokens_seen": 263202352, "step": 121835 }, { "epoch": 19.876019575856443, "grad_norm": 0.006645440123975277, "learning_rate": 1.1736162598163791e-07, "loss": 0.001, "num_input_tokens_seen": 263214032, "step": 121840 }, { "epoch": 19.8768352365416, "grad_norm": 0.026239193975925446, "learning_rate": 1.1582454899267126e-07, "loss": 0.0018, "num_input_tokens_seen": 263225712, "step": 121845 }, { "epoch": 19.877650897226754, "grad_norm": 0.00017447816208004951, "learning_rate": 1.1429760279069168e-07, "loss": 0.0022, "num_input_tokens_seen": 263235632, "step": 121850 }, { "epoch": 19.87846655791191, "grad_norm": 0.014652963727712631, "learning_rate": 1.1278078740656339e-07, "loss": 0.0016, "num_input_tokens_seen": 263245872, "step": 121855 }, { "epoch": 19.879282218597062, "grad_norm": 0.35925230383872986, "learning_rate": 1.1127410287115059e-07, "loss": 0.0068, "num_input_tokens_seen": 263255984, "step": 121860 }, { "epoch": 19.880097879282218, "grad_norm": 0.000630914350040257, "learning_rate": 1.0977754921487337e-07, "loss": 0.0011, "num_input_tokens_seen": 263266800, "step": 121865 }, { "epoch": 19.880913539967374, "grad_norm": 0.003280578413978219, "learning_rate": 1.0829112646809635e-07, "loss": 0.0008, "num_input_tokens_seen": 263277744, "step": 121870 }, { "epoch": 19.88172920065253, "grad_norm": 0.0055033001117408276, "learning_rate": 1.068148346610176e-07, "loss": 0.0103, "num_input_tokens_seen": 263287120, "step": 121875 }, { "epoch": 19.882544861337685, "grad_norm": 0.014537639915943146, "learning_rate": 1.0534867382344659e-07, "loss": 0.0009, "num_input_tokens_seen": 263297744, "step": 121880 }, { "epoch": 19.883360522022837, "grad_norm": 0.010632148012518883, "learning_rate": 1.0389264398519283e-07, "loss": 0.0022, "num_input_tokens_seen": 263308464, "step": 121885 }, { "epoch": 19.884176182707993, "grad_norm": 0.009531443938612938, "learning_rate": 1.024467451756772e-07, "loss": 0.001, "num_input_tokens_seen": 263319504, "step": 121890 }, { "epoch": 19.88499184339315, "grad_norm": 0.0028590448200702667, "learning_rate": 1.0101097742426513e-07, "loss": 0.0019, "num_input_tokens_seen": 263330384, "step": 121895 }, { "epoch": 19.885807504078304, "grad_norm": 0.001724202185869217, "learning_rate": 9.958534075998893e-08, "loss": 0.0034, "num_input_tokens_seen": 263342160, "step": 121900 }, { "epoch": 19.88662316476346, "grad_norm": 0.0016756814438849688, "learning_rate": 9.816983521182543e-08, "loss": 0.0034, "num_input_tokens_seen": 263352944, "step": 121905 }, { "epoch": 19.887438825448612, "grad_norm": 0.021503252908587456, "learning_rate": 9.676446080841839e-08, "loss": 0.0038, "num_input_tokens_seen": 263364752, "step": 121910 }, { "epoch": 19.888254486133768, "grad_norm": 0.06410571932792664, "learning_rate": 9.536921757824502e-08, "loss": 0.0013, "num_input_tokens_seen": 263375536, "step": 121915 }, { "epoch": 19.889070146818923, "grad_norm": 0.0006257572094909847, "learning_rate": 9.39841055495605e-08, "loss": 0.0005, "num_input_tokens_seen": 263386256, "step": 121920 }, { "epoch": 19.88988580750408, "grad_norm": 0.0012954578269273043, "learning_rate": 9.260912475050898e-08, "loss": 0.001, "num_input_tokens_seen": 263396912, "step": 121925 }, { "epoch": 19.890701468189235, "grad_norm": 0.005243930034339428, "learning_rate": 9.124427520890155e-08, "loss": 0.0006, "num_input_tokens_seen": 263408944, "step": 121930 }, { "epoch": 19.891517128874387, "grad_norm": 0.0003207038389518857, "learning_rate": 8.988955695238277e-08, "loss": 0.0062, "num_input_tokens_seen": 263420048, "step": 121935 }, { "epoch": 19.892332789559543, "grad_norm": 0.0029847382102161646, "learning_rate": 8.854497000843065e-08, "loss": 0.0149, "num_input_tokens_seen": 263430480, "step": 121940 }, { "epoch": 19.8931484502447, "grad_norm": 0.007486116606742144, "learning_rate": 8.721051440435668e-08, "loss": 0.0029, "num_input_tokens_seen": 263441136, "step": 121945 }, { "epoch": 19.893964110929854, "grad_norm": 0.0015292003517970443, "learning_rate": 8.588619016708377e-08, "loss": 0.0033, "num_input_tokens_seen": 263450256, "step": 121950 }, { "epoch": 19.894779771615006, "grad_norm": 0.0005196183919906616, "learning_rate": 8.457199732353482e-08, "loss": 0.001, "num_input_tokens_seen": 263461872, "step": 121955 }, { "epoch": 19.895595432300162, "grad_norm": 0.000307762180455029, "learning_rate": 8.32679359003552e-08, "loss": 0.0082, "num_input_tokens_seen": 263473104, "step": 121960 }, { "epoch": 19.896411092985318, "grad_norm": 0.00352225243113935, "learning_rate": 8.197400592391268e-08, "loss": 0.0003, "num_input_tokens_seen": 263482032, "step": 121965 }, { "epoch": 19.897226753670473, "grad_norm": 0.0018733406905084848, "learning_rate": 8.069020742040855e-08, "loss": 0.0066, "num_input_tokens_seen": 263492688, "step": 121970 }, { "epoch": 19.89804241435563, "grad_norm": 0.0029799570329487324, "learning_rate": 7.941654041598856e-08, "loss": 0.0454, "num_input_tokens_seen": 263503504, "step": 121975 }, { "epoch": 19.898858075040785, "grad_norm": 0.0007707338081672788, "learning_rate": 7.815300493635436e-08, "loss": 0.0009, "num_input_tokens_seen": 263514288, "step": 121980 }, { "epoch": 19.899673735725937, "grad_norm": 0.0009643675875850022, "learning_rate": 7.68996010071521e-08, "loss": 0.0007, "num_input_tokens_seen": 263524592, "step": 121985 }, { "epoch": 19.900489396411093, "grad_norm": 0.014110865071415901, "learning_rate": 7.565632865375039e-08, "loss": 0.0021, "num_input_tokens_seen": 263534640, "step": 121990 }, { "epoch": 19.90130505709625, "grad_norm": 0.00029828620608896017, "learning_rate": 7.442318790140679e-08, "loss": 0.0003, "num_input_tokens_seen": 263545488, "step": 121995 }, { "epoch": 19.902120717781404, "grad_norm": 0.02362486906349659, "learning_rate": 7.32001787750458e-08, "loss": 0.0322, "num_input_tokens_seen": 263556592, "step": 122000 }, { "epoch": 19.902936378466556, "grad_norm": 0.024227816611528397, "learning_rate": 7.198730129948094e-08, "loss": 0.0015, "num_input_tokens_seen": 263566960, "step": 122005 }, { "epoch": 19.903752039151712, "grad_norm": 0.025522449985146523, "learning_rate": 7.078455549935914e-08, "loss": 0.0012, "num_input_tokens_seen": 263577968, "step": 122010 }, { "epoch": 19.904567699836868, "grad_norm": 0.0007530459552071989, "learning_rate": 6.959194139893876e-08, "loss": 0.0005, "num_input_tokens_seen": 263588656, "step": 122015 }, { "epoch": 19.905383360522023, "grad_norm": 0.025915952399373055, "learning_rate": 6.840945902242268e-08, "loss": 0.0011, "num_input_tokens_seen": 263599984, "step": 122020 }, { "epoch": 19.90619902120718, "grad_norm": 0.06031077727675438, "learning_rate": 6.723710839384723e-08, "loss": 0.0025, "num_input_tokens_seen": 263611792, "step": 122025 }, { "epoch": 19.90701468189233, "grad_norm": 0.0003311052278149873, "learning_rate": 6.607488953691565e-08, "loss": 0.0014, "num_input_tokens_seen": 263623088, "step": 122030 }, { "epoch": 19.907830342577487, "grad_norm": 0.001318062306381762, "learning_rate": 6.492280247516469e-08, "loss": 0.0004, "num_input_tokens_seen": 263633232, "step": 122035 }, { "epoch": 19.908646003262643, "grad_norm": 0.003854473354294896, "learning_rate": 6.378084723196453e-08, "loss": 0.0022, "num_input_tokens_seen": 263643120, "step": 122040 }, { "epoch": 19.9094616639478, "grad_norm": 0.0028099711053073406, "learning_rate": 6.264902383051885e-08, "loss": 0.0022, "num_input_tokens_seen": 263653712, "step": 122045 }, { "epoch": 19.910277324632954, "grad_norm": 0.008752093650400639, "learning_rate": 6.152733229364272e-08, "loss": 0.0008, "num_input_tokens_seen": 263664752, "step": 122050 }, { "epoch": 19.911092985318106, "grad_norm": 0.002545823808759451, "learning_rate": 6.041577264415122e-08, "loss": 0.0015, "num_input_tokens_seen": 263676272, "step": 122055 }, { "epoch": 19.911908646003262, "grad_norm": 0.0033062314614653587, "learning_rate": 5.9314344904581876e-08, "loss": 0.0006, "num_input_tokens_seen": 263687088, "step": 122060 }, { "epoch": 19.912724306688418, "grad_norm": 0.0010838387534022331, "learning_rate": 5.822304909719467e-08, "loss": 0.0005, "num_input_tokens_seen": 263697072, "step": 122065 }, { "epoch": 19.913539967373573, "grad_norm": 0.07013796269893646, "learning_rate": 5.714188524413855e-08, "loss": 0.0036, "num_input_tokens_seen": 263707728, "step": 122070 }, { "epoch": 19.91435562805873, "grad_norm": 0.004880748223513365, "learning_rate": 5.6070853367284903e-08, "loss": 0.0163, "num_input_tokens_seen": 263718000, "step": 122075 }, { "epoch": 19.91517128874388, "grad_norm": 0.0007239999831654131, "learning_rate": 5.500995348844962e-08, "loss": 0.0101, "num_input_tokens_seen": 263729040, "step": 122080 }, { "epoch": 19.915986949429037, "grad_norm": 0.06177964434027672, "learning_rate": 5.395918562900448e-08, "loss": 0.0057, "num_input_tokens_seen": 263740144, "step": 122085 }, { "epoch": 19.916802610114193, "grad_norm": 0.004266651347279549, "learning_rate": 5.2918549810376806e-08, "loss": 0.0027, "num_input_tokens_seen": 263750448, "step": 122090 }, { "epoch": 19.91761827079935, "grad_norm": 0.16771961748600006, "learning_rate": 5.188804605349429e-08, "loss": 0.0067, "num_input_tokens_seen": 263760080, "step": 122095 }, { "epoch": 19.918433931484504, "grad_norm": 0.013668928295373917, "learning_rate": 5.086767437939566e-08, "loss": 0.0016, "num_input_tokens_seen": 263770160, "step": 122100 }, { "epoch": 19.919249592169656, "grad_norm": 0.005185098387300968, "learning_rate": 4.985743480867555e-08, "loss": 0.0009, "num_input_tokens_seen": 263781104, "step": 122105 }, { "epoch": 19.920065252854812, "grad_norm": 0.019272323697805405, "learning_rate": 4.885732736181758e-08, "loss": 0.0018, "num_input_tokens_seen": 263791440, "step": 122110 }, { "epoch": 19.920880913539968, "grad_norm": 0.0020935176871716976, "learning_rate": 4.7867352059138835e-08, "loss": 0.0008, "num_input_tokens_seen": 263801328, "step": 122115 }, { "epoch": 19.921696574225123, "grad_norm": 0.0010540640214458108, "learning_rate": 4.688750892062332e-08, "loss": 0.001, "num_input_tokens_seen": 263812528, "step": 122120 }, { "epoch": 19.92251223491028, "grad_norm": 0.00029742918559350073, "learning_rate": 4.5917797966144037e-08, "loss": 0.0012, "num_input_tokens_seen": 263823024, "step": 122125 }, { "epoch": 19.92332789559543, "grad_norm": 0.0015542684122920036, "learning_rate": 4.495821921540744e-08, "loss": 0.0011, "num_input_tokens_seen": 263833680, "step": 122130 }, { "epoch": 19.924143556280587, "grad_norm": 0.8919483423233032, "learning_rate": 4.400877268784242e-08, "loss": 0.0234, "num_input_tokens_seen": 263844784, "step": 122135 }, { "epoch": 19.924959216965743, "grad_norm": 0.00047430527047254145, "learning_rate": 4.306945840265586e-08, "loss": 0.0007, "num_input_tokens_seen": 263855984, "step": 122140 }, { "epoch": 19.9257748776509, "grad_norm": 0.001867034356109798, "learning_rate": 4.2140276378943576e-08, "loss": 0.0027, "num_input_tokens_seen": 263867216, "step": 122145 }, { "epoch": 19.92659053833605, "grad_norm": 0.29011815786361694, "learning_rate": 4.1221226635468345e-08, "loss": 0.0101, "num_input_tokens_seen": 263877392, "step": 122150 }, { "epoch": 19.927406199021206, "grad_norm": 0.0008424674742855132, "learning_rate": 4.031230919088191e-08, "loss": 0.0008, "num_input_tokens_seen": 263888464, "step": 122155 }, { "epoch": 19.928221859706362, "grad_norm": 0.0022897582966834307, "learning_rate": 3.941352406361398e-08, "loss": 0.001, "num_input_tokens_seen": 263900144, "step": 122160 }, { "epoch": 19.929037520391518, "grad_norm": 0.003309818683192134, "learning_rate": 3.852487127187221e-08, "loss": 0.0008, "num_input_tokens_seen": 263910800, "step": 122165 }, { "epoch": 19.929853181076673, "grad_norm": 0.021318409591913223, "learning_rate": 3.7646350833697715e-08, "loss": 0.0254, "num_input_tokens_seen": 263921488, "step": 122170 }, { "epoch": 19.930668841761825, "grad_norm": 0.00903343502432108, "learning_rate": 3.677796276685408e-08, "loss": 0.0016, "num_input_tokens_seen": 263932176, "step": 122175 }, { "epoch": 19.93148450244698, "grad_norm": 0.0011127277975901961, "learning_rate": 3.591970708893832e-08, "loss": 0.0008, "num_input_tokens_seen": 263943024, "step": 122180 }, { "epoch": 19.932300163132137, "grad_norm": 0.032411057502031326, "learning_rate": 3.507158381738096e-08, "loss": 0.0025, "num_input_tokens_seen": 263954416, "step": 122185 }, { "epoch": 19.933115823817293, "grad_norm": 0.0004720069991890341, "learning_rate": 3.4233592969334926e-08, "loss": 0.0011, "num_input_tokens_seen": 263962640, "step": 122190 }, { "epoch": 19.93393148450245, "grad_norm": 0.0994650274515152, "learning_rate": 3.340573456184215e-08, "loss": 0.0073, "num_input_tokens_seen": 263973296, "step": 122195 }, { "epoch": 19.9347471451876, "grad_norm": 0.000907588517293334, "learning_rate": 3.258800861155598e-08, "loss": 0.0004, "num_input_tokens_seen": 263984464, "step": 122200 }, { "epoch": 19.935562805872756, "grad_norm": 0.1028270572423935, "learning_rate": 3.178041513518526e-08, "loss": 0.0035, "num_input_tokens_seen": 263994480, "step": 122205 }, { "epoch": 19.936378466557912, "grad_norm": 0.00754641043022275, "learning_rate": 3.098295414899477e-08, "loss": 0.0008, "num_input_tokens_seen": 264005936, "step": 122210 }, { "epoch": 19.937194127243067, "grad_norm": 0.0007687432807870209, "learning_rate": 3.019562566924927e-08, "loss": 0.0038, "num_input_tokens_seen": 264016816, "step": 122215 }, { "epoch": 19.938009787928223, "grad_norm": 0.00819560419768095, "learning_rate": 2.9418429711769445e-08, "loss": 0.0022, "num_input_tokens_seen": 264027952, "step": 122220 }, { "epoch": 19.938825448613375, "grad_norm": 0.044117119163274765, "learning_rate": 2.865136629243148e-08, "loss": 0.0023, "num_input_tokens_seen": 264038992, "step": 122225 }, { "epoch": 19.93964110929853, "grad_norm": 0.005589211825281382, "learning_rate": 2.7894435426722988e-08, "loss": 0.0088, "num_input_tokens_seen": 264049776, "step": 122230 }, { "epoch": 19.940456769983687, "grad_norm": 0.003952317405492067, "learning_rate": 2.7147637130020553e-08, "loss": 0.001, "num_input_tokens_seen": 264060432, "step": 122235 }, { "epoch": 19.941272430668842, "grad_norm": 0.0002749775012489408, "learning_rate": 2.6410971417423214e-08, "loss": 0.0003, "num_input_tokens_seen": 264071856, "step": 122240 }, { "epoch": 19.942088091353998, "grad_norm": 0.012595501728355885, "learning_rate": 2.5684438303807955e-08, "loss": 0.0018, "num_input_tokens_seen": 264083344, "step": 122245 }, { "epoch": 19.94290375203915, "grad_norm": 0.005414798855781555, "learning_rate": 2.496803780405177e-08, "loss": 0.0016, "num_input_tokens_seen": 264094384, "step": 122250 }, { "epoch": 19.943719412724306, "grad_norm": 0.0005905579309910536, "learning_rate": 2.426176993253204e-08, "loss": 0.0005, "num_input_tokens_seen": 264105200, "step": 122255 }, { "epoch": 19.94453507340946, "grad_norm": 0.0037936638109385967, "learning_rate": 2.356563470357065e-08, "loss": 0.0005, "num_input_tokens_seen": 264115472, "step": 122260 }, { "epoch": 19.945350734094617, "grad_norm": 0.0001786290085874498, "learning_rate": 2.287963213137845e-08, "loss": 0.0007, "num_input_tokens_seen": 264125648, "step": 122265 }, { "epoch": 19.946166394779773, "grad_norm": 0.003722716588526964, "learning_rate": 2.2203762229777713e-08, "loss": 0.0033, "num_input_tokens_seen": 264136976, "step": 122270 }, { "epoch": 19.946982055464925, "grad_norm": 0.005986783653497696, "learning_rate": 2.15380250124797e-08, "loss": 0.0033, "num_input_tokens_seen": 264146768, "step": 122275 }, { "epoch": 19.94779771615008, "grad_norm": 0.02588781714439392, "learning_rate": 2.0882420493029132e-08, "loss": 0.0009, "num_input_tokens_seen": 264157616, "step": 122280 }, { "epoch": 19.948613376835237, "grad_norm": 0.14042320847511292, "learning_rate": 2.0236948684582147e-08, "loss": 0.0139, "num_input_tokens_seen": 264168176, "step": 122285 }, { "epoch": 19.949429037520392, "grad_norm": 0.002619031583890319, "learning_rate": 1.96016096003504e-08, "loss": 0.0013, "num_input_tokens_seen": 264177264, "step": 122290 }, { "epoch": 19.950244698205548, "grad_norm": 0.005070169921964407, "learning_rate": 1.8976403253156972e-08, "loss": 0.0006, "num_input_tokens_seen": 264186352, "step": 122295 }, { "epoch": 19.9510603588907, "grad_norm": 0.011310932226479053, "learning_rate": 1.836132965571391e-08, "loss": 0.0018, "num_input_tokens_seen": 264197232, "step": 122300 }, { "epoch": 19.951876019575856, "grad_norm": 0.0019945164676755667, "learning_rate": 1.7756388820400205e-08, "loss": 0.1224, "num_input_tokens_seen": 264207056, "step": 122305 }, { "epoch": 19.95269168026101, "grad_norm": 0.03021303005516529, "learning_rate": 1.716158075953933e-08, "loss": 0.0014, "num_input_tokens_seen": 264218288, "step": 122310 }, { "epoch": 19.953507340946167, "grad_norm": 0.002650508191436529, "learning_rate": 1.6576905485177206e-08, "loss": 0.0152, "num_input_tokens_seen": 264228816, "step": 122315 }, { "epoch": 19.954323001631323, "grad_norm": 0.011295244097709656, "learning_rate": 1.6002363009137712e-08, "loss": 0.0009, "num_input_tokens_seen": 264239856, "step": 122320 }, { "epoch": 19.955138662316475, "grad_norm": 0.02210673689842224, "learning_rate": 1.5437953343078182e-08, "loss": 0.0016, "num_input_tokens_seen": 264248848, "step": 122325 }, { "epoch": 19.95595432300163, "grad_norm": 0.0006285077542997897, "learning_rate": 1.488367649848943e-08, "loss": 0.0005, "num_input_tokens_seen": 264259632, "step": 122330 }, { "epoch": 19.956769983686787, "grad_norm": 0.059016335755586624, "learning_rate": 1.4339532486529195e-08, "loss": 0.0028, "num_input_tokens_seen": 264271152, "step": 122335 }, { "epoch": 19.957585644371942, "grad_norm": 0.000976592069491744, "learning_rate": 1.3805521318244196e-08, "loss": 0.0009, "num_input_tokens_seen": 264281744, "step": 122340 }, { "epoch": 19.958401305057095, "grad_norm": 0.3123403489589691, "learning_rate": 1.3281643004514621e-08, "loss": 0.0512, "num_input_tokens_seen": 264292656, "step": 122345 }, { "epoch": 19.95921696574225, "grad_norm": 0.035995569080114365, "learning_rate": 1.2767897555887587e-08, "loss": 0.0026, "num_input_tokens_seen": 264303664, "step": 122350 }, { "epoch": 19.960032626427406, "grad_norm": 0.0020898154471069574, "learning_rate": 1.2264284982743679e-08, "loss": 0.0009, "num_input_tokens_seen": 264314768, "step": 122355 }, { "epoch": 19.96084828711256, "grad_norm": 0.004529369994997978, "learning_rate": 1.1770805295407972e-08, "loss": 0.001, "num_input_tokens_seen": 264324976, "step": 122360 }, { "epoch": 19.961663947797717, "grad_norm": 0.04026377946138382, "learning_rate": 1.1287458503816961e-08, "loss": 0.0018, "num_input_tokens_seen": 264335792, "step": 122365 }, { "epoch": 19.96247960848287, "grad_norm": 0.10382266342639923, "learning_rate": 1.0814244617740609e-08, "loss": 0.0057, "num_input_tokens_seen": 264346512, "step": 122370 }, { "epoch": 19.963295269168025, "grad_norm": 0.043546292930841446, "learning_rate": 1.0351163646782346e-08, "loss": 0.002, "num_input_tokens_seen": 264356944, "step": 122375 }, { "epoch": 19.96411092985318, "grad_norm": 0.06786137819290161, "learning_rate": 9.898215600379068e-09, "loss": 0.0028, "num_input_tokens_seen": 264367376, "step": 122380 }, { "epoch": 19.964926590538337, "grad_norm": 0.0008918531239032745, "learning_rate": 9.455400487634602e-09, "loss": 0.0003, "num_input_tokens_seen": 264378864, "step": 122385 }, { "epoch": 19.965742251223492, "grad_norm": 0.016499491408467293, "learning_rate": 9.022718317597267e-09, "loss": 0.0013, "num_input_tokens_seen": 264389424, "step": 122390 }, { "epoch": 19.966557911908644, "grad_norm": 0.21509645879268646, "learning_rate": 8.600169098982313e-09, "loss": 0.0066, "num_input_tokens_seen": 264399376, "step": 122395 }, { "epoch": 19.9673735725938, "grad_norm": 0.004423909820616245, "learning_rate": 8.187752840338458e-09, "loss": 0.0013, "num_input_tokens_seen": 264411056, "step": 122400 }, { "epoch": 19.968189233278956, "grad_norm": 0.0020240589510649443, "learning_rate": 7.785469550103397e-09, "loss": 0.0005, "num_input_tokens_seen": 264422640, "step": 122405 }, { "epoch": 19.96900489396411, "grad_norm": 0.0003290712193120271, "learning_rate": 7.393319236326246e-09, "loss": 0.0218, "num_input_tokens_seen": 264434640, "step": 122410 }, { "epoch": 19.969820554649267, "grad_norm": 0.004327591508626938, "learning_rate": 7.011301907056122e-09, "loss": 0.0027, "num_input_tokens_seen": 264444624, "step": 122415 }, { "epoch": 19.97063621533442, "grad_norm": 0.011982999742031097, "learning_rate": 6.639417570009076e-09, "loss": 0.0025, "num_input_tokens_seen": 264454544, "step": 122420 }, { "epoch": 19.971451876019575, "grad_norm": 0.025622902438044548, "learning_rate": 6.2776662326236025e-09, "loss": 0.0612, "num_input_tokens_seen": 264466608, "step": 122425 }, { "epoch": 19.97226753670473, "grad_norm": 0.001789126661606133, "learning_rate": 5.926047902393705e-09, "loss": 0.002, "num_input_tokens_seen": 264477040, "step": 122430 }, { "epoch": 19.973083197389887, "grad_norm": 0.026516800746321678, "learning_rate": 5.584562586313791e-09, "loss": 0.0016, "num_input_tokens_seen": 264487856, "step": 122435 }, { "epoch": 19.973898858075042, "grad_norm": 0.005369944963604212, "learning_rate": 5.253210291322752e-09, "loss": 0.0032, "num_input_tokens_seen": 264497936, "step": 122440 }, { "epoch": 19.974714518760194, "grad_norm": 0.007590974681079388, "learning_rate": 4.93199102419295e-09, "loss": 0.0005, "num_input_tokens_seen": 264507600, "step": 122445 }, { "epoch": 19.97553017944535, "grad_norm": 0.014951630495488644, "learning_rate": 4.620904791419189e-09, "loss": 0.0008, "num_input_tokens_seen": 264519120, "step": 122450 }, { "epoch": 19.976345840130506, "grad_norm": 0.0008193363901227713, "learning_rate": 4.31995159927423e-09, "loss": 0.0005, "num_input_tokens_seen": 264530320, "step": 122455 }, { "epoch": 19.97716150081566, "grad_norm": 0.010377529077231884, "learning_rate": 4.029131453864299e-09, "loss": 0.0036, "num_input_tokens_seen": 264540912, "step": 122460 }, { "epoch": 19.977977161500817, "grad_norm": 0.2446034699678421, "learning_rate": 3.748444361129088e-09, "loss": 0.0049, "num_input_tokens_seen": 264550928, "step": 122465 }, { "epoch": 19.97879282218597, "grad_norm": 0.00831429474055767, "learning_rate": 3.477890326675226e-09, "loss": 0.0021, "num_input_tokens_seen": 264561584, "step": 122470 }, { "epoch": 19.979608482871125, "grad_norm": 0.08868524432182312, "learning_rate": 3.217469356053826e-09, "loss": 0.001, "num_input_tokens_seen": 264572464, "step": 122475 }, { "epoch": 19.98042414355628, "grad_norm": 0.00023401925864163786, "learning_rate": 2.9671814545384477e-09, "loss": 0.0006, "num_input_tokens_seen": 264582672, "step": 122480 }, { "epoch": 19.981239804241437, "grad_norm": 0.000985664431937039, "learning_rate": 2.7270266271806065e-09, "loss": 0.0005, "num_input_tokens_seen": 264593840, "step": 122485 }, { "epoch": 19.982055464926592, "grad_norm": 0.0008842953247949481, "learning_rate": 2.4970048788652833e-09, "loss": 0.0004, "num_input_tokens_seen": 264605136, "step": 122490 }, { "epoch": 19.982871125611744, "grad_norm": 0.00036722770892083645, "learning_rate": 2.2771162141999036e-09, "loss": 0.0005, "num_input_tokens_seen": 264617584, "step": 122495 }, { "epoch": 19.9836867862969, "grad_norm": 0.049424611032009125, "learning_rate": 2.0673606376808707e-09, "loss": 0.0016, "num_input_tokens_seen": 264627760, "step": 122500 }, { "epoch": 19.984502446982056, "grad_norm": 0.01784713752567768, "learning_rate": 1.8677381535825435e-09, "loss": 0.0008, "num_input_tokens_seen": 264638864, "step": 122505 }, { "epoch": 19.98531810766721, "grad_norm": 0.001981490757316351, "learning_rate": 1.6782487659572354e-09, "loss": 0.0005, "num_input_tokens_seen": 264648880, "step": 122510 }, { "epoch": 19.986133768352367, "grad_norm": 0.0023713652044534683, "learning_rate": 1.4988924785797053e-09, "loss": 0.002, "num_input_tokens_seen": 264659984, "step": 122515 }, { "epoch": 19.98694942903752, "grad_norm": 0.1398274153470993, "learning_rate": 1.329669295113689e-09, "loss": 0.0055, "num_input_tokens_seen": 264671504, "step": 122520 }, { "epoch": 19.987765089722675, "grad_norm": 0.3532371520996094, "learning_rate": 1.1705792190008778e-09, "loss": 0.0134, "num_input_tokens_seen": 264682704, "step": 122525 }, { "epoch": 19.98858075040783, "grad_norm": 0.014912966638803482, "learning_rate": 1.0216222534609189e-09, "loss": 0.0028, "num_input_tokens_seen": 264693232, "step": 122530 }, { "epoch": 19.989396411092986, "grad_norm": 0.0006106890505179763, "learning_rate": 8.827984014914137e-10, "loss": 0.0024, "num_input_tokens_seen": 264704400, "step": 122535 }, { "epoch": 19.99021207177814, "grad_norm": 0.0008242643089033663, "learning_rate": 7.541076659234314e-10, "loss": 0.0018, "num_input_tokens_seen": 264713648, "step": 122540 }, { "epoch": 19.991027732463294, "grad_norm": 0.00012639925989788026, "learning_rate": 6.355500494215072e-10, "loss": 0.0094, "num_input_tokens_seen": 264724496, "step": 122545 }, { "epoch": 19.99184339314845, "grad_norm": 0.01107293926179409, "learning_rate": 5.271255543171094e-10, "loss": 0.002, "num_input_tokens_seen": 264735760, "step": 122550 }, { "epoch": 19.992659053833606, "grad_norm": 0.0008368192939087749, "learning_rate": 4.2883418277517293e-10, "loss": 0.0007, "num_input_tokens_seen": 264744688, "step": 122555 }, { "epoch": 19.99347471451876, "grad_norm": 0.09536808729171753, "learning_rate": 3.4067593690512154e-10, "loss": 0.0036, "num_input_tokens_seen": 264755280, "step": 122560 }, { "epoch": 19.994290375203914, "grad_norm": 0.004233692307025194, "learning_rate": 2.6265081837228976e-10, "loss": 0.0017, "num_input_tokens_seen": 264764784, "step": 122565 }, { "epoch": 19.99510603588907, "grad_norm": 0.003642668481916189, "learning_rate": 1.9475882884201212e-10, "loss": 0.0005, "num_input_tokens_seen": 264774864, "step": 122570 }, { "epoch": 19.995921696574225, "grad_norm": 0.00029860870563425124, "learning_rate": 1.3699996964655626e-10, "loss": 0.0003, "num_input_tokens_seen": 264786800, "step": 122575 }, { "epoch": 19.99673735725938, "grad_norm": 0.0004550835001282394, "learning_rate": 8.937424195165634e-11, "loss": 0.001, "num_input_tokens_seen": 264798096, "step": 122580 }, { "epoch": 19.997553017944536, "grad_norm": 0.0013587478315457702, "learning_rate": 5.188164675651308e-11, "loss": 0.006, "num_input_tokens_seen": 264810128, "step": 122585 }, { "epoch": 19.99836867862969, "grad_norm": 0.00026625217287801206, "learning_rate": 2.4522184838282614e-11, "loss": 0.0002, "num_input_tokens_seen": 264819248, "step": 122590 }, { "epoch": 19.999184339314844, "grad_norm": 2.0659940242767334, "learning_rate": 7.295856696565295e-12, "loss": 0.0159, "num_input_tokens_seen": 264831024, "step": 122595 }, { "epoch": 20.0, "grad_norm": 0.0002738155599217862, "learning_rate": 2.0266266442803271e-13, "loss": 0.0014, "num_input_tokens_seen": 264840880, "step": 122600 }, { "epoch": 20.0, "eval_loss": 0.3499123454093933, "eval_runtime": 103.9372, "eval_samples_per_second": 26.218, "eval_steps_per_second": 6.562, "num_input_tokens_seen": 264840880, "step": 122600 }, { "epoch": 20.0, "num_input_tokens_seen": 264840880, "step": 122600, "total_flos": 1.1925665126797148e+19, "train_loss": 0.0669187841472106, "train_runtime": 45650.2803, "train_samples_per_second": 10.742, "train_steps_per_second": 2.686 } ], "logging_steps": 5, "max_steps": 122600, "num_input_tokens_seen": 264840880, "num_train_epochs": 20, "save_steps": 6130, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1925665126797148e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }