{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 8.0, "eval_steps": 500, "global_step": 5864, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0013655372535631989, "grad_norm": 6.500636975566314, "learning_rate": 0.0, "loss": 0.7184, "num_tokens": 781862.0, "step": 1 }, { "epoch": 0.0027310745071263977, "grad_norm": 6.525370569209557, "learning_rate": 2.2727272727272729e-07, "loss": 0.7536, "num_tokens": 1579241.0, "step": 2 }, { "epoch": 0.004096611760689596, "grad_norm": 6.511988733486317, "learning_rate": 4.5454545454545457e-07, "loss": 0.7087, "num_tokens": 2313777.0, "step": 3 }, { "epoch": 0.0054621490142527955, "grad_norm": 6.455978665925424, "learning_rate": 6.818181818181818e-07, "loss": 0.7095, "num_tokens": 2958332.0, "step": 4 }, { "epoch": 0.006827686267815994, "grad_norm": 6.270538221352332, "learning_rate": 9.090909090909091e-07, "loss": 0.7054, "num_tokens": 3751162.0, "step": 5 }, { "epoch": 0.008193223521379193, "grad_norm": 6.163063219929961, "learning_rate": 1.1363636363636364e-06, "loss": 0.7211, "num_tokens": 4522410.0, "step": 6 }, { "epoch": 0.009558760774942391, "grad_norm": 6.37046560566571, "learning_rate": 1.3636363636363636e-06, "loss": 0.7327, "num_tokens": 5246142.0, "step": 7 }, { "epoch": 0.010924298028505591, "grad_norm": 5.814154128378001, "learning_rate": 1.590909090909091e-06, "loss": 0.729, "num_tokens": 6047445.0, "step": 8 }, { "epoch": 0.01228983528206879, "grad_norm": 5.01031229073977, "learning_rate": 1.8181818181818183e-06, "loss": 0.7131, "num_tokens": 6880372.0, "step": 9 }, { "epoch": 0.013655372535631987, "grad_norm": 3.5126170345533496, "learning_rate": 2.0454545454545457e-06, "loss": 0.6907, "num_tokens": 7653932.0, "step": 10 }, { "epoch": 0.015020909789195187, "grad_norm": 3.1232493652732436, "learning_rate": 2.2727272727272728e-06, "loss": 0.693, "num_tokens": 8367645.0, "step": 11 }, { "epoch": 0.016386447042758386, "grad_norm": 2.7971927055641457, "learning_rate": 2.5e-06, "loss": 0.6947, "num_tokens": 9111974.0, "step": 12 }, { "epoch": 0.017751984296321584, "grad_norm": 2.547170072023362, "learning_rate": 2.7272727272727272e-06, "loss": 0.6756, "num_tokens": 9866272.0, "step": 13 }, { "epoch": 0.019117521549884782, "grad_norm": 4.074999473801928, "learning_rate": 2.954545454545455e-06, "loss": 0.6802, "num_tokens": 10557085.0, "step": 14 }, { "epoch": 0.02048305880344798, "grad_norm": 3.9363858863566588, "learning_rate": 3.181818181818182e-06, "loss": 0.6556, "num_tokens": 11350918.0, "step": 15 }, { "epoch": 0.021848596057011182, "grad_norm": 4.111214220610141, "learning_rate": 3.409090909090909e-06, "loss": 0.6879, "num_tokens": 11999096.0, "step": 16 }, { "epoch": 0.02321413331057438, "grad_norm": 3.6960145417665276, "learning_rate": 3.6363636363636366e-06, "loss": 0.6888, "num_tokens": 12834086.0, "step": 17 }, { "epoch": 0.02457967056413758, "grad_norm": 2.8313409890781864, "learning_rate": 3.863636363636364e-06, "loss": 0.6476, "num_tokens": 13567107.0, "step": 18 }, { "epoch": 0.025945207817700777, "grad_norm": 2.351385227441186, "learning_rate": 4.0909090909090915e-06, "loss": 0.6399, "num_tokens": 14306544.0, "step": 19 }, { "epoch": 0.027310745071263975, "grad_norm": 2.301352140931594, "learning_rate": 4.3181818181818185e-06, "loss": 0.64, "num_tokens": 15097194.0, "step": 20 }, { "epoch": 0.028676282324827173, "grad_norm": 2.2561484251140205, "learning_rate": 4.5454545454545455e-06, "loss": 0.6685, "num_tokens": 15881981.0, "step": 21 }, { "epoch": 0.030041819578390375, "grad_norm": 1.731456144696499, "learning_rate": 4.772727272727273e-06, "loss": 0.6382, "num_tokens": 16690050.0, "step": 22 }, { "epoch": 0.03140735683195357, "grad_norm": 1.3070776019872297, "learning_rate": 5e-06, "loss": 0.6245, "num_tokens": 17379495.0, "step": 23 }, { "epoch": 0.03277289408551677, "grad_norm": 1.0264785877265707, "learning_rate": 5.2272727272727274e-06, "loss": 0.6354, "num_tokens": 18147974.0, "step": 24 }, { "epoch": 0.03413843133907997, "grad_norm": 1.0720298763528424, "learning_rate": 5.4545454545454545e-06, "loss": 0.5943, "num_tokens": 18882258.0, "step": 25 }, { "epoch": 0.03550396859264317, "grad_norm": 1.2142069321183426, "learning_rate": 5.681818181818183e-06, "loss": 0.6139, "num_tokens": 19635638.0, "step": 26 }, { "epoch": 0.036869505846206366, "grad_norm": 1.1542798945731323, "learning_rate": 5.90909090909091e-06, "loss": 0.5907, "num_tokens": 20425283.0, "step": 27 }, { "epoch": 0.038235043099769564, "grad_norm": 0.9803952950223598, "learning_rate": 6.136363636363637e-06, "loss": 0.5827, "num_tokens": 21143674.0, "step": 28 }, { "epoch": 0.03960058035333276, "grad_norm": 0.8792837572031468, "learning_rate": 6.363636363636364e-06, "loss": 0.5947, "num_tokens": 21875193.0, "step": 29 }, { "epoch": 0.04096611760689596, "grad_norm": 0.91494722648305, "learning_rate": 6.590909090909091e-06, "loss": 0.6095, "num_tokens": 22612249.0, "step": 30 }, { "epoch": 0.04233165486045916, "grad_norm": 0.9469290750930831, "learning_rate": 6.818181818181818e-06, "loss": 0.5764, "num_tokens": 23359661.0, "step": 31 }, { "epoch": 0.043697192114022364, "grad_norm": 0.9731506885951626, "learning_rate": 7.045454545454546e-06, "loss": 0.6178, "num_tokens": 24201458.0, "step": 32 }, { "epoch": 0.04506272936758556, "grad_norm": 1.0155235852203073, "learning_rate": 7.272727272727273e-06, "loss": 0.5866, "num_tokens": 24973015.0, "step": 33 }, { "epoch": 0.04642826662114876, "grad_norm": 1.0620517451106888, "learning_rate": 7.500000000000001e-06, "loss": 0.5919, "num_tokens": 25790103.0, "step": 34 }, { "epoch": 0.04779380387471196, "grad_norm": 1.0492387119203623, "learning_rate": 7.727272727272727e-06, "loss": 0.5756, "num_tokens": 26605078.0, "step": 35 }, { "epoch": 0.04915934112827516, "grad_norm": 0.6755988447913311, "learning_rate": 7.954545454545455e-06, "loss": 0.5479, "num_tokens": 27311426.0, "step": 36 }, { "epoch": 0.050524878381838355, "grad_norm": 0.4967428784102901, "learning_rate": 8.181818181818183e-06, "loss": 0.5507, "num_tokens": 28028209.0, "step": 37 }, { "epoch": 0.05189041563540155, "grad_norm": 0.5290040067639277, "learning_rate": 8.40909090909091e-06, "loss": 0.5628, "num_tokens": 28813214.0, "step": 38 }, { "epoch": 0.05325595288896475, "grad_norm": 0.520013757122856, "learning_rate": 8.636363636363637e-06, "loss": 0.5589, "num_tokens": 29649897.0, "step": 39 }, { "epoch": 0.05462149014252795, "grad_norm": 0.4905643283623459, "learning_rate": 8.863636363636365e-06, "loss": 0.5821, "num_tokens": 30381412.0, "step": 40 }, { "epoch": 0.05598702739609115, "grad_norm": 0.47850552050856104, "learning_rate": 9.090909090909091e-06, "loss": 0.5595, "num_tokens": 31138037.0, "step": 41 }, { "epoch": 0.057352564649654346, "grad_norm": 0.4697110256945509, "learning_rate": 9.318181818181819e-06, "loss": 0.5506, "num_tokens": 31904977.0, "step": 42 }, { "epoch": 0.058718101903217544, "grad_norm": 0.4306652319277409, "learning_rate": 9.545454545454547e-06, "loss": 0.5833, "num_tokens": 32705553.0, "step": 43 }, { "epoch": 0.06008363915678075, "grad_norm": 0.45841094431529866, "learning_rate": 9.772727272727273e-06, "loss": 0.5684, "num_tokens": 33448701.0, "step": 44 }, { "epoch": 0.06144917641034395, "grad_norm": 0.49986922606926004, "learning_rate": 1e-05, "loss": 0.5452, "num_tokens": 34188110.0, "step": 45 }, { "epoch": 0.06281471366390715, "grad_norm": 0.38624303490456985, "learning_rate": 1.0227272727272729e-05, "loss": 0.5571, "num_tokens": 34917083.0, "step": 46 }, { "epoch": 0.06418025091747034, "grad_norm": 0.46679617916920424, "learning_rate": 1.0454545454545455e-05, "loss": 0.5596, "num_tokens": 35648165.0, "step": 47 }, { "epoch": 0.06554578817103354, "grad_norm": 0.40592112385472606, "learning_rate": 1.0681818181818183e-05, "loss": 0.5575, "num_tokens": 36382540.0, "step": 48 }, { "epoch": 0.06691132542459674, "grad_norm": 0.4273957355642175, "learning_rate": 1.0909090909090909e-05, "loss": 0.5854, "num_tokens": 37185624.0, "step": 49 }, { "epoch": 0.06827686267815994, "grad_norm": 0.4186188193374812, "learning_rate": 1.1136363636363637e-05, "loss": 0.5302, "num_tokens": 37911531.0, "step": 50 }, { "epoch": 0.06964239993172314, "grad_norm": 0.48201222764512286, "learning_rate": 1.1363636363636366e-05, "loss": 0.5797, "num_tokens": 38686844.0, "step": 51 }, { "epoch": 0.07100793718528634, "grad_norm": 0.39683104282321563, "learning_rate": 1.1590909090909093e-05, "loss": 0.5354, "num_tokens": 39376780.0, "step": 52 }, { "epoch": 0.07237347443884953, "grad_norm": 0.45960196607403614, "learning_rate": 1.181818181818182e-05, "loss": 0.5615, "num_tokens": 40177770.0, "step": 53 }, { "epoch": 0.07373901169241273, "grad_norm": 0.4050567459313387, "learning_rate": 1.2045454545454547e-05, "loss": 0.5393, "num_tokens": 40922233.0, "step": 54 }, { "epoch": 0.07510454894597593, "grad_norm": 0.4399453743966374, "learning_rate": 1.2272727272727274e-05, "loss": 0.5704, "num_tokens": 41776457.0, "step": 55 }, { "epoch": 0.07647008619953913, "grad_norm": 0.46260648964718565, "learning_rate": 1.25e-05, "loss": 0.5408, "num_tokens": 42492497.0, "step": 56 }, { "epoch": 0.07783562345310233, "grad_norm": 0.3853245088953156, "learning_rate": 1.2727272727272728e-05, "loss": 0.5434, "num_tokens": 43228499.0, "step": 57 }, { "epoch": 0.07920116070666552, "grad_norm": 0.43635858695217833, "learning_rate": 1.2954545454545455e-05, "loss": 0.5717, "num_tokens": 43953622.0, "step": 58 }, { "epoch": 0.08056669796022872, "grad_norm": 0.4570387785079025, "learning_rate": 1.3181818181818183e-05, "loss": 0.5462, "num_tokens": 44703923.0, "step": 59 }, { "epoch": 0.08193223521379192, "grad_norm": 0.4010384106139643, "learning_rate": 1.3409090909090909e-05, "loss": 0.5365, "num_tokens": 45481633.0, "step": 60 }, { "epoch": 0.08329777246735512, "grad_norm": 0.4455346736386146, "learning_rate": 1.3636363636363637e-05, "loss": 0.5518, "num_tokens": 46179700.0, "step": 61 }, { "epoch": 0.08466330972091832, "grad_norm": 0.38694778936402796, "learning_rate": 1.3863636363636366e-05, "loss": 0.5396, "num_tokens": 46989012.0, "step": 62 }, { "epoch": 0.08602884697448153, "grad_norm": 0.43455394438221334, "learning_rate": 1.4090909090909092e-05, "loss": 0.5667, "num_tokens": 47792009.0, "step": 63 }, { "epoch": 0.08739438422804473, "grad_norm": 0.41604009729364805, "learning_rate": 1.431818181818182e-05, "loss": 0.5401, "num_tokens": 48535837.0, "step": 64 }, { "epoch": 0.08875992148160793, "grad_norm": 0.4294217512978351, "learning_rate": 1.4545454545454546e-05, "loss": 0.5418, "num_tokens": 49300416.0, "step": 65 }, { "epoch": 0.09012545873517112, "grad_norm": 0.427651360697862, "learning_rate": 1.4772727272727274e-05, "loss": 0.5441, "num_tokens": 50027704.0, "step": 66 }, { "epoch": 0.09149099598873432, "grad_norm": 0.46644071357641953, "learning_rate": 1.5000000000000002e-05, "loss": 0.5228, "num_tokens": 50738298.0, "step": 67 }, { "epoch": 0.09285653324229752, "grad_norm": 0.5133638618951397, "learning_rate": 1.5227272727272728e-05, "loss": 0.5486, "num_tokens": 51518049.0, "step": 68 }, { "epoch": 0.09422207049586072, "grad_norm": 0.44035630747762755, "learning_rate": 1.5454545454545454e-05, "loss": 0.5258, "num_tokens": 52202527.0, "step": 69 }, { "epoch": 0.09558760774942392, "grad_norm": 0.503634406418041, "learning_rate": 1.5681818181818182e-05, "loss": 0.5517, "num_tokens": 52936090.0, "step": 70 }, { "epoch": 0.09695314500298712, "grad_norm": 0.4505419400376497, "learning_rate": 1.590909090909091e-05, "loss": 0.5236, "num_tokens": 53700006.0, "step": 71 }, { "epoch": 0.09831868225655031, "grad_norm": 0.43763486691124376, "learning_rate": 1.6136363636363638e-05, "loss": 0.5334, "num_tokens": 54469611.0, "step": 72 }, { "epoch": 0.09968421951011351, "grad_norm": 0.4048029356083782, "learning_rate": 1.6363636363636366e-05, "loss": 0.5201, "num_tokens": 55233428.0, "step": 73 }, { "epoch": 0.10104975676367671, "grad_norm": 0.4778889974829067, "learning_rate": 1.6590909090909094e-05, "loss": 0.5663, "num_tokens": 56002125.0, "step": 74 }, { "epoch": 0.10241529401723991, "grad_norm": 0.4627398557117488, "learning_rate": 1.681818181818182e-05, "loss": 0.5536, "num_tokens": 56831886.0, "step": 75 }, { "epoch": 0.1037808312708031, "grad_norm": 0.4438489919878601, "learning_rate": 1.7045454545454546e-05, "loss": 0.5054, "num_tokens": 57576284.0, "step": 76 }, { "epoch": 0.1051463685243663, "grad_norm": 0.5153508019162509, "learning_rate": 1.7272727272727274e-05, "loss": 0.5268, "num_tokens": 58350348.0, "step": 77 }, { "epoch": 0.1065119057779295, "grad_norm": 0.4620213519428502, "learning_rate": 1.7500000000000002e-05, "loss": 0.536, "num_tokens": 59061243.0, "step": 78 }, { "epoch": 0.1078774430314927, "grad_norm": 0.5377407023474443, "learning_rate": 1.772727272727273e-05, "loss": 0.5079, "num_tokens": 59804038.0, "step": 79 }, { "epoch": 0.1092429802850559, "grad_norm": 0.4566318708592172, "learning_rate": 1.7954545454545454e-05, "loss": 0.5424, "num_tokens": 60563985.0, "step": 80 }, { "epoch": 0.1106085175386191, "grad_norm": 0.5011840945413685, "learning_rate": 1.8181818181818182e-05, "loss": 0.5127, "num_tokens": 61329157.0, "step": 81 }, { "epoch": 0.1119740547921823, "grad_norm": 0.42951195628083, "learning_rate": 1.840909090909091e-05, "loss": 0.5094, "num_tokens": 62190537.0, "step": 82 }, { "epoch": 0.1133395920457455, "grad_norm": 0.4244741747498876, "learning_rate": 1.8636363636363638e-05, "loss": 0.5213, "num_tokens": 62999763.0, "step": 83 }, { "epoch": 0.11470512929930869, "grad_norm": 0.5055059283013258, "learning_rate": 1.8863636363636366e-05, "loss": 0.5169, "num_tokens": 63729924.0, "step": 84 }, { "epoch": 0.11607066655287189, "grad_norm": 0.5111386840160126, "learning_rate": 1.9090909090909094e-05, "loss": 0.527, "num_tokens": 64474072.0, "step": 85 }, { "epoch": 0.11743620380643509, "grad_norm": 0.4826745562225029, "learning_rate": 1.931818181818182e-05, "loss": 0.5218, "num_tokens": 65266387.0, "step": 86 }, { "epoch": 0.11880174105999829, "grad_norm": 0.508374816697989, "learning_rate": 1.9545454545454546e-05, "loss": 0.5365, "num_tokens": 66001793.0, "step": 87 }, { "epoch": 0.1201672783135615, "grad_norm": 0.4879269702139998, "learning_rate": 1.9772727272727274e-05, "loss": 0.5286, "num_tokens": 66749495.0, "step": 88 }, { "epoch": 0.1215328155671247, "grad_norm": 0.43349333336280543, "learning_rate": 2e-05, "loss": 0.5483, "num_tokens": 67564234.0, "step": 89 }, { "epoch": 0.1228983528206879, "grad_norm": 0.39736666585280805, "learning_rate": 2.022727272727273e-05, "loss": 0.5299, "num_tokens": 68330993.0, "step": 90 }, { "epoch": 0.1242638900742511, "grad_norm": 0.43837441298266666, "learning_rate": 2.0454545454545457e-05, "loss": 0.5279, "num_tokens": 69125869.0, "step": 91 }, { "epoch": 0.1256294273278143, "grad_norm": 0.40946718072581995, "learning_rate": 2.0681818181818185e-05, "loss": 0.5291, "num_tokens": 69891218.0, "step": 92 }, { "epoch": 0.1269949645813775, "grad_norm": 0.435976830054044, "learning_rate": 2.090909090909091e-05, "loss": 0.5314, "num_tokens": 70631922.0, "step": 93 }, { "epoch": 0.1283605018349407, "grad_norm": 0.4750134563698448, "learning_rate": 2.113636363636364e-05, "loss": 0.5532, "num_tokens": 71369020.0, "step": 94 }, { "epoch": 0.12972603908850389, "grad_norm": 0.42727220895375145, "learning_rate": 2.1363636363636365e-05, "loss": 0.503, "num_tokens": 72145740.0, "step": 95 }, { "epoch": 0.13109157634206708, "grad_norm": 0.4179579025361044, "learning_rate": 2.1590909090909093e-05, "loss": 0.4959, "num_tokens": 72920824.0, "step": 96 }, { "epoch": 0.13245711359563028, "grad_norm": 0.4215047034262279, "learning_rate": 2.1818181818181818e-05, "loss": 0.5238, "num_tokens": 73771316.0, "step": 97 }, { "epoch": 0.13382265084919348, "grad_norm": 0.4619903244552818, "learning_rate": 2.204545454545455e-05, "loss": 0.5049, "num_tokens": 74515867.0, "step": 98 }, { "epoch": 0.13518818810275668, "grad_norm": 0.4723486300399272, "learning_rate": 2.2272727272727274e-05, "loss": 0.5294, "num_tokens": 75280980.0, "step": 99 }, { "epoch": 0.13655372535631988, "grad_norm": 0.48511046620031884, "learning_rate": 2.25e-05, "loss": 0.5299, "num_tokens": 76045199.0, "step": 100 }, { "epoch": 0.13791926260988308, "grad_norm": 0.40897118853023023, "learning_rate": 2.2727272727272733e-05, "loss": 0.5063, "num_tokens": 76836546.0, "step": 101 }, { "epoch": 0.13928479986344627, "grad_norm": 0.5709098640579996, "learning_rate": 2.2954545454545457e-05, "loss": 0.5285, "num_tokens": 77610906.0, "step": 102 }, { "epoch": 0.14065033711700947, "grad_norm": 0.4724914627358824, "learning_rate": 2.3181818181818185e-05, "loss": 0.5197, "num_tokens": 78465370.0, "step": 103 }, { "epoch": 0.14201587437057267, "grad_norm": 0.5653428812750941, "learning_rate": 2.340909090909091e-05, "loss": 0.5159, "num_tokens": 79261238.0, "step": 104 }, { "epoch": 0.14338141162413587, "grad_norm": 0.5179508911757918, "learning_rate": 2.363636363636364e-05, "loss": 0.52, "num_tokens": 80056201.0, "step": 105 }, { "epoch": 0.14474694887769907, "grad_norm": 0.5167913546242338, "learning_rate": 2.3863636363636365e-05, "loss": 0.5036, "num_tokens": 80790155.0, "step": 106 }, { "epoch": 0.14611248613126226, "grad_norm": 0.5042511791643816, "learning_rate": 2.4090909090909093e-05, "loss": 0.5266, "num_tokens": 81520104.0, "step": 107 }, { "epoch": 0.14747802338482546, "grad_norm": 0.5845215069040914, "learning_rate": 2.4318181818181818e-05, "loss": 0.5369, "num_tokens": 82261359.0, "step": 108 }, { "epoch": 0.14884356063838866, "grad_norm": 0.4999602802594102, "learning_rate": 2.454545454545455e-05, "loss": 0.5249, "num_tokens": 83051862.0, "step": 109 }, { "epoch": 0.15020909789195186, "grad_norm": 0.5558951693840845, "learning_rate": 2.4772727272727273e-05, "loss": 0.5178, "num_tokens": 83800921.0, "step": 110 }, { "epoch": 0.15157463514551506, "grad_norm": 0.527803161522279, "learning_rate": 2.5e-05, "loss": 0.5004, "num_tokens": 84463933.0, "step": 111 }, { "epoch": 0.15294017239907826, "grad_norm": 0.4984199227342967, "learning_rate": 2.5227272727272732e-05, "loss": 0.4981, "num_tokens": 85234195.0, "step": 112 }, { "epoch": 0.15430570965264145, "grad_norm": 0.5557718686262743, "learning_rate": 2.5454545454545457e-05, "loss": 0.523, "num_tokens": 85990575.0, "step": 113 }, { "epoch": 0.15567124690620465, "grad_norm": 0.5559994111610083, "learning_rate": 2.5681818181818185e-05, "loss": 0.5568, "num_tokens": 86796478.0, "step": 114 }, { "epoch": 0.15703678415976785, "grad_norm": 0.6228049878945804, "learning_rate": 2.590909090909091e-05, "loss": 0.5269, "num_tokens": 87587191.0, "step": 115 }, { "epoch": 0.15840232141333105, "grad_norm": 0.47631371943621176, "learning_rate": 2.613636363636364e-05, "loss": 0.5112, "num_tokens": 88318757.0, "step": 116 }, { "epoch": 0.15976785866689425, "grad_norm": 0.5858639712348372, "learning_rate": 2.6363636363636365e-05, "loss": 0.5009, "num_tokens": 89117031.0, "step": 117 }, { "epoch": 0.16113339592045745, "grad_norm": 0.5995576294816305, "learning_rate": 2.6590909090909093e-05, "loss": 0.5286, "num_tokens": 89863782.0, "step": 118 }, { "epoch": 0.16249893317402064, "grad_norm": 0.46399614143536966, "learning_rate": 2.6818181818181817e-05, "loss": 0.4915, "num_tokens": 90626599.0, "step": 119 }, { "epoch": 0.16386447042758384, "grad_norm": 0.6651177358944861, "learning_rate": 2.704545454545455e-05, "loss": 0.5235, "num_tokens": 91426268.0, "step": 120 }, { "epoch": 0.16523000768114704, "grad_norm": 0.4822627497970511, "learning_rate": 2.7272727272727273e-05, "loss": 0.5067, "num_tokens": 92168484.0, "step": 121 }, { "epoch": 0.16659554493471024, "grad_norm": 0.5527346404425151, "learning_rate": 2.75e-05, "loss": 0.5221, "num_tokens": 93022732.0, "step": 122 }, { "epoch": 0.16796108218827344, "grad_norm": 0.42215576266743204, "learning_rate": 2.7727272727272732e-05, "loss": 0.4952, "num_tokens": 93780799.0, "step": 123 }, { "epoch": 0.16932661944183663, "grad_norm": 0.6070308256007652, "learning_rate": 2.7954545454545457e-05, "loss": 0.5422, "num_tokens": 94536625.0, "step": 124 }, { "epoch": 0.17069215669539983, "grad_norm": 0.46434297348014014, "learning_rate": 2.8181818181818185e-05, "loss": 0.5222, "num_tokens": 95331936.0, "step": 125 }, { "epoch": 0.17205769394896306, "grad_norm": 0.5789034363301886, "learning_rate": 2.840909090909091e-05, "loss": 0.5002, "num_tokens": 96013577.0, "step": 126 }, { "epoch": 0.17342323120252626, "grad_norm": 0.5516000511986088, "learning_rate": 2.863636363636364e-05, "loss": 0.5468, "num_tokens": 96777177.0, "step": 127 }, { "epoch": 0.17478876845608946, "grad_norm": 0.5262697679118624, "learning_rate": 2.8863636363636365e-05, "loss": 0.5073, "num_tokens": 97649984.0, "step": 128 }, { "epoch": 0.17615430570965265, "grad_norm": 0.6109584406157388, "learning_rate": 2.9090909090909093e-05, "loss": 0.5227, "num_tokens": 98426131.0, "step": 129 }, { "epoch": 0.17751984296321585, "grad_norm": 0.5054252373654107, "learning_rate": 2.931818181818182e-05, "loss": 0.4952, "num_tokens": 99280265.0, "step": 130 }, { "epoch": 0.17888538021677905, "grad_norm": 0.567820805646932, "learning_rate": 2.954545454545455e-05, "loss": 0.5261, "num_tokens": 100068369.0, "step": 131 }, { "epoch": 0.18025091747034225, "grad_norm": 0.44921931971494333, "learning_rate": 2.9772727272727273e-05, "loss": 0.5105, "num_tokens": 100866816.0, "step": 132 }, { "epoch": 0.18161645472390545, "grad_norm": 0.6181480858497694, "learning_rate": 3.0000000000000004e-05, "loss": 0.5154, "num_tokens": 101644474.0, "step": 133 }, { "epoch": 0.18298199197746864, "grad_norm": 0.5109641844332039, "learning_rate": 3.0227272727272732e-05, "loss": 0.5123, "num_tokens": 102393514.0, "step": 134 }, { "epoch": 0.18434752923103184, "grad_norm": 0.5744589035773703, "learning_rate": 3.0454545454545456e-05, "loss": 0.5053, "num_tokens": 103111951.0, "step": 135 }, { "epoch": 0.18571306648459504, "grad_norm": 0.5827588535564215, "learning_rate": 3.068181818181819e-05, "loss": 0.5224, "num_tokens": 103864022.0, "step": 136 }, { "epoch": 0.18707860373815824, "grad_norm": 0.6209055790859109, "learning_rate": 3.090909090909091e-05, "loss": 0.5311, "num_tokens": 104563581.0, "step": 137 }, { "epoch": 0.18844414099172144, "grad_norm": 0.46766239646851604, "learning_rate": 3.1136363636363643e-05, "loss": 0.5226, "num_tokens": 105282648.0, "step": 138 }, { "epoch": 0.18980967824528464, "grad_norm": 0.6837695625778015, "learning_rate": 3.1363636363636365e-05, "loss": 0.5131, "num_tokens": 105977886.0, "step": 139 }, { "epoch": 0.19117521549884783, "grad_norm": 0.5371715088718392, "learning_rate": 3.159090909090909e-05, "loss": 0.5417, "num_tokens": 106783806.0, "step": 140 }, { "epoch": 0.19254075275241103, "grad_norm": 0.59711169202769, "learning_rate": 3.181818181818182e-05, "loss": 0.4746, "num_tokens": 107499922.0, "step": 141 }, { "epoch": 0.19390629000597423, "grad_norm": 0.5445751961435984, "learning_rate": 3.204545454545455e-05, "loss": 0.5068, "num_tokens": 108277422.0, "step": 142 }, { "epoch": 0.19527182725953743, "grad_norm": 0.6003499215638751, "learning_rate": 3.2272727272727276e-05, "loss": 0.5049, "num_tokens": 109114032.0, "step": 143 }, { "epoch": 0.19663736451310063, "grad_norm": 0.5652915329529774, "learning_rate": 3.2500000000000004e-05, "loss": 0.4971, "num_tokens": 109899718.0, "step": 144 }, { "epoch": 0.19800290176666382, "grad_norm": 0.5825652148545489, "learning_rate": 3.272727272727273e-05, "loss": 0.5143, "num_tokens": 110717613.0, "step": 145 }, { "epoch": 0.19936843902022702, "grad_norm": 0.5077504725104915, "learning_rate": 3.295454545454546e-05, "loss": 0.513, "num_tokens": 111439912.0, "step": 146 }, { "epoch": 0.20073397627379022, "grad_norm": 0.623724020920785, "learning_rate": 3.318181818181819e-05, "loss": 0.5149, "num_tokens": 112180564.0, "step": 147 }, { "epoch": 0.20209951352735342, "grad_norm": 0.5672137756505236, "learning_rate": 3.340909090909091e-05, "loss": 0.534, "num_tokens": 112954449.0, "step": 148 }, { "epoch": 0.20346505078091662, "grad_norm": 0.5355988782729278, "learning_rate": 3.363636363636364e-05, "loss": 0.5041, "num_tokens": 113701483.0, "step": 149 }, { "epoch": 0.20483058803447982, "grad_norm": 0.5720815715867144, "learning_rate": 3.3863636363636364e-05, "loss": 0.5124, "num_tokens": 114385292.0, "step": 150 }, { "epoch": 0.20619612528804301, "grad_norm": 0.561208087187484, "learning_rate": 3.409090909090909e-05, "loss": 0.5156, "num_tokens": 115144805.0, "step": 151 }, { "epoch": 0.2075616625416062, "grad_norm": 0.5228809168641877, "learning_rate": 3.431818181818182e-05, "loss": 0.4914, "num_tokens": 115906063.0, "step": 152 }, { "epoch": 0.2089271997951694, "grad_norm": 0.5799427624859983, "learning_rate": 3.454545454545455e-05, "loss": 0.4982, "num_tokens": 116680256.0, "step": 153 }, { "epoch": 0.2102927370487326, "grad_norm": 0.5120664897627983, "learning_rate": 3.4772727272727276e-05, "loss": 0.5036, "num_tokens": 117453273.0, "step": 154 }, { "epoch": 0.2116582743022958, "grad_norm": 0.5600108724534937, "learning_rate": 3.5000000000000004e-05, "loss": 0.4883, "num_tokens": 118305135.0, "step": 155 }, { "epoch": 0.213023811555859, "grad_norm": 0.5999854542015981, "learning_rate": 3.522727272727273e-05, "loss": 0.5235, "num_tokens": 119052182.0, "step": 156 }, { "epoch": 0.2143893488094222, "grad_norm": 0.5807371549871507, "learning_rate": 3.545454545454546e-05, "loss": 0.4804, "num_tokens": 119755320.0, "step": 157 }, { "epoch": 0.2157548860629854, "grad_norm": 0.6532245738857998, "learning_rate": 3.568181818181819e-05, "loss": 0.535, "num_tokens": 120582682.0, "step": 158 }, { "epoch": 0.2171204233165486, "grad_norm": 0.514465980197611, "learning_rate": 3.590909090909091e-05, "loss": 0.5147, "num_tokens": 121414797.0, "step": 159 }, { "epoch": 0.2184859605701118, "grad_norm": 0.6744142111399054, "learning_rate": 3.613636363636364e-05, "loss": 0.4908, "num_tokens": 122173384.0, "step": 160 }, { "epoch": 0.219851497823675, "grad_norm": 0.6094443726996054, "learning_rate": 3.6363636363636364e-05, "loss": 0.5108, "num_tokens": 122918408.0, "step": 161 }, { "epoch": 0.2212170350772382, "grad_norm": 0.5337238498970742, "learning_rate": 3.659090909090909e-05, "loss": 0.5117, "num_tokens": 123669596.0, "step": 162 }, { "epoch": 0.2225825723308014, "grad_norm": 0.5519849176071898, "learning_rate": 3.681818181818182e-05, "loss": 0.5305, "num_tokens": 124429571.0, "step": 163 }, { "epoch": 0.2239481095843646, "grad_norm": 0.5022822122495851, "learning_rate": 3.704545454545455e-05, "loss": 0.5138, "num_tokens": 125196963.0, "step": 164 }, { "epoch": 0.2253136468379278, "grad_norm": 0.6368693429950223, "learning_rate": 3.7272727272727276e-05, "loss": 0.492, "num_tokens": 125949052.0, "step": 165 }, { "epoch": 0.226679184091491, "grad_norm": 0.44901498439288545, "learning_rate": 3.7500000000000003e-05, "loss": 0.4873, "num_tokens": 126742770.0, "step": 166 }, { "epoch": 0.22804472134505419, "grad_norm": 0.724543202176017, "learning_rate": 3.772727272727273e-05, "loss": 0.4951, "num_tokens": 127558819.0, "step": 167 }, { "epoch": 0.22941025859861738, "grad_norm": 0.5283152656497417, "learning_rate": 3.795454545454546e-05, "loss": 0.5101, "num_tokens": 128286834.0, "step": 168 }, { "epoch": 0.23077579585218058, "grad_norm": 0.787761705822389, "learning_rate": 3.818181818181819e-05, "loss": 0.5031, "num_tokens": 129132344.0, "step": 169 }, { "epoch": 0.23214133310574378, "grad_norm": 0.6727679749189965, "learning_rate": 3.840909090909091e-05, "loss": 0.5307, "num_tokens": 129824101.0, "step": 170 }, { "epoch": 0.23350687035930698, "grad_norm": 0.6991331592511054, "learning_rate": 3.863636363636364e-05, "loss": 0.503, "num_tokens": 130556473.0, "step": 171 }, { "epoch": 0.23487240761287018, "grad_norm": 0.6604649316729178, "learning_rate": 3.8863636363636364e-05, "loss": 0.5188, "num_tokens": 131291044.0, "step": 172 }, { "epoch": 0.23623794486643337, "grad_norm": 0.64544600690735, "learning_rate": 3.909090909090909e-05, "loss": 0.5503, "num_tokens": 132084966.0, "step": 173 }, { "epoch": 0.23760348211999657, "grad_norm": 0.654093547842762, "learning_rate": 3.931818181818182e-05, "loss": 0.5128, "num_tokens": 132902353.0, "step": 174 }, { "epoch": 0.23896901937355977, "grad_norm": 0.6416395108452609, "learning_rate": 3.954545454545455e-05, "loss": 0.5276, "num_tokens": 133622462.0, "step": 175 }, { "epoch": 0.240334556627123, "grad_norm": 0.5949715966073831, "learning_rate": 3.9772727272727275e-05, "loss": 0.4961, "num_tokens": 134319877.0, "step": 176 }, { "epoch": 0.2417000938806862, "grad_norm": 0.6462681475286273, "learning_rate": 4e-05, "loss": 0.4948, "num_tokens": 135041196.0, "step": 177 }, { "epoch": 0.2430656311342494, "grad_norm": 0.5734996266190384, "learning_rate": 3.999999725448976e-05, "loss": 0.4953, "num_tokens": 135748184.0, "step": 178 }, { "epoch": 0.2444311683878126, "grad_norm": 0.538660585711184, "learning_rate": 3.999998901795987e-05, "loss": 0.4902, "num_tokens": 136581030.0, "step": 179 }, { "epoch": 0.2457967056413758, "grad_norm": 0.6855689734119854, "learning_rate": 3.9999975290412844e-05, "loss": 0.5291, "num_tokens": 137373989.0, "step": 180 }, { "epoch": 0.247162242894939, "grad_norm": 0.6385756004518256, "learning_rate": 3.999995607185286e-05, "loss": 0.4963, "num_tokens": 138117615.0, "step": 181 }, { "epoch": 0.2485277801485022, "grad_norm": 0.5582389907979056, "learning_rate": 3.9999931362285805e-05, "loss": 0.4936, "num_tokens": 138849889.0, "step": 182 }, { "epoch": 0.24989331740206538, "grad_norm": 0.7072581801313, "learning_rate": 3.999990116171919e-05, "loss": 0.4963, "num_tokens": 139648257.0, "step": 183 }, { "epoch": 0.2512588546556286, "grad_norm": 0.5995859842863248, "learning_rate": 3.9999865470162247e-05, "loss": 0.491, "num_tokens": 140300071.0, "step": 184 }, { "epoch": 0.2526243919091918, "grad_norm": 0.5677888739020435, "learning_rate": 3.9999824287625855e-05, "loss": 0.5077, "num_tokens": 141035858.0, "step": 185 }, { "epoch": 0.253989929162755, "grad_norm": 0.5072707978633081, "learning_rate": 3.9999777614122576e-05, "loss": 0.4885, "num_tokens": 141908782.0, "step": 186 }, { "epoch": 0.2553554664163182, "grad_norm": 0.6727066576961708, "learning_rate": 3.9999725449666655e-05, "loss": 0.5536, "num_tokens": 142717216.0, "step": 187 }, { "epoch": 0.2567210036698814, "grad_norm": 0.5036135643676283, "learning_rate": 3.9999667794274e-05, "loss": 0.5153, "num_tokens": 143499035.0, "step": 188 }, { "epoch": 0.2580865409234446, "grad_norm": 0.554107681293757, "learning_rate": 3.99996046479622e-05, "loss": 0.4928, "num_tokens": 144262477.0, "step": 189 }, { "epoch": 0.25945207817700777, "grad_norm": 0.5549062598268762, "learning_rate": 3.999953601075052e-05, "loss": 0.4944, "num_tokens": 144972290.0, "step": 190 }, { "epoch": 0.26081761543057097, "grad_norm": 0.4935553878814046, "learning_rate": 3.9999461882659894e-05, "loss": 0.4912, "num_tokens": 145734994.0, "step": 191 }, { "epoch": 0.26218315268413417, "grad_norm": 0.577878252900415, "learning_rate": 3.999938226371294e-05, "loss": 0.4912, "num_tokens": 146518634.0, "step": 192 }, { "epoch": 0.26354868993769737, "grad_norm": 0.5386099294389277, "learning_rate": 3.999929715393395e-05, "loss": 0.4948, "num_tokens": 147199487.0, "step": 193 }, { "epoch": 0.26491422719126057, "grad_norm": 0.54883578605865, "learning_rate": 3.999920655334887e-05, "loss": 0.4908, "num_tokens": 147964517.0, "step": 194 }, { "epoch": 0.26627976444482376, "grad_norm": 0.5289863568841632, "learning_rate": 3.999911046198536e-05, "loss": 0.5109, "num_tokens": 148768364.0, "step": 195 }, { "epoch": 0.26764530169838696, "grad_norm": 0.5983582871155441, "learning_rate": 3.9999008879872716e-05, "loss": 0.5201, "num_tokens": 149581041.0, "step": 196 }, { "epoch": 0.26901083895195016, "grad_norm": 0.4921622038136214, "learning_rate": 3.999890180704194e-05, "loss": 0.5016, "num_tokens": 150376574.0, "step": 197 }, { "epoch": 0.27037637620551336, "grad_norm": 0.6004839596454482, "learning_rate": 3.999878924352568e-05, "loss": 0.4962, "num_tokens": 151084782.0, "step": 198 }, { "epoch": 0.27174191345907656, "grad_norm": 0.6734274634936434, "learning_rate": 3.999867118935829e-05, "loss": 0.5143, "num_tokens": 151869753.0, "step": 199 }, { "epoch": 0.27310745071263975, "grad_norm": 0.5133783299601595, "learning_rate": 3.999854764457578e-05, "loss": 0.4987, "num_tokens": 152631222.0, "step": 200 }, { "epoch": 0.27447298796620295, "grad_norm": 0.6860695306002675, "learning_rate": 3.9998418609215826e-05, "loss": 0.4763, "num_tokens": 153384826.0, "step": 201 }, { "epoch": 0.27583852521976615, "grad_norm": 0.5229030008230252, "learning_rate": 3.9998284083317795e-05, "loss": 0.4916, "num_tokens": 154123081.0, "step": 202 }, { "epoch": 0.27720406247332935, "grad_norm": 0.7125707517482281, "learning_rate": 3.9998144066922744e-05, "loss": 0.5193, "num_tokens": 154974829.0, "step": 203 }, { "epoch": 0.27856959972689255, "grad_norm": 0.6048899683444284, "learning_rate": 3.9997998560073364e-05, "loss": 0.4986, "num_tokens": 155689814.0, "step": 204 }, { "epoch": 0.27993513698045575, "grad_norm": 0.5700685868417094, "learning_rate": 3.999784756281405e-05, "loss": 0.5113, "num_tokens": 156498336.0, "step": 205 }, { "epoch": 0.28130067423401894, "grad_norm": 0.6294781714246721, "learning_rate": 3.9997691075190864e-05, "loss": 0.5144, "num_tokens": 157211030.0, "step": 206 }, { "epoch": 0.28266621148758214, "grad_norm": 0.547648549722091, "learning_rate": 3.999752909725154e-05, "loss": 0.484, "num_tokens": 157935970.0, "step": 207 }, { "epoch": 0.28403174874114534, "grad_norm": 0.6793519312728593, "learning_rate": 3.999736162904551e-05, "loss": 0.4834, "num_tokens": 158711701.0, "step": 208 }, { "epoch": 0.28539728599470854, "grad_norm": 0.5858990947356446, "learning_rate": 3.999718867062384e-05, "loss": 0.511, "num_tokens": 159464623.0, "step": 209 }, { "epoch": 0.28676282324827174, "grad_norm": 0.5725871886575743, "learning_rate": 3.9997010222039297e-05, "loss": 0.5163, "num_tokens": 160241725.0, "step": 210 }, { "epoch": 0.28812836050183493, "grad_norm": 0.6560115247748611, "learning_rate": 3.9996826283346325e-05, "loss": 0.4906, "num_tokens": 160978940.0, "step": 211 }, { "epoch": 0.28949389775539813, "grad_norm": 0.4771214718756491, "learning_rate": 3.999663685460102e-05, "loss": 0.4957, "num_tokens": 161779321.0, "step": 212 }, { "epoch": 0.29085943500896133, "grad_norm": 0.7700597335901385, "learning_rate": 3.999644193586118e-05, "loss": 0.5179, "num_tokens": 162530909.0, "step": 213 }, { "epoch": 0.29222497226252453, "grad_norm": 0.6053780680750889, "learning_rate": 3.999624152718627e-05, "loss": 0.5249, "num_tokens": 163320760.0, "step": 214 }, { "epoch": 0.2935905095160877, "grad_norm": 0.6993885319703983, "learning_rate": 3.9996035628637425e-05, "loss": 0.4938, "num_tokens": 164174798.0, "step": 215 }, { "epoch": 0.2949560467696509, "grad_norm": 0.6354395075675292, "learning_rate": 3.999582424027744e-05, "loss": 0.4793, "num_tokens": 164897363.0, "step": 216 }, { "epoch": 0.2963215840232141, "grad_norm": 0.5760389658544773, "learning_rate": 3.9995607362170823e-05, "loss": 0.4819, "num_tokens": 165686427.0, "step": 217 }, { "epoch": 0.2976871212767773, "grad_norm": 0.6706519747961608, "learning_rate": 3.999538499438372e-05, "loss": 0.4849, "num_tokens": 166498716.0, "step": 218 }, { "epoch": 0.2990526585303405, "grad_norm": 0.5840226220048302, "learning_rate": 3.9995157136983966e-05, "loss": 0.4705, "num_tokens": 167265170.0, "step": 219 }, { "epoch": 0.3004181957839037, "grad_norm": 0.7938818570248787, "learning_rate": 3.9994923790041076e-05, "loss": 0.51, "num_tokens": 168029418.0, "step": 220 }, { "epoch": 0.3017837330374669, "grad_norm": 0.6589705190826542, "learning_rate": 3.999468495362623e-05, "loss": 0.5015, "num_tokens": 168831970.0, "step": 221 }, { "epoch": 0.3031492702910301, "grad_norm": 0.7675927016958526, "learning_rate": 3.999444062781229e-05, "loss": 0.5228, "num_tokens": 169643664.0, "step": 222 }, { "epoch": 0.3045148075445933, "grad_norm": 0.5557217186252527, "learning_rate": 3.9994190812673784e-05, "loss": 0.5016, "num_tokens": 170389390.0, "step": 223 }, { "epoch": 0.3058803447981565, "grad_norm": 0.7219958365041722, "learning_rate": 3.9993935508286934e-05, "loss": 0.4872, "num_tokens": 171120690.0, "step": 224 }, { "epoch": 0.3072458820517197, "grad_norm": 0.5431083805372404, "learning_rate": 3.99936747147296e-05, "loss": 0.479, "num_tokens": 171934188.0, "step": 225 }, { "epoch": 0.3086114193052829, "grad_norm": 0.6979375711774498, "learning_rate": 3.999340843208135e-05, "loss": 0.4873, "num_tokens": 172755877.0, "step": 226 }, { "epoch": 0.3099769565588461, "grad_norm": 0.6248267711743879, "learning_rate": 3.999313666042342e-05, "loss": 0.5069, "num_tokens": 173513956.0, "step": 227 }, { "epoch": 0.3113424938124093, "grad_norm": 0.7452779965321701, "learning_rate": 3.999285939983871e-05, "loss": 0.5001, "num_tokens": 174298361.0, "step": 228 }, { "epoch": 0.3127080310659725, "grad_norm": 0.632773471329681, "learning_rate": 3.99925766504118e-05, "loss": 0.4987, "num_tokens": 175161082.0, "step": 229 }, { "epoch": 0.3140735683195357, "grad_norm": 0.673477350154261, "learning_rate": 3.9992288412228954e-05, "loss": 0.4806, "num_tokens": 175888547.0, "step": 230 }, { "epoch": 0.3154391055730989, "grad_norm": 0.5470344789476095, "learning_rate": 3.999199468537809e-05, "loss": 0.4872, "num_tokens": 176701182.0, "step": 231 }, { "epoch": 0.3168046428266621, "grad_norm": 0.8084304807514564, "learning_rate": 3.9991695469948806e-05, "loss": 0.4655, "num_tokens": 177456235.0, "step": 232 }, { "epoch": 0.3181701800802253, "grad_norm": 0.7231563732142144, "learning_rate": 3.99913907660324e-05, "loss": 0.4846, "num_tokens": 178193394.0, "step": 233 }, { "epoch": 0.3195357173337885, "grad_norm": 0.680325924904496, "learning_rate": 3.9991080573721805e-05, "loss": 0.4893, "num_tokens": 178913206.0, "step": 234 }, { "epoch": 0.3209012545873517, "grad_norm": 0.72640272392484, "learning_rate": 3.999076489311165e-05, "loss": 0.5026, "num_tokens": 179715440.0, "step": 235 }, { "epoch": 0.3222667918409149, "grad_norm": 0.5386293123759659, "learning_rate": 3.999044372429825e-05, "loss": 0.5106, "num_tokens": 180486851.0, "step": 236 }, { "epoch": 0.3236323290944781, "grad_norm": 0.6617698719752889, "learning_rate": 3.999011706737957e-05, "loss": 0.5209, "num_tokens": 181295035.0, "step": 237 }, { "epoch": 0.3249978663480413, "grad_norm": 0.49695575175404827, "learning_rate": 3.998978492245525e-05, "loss": 0.4684, "num_tokens": 182036842.0, "step": 238 }, { "epoch": 0.3263634036016045, "grad_norm": 0.567109798090299, "learning_rate": 3.998944728962662e-05, "loss": 0.4998, "num_tokens": 182764703.0, "step": 239 }, { "epoch": 0.3277289408551677, "grad_norm": 0.43932918609149063, "learning_rate": 3.998910416899669e-05, "loss": 0.4938, "num_tokens": 183540023.0, "step": 240 }, { "epoch": 0.3290944781087309, "grad_norm": 0.516182525584623, "learning_rate": 3.998875556067011e-05, "loss": 0.4941, "num_tokens": 184271977.0, "step": 241 }, { "epoch": 0.3304600153622941, "grad_norm": 0.46349439363180944, "learning_rate": 3.9988401464753244e-05, "loss": 0.5052, "num_tokens": 185074936.0, "step": 242 }, { "epoch": 0.3318255526158573, "grad_norm": 0.478761784248709, "learning_rate": 3.9988041881354095e-05, "loss": 0.5187, "num_tokens": 185912258.0, "step": 243 }, { "epoch": 0.3331910898694205, "grad_norm": 0.4582417941488063, "learning_rate": 3.9987676810582366e-05, "loss": 0.4915, "num_tokens": 186708027.0, "step": 244 }, { "epoch": 0.3345566271229837, "grad_norm": 0.49187308145850994, "learning_rate": 3.9987306252549425e-05, "loss": 0.5126, "num_tokens": 187484562.0, "step": 245 }, { "epoch": 0.3359221643765469, "grad_norm": 0.5134629851402658, "learning_rate": 3.9986930207368306e-05, "loss": 0.4896, "num_tokens": 188214559.0, "step": 246 }, { "epoch": 0.33728770163011007, "grad_norm": 0.43294471187455175, "learning_rate": 3.998654867515373e-05, "loss": 0.4893, "num_tokens": 189003396.0, "step": 247 }, { "epoch": 0.33865323888367327, "grad_norm": 0.490735593391187, "learning_rate": 3.998616165602209e-05, "loss": 0.4737, "num_tokens": 189712206.0, "step": 248 }, { "epoch": 0.34001877613723647, "grad_norm": 0.47448278816527695, "learning_rate": 3.9985769150091434e-05, "loss": 0.5024, "num_tokens": 190557446.0, "step": 249 }, { "epoch": 0.34138431339079967, "grad_norm": 0.4886706738297699, "learning_rate": 3.998537115748151e-05, "loss": 0.4887, "num_tokens": 191296974.0, "step": 250 }, { "epoch": 0.3427498506443629, "grad_norm": 0.43620199339665017, "learning_rate": 3.9984967678313736e-05, "loss": 0.4816, "num_tokens": 192090517.0, "step": 251 }, { "epoch": 0.3441153878979261, "grad_norm": 0.49147474766905286, "learning_rate": 3.998455871271117e-05, "loss": 0.49, "num_tokens": 192824923.0, "step": 252 }, { "epoch": 0.3454809251514893, "grad_norm": 0.4669267285802188, "learning_rate": 3.9984144260798594e-05, "loss": 0.4895, "num_tokens": 193592604.0, "step": 253 }, { "epoch": 0.3468464624050525, "grad_norm": 0.47311573408637914, "learning_rate": 3.9983724322702436e-05, "loss": 0.4993, "num_tokens": 194343790.0, "step": 254 }, { "epoch": 0.3482119996586157, "grad_norm": 0.43905730158481016, "learning_rate": 3.9983298898550794e-05, "loss": 0.4926, "num_tokens": 195144936.0, "step": 255 }, { "epoch": 0.3495775369121789, "grad_norm": 0.5057197637424525, "learning_rate": 3.9982867988473446e-05, "loss": 0.4764, "num_tokens": 195911042.0, "step": 256 }, { "epoch": 0.3509430741657421, "grad_norm": 0.452903816017546, "learning_rate": 3.998243159260184e-05, "loss": 0.4776, "num_tokens": 196669972.0, "step": 257 }, { "epoch": 0.3523086114193053, "grad_norm": 0.5079060892630048, "learning_rate": 3.998198971106912e-05, "loss": 0.4984, "num_tokens": 197435742.0, "step": 258 }, { "epoch": 0.3536741486728685, "grad_norm": 0.4906686516048972, "learning_rate": 3.998154234401006e-05, "loss": 0.488, "num_tokens": 198199606.0, "step": 259 }, { "epoch": 0.3550396859264317, "grad_norm": 0.5031600942369784, "learning_rate": 3.998108949156116e-05, "loss": 0.4666, "num_tokens": 198987046.0, "step": 260 }, { "epoch": 0.3564052231799949, "grad_norm": 0.48862394713257107, "learning_rate": 3.998063115386054e-05, "loss": 0.489, "num_tokens": 199762693.0, "step": 261 }, { "epoch": 0.3577707604335581, "grad_norm": 0.5689848299589474, "learning_rate": 3.998016733104803e-05, "loss": 0.5313, "num_tokens": 200504594.0, "step": 262 }, { "epoch": 0.3591362976871213, "grad_norm": 0.4816870988206345, "learning_rate": 3.997969802326512e-05, "loss": 0.4947, "num_tokens": 201206950.0, "step": 263 }, { "epoch": 0.3605018349406845, "grad_norm": 0.5485784744038639, "learning_rate": 3.9979223230654994e-05, "loss": 0.5089, "num_tokens": 201958474.0, "step": 264 }, { "epoch": 0.3618673721942477, "grad_norm": 0.5374968121617392, "learning_rate": 3.997874295336246e-05, "loss": 0.482, "num_tokens": 202668583.0, "step": 265 }, { "epoch": 0.3632329094478109, "grad_norm": 0.47870250067055453, "learning_rate": 3.997825719153405e-05, "loss": 0.4837, "num_tokens": 203335965.0, "step": 266 }, { "epoch": 0.3645984467013741, "grad_norm": 0.49047233107470933, "learning_rate": 3.9977765945317934e-05, "loss": 0.4919, "num_tokens": 204113580.0, "step": 267 }, { "epoch": 0.3659639839549373, "grad_norm": 0.47560958666819675, "learning_rate": 3.997726921486399e-05, "loss": 0.4962, "num_tokens": 204930237.0, "step": 268 }, { "epoch": 0.3673295212085005, "grad_norm": 0.48173860616042197, "learning_rate": 3.997676700032373e-05, "loss": 0.4846, "num_tokens": 205713593.0, "step": 269 }, { "epoch": 0.3686950584620637, "grad_norm": 0.4602373430016164, "learning_rate": 3.9976259301850375e-05, "loss": 0.4894, "num_tokens": 206518217.0, "step": 270 }, { "epoch": 0.3700605957156269, "grad_norm": 0.4607603099301469, "learning_rate": 3.9975746119598785e-05, "loss": 0.488, "num_tokens": 207298768.0, "step": 271 }, { "epoch": 0.3714261329691901, "grad_norm": 0.5173783422135553, "learning_rate": 3.9975227453725514e-05, "loss": 0.4875, "num_tokens": 208115943.0, "step": 272 }, { "epoch": 0.3727916702227533, "grad_norm": 0.4869702811816523, "learning_rate": 3.997470330438879e-05, "loss": 0.4822, "num_tokens": 208902284.0, "step": 273 }, { "epoch": 0.3741572074763165, "grad_norm": 0.523771122813269, "learning_rate": 3.997417367174851e-05, "loss": 0.497, "num_tokens": 209613804.0, "step": 274 }, { "epoch": 0.3755227447298797, "grad_norm": 0.4742050795684263, "learning_rate": 3.997363855596624e-05, "loss": 0.4789, "num_tokens": 210340571.0, "step": 275 }, { "epoch": 0.3768882819834429, "grad_norm": 0.49868761653789145, "learning_rate": 3.997309795720521e-05, "loss": 0.4978, "num_tokens": 211039158.0, "step": 276 }, { "epoch": 0.3782538192370061, "grad_norm": 0.5151394842217513, "learning_rate": 3.9972551875630355e-05, "loss": 0.4931, "num_tokens": 211825565.0, "step": 277 }, { "epoch": 0.37961935649056927, "grad_norm": 0.4699123034699604, "learning_rate": 3.997200031140824e-05, "loss": 0.4878, "num_tokens": 212582971.0, "step": 278 }, { "epoch": 0.38098489374413247, "grad_norm": 0.5672294964348898, "learning_rate": 3.997144326470713e-05, "loss": 0.4793, "num_tokens": 213266102.0, "step": 279 }, { "epoch": 0.38235043099769567, "grad_norm": 0.5436851273904073, "learning_rate": 3.997088073569696e-05, "loss": 0.5053, "num_tokens": 214010120.0, "step": 280 }, { "epoch": 0.38371596825125887, "grad_norm": 0.562938290331741, "learning_rate": 3.9970312724549325e-05, "loss": 0.4865, "num_tokens": 214823555.0, "step": 281 }, { "epoch": 0.38508150550482206, "grad_norm": 0.5492037169225029, "learning_rate": 3.9969739231437514e-05, "loss": 0.5227, "num_tokens": 215565525.0, "step": 282 }, { "epoch": 0.38644704275838526, "grad_norm": 0.47260732981011927, "learning_rate": 3.996916025653646e-05, "loss": 0.4978, "num_tokens": 216359979.0, "step": 283 }, { "epoch": 0.38781258001194846, "grad_norm": 0.5912603654529168, "learning_rate": 3.9968575800022795e-05, "loss": 0.4909, "num_tokens": 217119930.0, "step": 284 }, { "epoch": 0.38917811726551166, "grad_norm": 0.4800289158770075, "learning_rate": 3.9967985862074795e-05, "loss": 0.4904, "num_tokens": 217887396.0, "step": 285 }, { "epoch": 0.39054365451907486, "grad_norm": 0.5349153593031989, "learning_rate": 3.996739044287244e-05, "loss": 0.4989, "num_tokens": 218687938.0, "step": 286 }, { "epoch": 0.39190919177263805, "grad_norm": 0.449385979140907, "learning_rate": 3.9966789542597375e-05, "loss": 0.4769, "num_tokens": 219500101.0, "step": 287 }, { "epoch": 0.39327472902620125, "grad_norm": 0.48298372362963354, "learning_rate": 3.996618316143288e-05, "loss": 0.4807, "num_tokens": 220242171.0, "step": 288 }, { "epoch": 0.39464026627976445, "grad_norm": 0.46851999868688576, "learning_rate": 3.9965571299563966e-05, "loss": 0.4933, "num_tokens": 221055384.0, "step": 289 }, { "epoch": 0.39600580353332765, "grad_norm": 0.4873132541161622, "learning_rate": 3.996495395717726e-05, "loss": 0.5077, "num_tokens": 221792382.0, "step": 290 }, { "epoch": 0.39737134078689085, "grad_norm": 0.5078636272600695, "learning_rate": 3.9964331134461104e-05, "loss": 0.5007, "num_tokens": 222572810.0, "step": 291 }, { "epoch": 0.39873687804045405, "grad_norm": 0.46168761576308515, "learning_rate": 3.9963702831605486e-05, "loss": 0.4797, "num_tokens": 223262344.0, "step": 292 }, { "epoch": 0.40010241529401724, "grad_norm": 0.44228730938342975, "learning_rate": 3.996306904880208e-05, "loss": 0.4853, "num_tokens": 224015030.0, "step": 293 }, { "epoch": 0.40146795254758044, "grad_norm": 0.6235384180584469, "learning_rate": 3.996242978624421e-05, "loss": 0.4766, "num_tokens": 224756295.0, "step": 294 }, { "epoch": 0.40283348980114364, "grad_norm": 0.4830690704975599, "learning_rate": 3.996178504412691e-05, "loss": 0.4852, "num_tokens": 225558938.0, "step": 295 }, { "epoch": 0.40419902705470684, "grad_norm": 0.4520578281129281, "learning_rate": 3.9961134822646854e-05, "loss": 0.4609, "num_tokens": 226245787.0, "step": 296 }, { "epoch": 0.40556456430827004, "grad_norm": 0.6063703451993918, "learning_rate": 3.9960479122002384e-05, "loss": 0.5141, "num_tokens": 227039605.0, "step": 297 }, { "epoch": 0.40693010156183324, "grad_norm": 0.5153840176242849, "learning_rate": 3.995981794239354e-05, "loss": 0.4749, "num_tokens": 227815552.0, "step": 298 }, { "epoch": 0.40829563881539643, "grad_norm": 0.5859640723172536, "learning_rate": 3.995915128402202e-05, "loss": 0.4905, "num_tokens": 228659573.0, "step": 299 }, { "epoch": 0.40966117606895963, "grad_norm": 0.5646089849592923, "learning_rate": 3.995847914709118e-05, "loss": 0.4858, "num_tokens": 229433563.0, "step": 300 }, { "epoch": 0.41102671332252283, "grad_norm": 0.4954418681787531, "learning_rate": 3.995780153180607e-05, "loss": 0.4751, "num_tokens": 230233993.0, "step": 301 }, { "epoch": 0.41239225057608603, "grad_norm": 0.5430796117492785, "learning_rate": 3.9957118438373404e-05, "loss": 0.5074, "num_tokens": 230960817.0, "step": 302 }, { "epoch": 0.4137577878296492, "grad_norm": 0.5391473251476868, "learning_rate": 3.9956429867001554e-05, "loss": 0.497, "num_tokens": 231745708.0, "step": 303 }, { "epoch": 0.4151233250832124, "grad_norm": 0.5185051650699583, "learning_rate": 3.995573581790058e-05, "loss": 0.5022, "num_tokens": 232542634.0, "step": 304 }, { "epoch": 0.4164888623367756, "grad_norm": 0.48934291109529016, "learning_rate": 3.99550362912822e-05, "loss": 0.5089, "num_tokens": 233333236.0, "step": 305 }, { "epoch": 0.4178543995903388, "grad_norm": 0.5892698346573997, "learning_rate": 3.9954331287359827e-05, "loss": 0.4814, "num_tokens": 234089042.0, "step": 306 }, { "epoch": 0.419219936843902, "grad_norm": 0.5204378489494141, "learning_rate": 3.99536208063485e-05, "loss": 0.5129, "num_tokens": 234830707.0, "step": 307 }, { "epoch": 0.4205854740974652, "grad_norm": 0.6550487233172577, "learning_rate": 3.995290484846498e-05, "loss": 0.4779, "num_tokens": 235545529.0, "step": 308 }, { "epoch": 0.4219510113510284, "grad_norm": 0.5779261735841767, "learning_rate": 3.995218341392766e-05, "loss": 0.4983, "num_tokens": 236370933.0, "step": 309 }, { "epoch": 0.4233165486045916, "grad_norm": 0.6604961435384231, "learning_rate": 3.995145650295662e-05, "loss": 0.5076, "num_tokens": 237102492.0, "step": 310 }, { "epoch": 0.4246820858581548, "grad_norm": 0.5163723599098748, "learning_rate": 3.9950724115773625e-05, "loss": 0.4775, "num_tokens": 237880756.0, "step": 311 }, { "epoch": 0.426047623111718, "grad_norm": 0.5684809735347632, "learning_rate": 3.9949986252602074e-05, "loss": 0.5043, "num_tokens": 238626595.0, "step": 312 }, { "epoch": 0.4274131603652812, "grad_norm": 0.597672936806328, "learning_rate": 3.9949242913667066e-05, "loss": 0.484, "num_tokens": 239367594.0, "step": 313 }, { "epoch": 0.4287786976188444, "grad_norm": 0.48796550975446823, "learning_rate": 3.994849409919536e-05, "loss": 0.4915, "num_tokens": 240116791.0, "step": 314 }, { "epoch": 0.4301442348724076, "grad_norm": 0.5919701984930172, "learning_rate": 3.9947739809415384e-05, "loss": 0.5008, "num_tokens": 240817505.0, "step": 315 }, { "epoch": 0.4315097721259708, "grad_norm": 0.5237862744297895, "learning_rate": 3.994698004455725e-05, "loss": 0.5117, "num_tokens": 241564572.0, "step": 316 }, { "epoch": 0.432875309379534, "grad_norm": 0.5296513681897442, "learning_rate": 3.994621480485271e-05, "loss": 0.5133, "num_tokens": 242315939.0, "step": 317 }, { "epoch": 0.4342408466330972, "grad_norm": 0.56202180262192, "learning_rate": 3.994544409053523e-05, "loss": 0.4899, "num_tokens": 243121592.0, "step": 318 }, { "epoch": 0.4356063838866604, "grad_norm": 0.5448259045707802, "learning_rate": 3.99446679018399e-05, "loss": 0.4855, "num_tokens": 243893474.0, "step": 319 }, { "epoch": 0.4369719211402236, "grad_norm": 0.5747667726804877, "learning_rate": 3.994388623900352e-05, "loss": 0.4573, "num_tokens": 244667700.0, "step": 320 }, { "epoch": 0.4383374583937868, "grad_norm": 0.5764729377439064, "learning_rate": 3.994309910226453e-05, "loss": 0.5025, "num_tokens": 245524894.0, "step": 321 }, { "epoch": 0.43970299564735, "grad_norm": 0.5048255162628326, "learning_rate": 3.994230649186305e-05, "loss": 0.523, "num_tokens": 246318623.0, "step": 322 }, { "epoch": 0.4410685329009132, "grad_norm": 0.550673153247694, "learning_rate": 3.9941508408040876e-05, "loss": 0.4954, "num_tokens": 247063112.0, "step": 323 }, { "epoch": 0.4424340701544764, "grad_norm": 0.4642961646310446, "learning_rate": 3.9940704851041466e-05, "loss": 0.4699, "num_tokens": 247731589.0, "step": 324 }, { "epoch": 0.4437996074080396, "grad_norm": 0.5469113799525428, "learning_rate": 3.993989582110995e-05, "loss": 0.491, "num_tokens": 248444602.0, "step": 325 }, { "epoch": 0.4451651446616028, "grad_norm": 0.5737981714092051, "learning_rate": 3.9939081318493136e-05, "loss": 0.4856, "num_tokens": 249230640.0, "step": 326 }, { "epoch": 0.446530681915166, "grad_norm": 0.4697325206048375, "learning_rate": 3.993826134343949e-05, "loss": 0.4943, "num_tokens": 249979267.0, "step": 327 }, { "epoch": 0.4478962191687292, "grad_norm": 0.6474493100543741, "learning_rate": 3.9937435896199135e-05, "loss": 0.4812, "num_tokens": 250713432.0, "step": 328 }, { "epoch": 0.4492617564222924, "grad_norm": 0.4902907316539322, "learning_rate": 3.99366049770239e-05, "loss": 0.4981, "num_tokens": 251499076.0, "step": 329 }, { "epoch": 0.4506272936758556, "grad_norm": 0.5506501501663551, "learning_rate": 3.9935768586167254e-05, "loss": 0.4595, "num_tokens": 252294504.0, "step": 330 }, { "epoch": 0.4519928309294188, "grad_norm": 0.48070553273722855, "learning_rate": 3.9934926723884346e-05, "loss": 0.5062, "num_tokens": 253098070.0, "step": 331 }, { "epoch": 0.453358368182982, "grad_norm": 0.5004326009818928, "learning_rate": 3.993407939043199e-05, "loss": 0.4771, "num_tokens": 253773906.0, "step": 332 }, { "epoch": 0.4547239054365452, "grad_norm": 0.534594255558685, "learning_rate": 3.9933226586068666e-05, "loss": 0.4853, "num_tokens": 254497202.0, "step": 333 }, { "epoch": 0.45608944269010837, "grad_norm": 0.4352210178111366, "learning_rate": 3.9932368311054544e-05, "loss": 0.4827, "num_tokens": 255311898.0, "step": 334 }, { "epoch": 0.45745497994367157, "grad_norm": 0.4907729570932215, "learning_rate": 3.993150456565143e-05, "loss": 0.5027, "num_tokens": 256101427.0, "step": 335 }, { "epoch": 0.45882051719723477, "grad_norm": 0.5144077983826008, "learning_rate": 3.9930635350122806e-05, "loss": 0.472, "num_tokens": 256854702.0, "step": 336 }, { "epoch": 0.46018605445079797, "grad_norm": 0.5606896343894453, "learning_rate": 3.992976066473386e-05, "loss": 0.4892, "num_tokens": 257665665.0, "step": 337 }, { "epoch": 0.46155159170436116, "grad_norm": 0.48695035884727483, "learning_rate": 3.992888050975141e-05, "loss": 0.4677, "num_tokens": 258421412.0, "step": 338 }, { "epoch": 0.46291712895792436, "grad_norm": 0.527472312681286, "learning_rate": 3.9927994885443945e-05, "loss": 0.4883, "num_tokens": 259281491.0, "step": 339 }, { "epoch": 0.46428266621148756, "grad_norm": 0.45117863540697983, "learning_rate": 3.992710379208164e-05, "loss": 0.5018, "num_tokens": 260132318.0, "step": 340 }, { "epoch": 0.46564820346505076, "grad_norm": 0.48465403874237994, "learning_rate": 3.992620722993632e-05, "loss": 0.4737, "num_tokens": 260859367.0, "step": 341 }, { "epoch": 0.46701374071861396, "grad_norm": 0.4596209801145746, "learning_rate": 3.99253051992815e-05, "loss": 0.4734, "num_tokens": 261592897.0, "step": 342 }, { "epoch": 0.46837927797217715, "grad_norm": 0.4593572134908672, "learning_rate": 3.992439770039233e-05, "loss": 0.4689, "num_tokens": 262343138.0, "step": 343 }, { "epoch": 0.46974481522574035, "grad_norm": 0.4604074469165245, "learning_rate": 3.9923484733545665e-05, "loss": 0.4697, "num_tokens": 263134383.0, "step": 344 }, { "epoch": 0.47111035247930355, "grad_norm": 0.5160139149659917, "learning_rate": 3.9922566299020015e-05, "loss": 0.5054, "num_tokens": 263964936.0, "step": 345 }, { "epoch": 0.47247588973286675, "grad_norm": 0.46357178785093156, "learning_rate": 3.992164239709554e-05, "loss": 0.4815, "num_tokens": 264686760.0, "step": 346 }, { "epoch": 0.47384142698642995, "grad_norm": 0.47324934389624457, "learning_rate": 3.99207130280541e-05, "loss": 0.4956, "num_tokens": 265443028.0, "step": 347 }, { "epoch": 0.47520696423999315, "grad_norm": 0.4913174919893693, "learning_rate": 3.991977819217919e-05, "loss": 0.5038, "num_tokens": 266231883.0, "step": 348 }, { "epoch": 0.47657250149355634, "grad_norm": 0.4957208452314514, "learning_rate": 3.9918837889755984e-05, "loss": 0.4689, "num_tokens": 267006260.0, "step": 349 }, { "epoch": 0.47793803874711954, "grad_norm": 0.5072798772469029, "learning_rate": 3.991789212107135e-05, "loss": 0.49, "num_tokens": 267769431.0, "step": 350 }, { "epoch": 0.4793035760006828, "grad_norm": 0.46864455109070774, "learning_rate": 3.991694088641377e-05, "loss": 0.5033, "num_tokens": 268531278.0, "step": 351 }, { "epoch": 0.480669113254246, "grad_norm": 0.5747280800220212, "learning_rate": 3.9915984186073465e-05, "loss": 0.4965, "num_tokens": 269329197.0, "step": 352 }, { "epoch": 0.4820346505078092, "grad_norm": 0.4390375860604689, "learning_rate": 3.991502202034225e-05, "loss": 0.4917, "num_tokens": 270106226.0, "step": 353 }, { "epoch": 0.4834001877613724, "grad_norm": 0.5700398819847046, "learning_rate": 3.991405438951365e-05, "loss": 0.5005, "num_tokens": 270869973.0, "step": 354 }, { "epoch": 0.4847657250149356, "grad_norm": 0.5130044963087405, "learning_rate": 3.991308129388284e-05, "loss": 0.4955, "num_tokens": 271641818.0, "step": 355 }, { "epoch": 0.4861312622684988, "grad_norm": 0.5259473921415438, "learning_rate": 3.991210273374669e-05, "loss": 0.5044, "num_tokens": 272466578.0, "step": 356 }, { "epoch": 0.487496799522062, "grad_norm": 0.6347716313056088, "learning_rate": 3.9911118709403694e-05, "loss": 0.5006, "num_tokens": 273263173.0, "step": 357 }, { "epoch": 0.4888623367756252, "grad_norm": 0.4444280255959215, "learning_rate": 3.991012922115405e-05, "loss": 0.4943, "num_tokens": 274072129.0, "step": 358 }, { "epoch": 0.4902278740291884, "grad_norm": 0.5522705227617446, "learning_rate": 3.9909134269299604e-05, "loss": 0.4896, "num_tokens": 274788487.0, "step": 359 }, { "epoch": 0.4915934112827516, "grad_norm": 0.5017637598606743, "learning_rate": 3.9908133854143865e-05, "loss": 0.5029, "num_tokens": 275517372.0, "step": 360 }, { "epoch": 0.4929589485363148, "grad_norm": 0.5030773067498427, "learning_rate": 3.990712797599202e-05, "loss": 0.4652, "num_tokens": 276221573.0, "step": 361 }, { "epoch": 0.494324485789878, "grad_norm": 0.4841077191137425, "learning_rate": 3.990611663515093e-05, "loss": 0.462, "num_tokens": 276933000.0, "step": 362 }, { "epoch": 0.4956900230434412, "grad_norm": 0.5069308468527598, "learning_rate": 3.99050998319291e-05, "loss": 0.487, "num_tokens": 277707476.0, "step": 363 }, { "epoch": 0.4970555602970044, "grad_norm": 0.4288821717327987, "learning_rate": 3.990407756663671e-05, "loss": 0.48, "num_tokens": 278432728.0, "step": 364 }, { "epoch": 0.49842109755056757, "grad_norm": 0.4668401484382509, "learning_rate": 3.990304983958562e-05, "loss": 0.4928, "num_tokens": 279161715.0, "step": 365 }, { "epoch": 0.49978663480413077, "grad_norm": 0.42056436081783427, "learning_rate": 3.990201665108934e-05, "loss": 0.4785, "num_tokens": 279998957.0, "step": 366 }, { "epoch": 0.5011521720576939, "grad_norm": 0.5175055164048528, "learning_rate": 3.9900978001463046e-05, "loss": 0.4741, "num_tokens": 280724323.0, "step": 367 }, { "epoch": 0.5025177093112572, "grad_norm": 0.47280730860140063, "learning_rate": 3.9899933891023594e-05, "loss": 0.4771, "num_tokens": 281517033.0, "step": 368 }, { "epoch": 0.5038832465648203, "grad_norm": 0.5111939030248641, "learning_rate": 3.989888432008948e-05, "loss": 0.4971, "num_tokens": 282370345.0, "step": 369 }, { "epoch": 0.5052487838183836, "grad_norm": 0.5070666833029053, "learning_rate": 3.989782928898091e-05, "loss": 0.4699, "num_tokens": 283115713.0, "step": 370 }, { "epoch": 0.5066143210719467, "grad_norm": 0.5760406233121458, "learning_rate": 3.9896768798019704e-05, "loss": 0.4582, "num_tokens": 283864860.0, "step": 371 }, { "epoch": 0.50797985832551, "grad_norm": 0.4830604277691185, "learning_rate": 3.989570284752939e-05, "loss": 0.4891, "num_tokens": 284665733.0, "step": 372 }, { "epoch": 0.5093453955790731, "grad_norm": 0.4984428105359956, "learning_rate": 3.989463143783512e-05, "loss": 0.5056, "num_tokens": 285469958.0, "step": 373 }, { "epoch": 0.5107109328326364, "grad_norm": 0.46213491602598655, "learning_rate": 3.9893554569263754e-05, "loss": 0.474, "num_tokens": 286222903.0, "step": 374 }, { "epoch": 0.5120764700861995, "grad_norm": 0.5661329533120816, "learning_rate": 3.98924722421438e-05, "loss": 0.5142, "num_tokens": 287019550.0, "step": 375 }, { "epoch": 0.5134420073397628, "grad_norm": 0.4631304684837866, "learning_rate": 3.9891384456805416e-05, "loss": 0.4622, "num_tokens": 287752277.0, "step": 376 }, { "epoch": 0.5148075445933259, "grad_norm": 0.5637703245184947, "learning_rate": 3.9890291213580444e-05, "loss": 0.4618, "num_tokens": 288496766.0, "step": 377 }, { "epoch": 0.5161730818468891, "grad_norm": 0.5267649957835265, "learning_rate": 3.9889192512802395e-05, "loss": 0.4902, "num_tokens": 289277216.0, "step": 378 }, { "epoch": 0.5175386191004523, "grad_norm": 0.4660815404655567, "learning_rate": 3.988808835480641e-05, "loss": 0.4803, "num_tokens": 290000884.0, "step": 379 }, { "epoch": 0.5189041563540155, "grad_norm": 0.5105028965336454, "learning_rate": 3.9886978739929344e-05, "loss": 0.4943, "num_tokens": 290694042.0, "step": 380 }, { "epoch": 0.5202696936075787, "grad_norm": 0.44509236780406597, "learning_rate": 3.988586366850968e-05, "loss": 0.51, "num_tokens": 291467310.0, "step": 381 }, { "epoch": 0.5216352308611419, "grad_norm": 0.4756032509027699, "learning_rate": 3.9884743140887585e-05, "loss": 0.4731, "num_tokens": 292263453.0, "step": 382 }, { "epoch": 0.5230007681147051, "grad_norm": 0.4906395781248726, "learning_rate": 3.9883617157404877e-05, "loss": 0.4887, "num_tokens": 293045342.0, "step": 383 }, { "epoch": 0.5243663053682683, "grad_norm": 0.4785116606410976, "learning_rate": 3.9882485718405054e-05, "loss": 0.4786, "num_tokens": 293752202.0, "step": 384 }, { "epoch": 0.5257318426218315, "grad_norm": 0.4266397404166461, "learning_rate": 3.988134882423326e-05, "loss": 0.4704, "num_tokens": 294509326.0, "step": 385 }, { "epoch": 0.5270973798753947, "grad_norm": 0.5754530912512139, "learning_rate": 3.9880206475236307e-05, "loss": 0.4983, "num_tokens": 295286096.0, "step": 386 }, { "epoch": 0.5284629171289579, "grad_norm": 0.5115189045854828, "learning_rate": 3.9879058671762695e-05, "loss": 0.4699, "num_tokens": 296024065.0, "step": 387 }, { "epoch": 0.5298284543825211, "grad_norm": 0.47086845495889185, "learning_rate": 3.9877905414162553e-05, "loss": 0.4983, "num_tokens": 296814855.0, "step": 388 }, { "epoch": 0.5311939916360843, "grad_norm": 0.591427887495622, "learning_rate": 3.987674670278769e-05, "loss": 0.4821, "num_tokens": 297601966.0, "step": 389 }, { "epoch": 0.5325595288896475, "grad_norm": 0.4161106956692572, "learning_rate": 3.98755825379916e-05, "loss": 0.4882, "num_tokens": 298298993.0, "step": 390 }, { "epoch": 0.5339250661432107, "grad_norm": 0.5741324906208105, "learning_rate": 3.9874412920129395e-05, "loss": 0.4877, "num_tokens": 299084362.0, "step": 391 }, { "epoch": 0.5352906033967739, "grad_norm": 0.4727972293014399, "learning_rate": 3.987323784955788e-05, "loss": 0.4813, "num_tokens": 299866430.0, "step": 392 }, { "epoch": 0.5366561406503371, "grad_norm": 0.46409408551721953, "learning_rate": 3.9872057326635525e-05, "loss": 0.4761, "num_tokens": 300572297.0, "step": 393 }, { "epoch": 0.5380216779039003, "grad_norm": 0.5208065454522826, "learning_rate": 3.9870871351722454e-05, "loss": 0.4625, "num_tokens": 301280163.0, "step": 394 }, { "epoch": 0.5393872151574635, "grad_norm": 0.45697258941149027, "learning_rate": 3.986967992518045e-05, "loss": 0.4554, "num_tokens": 302007061.0, "step": 395 }, { "epoch": 0.5407527524110267, "grad_norm": 0.6052560216213235, "learning_rate": 3.9868483047372974e-05, "loss": 0.4861, "num_tokens": 302793860.0, "step": 396 }, { "epoch": 0.5421182896645899, "grad_norm": 0.511649810458573, "learning_rate": 3.986728071866514e-05, "loss": 0.5089, "num_tokens": 303539228.0, "step": 397 }, { "epoch": 0.5434838269181531, "grad_norm": 0.5259982659494048, "learning_rate": 3.986607293942372e-05, "loss": 0.4793, "num_tokens": 304271393.0, "step": 398 }, { "epoch": 0.5448493641717163, "grad_norm": 0.5423519835977575, "learning_rate": 3.986485971001716e-05, "loss": 0.4947, "num_tokens": 305007034.0, "step": 399 }, { "epoch": 0.5462149014252795, "grad_norm": 0.4560831817770411, "learning_rate": 3.986364103081557e-05, "loss": 0.4837, "num_tokens": 305738401.0, "step": 400 }, { "epoch": 0.5475804386788428, "grad_norm": 0.5899617266854076, "learning_rate": 3.9862416902190706e-05, "loss": 0.4983, "num_tokens": 306532200.0, "step": 401 }, { "epoch": 0.5489459759324059, "grad_norm": 0.5120345769321453, "learning_rate": 3.9861187324516e-05, "loss": 0.5076, "num_tokens": 307244307.0, "step": 402 }, { "epoch": 0.5503115131859692, "grad_norm": 0.5149555655372379, "learning_rate": 3.9859952298166544e-05, "loss": 0.4945, "num_tokens": 308006043.0, "step": 403 }, { "epoch": 0.5516770504395323, "grad_norm": 0.5239808522632581, "learning_rate": 3.985871182351909e-05, "loss": 0.4771, "num_tokens": 308785558.0, "step": 404 }, { "epoch": 0.5530425876930956, "grad_norm": 0.45704414154776035, "learning_rate": 3.985746590095206e-05, "loss": 0.4752, "num_tokens": 309553286.0, "step": 405 }, { "epoch": 0.5544081249466587, "grad_norm": 0.5290380191704342, "learning_rate": 3.9856214530845514e-05, "loss": 0.4645, "num_tokens": 310326933.0, "step": 406 }, { "epoch": 0.555773662200222, "grad_norm": 0.4612293404514629, "learning_rate": 3.985495771358121e-05, "loss": 0.4973, "num_tokens": 311116677.0, "step": 407 }, { "epoch": 0.5571391994537851, "grad_norm": 0.4497635622358786, "learning_rate": 3.985369544954254e-05, "loss": 0.4673, "num_tokens": 311856460.0, "step": 408 }, { "epoch": 0.5585047367073483, "grad_norm": 0.4899936406615019, "learning_rate": 3.985242773911456e-05, "loss": 0.4852, "num_tokens": 312526419.0, "step": 409 }, { "epoch": 0.5598702739609115, "grad_norm": 0.4054784650318478, "learning_rate": 3.985115458268401e-05, "loss": 0.4756, "num_tokens": 313315050.0, "step": 410 }, { "epoch": 0.5612358112144747, "grad_norm": 0.4870565410907852, "learning_rate": 3.984987598063925e-05, "loss": 0.484, "num_tokens": 314118654.0, "step": 411 }, { "epoch": 0.5626013484680379, "grad_norm": 0.4049085989883703, "learning_rate": 3.984859193337035e-05, "loss": 0.4686, "num_tokens": 314876435.0, "step": 412 }, { "epoch": 0.5639668857216011, "grad_norm": 0.5339295509559463, "learning_rate": 3.984730244126901e-05, "loss": 0.4939, "num_tokens": 315647483.0, "step": 413 }, { "epoch": 0.5653324229751643, "grad_norm": 0.44776850167426663, "learning_rate": 3.9846007504728593e-05, "loss": 0.4888, "num_tokens": 316454197.0, "step": 414 }, { "epoch": 0.5666979602287275, "grad_norm": 0.4991384198763151, "learning_rate": 3.9844707124144124e-05, "loss": 0.4732, "num_tokens": 317113360.0, "step": 415 }, { "epoch": 0.5680634974822907, "grad_norm": 0.5090395802571818, "learning_rate": 3.984340129991231e-05, "loss": 0.4885, "num_tokens": 317893602.0, "step": 416 }, { "epoch": 0.5694290347358539, "grad_norm": 0.4465326597662841, "learning_rate": 3.984209003243149e-05, "loss": 0.4911, "num_tokens": 318737749.0, "step": 417 }, { "epoch": 0.5707945719894171, "grad_norm": 0.42807781926618743, "learning_rate": 3.984077332210168e-05, "loss": 0.4537, "num_tokens": 319398186.0, "step": 418 }, { "epoch": 0.5721601092429803, "grad_norm": 0.4343952473262664, "learning_rate": 3.983945116932454e-05, "loss": 0.5032, "num_tokens": 320235448.0, "step": 419 }, { "epoch": 0.5735256464965435, "grad_norm": 0.4968746264998327, "learning_rate": 3.983812357450341e-05, "loss": 0.4703, "num_tokens": 320994453.0, "step": 420 }, { "epoch": 0.5748911837501067, "grad_norm": 0.47287435527711746, "learning_rate": 3.983679053804328e-05, "loss": 0.4581, "num_tokens": 321722682.0, "step": 421 }, { "epoch": 0.5762567210036699, "grad_norm": 0.4872812773604365, "learning_rate": 3.98354520603508e-05, "loss": 0.4816, "num_tokens": 322491853.0, "step": 422 }, { "epoch": 0.5776222582572331, "grad_norm": 0.51676102964061, "learning_rate": 3.98341081418343e-05, "loss": 0.4746, "num_tokens": 323230647.0, "step": 423 }, { "epoch": 0.5789877955107963, "grad_norm": 0.45384092063920883, "learning_rate": 3.983275878290372e-05, "loss": 0.504, "num_tokens": 323998089.0, "step": 424 }, { "epoch": 0.5803533327643595, "grad_norm": 0.494457986086883, "learning_rate": 3.983140398397071e-05, "loss": 0.4643, "num_tokens": 324679142.0, "step": 425 }, { "epoch": 0.5817188700179227, "grad_norm": 0.5333808869915433, "learning_rate": 3.983004374544856e-05, "loss": 0.4545, "num_tokens": 325360054.0, "step": 426 }, { "epoch": 0.5830844072714859, "grad_norm": 0.47935847595409664, "learning_rate": 3.9828678067752214e-05, "loss": 0.5009, "num_tokens": 326095180.0, "step": 427 }, { "epoch": 0.5844499445250491, "grad_norm": 0.4999135781937812, "learning_rate": 3.982730695129828e-05, "loss": 0.4856, "num_tokens": 326905415.0, "step": 428 }, { "epoch": 0.5858154817786123, "grad_norm": 0.4766950884608175, "learning_rate": 3.9825930396505036e-05, "loss": 0.4936, "num_tokens": 327711008.0, "step": 429 }, { "epoch": 0.5871810190321755, "grad_norm": 0.500768211934355, "learning_rate": 3.9824548403792406e-05, "loss": 0.4744, "num_tokens": 328403684.0, "step": 430 }, { "epoch": 0.5885465562857387, "grad_norm": 0.4806758865830102, "learning_rate": 3.9823160973581966e-05, "loss": 0.4938, "num_tokens": 329242640.0, "step": 431 }, { "epoch": 0.5899120935393019, "grad_norm": 0.4783154009695625, "learning_rate": 3.9821768106296975e-05, "loss": 0.4644, "num_tokens": 329985268.0, "step": 432 }, { "epoch": 0.5912776307928651, "grad_norm": 0.4656742593118784, "learning_rate": 3.9820369802362325e-05, "loss": 0.4702, "num_tokens": 330815185.0, "step": 433 }, { "epoch": 0.5926431680464282, "grad_norm": 0.4801418810887073, "learning_rate": 3.981896606220458e-05, "loss": 0.4443, "num_tokens": 331567995.0, "step": 434 }, { "epoch": 0.5940087052999915, "grad_norm": 0.4288022969554381, "learning_rate": 3.981755688625197e-05, "loss": 0.49, "num_tokens": 332334214.0, "step": 435 }, { "epoch": 0.5953742425535546, "grad_norm": 0.4922718783393102, "learning_rate": 3.981614227493437e-05, "loss": 0.4819, "num_tokens": 333131199.0, "step": 436 }, { "epoch": 0.5967397798071179, "grad_norm": 0.43214036848042836, "learning_rate": 3.981472222868331e-05, "loss": 0.4726, "num_tokens": 333900299.0, "step": 437 }, { "epoch": 0.598105317060681, "grad_norm": 0.5804589503796131, "learning_rate": 3.981329674793198e-05, "loss": 0.4762, "num_tokens": 334642801.0, "step": 438 }, { "epoch": 0.5994708543142443, "grad_norm": 0.45804747031368637, "learning_rate": 3.981186583311525e-05, "loss": 0.4631, "num_tokens": 335423971.0, "step": 439 }, { "epoch": 0.6008363915678074, "grad_norm": 0.5559116003464653, "learning_rate": 3.981042948466962e-05, "loss": 0.4452, "num_tokens": 336156368.0, "step": 440 }, { "epoch": 0.6022019288213707, "grad_norm": 0.5289039981094046, "learning_rate": 3.9808987703033256e-05, "loss": 0.4598, "num_tokens": 336842360.0, "step": 441 }, { "epoch": 0.6035674660749338, "grad_norm": 0.49416955157247566, "learning_rate": 3.9807540488645986e-05, "loss": 0.4937, "num_tokens": 337607796.0, "step": 442 }, { "epoch": 0.6049330033284971, "grad_norm": 0.6193399746000244, "learning_rate": 3.980608784194929e-05, "loss": 0.5016, "num_tokens": 338378413.0, "step": 443 }, { "epoch": 0.6062985405820602, "grad_norm": 0.4755398279963758, "learning_rate": 3.980462976338631e-05, "loss": 0.4794, "num_tokens": 339163790.0, "step": 444 }, { "epoch": 0.6076640778356235, "grad_norm": 0.6785116536076674, "learning_rate": 3.9803166253401845e-05, "loss": 0.4768, "num_tokens": 339913682.0, "step": 445 }, { "epoch": 0.6090296150891866, "grad_norm": 0.5997429345284826, "learning_rate": 3.980169731244235e-05, "loss": 0.4991, "num_tokens": 340667280.0, "step": 446 }, { "epoch": 0.6103951523427499, "grad_norm": 0.6817076247037134, "learning_rate": 3.980022294095592e-05, "loss": 0.4897, "num_tokens": 341402387.0, "step": 447 }, { "epoch": 0.611760689596313, "grad_norm": 0.5426713387291994, "learning_rate": 3.9798743139392336e-05, "loss": 0.4624, "num_tokens": 342126377.0, "step": 448 }, { "epoch": 0.6131262268498763, "grad_norm": 0.6944770837800442, "learning_rate": 3.979725790820302e-05, "loss": 0.4577, "num_tokens": 342944624.0, "step": 449 }, { "epoch": 0.6144917641034394, "grad_norm": 0.5849066630355068, "learning_rate": 3.979576724784105e-05, "loss": 0.4925, "num_tokens": 343793644.0, "step": 450 }, { "epoch": 0.6158573013570027, "grad_norm": 0.5704537334141339, "learning_rate": 3.979427115876116e-05, "loss": 0.4761, "num_tokens": 344553381.0, "step": 451 }, { "epoch": 0.6172228386105658, "grad_norm": 0.5493800641706457, "learning_rate": 3.979276964141974e-05, "loss": 0.5086, "num_tokens": 345386637.0, "step": 452 }, { "epoch": 0.6185883758641291, "grad_norm": 0.5007939104102442, "learning_rate": 3.9791262696274854e-05, "loss": 0.4875, "num_tokens": 346134370.0, "step": 453 }, { "epoch": 0.6199539131176922, "grad_norm": 0.49897061099466966, "learning_rate": 3.9789750323786184e-05, "loss": 0.4669, "num_tokens": 346892468.0, "step": 454 }, { "epoch": 0.6213194503712555, "grad_norm": 0.5153622300953542, "learning_rate": 3.9788232524415107e-05, "loss": 0.4816, "num_tokens": 347629735.0, "step": 455 }, { "epoch": 0.6226849876248186, "grad_norm": 0.5465835801042559, "learning_rate": 3.978670929862462e-05, "loss": 0.4993, "num_tokens": 348412290.0, "step": 456 }, { "epoch": 0.6240505248783819, "grad_norm": 0.4721067620205064, "learning_rate": 3.9785180646879406e-05, "loss": 0.4727, "num_tokens": 349089958.0, "step": 457 }, { "epoch": 0.625416062131945, "grad_norm": 0.5946349278890641, "learning_rate": 3.978364656964579e-05, "loss": 0.4722, "num_tokens": 349823834.0, "step": 458 }, { "epoch": 0.6267815993855083, "grad_norm": 0.48766206722610494, "learning_rate": 3.978210706739175e-05, "loss": 0.4942, "num_tokens": 350632107.0, "step": 459 }, { "epoch": 0.6281471366390714, "grad_norm": 0.582812754830379, "learning_rate": 3.978056214058692e-05, "loss": 0.448, "num_tokens": 351357486.0, "step": 460 }, { "epoch": 0.6295126738926347, "grad_norm": 0.442284843244288, "learning_rate": 3.977901178970259e-05, "loss": 0.4721, "num_tokens": 352152235.0, "step": 461 }, { "epoch": 0.6308782111461978, "grad_norm": 0.5879303919756329, "learning_rate": 3.977745601521171e-05, "loss": 0.4769, "num_tokens": 352899563.0, "step": 462 }, { "epoch": 0.632243748399761, "grad_norm": 0.5299565869689957, "learning_rate": 3.977589481758887e-05, "loss": 0.4772, "num_tokens": 353641255.0, "step": 463 }, { "epoch": 0.6336092856533242, "grad_norm": 0.5243481099851344, "learning_rate": 3.9774328197310334e-05, "loss": 0.4629, "num_tokens": 354413544.0, "step": 464 }, { "epoch": 0.6349748229068874, "grad_norm": 0.547631516931638, "learning_rate": 3.977275615485401e-05, "loss": 0.4762, "num_tokens": 355172169.0, "step": 465 }, { "epoch": 0.6363403601604506, "grad_norm": 0.5384145981441301, "learning_rate": 3.977117869069944e-05, "loss": 0.4631, "num_tokens": 355974796.0, "step": 466 }, { "epoch": 0.6377058974140138, "grad_norm": 0.5188264074469684, "learning_rate": 3.976959580532787e-05, "loss": 0.4871, "num_tokens": 356653202.0, "step": 467 }, { "epoch": 0.639071434667577, "grad_norm": 0.5035764751217029, "learning_rate": 3.976800749922215e-05, "loss": 0.5131, "num_tokens": 357453524.0, "step": 468 }, { "epoch": 0.6404369719211402, "grad_norm": 0.5348127752755452, "learning_rate": 3.976641377286682e-05, "loss": 0.496, "num_tokens": 358205430.0, "step": 469 }, { "epoch": 0.6418025091747034, "grad_norm": 0.5174658053186324, "learning_rate": 3.976481462674804e-05, "loss": 0.4882, "num_tokens": 358969175.0, "step": 470 }, { "epoch": 0.6431680464282666, "grad_norm": 0.4162864931180461, "learning_rate": 3.976321006135363e-05, "loss": 0.4466, "num_tokens": 359732045.0, "step": 471 }, { "epoch": 0.6445335836818298, "grad_norm": 0.4933173177698445, "learning_rate": 3.9761600077173106e-05, "loss": 0.4733, "num_tokens": 360503104.0, "step": 472 }, { "epoch": 0.645899120935393, "grad_norm": 0.5340386594041048, "learning_rate": 3.9759984674697584e-05, "loss": 0.4683, "num_tokens": 361190644.0, "step": 473 }, { "epoch": 0.6472646581889562, "grad_norm": 0.4816816344094416, "learning_rate": 3.975836385441986e-05, "loss": 0.4801, "num_tokens": 361964681.0, "step": 474 }, { "epoch": 0.6486301954425194, "grad_norm": 0.4856620169503082, "learning_rate": 3.975673761683437e-05, "loss": 0.471, "num_tokens": 362712524.0, "step": 475 }, { "epoch": 0.6499957326960826, "grad_norm": 0.4652739290011492, "learning_rate": 3.975510596243721e-05, "loss": 0.4878, "num_tokens": 363444809.0, "step": 476 }, { "epoch": 0.6513612699496458, "grad_norm": 0.4719934721718086, "learning_rate": 3.975346889172613e-05, "loss": 0.4734, "num_tokens": 364223553.0, "step": 477 }, { "epoch": 0.652726807203209, "grad_norm": 0.4119698438919456, "learning_rate": 3.975182640520054e-05, "loss": 0.4598, "num_tokens": 364930413.0, "step": 478 }, { "epoch": 0.6540923444567722, "grad_norm": 0.5745170669898011, "learning_rate": 3.975017850336147e-05, "loss": 0.468, "num_tokens": 365627313.0, "step": 479 }, { "epoch": 0.6554578817103354, "grad_norm": 0.4225381144399828, "learning_rate": 3.974852518671163e-05, "loss": 0.4506, "num_tokens": 366390560.0, "step": 480 }, { "epoch": 0.6568234189638986, "grad_norm": 0.45006636251458837, "learning_rate": 3.9746866455755385e-05, "loss": 0.4756, "num_tokens": 367203785.0, "step": 481 }, { "epoch": 0.6581889562174618, "grad_norm": 0.45740534325007376, "learning_rate": 3.974520231099874e-05, "loss": 0.4931, "num_tokens": 367972346.0, "step": 482 }, { "epoch": 0.659554493471025, "grad_norm": 0.4904581626251555, "learning_rate": 3.9743532752949336e-05, "loss": 0.4871, "num_tokens": 368704810.0, "step": 483 }, { "epoch": 0.6609200307245882, "grad_norm": 0.46389921379307203, "learning_rate": 3.974185778211651e-05, "loss": 0.4862, "num_tokens": 369463250.0, "step": 484 }, { "epoch": 0.6622855679781514, "grad_norm": 0.4733532420883715, "learning_rate": 3.97401773990112e-05, "loss": 0.4741, "num_tokens": 370240745.0, "step": 485 }, { "epoch": 0.6636511052317146, "grad_norm": 0.40897562260181447, "learning_rate": 3.973849160414603e-05, "loss": 0.4511, "num_tokens": 370918463.0, "step": 486 }, { "epoch": 0.6650166424852778, "grad_norm": 0.4527201742376145, "learning_rate": 3.973680039803526e-05, "loss": 0.4628, "num_tokens": 371669825.0, "step": 487 }, { "epoch": 0.666382179738841, "grad_norm": 0.45327278190057124, "learning_rate": 3.9735103781194804e-05, "loss": 0.4665, "num_tokens": 372455157.0, "step": 488 }, { "epoch": 0.6677477169924042, "grad_norm": 0.4538002115295007, "learning_rate": 3.973340175414222e-05, "loss": 0.4878, "num_tokens": 373292474.0, "step": 489 }, { "epoch": 0.6691132542459673, "grad_norm": 0.4379668052864278, "learning_rate": 3.9731694317396745e-05, "loss": 0.4769, "num_tokens": 374081092.0, "step": 490 }, { "epoch": 0.6704787914995306, "grad_norm": 0.4598889492616591, "learning_rate": 3.9729981471479215e-05, "loss": 0.471, "num_tokens": 374866303.0, "step": 491 }, { "epoch": 0.6718443287530937, "grad_norm": 0.4856378106388182, "learning_rate": 3.9728263216912164e-05, "loss": 0.4578, "num_tokens": 375564177.0, "step": 492 }, { "epoch": 0.673209866006657, "grad_norm": 0.48259393891970465, "learning_rate": 3.972653955421975e-05, "loss": 0.4501, "num_tokens": 376352567.0, "step": 493 }, { "epoch": 0.6745754032602201, "grad_norm": 0.5290604352381708, "learning_rate": 3.972481048392779e-05, "loss": 0.4508, "num_tokens": 377105995.0, "step": 494 }, { "epoch": 0.6759409405137834, "grad_norm": 0.4662231652129594, "learning_rate": 3.972307600656374e-05, "loss": 0.4923, "num_tokens": 377839271.0, "step": 495 }, { "epoch": 0.6773064777673465, "grad_norm": 0.5770653450538742, "learning_rate": 3.972133612265673e-05, "loss": 0.4693, "num_tokens": 378614568.0, "step": 496 }, { "epoch": 0.6786720150209098, "grad_norm": 0.47484204146662723, "learning_rate": 3.9719590832737516e-05, "loss": 0.4341, "num_tokens": 379355838.0, "step": 497 }, { "epoch": 0.6800375522744729, "grad_norm": 0.5185500356233075, "learning_rate": 3.97178401373385e-05, "loss": 0.4785, "num_tokens": 380106924.0, "step": 498 }, { "epoch": 0.6814030895280362, "grad_norm": 0.4833135252692059, "learning_rate": 3.9716084036993765e-05, "loss": 0.5023, "num_tokens": 380873032.0, "step": 499 }, { "epoch": 0.6827686267815993, "grad_norm": 0.5500453811554759, "learning_rate": 3.9714322532239e-05, "loss": 0.4551, "num_tokens": 381611002.0, "step": 500 }, { "epoch": 0.6841341640351626, "grad_norm": 0.5332407493334498, "learning_rate": 3.9712555623611575e-05, "loss": 0.4929, "num_tokens": 382420963.0, "step": 501 }, { "epoch": 0.6854997012887258, "grad_norm": 0.47912849913679745, "learning_rate": 3.971078331165049e-05, "loss": 0.4861, "num_tokens": 383164139.0, "step": 502 }, { "epoch": 0.686865238542289, "grad_norm": 0.5050391995579695, "learning_rate": 3.9709005596896416e-05, "loss": 0.4626, "num_tokens": 383873544.0, "step": 503 }, { "epoch": 0.6882307757958522, "grad_norm": 0.48422344880565593, "learning_rate": 3.970722247989164e-05, "loss": 0.4637, "num_tokens": 384672596.0, "step": 504 }, { "epoch": 0.6895963130494154, "grad_norm": 0.5634173446065428, "learning_rate": 3.9705433961180115e-05, "loss": 0.4756, "num_tokens": 385339695.0, "step": 505 }, { "epoch": 0.6909618503029786, "grad_norm": 0.44695432261192397, "learning_rate": 3.970364004130746e-05, "loss": 0.4811, "num_tokens": 386117976.0, "step": 506 }, { "epoch": 0.6923273875565418, "grad_norm": 0.5679365518333147, "learning_rate": 3.9701840720820894e-05, "loss": 0.4841, "num_tokens": 386905846.0, "step": 507 }, { "epoch": 0.693692924810105, "grad_norm": 0.49235080872688747, "learning_rate": 3.970003600026933e-05, "loss": 0.4584, "num_tokens": 387557365.0, "step": 508 }, { "epoch": 0.6950584620636682, "grad_norm": 0.5144472913642976, "learning_rate": 3.96982258802033e-05, "loss": 0.4754, "num_tokens": 388355725.0, "step": 509 }, { "epoch": 0.6964239993172314, "grad_norm": 0.5054907519898635, "learning_rate": 3.9696410361175014e-05, "loss": 0.461, "num_tokens": 389089982.0, "step": 510 }, { "epoch": 0.6977895365707946, "grad_norm": 0.4827245024540012, "learning_rate": 3.969458944373828e-05, "loss": 0.456, "num_tokens": 389842970.0, "step": 511 }, { "epoch": 0.6991550738243578, "grad_norm": 0.5461340405098468, "learning_rate": 3.96927631284486e-05, "loss": 0.5059, "num_tokens": 390674832.0, "step": 512 }, { "epoch": 0.700520611077921, "grad_norm": 0.532700582008266, "learning_rate": 3.96909314158631e-05, "loss": 0.4894, "num_tokens": 391477961.0, "step": 513 }, { "epoch": 0.7018861483314842, "grad_norm": 0.4854496792565718, "learning_rate": 3.968909430654054e-05, "loss": 0.4611, "num_tokens": 392221351.0, "step": 514 }, { "epoch": 0.7032516855850474, "grad_norm": 0.5547230387752475, "learning_rate": 3.968725180104137e-05, "loss": 0.4676, "num_tokens": 392928524.0, "step": 515 }, { "epoch": 0.7046172228386106, "grad_norm": 0.5779438501875088, "learning_rate": 3.968540389992765e-05, "loss": 0.4951, "num_tokens": 393638153.0, "step": 516 }, { "epoch": 0.7059827600921738, "grad_norm": 0.5792255318730097, "learning_rate": 3.9683550603763076e-05, "loss": 0.4722, "num_tokens": 394382313.0, "step": 517 }, { "epoch": 0.707348297345737, "grad_norm": 0.4888588193626351, "learning_rate": 3.968169191311303e-05, "loss": 0.4916, "num_tokens": 395182565.0, "step": 518 }, { "epoch": 0.7087138345993002, "grad_norm": 0.5291413525969777, "learning_rate": 3.967982782854451e-05, "loss": 0.4723, "num_tokens": 395904917.0, "step": 519 }, { "epoch": 0.7100793718528634, "grad_norm": 0.4688728551018723, "learning_rate": 3.9677958350626165e-05, "loss": 0.5068, "num_tokens": 396652902.0, "step": 520 }, { "epoch": 0.7114449091064265, "grad_norm": 0.5771387136811214, "learning_rate": 3.96760834799283e-05, "loss": 0.4702, "num_tokens": 397450311.0, "step": 521 }, { "epoch": 0.7128104463599898, "grad_norm": 0.42393258283633606, "learning_rate": 3.967420321702285e-05, "loss": 0.4521, "num_tokens": 398205440.0, "step": 522 }, { "epoch": 0.714175983613553, "grad_norm": 0.537116869706251, "learning_rate": 3.967231756248339e-05, "loss": 0.4756, "num_tokens": 398927088.0, "step": 523 }, { "epoch": 0.7155415208671162, "grad_norm": 0.453932524617442, "learning_rate": 3.9670426516885176e-05, "loss": 0.4823, "num_tokens": 399686370.0, "step": 524 }, { "epoch": 0.7169070581206793, "grad_norm": 0.5582546770838522, "learning_rate": 3.9668530080805076e-05, "loss": 0.487, "num_tokens": 400413458.0, "step": 525 }, { "epoch": 0.7182725953742426, "grad_norm": 0.49879083777937055, "learning_rate": 3.96666282548216e-05, "loss": 0.4743, "num_tokens": 401149225.0, "step": 526 }, { "epoch": 0.7196381326278057, "grad_norm": 0.5493017119791027, "learning_rate": 3.9664721039514925e-05, "loss": 0.4528, "num_tokens": 401892171.0, "step": 527 }, { "epoch": 0.721003669881369, "grad_norm": 0.5333817000275781, "learning_rate": 3.966280843546685e-05, "loss": 0.4798, "num_tokens": 402646606.0, "step": 528 }, { "epoch": 0.7223692071349321, "grad_norm": 0.6338952493234897, "learning_rate": 3.9660890443260836e-05, "loss": 0.4572, "num_tokens": 403408801.0, "step": 529 }, { "epoch": 0.7237347443884954, "grad_norm": 0.5822438407311255, "learning_rate": 3.965896706348198e-05, "loss": 0.4776, "num_tokens": 404167900.0, "step": 530 }, { "epoch": 0.7251002816420585, "grad_norm": 0.605590502324497, "learning_rate": 3.965703829671701e-05, "loss": 0.4612, "num_tokens": 404939438.0, "step": 531 }, { "epoch": 0.7264658188956218, "grad_norm": 0.5990537685100394, "learning_rate": 3.965510414355432e-05, "loss": 0.4697, "num_tokens": 405642199.0, "step": 532 }, { "epoch": 0.7278313561491849, "grad_norm": 0.4498974776802742, "learning_rate": 3.965316460458394e-05, "loss": 0.4989, "num_tokens": 406455367.0, "step": 533 }, { "epoch": 0.7291968934027482, "grad_norm": 0.5637726182371167, "learning_rate": 3.965121968039753e-05, "loss": 0.4731, "num_tokens": 407269972.0, "step": 534 }, { "epoch": 0.7305624306563113, "grad_norm": 0.4479143265636202, "learning_rate": 3.9649269371588396e-05, "loss": 0.471, "num_tokens": 408061462.0, "step": 535 }, { "epoch": 0.7319279679098746, "grad_norm": 0.5128422761827114, "learning_rate": 3.964731367875151e-05, "loss": 0.4876, "num_tokens": 408786224.0, "step": 536 }, { "epoch": 0.7332935051634377, "grad_norm": 0.47423187352731655, "learning_rate": 3.9645352602483456e-05, "loss": 0.4673, "num_tokens": 409563687.0, "step": 537 }, { "epoch": 0.734659042417001, "grad_norm": 0.5520937509421888, "learning_rate": 3.9643386143382474e-05, "loss": 0.4876, "num_tokens": 410341422.0, "step": 538 }, { "epoch": 0.7360245796705641, "grad_norm": 0.5374416437043311, "learning_rate": 3.964141430204846e-05, "loss": 0.4794, "num_tokens": 411058975.0, "step": 539 }, { "epoch": 0.7373901169241274, "grad_norm": 0.4672122437102688, "learning_rate": 3.9639437079082926e-05, "loss": 0.4742, "num_tokens": 411855286.0, "step": 540 }, { "epoch": 0.7387556541776905, "grad_norm": 0.4826558808858983, "learning_rate": 3.963745447508903e-05, "loss": 0.4926, "num_tokens": 412665147.0, "step": 541 }, { "epoch": 0.7401211914312538, "grad_norm": 0.47948338725696416, "learning_rate": 3.9635466490671594e-05, "loss": 0.487, "num_tokens": 413455885.0, "step": 542 }, { "epoch": 0.7414867286848169, "grad_norm": 0.49318631026028215, "learning_rate": 3.9633473126437054e-05, "loss": 0.4736, "num_tokens": 414187350.0, "step": 543 }, { "epoch": 0.7428522659383802, "grad_norm": 0.48476244057397383, "learning_rate": 3.963147438299351e-05, "loss": 0.4651, "num_tokens": 414928387.0, "step": 544 }, { "epoch": 0.7442178031919433, "grad_norm": 0.478121225952017, "learning_rate": 3.962947026095069e-05, "loss": 0.4838, "num_tokens": 415837678.0, "step": 545 }, { "epoch": 0.7455833404455066, "grad_norm": 0.4211015886895058, "learning_rate": 3.962746076091994e-05, "loss": 0.4847, "num_tokens": 416571596.0, "step": 546 }, { "epoch": 0.7469488776990697, "grad_norm": 0.42789914208683405, "learning_rate": 3.9625445883514316e-05, "loss": 0.4655, "num_tokens": 417331981.0, "step": 547 }, { "epoch": 0.748314414952633, "grad_norm": 0.446523671347052, "learning_rate": 3.9623425629348433e-05, "loss": 0.449, "num_tokens": 418077619.0, "step": 548 }, { "epoch": 0.7496799522061961, "grad_norm": 0.4309541255492509, "learning_rate": 3.96213999990386e-05, "loss": 0.4549, "num_tokens": 418753245.0, "step": 549 }, { "epoch": 0.7510454894597594, "grad_norm": 0.44418998825872996, "learning_rate": 3.961936899320274e-05, "loss": 0.4789, "num_tokens": 419512346.0, "step": 550 }, { "epoch": 0.7524110267133225, "grad_norm": 0.4520091409315816, "learning_rate": 3.961733261246043e-05, "loss": 0.4602, "num_tokens": 420240679.0, "step": 551 }, { "epoch": 0.7537765639668857, "grad_norm": 0.47473760924801794, "learning_rate": 3.961529085743289e-05, "loss": 0.4632, "num_tokens": 420969087.0, "step": 552 }, { "epoch": 0.7551421012204489, "grad_norm": 0.5477137782943853, "learning_rate": 3.961324372874296e-05, "loss": 0.4808, "num_tokens": 421664482.0, "step": 553 }, { "epoch": 0.7565076384740121, "grad_norm": 0.44555670983648543, "learning_rate": 3.9611191227015136e-05, "loss": 0.4578, "num_tokens": 422390017.0, "step": 554 }, { "epoch": 0.7578731757275753, "grad_norm": 0.5425864695529296, "learning_rate": 3.960913335287554e-05, "loss": 0.4704, "num_tokens": 423141101.0, "step": 555 }, { "epoch": 0.7592387129811385, "grad_norm": 0.5642569601308794, "learning_rate": 3.9607070106951943e-05, "loss": 0.47, "num_tokens": 423911241.0, "step": 556 }, { "epoch": 0.7606042502347017, "grad_norm": 0.44926150398138187, "learning_rate": 3.960500148987376e-05, "loss": 0.4478, "num_tokens": 424629204.0, "step": 557 }, { "epoch": 0.7619697874882649, "grad_norm": 0.48874425326038123, "learning_rate": 3.9602927502272025e-05, "loss": 0.4747, "num_tokens": 425332262.0, "step": 558 }, { "epoch": 0.7633353247418281, "grad_norm": 0.40908438161009714, "learning_rate": 3.960084814477943e-05, "loss": 0.4587, "num_tokens": 426135873.0, "step": 559 }, { "epoch": 0.7647008619953913, "grad_norm": 0.5072914605396213, "learning_rate": 3.9598763418030285e-05, "loss": 0.4604, "num_tokens": 426854958.0, "step": 560 }, { "epoch": 0.7660663992489545, "grad_norm": 0.4694137178485916, "learning_rate": 3.959667332266057e-05, "loss": 0.4634, "num_tokens": 427580781.0, "step": 561 }, { "epoch": 0.7674319365025177, "grad_norm": 0.46773364362373393, "learning_rate": 3.959457785930787e-05, "loss": 0.4651, "num_tokens": 428332677.0, "step": 562 }, { "epoch": 0.7687974737560809, "grad_norm": 0.5526178169682466, "learning_rate": 3.959247702861142e-05, "loss": 0.466, "num_tokens": 429196780.0, "step": 563 }, { "epoch": 0.7701630110096441, "grad_norm": 0.40076559540016415, "learning_rate": 3.959037083121209e-05, "loss": 0.463, "num_tokens": 429959680.0, "step": 564 }, { "epoch": 0.7715285482632073, "grad_norm": 0.5191719998871398, "learning_rate": 3.9588259267752396e-05, "loss": 0.4696, "num_tokens": 430737865.0, "step": 565 }, { "epoch": 0.7728940855167705, "grad_norm": 0.414799497762362, "learning_rate": 3.958614233887649e-05, "loss": 0.4659, "num_tokens": 431481363.0, "step": 566 }, { "epoch": 0.7742596227703337, "grad_norm": 0.4860164931245809, "learning_rate": 3.958402004523014e-05, "loss": 0.4728, "num_tokens": 432280602.0, "step": 567 }, { "epoch": 0.7756251600238969, "grad_norm": 0.5081621986490338, "learning_rate": 3.958189238746077e-05, "loss": 0.4745, "num_tokens": 433046763.0, "step": 568 }, { "epoch": 0.7769906972774601, "grad_norm": 0.44823873600296077, "learning_rate": 3.9579759366217444e-05, "loss": 0.4685, "num_tokens": 433830679.0, "step": 569 }, { "epoch": 0.7783562345310233, "grad_norm": 0.47415282125042174, "learning_rate": 3.957762098215085e-05, "loss": 0.4471, "num_tokens": 434524050.0, "step": 570 }, { "epoch": 0.7797217717845865, "grad_norm": 0.5180023129781258, "learning_rate": 3.957547723591332e-05, "loss": 0.4704, "num_tokens": 435330086.0, "step": 571 }, { "epoch": 0.7810873090381497, "grad_norm": 0.5023577936771879, "learning_rate": 3.95733281281588e-05, "loss": 0.4716, "num_tokens": 436139120.0, "step": 572 }, { "epoch": 0.7824528462917129, "grad_norm": 0.47022065560109316, "learning_rate": 3.957117365954292e-05, "loss": 0.4551, "num_tokens": 436860759.0, "step": 573 }, { "epoch": 0.7838183835452761, "grad_norm": 0.45410445321713266, "learning_rate": 3.956901383072289e-05, "loss": 0.4676, "num_tokens": 437661182.0, "step": 574 }, { "epoch": 0.7851839207988393, "grad_norm": 0.49570513614762524, "learning_rate": 3.956684864235759e-05, "loss": 0.469, "num_tokens": 438449444.0, "step": 575 }, { "epoch": 0.7865494580524025, "grad_norm": 0.484642493379703, "learning_rate": 3.956467809510753e-05, "loss": 0.4838, "num_tokens": 439327369.0, "step": 576 }, { "epoch": 0.7879149953059656, "grad_norm": 0.4803400816541847, "learning_rate": 3.9562502189634846e-05, "loss": 0.4781, "num_tokens": 440150905.0, "step": 577 }, { "epoch": 0.7892805325595289, "grad_norm": 0.40315058926848013, "learning_rate": 3.9560320926603306e-05, "loss": 0.4732, "num_tokens": 440966144.0, "step": 578 }, { "epoch": 0.790646069813092, "grad_norm": 0.5100400755862433, "learning_rate": 3.955813430667833e-05, "loss": 0.484, "num_tokens": 441775252.0, "step": 579 }, { "epoch": 0.7920116070666553, "grad_norm": 0.45129938419921367, "learning_rate": 3.9555942330526954e-05, "loss": 0.4784, "num_tokens": 442486015.0, "step": 580 }, { "epoch": 0.7933771443202184, "grad_norm": 0.500803667159872, "learning_rate": 3.955374499881785e-05, "loss": 0.4629, "num_tokens": 443232818.0, "step": 581 }, { "epoch": 0.7947426815737817, "grad_norm": 0.4658117515802527, "learning_rate": 3.955154231222134e-05, "loss": 0.4719, "num_tokens": 443987483.0, "step": 582 }, { "epoch": 0.7961082188273448, "grad_norm": 0.5477763559452692, "learning_rate": 3.954933427140936e-05, "loss": 0.4796, "num_tokens": 444714986.0, "step": 583 }, { "epoch": 0.7974737560809081, "grad_norm": 0.4405682952251638, "learning_rate": 3.9547120877055496e-05, "loss": 0.4751, "num_tokens": 445546278.0, "step": 584 }, { "epoch": 0.7988392933344712, "grad_norm": 0.4887036270464366, "learning_rate": 3.9544902129834945e-05, "loss": 0.4548, "num_tokens": 446283839.0, "step": 585 }, { "epoch": 0.8002048305880345, "grad_norm": 0.5473689496644931, "learning_rate": 3.9542678030424566e-05, "loss": 0.465, "num_tokens": 446984657.0, "step": 586 }, { "epoch": 0.8015703678415976, "grad_norm": 0.45074724497326313, "learning_rate": 3.9540448579502825e-05, "loss": 0.4789, "num_tokens": 447723211.0, "step": 587 }, { "epoch": 0.8029359050951609, "grad_norm": 0.42316796342388496, "learning_rate": 3.953821377774984e-05, "loss": 0.4705, "num_tokens": 448502325.0, "step": 588 }, { "epoch": 0.804301442348724, "grad_norm": 0.4887798895112784, "learning_rate": 3.9535973625847344e-05, "loss": 0.4986, "num_tokens": 449250620.0, "step": 589 }, { "epoch": 0.8056669796022873, "grad_norm": 0.5239204544698899, "learning_rate": 3.953372812447871e-05, "loss": 0.4876, "num_tokens": 450121084.0, "step": 590 }, { "epoch": 0.8070325168558504, "grad_norm": 0.4233961244227466, "learning_rate": 3.953147727432896e-05, "loss": 0.4681, "num_tokens": 450902615.0, "step": 591 }, { "epoch": 0.8083980541094137, "grad_norm": 0.5840141647684108, "learning_rate": 3.95292210760847e-05, "loss": 0.478, "num_tokens": 451655810.0, "step": 592 }, { "epoch": 0.8097635913629768, "grad_norm": 0.46786161437655205, "learning_rate": 3.952695953043422e-05, "loss": 0.4563, "num_tokens": 452387810.0, "step": 593 }, { "epoch": 0.8111291286165401, "grad_norm": 0.5026226114029837, "learning_rate": 3.952469263806742e-05, "loss": 0.4555, "num_tokens": 453195850.0, "step": 594 }, { "epoch": 0.8124946658701032, "grad_norm": 0.45330771740079673, "learning_rate": 3.952242039967582e-05, "loss": 0.4769, "num_tokens": 453924672.0, "step": 595 }, { "epoch": 0.8138602031236665, "grad_norm": 0.6074304679832, "learning_rate": 3.9520142815952594e-05, "loss": 0.4614, "num_tokens": 454674914.0, "step": 596 }, { "epoch": 0.8152257403772296, "grad_norm": 0.39053780270912936, "learning_rate": 3.951785988759253e-05, "loss": 0.4476, "num_tokens": 455425901.0, "step": 597 }, { "epoch": 0.8165912776307929, "grad_norm": 0.5255648616285411, "learning_rate": 3.9515571615292037e-05, "loss": 0.4628, "num_tokens": 456187892.0, "step": 598 }, { "epoch": 0.817956814884356, "grad_norm": 0.4182547831247378, "learning_rate": 3.951327799974919e-05, "loss": 0.479, "num_tokens": 456952090.0, "step": 599 }, { "epoch": 0.8193223521379193, "grad_norm": 0.5410376716955405, "learning_rate": 3.951097904166366e-05, "loss": 0.4837, "num_tokens": 457759460.0, "step": 600 }, { "epoch": 0.8206878893914825, "grad_norm": 0.46071953046220326, "learning_rate": 3.950867474173675e-05, "loss": 0.4832, "num_tokens": 458501714.0, "step": 601 }, { "epoch": 0.8220534266450457, "grad_norm": 0.5702356788111037, "learning_rate": 3.950636510067142e-05, "loss": 0.4796, "num_tokens": 459274331.0, "step": 602 }, { "epoch": 0.8234189638986089, "grad_norm": 0.4961319378463832, "learning_rate": 3.950405011917223e-05, "loss": 0.4782, "num_tokens": 459976740.0, "step": 603 }, { "epoch": 0.8247845011521721, "grad_norm": 0.5541096097930213, "learning_rate": 3.950172979794539e-05, "loss": 0.4634, "num_tokens": 460698062.0, "step": 604 }, { "epoch": 0.8261500384057353, "grad_norm": 0.47393612893391857, "learning_rate": 3.9499404137698725e-05, "loss": 0.4533, "num_tokens": 461482739.0, "step": 605 }, { "epoch": 0.8275155756592985, "grad_norm": 0.5372255034045017, "learning_rate": 3.949707313914169e-05, "loss": 0.4764, "num_tokens": 462328274.0, "step": 606 }, { "epoch": 0.8288811129128617, "grad_norm": 0.4098237624287098, "learning_rate": 3.949473680298537e-05, "loss": 0.4498, "num_tokens": 463065850.0, "step": 607 }, { "epoch": 0.8302466501664248, "grad_norm": 0.5184614454355081, "learning_rate": 3.9492395129942485e-05, "loss": 0.4674, "num_tokens": 463809036.0, "step": 608 }, { "epoch": 0.8316121874199881, "grad_norm": 0.431397348160034, "learning_rate": 3.949004812072738e-05, "loss": 0.4646, "num_tokens": 464564339.0, "step": 609 }, { "epoch": 0.8329777246735512, "grad_norm": 0.4169164799007455, "learning_rate": 3.948769577605603e-05, "loss": 0.4753, "num_tokens": 465398163.0, "step": 610 }, { "epoch": 0.8343432619271145, "grad_norm": 0.4398030701411054, "learning_rate": 3.9485338096646016e-05, "loss": 0.4784, "num_tokens": 466189270.0, "step": 611 }, { "epoch": 0.8357087991806776, "grad_norm": 0.46317171245088, "learning_rate": 3.9482975083216575e-05, "loss": 0.4728, "num_tokens": 467007726.0, "step": 612 }, { "epoch": 0.8370743364342409, "grad_norm": 0.407175158300312, "learning_rate": 3.948060673648857e-05, "loss": 0.4332, "num_tokens": 467673955.0, "step": 613 }, { "epoch": 0.838439873687804, "grad_norm": 0.4564866928397701, "learning_rate": 3.9478233057184456e-05, "loss": 0.4627, "num_tokens": 468434142.0, "step": 614 }, { "epoch": 0.8398054109413673, "grad_norm": 0.440860644759429, "learning_rate": 3.947585404602836e-05, "loss": 0.4718, "num_tokens": 469207037.0, "step": 615 }, { "epoch": 0.8411709481949304, "grad_norm": 0.4288716636414013, "learning_rate": 3.947346970374601e-05, "loss": 0.4573, "num_tokens": 469900771.0, "step": 616 }, { "epoch": 0.8425364854484937, "grad_norm": 0.4257032074256119, "learning_rate": 3.9471080031064765e-05, "loss": 0.4646, "num_tokens": 470685956.0, "step": 617 }, { "epoch": 0.8439020227020568, "grad_norm": 0.4371683388035611, "learning_rate": 3.9468685028713607e-05, "loss": 0.4731, "num_tokens": 471399655.0, "step": 618 }, { "epoch": 0.8452675599556201, "grad_norm": 0.45883867216705343, "learning_rate": 3.946628469742316e-05, "loss": 0.4681, "num_tokens": 472232989.0, "step": 619 }, { "epoch": 0.8466330972091832, "grad_norm": 0.4013662543792506, "learning_rate": 3.9463879037925645e-05, "loss": 0.4515, "num_tokens": 472988964.0, "step": 620 }, { "epoch": 0.8479986344627465, "grad_norm": 0.45296860662131583, "learning_rate": 3.9461468050954935e-05, "loss": 0.4801, "num_tokens": 473815786.0, "step": 621 }, { "epoch": 0.8493641717163096, "grad_norm": 0.5068200630086297, "learning_rate": 3.945905173724651e-05, "loss": 0.4724, "num_tokens": 474550755.0, "step": 622 }, { "epoch": 0.8507297089698729, "grad_norm": 0.4594830294106729, "learning_rate": 3.94566300975375e-05, "loss": 0.4865, "num_tokens": 475315571.0, "step": 623 }, { "epoch": 0.852095246223436, "grad_norm": 0.5416616437117524, "learning_rate": 3.945420313256661e-05, "loss": 0.4621, "num_tokens": 476008579.0, "step": 624 }, { "epoch": 0.8534607834769993, "grad_norm": 0.45924305214084293, "learning_rate": 3.945177084307423e-05, "loss": 0.4851, "num_tokens": 476811527.0, "step": 625 }, { "epoch": 0.8548263207305624, "grad_norm": 0.4985981606089528, "learning_rate": 3.944933322980234e-05, "loss": 0.4746, "num_tokens": 477563397.0, "step": 626 }, { "epoch": 0.8561918579841257, "grad_norm": 0.42033039190705046, "learning_rate": 3.9446890293494554e-05, "loss": 0.4654, "num_tokens": 478362280.0, "step": 627 }, { "epoch": 0.8575573952376888, "grad_norm": 0.5495451374334568, "learning_rate": 3.944444203489609e-05, "loss": 0.4841, "num_tokens": 479230529.0, "step": 628 }, { "epoch": 0.8589229324912521, "grad_norm": 0.5121610609441379, "learning_rate": 3.9441988454753815e-05, "loss": 0.476, "num_tokens": 480010501.0, "step": 629 }, { "epoch": 0.8602884697448152, "grad_norm": 0.5045961512881247, "learning_rate": 3.943952955381622e-05, "loss": 0.4757, "num_tokens": 480747299.0, "step": 630 }, { "epoch": 0.8616540069983785, "grad_norm": 0.46151418626338553, "learning_rate": 3.943706533283339e-05, "loss": 0.4568, "num_tokens": 481482682.0, "step": 631 }, { "epoch": 0.8630195442519416, "grad_norm": 0.45872687981812055, "learning_rate": 3.943459579255707e-05, "loss": 0.4685, "num_tokens": 482249063.0, "step": 632 }, { "epoch": 0.8643850815055049, "grad_norm": 0.44376016343503205, "learning_rate": 3.94321209337406e-05, "loss": 0.4704, "num_tokens": 482979943.0, "step": 633 }, { "epoch": 0.865750618759068, "grad_norm": 0.5360478522443336, "learning_rate": 3.942964075713895e-05, "loss": 0.4894, "num_tokens": 483814857.0, "step": 634 }, { "epoch": 0.8671161560126313, "grad_norm": 0.4727280778718502, "learning_rate": 3.942715526350872e-05, "loss": 0.4873, "num_tokens": 484562127.0, "step": 635 }, { "epoch": 0.8684816932661944, "grad_norm": 0.5296154772954001, "learning_rate": 3.942466445360813e-05, "loss": 0.4649, "num_tokens": 485331121.0, "step": 636 }, { "epoch": 0.8698472305197577, "grad_norm": 0.4074959834339882, "learning_rate": 3.942216832819701e-05, "loss": 0.4569, "num_tokens": 486141888.0, "step": 637 }, { "epoch": 0.8712127677733208, "grad_norm": 0.5475257826938416, "learning_rate": 3.941966688803682e-05, "loss": 0.4676, "num_tokens": 486843415.0, "step": 638 }, { "epoch": 0.872578305026884, "grad_norm": 0.4977712454491238, "learning_rate": 3.941716013389065e-05, "loss": 0.4552, "num_tokens": 487664048.0, "step": 639 }, { "epoch": 0.8739438422804472, "grad_norm": 0.4850642134048509, "learning_rate": 3.941464806652319e-05, "loss": 0.4507, "num_tokens": 488443278.0, "step": 640 }, { "epoch": 0.8753093795340104, "grad_norm": 0.5107342026633191, "learning_rate": 3.941213068670078e-05, "loss": 0.4637, "num_tokens": 489205290.0, "step": 641 }, { "epoch": 0.8766749167875736, "grad_norm": 0.43692925509307423, "learning_rate": 3.940960799519134e-05, "loss": 0.4581, "num_tokens": 489948809.0, "step": 642 }, { "epoch": 0.8780404540411368, "grad_norm": 0.45211154768413797, "learning_rate": 3.940707999276445e-05, "loss": 0.4623, "num_tokens": 490776253.0, "step": 643 }, { "epoch": 0.8794059912947, "grad_norm": 0.5036342478645346, "learning_rate": 3.940454668019129e-05, "loss": 0.4617, "num_tokens": 491574723.0, "step": 644 }, { "epoch": 0.8807715285482632, "grad_norm": 0.5166431210513591, "learning_rate": 3.940200805824468e-05, "loss": 0.4704, "num_tokens": 492268808.0, "step": 645 }, { "epoch": 0.8821370658018264, "grad_norm": 0.49254829108521714, "learning_rate": 3.939946412769901e-05, "loss": 0.4681, "num_tokens": 493102054.0, "step": 646 }, { "epoch": 0.8835026030553896, "grad_norm": 0.5060969054102933, "learning_rate": 3.9396914889330344e-05, "loss": 0.4918, "num_tokens": 493896107.0, "step": 647 }, { "epoch": 0.8848681403089528, "grad_norm": 0.4655312492096488, "learning_rate": 3.939436034391634e-05, "loss": 0.4811, "num_tokens": 494753487.0, "step": 648 }, { "epoch": 0.886233677562516, "grad_norm": 0.48899043597920056, "learning_rate": 3.939180049223629e-05, "loss": 0.4388, "num_tokens": 495525209.0, "step": 649 }, { "epoch": 0.8875992148160792, "grad_norm": 0.5008324358240948, "learning_rate": 3.9389235335071084e-05, "loss": 0.4686, "num_tokens": 496280567.0, "step": 650 }, { "epoch": 0.8889647520696424, "grad_norm": 0.5080439151295014, "learning_rate": 3.938666487320323e-05, "loss": 0.4715, "num_tokens": 497059019.0, "step": 651 }, { "epoch": 0.8903302893232056, "grad_norm": 0.515699739295865, "learning_rate": 3.938408910741689e-05, "loss": 0.4762, "num_tokens": 497862274.0, "step": 652 }, { "epoch": 0.8916958265767688, "grad_norm": 0.469219746878478, "learning_rate": 3.9381508038497794e-05, "loss": 0.4583, "num_tokens": 498687395.0, "step": 653 }, { "epoch": 0.893061363830332, "grad_norm": 0.4946370530387173, "learning_rate": 3.937892166723332e-05, "loss": 0.4667, "num_tokens": 499457325.0, "step": 654 }, { "epoch": 0.8944269010838952, "grad_norm": 0.4804268908486646, "learning_rate": 3.9376329994412476e-05, "loss": 0.4575, "num_tokens": 500233706.0, "step": 655 }, { "epoch": 0.8957924383374584, "grad_norm": 0.47262546262707933, "learning_rate": 3.937373302082585e-05, "loss": 0.4762, "num_tokens": 501092806.0, "step": 656 }, { "epoch": 0.8971579755910216, "grad_norm": 0.4930785061827512, "learning_rate": 3.937113074726567e-05, "loss": 0.4716, "num_tokens": 501834992.0, "step": 657 }, { "epoch": 0.8985235128445848, "grad_norm": 0.4538775057228869, "learning_rate": 3.9368523174525776e-05, "loss": 0.4628, "num_tokens": 502690794.0, "step": 658 }, { "epoch": 0.899889050098148, "grad_norm": 0.47299881278953193, "learning_rate": 3.9365910303401634e-05, "loss": 0.4697, "num_tokens": 503426763.0, "step": 659 }, { "epoch": 0.9012545873517112, "grad_norm": 0.44130549483745996, "learning_rate": 3.9363292134690315e-05, "loss": 0.456, "num_tokens": 504173303.0, "step": 660 }, { "epoch": 0.9026201246052744, "grad_norm": 0.4432540292404384, "learning_rate": 3.9360668669190496e-05, "loss": 0.4613, "num_tokens": 504997886.0, "step": 661 }, { "epoch": 0.9039856618588376, "grad_norm": 0.4603611215814994, "learning_rate": 3.9358039907702494e-05, "loss": 0.4738, "num_tokens": 505734378.0, "step": 662 }, { "epoch": 0.9053511991124008, "grad_norm": 0.45272271381739754, "learning_rate": 3.935540585102824e-05, "loss": 0.464, "num_tokens": 506499275.0, "step": 663 }, { "epoch": 0.906716736365964, "grad_norm": 0.47141393110280416, "learning_rate": 3.935276649997125e-05, "loss": 0.4624, "num_tokens": 507255021.0, "step": 664 }, { "epoch": 0.9080822736195272, "grad_norm": 0.5006185078824834, "learning_rate": 3.9350121855336694e-05, "loss": 0.4642, "num_tokens": 507963839.0, "step": 665 }, { "epoch": 0.9094478108730903, "grad_norm": 0.4444770881198114, "learning_rate": 3.9347471917931316e-05, "loss": 0.4662, "num_tokens": 508688312.0, "step": 666 }, { "epoch": 0.9108133481266536, "grad_norm": 0.5671652764283049, "learning_rate": 3.934481668856352e-05, "loss": 0.4673, "num_tokens": 509488948.0, "step": 667 }, { "epoch": 0.9121788853802167, "grad_norm": 0.3834308374292545, "learning_rate": 3.934215616804329e-05, "loss": 0.4694, "num_tokens": 510270165.0, "step": 668 }, { "epoch": 0.91354442263378, "grad_norm": 0.5472818216510836, "learning_rate": 3.933949035718224e-05, "loss": 0.4672, "num_tokens": 511054964.0, "step": 669 }, { "epoch": 0.9149099598873431, "grad_norm": 0.4860662590090909, "learning_rate": 3.9336819256793594e-05, "loss": 0.4978, "num_tokens": 511833692.0, "step": 670 }, { "epoch": 0.9162754971409064, "grad_norm": 0.526814329939499, "learning_rate": 3.933414286769218e-05, "loss": 0.4629, "num_tokens": 512558816.0, "step": 671 }, { "epoch": 0.9176410343944695, "grad_norm": 0.4994199180860422, "learning_rate": 3.933146119069446e-05, "loss": 0.4348, "num_tokens": 513311507.0, "step": 672 }, { "epoch": 0.9190065716480328, "grad_norm": 0.5089380503124683, "learning_rate": 3.9328774226618485e-05, "loss": 0.4593, "num_tokens": 514067137.0, "step": 673 }, { "epoch": 0.9203721089015959, "grad_norm": 0.5381527773499052, "learning_rate": 3.9326081976283945e-05, "loss": 0.4551, "num_tokens": 514786276.0, "step": 674 }, { "epoch": 0.9217376461551592, "grad_norm": 0.5680261832179578, "learning_rate": 3.932338444051213e-05, "loss": 0.506, "num_tokens": 515568565.0, "step": 675 }, { "epoch": 0.9231031834087223, "grad_norm": 0.44894566247146483, "learning_rate": 3.932068162012592e-05, "loss": 0.458, "num_tokens": 516327301.0, "step": 676 }, { "epoch": 0.9244687206622856, "grad_norm": 0.523804657466907, "learning_rate": 3.931797351594985e-05, "loss": 0.4968, "num_tokens": 517075995.0, "step": 677 }, { "epoch": 0.9258342579158487, "grad_norm": 0.45797667595771496, "learning_rate": 3.931526012881004e-05, "loss": 0.4679, "num_tokens": 517845488.0, "step": 678 }, { "epoch": 0.927199795169412, "grad_norm": 0.44975348878399535, "learning_rate": 3.9312541459534224e-05, "loss": 0.454, "num_tokens": 518561677.0, "step": 679 }, { "epoch": 0.9285653324229751, "grad_norm": 0.5020693074929422, "learning_rate": 3.930981750895174e-05, "loss": 0.4864, "num_tokens": 519310106.0, "step": 680 }, { "epoch": 0.9299308696765384, "grad_norm": 0.4730271258927459, "learning_rate": 3.930708827789357e-05, "loss": 0.4499, "num_tokens": 520024498.0, "step": 681 }, { "epoch": 0.9312964069301015, "grad_norm": 0.47398286077108126, "learning_rate": 3.930435376719227e-05, "loss": 0.4612, "num_tokens": 520781116.0, "step": 682 }, { "epoch": 0.9326619441836648, "grad_norm": 0.4745454833807532, "learning_rate": 3.9301613977682025e-05, "loss": 0.458, "num_tokens": 521535126.0, "step": 683 }, { "epoch": 0.9340274814372279, "grad_norm": 0.43791167832453387, "learning_rate": 3.929886891019862e-05, "loss": 0.4387, "num_tokens": 522310489.0, "step": 684 }, { "epoch": 0.9353930186907912, "grad_norm": 0.4251827305110499, "learning_rate": 3.929611856557947e-05, "loss": 0.4844, "num_tokens": 523133176.0, "step": 685 }, { "epoch": 0.9367585559443543, "grad_norm": 0.4977598678619739, "learning_rate": 3.929336294466357e-05, "loss": 0.468, "num_tokens": 523874078.0, "step": 686 }, { "epoch": 0.9381240931979176, "grad_norm": 0.5063608845226183, "learning_rate": 3.929060204829155e-05, "loss": 0.4923, "num_tokens": 524699884.0, "step": 687 }, { "epoch": 0.9394896304514807, "grad_norm": 0.4571405263405637, "learning_rate": 3.9287835877305636e-05, "loss": 0.4612, "num_tokens": 525514167.0, "step": 688 }, { "epoch": 0.940855167705044, "grad_norm": 0.39397263343350747, "learning_rate": 3.928506443254967e-05, "loss": 0.4629, "num_tokens": 526284972.0, "step": 689 }, { "epoch": 0.9422207049586071, "grad_norm": 0.477452421906753, "learning_rate": 3.928228771486909e-05, "loss": 0.4719, "num_tokens": 527011742.0, "step": 690 }, { "epoch": 0.9435862422121704, "grad_norm": 0.37692017506950853, "learning_rate": 3.9279505725110966e-05, "loss": 0.4569, "num_tokens": 527839006.0, "step": 691 }, { "epoch": 0.9449517794657335, "grad_norm": 0.47170557775935523, "learning_rate": 3.927671846412396e-05, "loss": 0.4864, "num_tokens": 528615716.0, "step": 692 }, { "epoch": 0.9463173167192968, "grad_norm": 0.47333716903271766, "learning_rate": 3.927392593275834e-05, "loss": 0.4639, "num_tokens": 529357369.0, "step": 693 }, { "epoch": 0.9476828539728599, "grad_norm": 0.4251545960114551, "learning_rate": 3.927112813186599e-05, "loss": 0.479, "num_tokens": 530095535.0, "step": 694 }, { "epoch": 0.9490483912264231, "grad_norm": 0.4375566346502254, "learning_rate": 3.926832506230039e-05, "loss": 0.4565, "num_tokens": 530804954.0, "step": 695 }, { "epoch": 0.9504139284799863, "grad_norm": 0.4613176664969383, "learning_rate": 3.926551672491665e-05, "loss": 0.4739, "num_tokens": 531557175.0, "step": 696 }, { "epoch": 0.9517794657335495, "grad_norm": 0.4279803661330633, "learning_rate": 3.926270312057145e-05, "loss": 0.4851, "num_tokens": 532314504.0, "step": 697 }, { "epoch": 0.9531450029871127, "grad_norm": 0.4343846236797042, "learning_rate": 3.925988425012313e-05, "loss": 0.4696, "num_tokens": 533088218.0, "step": 698 }, { "epoch": 0.9545105402406759, "grad_norm": 0.5044653652911529, "learning_rate": 3.925706011443157e-05, "loss": 0.4646, "num_tokens": 533834601.0, "step": 699 }, { "epoch": 0.9558760774942391, "grad_norm": 0.4578598802597193, "learning_rate": 3.925423071435831e-05, "loss": 0.4524, "num_tokens": 534588437.0, "step": 700 }, { "epoch": 0.9572416147478023, "grad_norm": 0.47786552907883006, "learning_rate": 3.925139605076649e-05, "loss": 0.4386, "num_tokens": 535343643.0, "step": 701 }, { "epoch": 0.9586071520013656, "grad_norm": 0.39282176919980105, "learning_rate": 3.924855612452082e-05, "loss": 0.4671, "num_tokens": 536171406.0, "step": 702 }, { "epoch": 0.9599726892549287, "grad_norm": 0.45388867197186383, "learning_rate": 3.9245710936487646e-05, "loss": 0.4488, "num_tokens": 536964518.0, "step": 703 }, { "epoch": 0.961338226508492, "grad_norm": 0.417800644793328, "learning_rate": 3.924286048753492e-05, "loss": 0.4813, "num_tokens": 537756351.0, "step": 704 }, { "epoch": 0.9627037637620551, "grad_norm": 0.47122135888613437, "learning_rate": 3.924000477853218e-05, "loss": 0.4442, "num_tokens": 538470208.0, "step": 705 }, { "epoch": 0.9640693010156184, "grad_norm": 0.46644814746637975, "learning_rate": 3.923714381035058e-05, "loss": 0.4714, "num_tokens": 539278841.0, "step": 706 }, { "epoch": 0.9654348382691815, "grad_norm": 0.46210133663912467, "learning_rate": 3.923427758386289e-05, "loss": 0.4519, "num_tokens": 539923394.0, "step": 707 }, { "epoch": 0.9668003755227448, "grad_norm": 0.4444281456089531, "learning_rate": 3.923140609994346e-05, "loss": 0.4837, "num_tokens": 540659009.0, "step": 708 }, { "epoch": 0.9681659127763079, "grad_norm": 0.44927428779625855, "learning_rate": 3.922852935946825e-05, "loss": 0.4618, "num_tokens": 541383570.0, "step": 709 }, { "epoch": 0.9695314500298712, "grad_norm": 0.5210352720663831, "learning_rate": 3.922564736331485e-05, "loss": 0.4634, "num_tokens": 542145576.0, "step": 710 }, { "epoch": 0.9708969872834343, "grad_norm": 0.4350715097484779, "learning_rate": 3.92227601123624e-05, "loss": 0.4649, "num_tokens": 542939007.0, "step": 711 }, { "epoch": 0.9722625245369976, "grad_norm": 0.6069281662147145, "learning_rate": 3.921986760749171e-05, "loss": 0.5131, "num_tokens": 543712051.0, "step": 712 }, { "epoch": 0.9736280617905607, "grad_norm": 0.5295315170639708, "learning_rate": 3.9216969849585135e-05, "loss": 0.4501, "num_tokens": 544447877.0, "step": 713 }, { "epoch": 0.974993599044124, "grad_norm": 0.641207852895196, "learning_rate": 3.921406683952666e-05, "loss": 0.4719, "num_tokens": 545124888.0, "step": 714 }, { "epoch": 0.9763591362976871, "grad_norm": 0.5358920022480104, "learning_rate": 3.9211158578201874e-05, "loss": 0.4868, "num_tokens": 545890233.0, "step": 715 }, { "epoch": 0.9777246735512504, "grad_norm": 0.6737023987445071, "learning_rate": 3.920824506649796e-05, "loss": 0.4538, "num_tokens": 546603773.0, "step": 716 }, { "epoch": 0.9790902108048135, "grad_norm": 0.5587180642875973, "learning_rate": 3.9205326305303695e-05, "loss": 0.466, "num_tokens": 547453280.0, "step": 717 }, { "epoch": 0.9804557480583768, "grad_norm": 0.5201546028739243, "learning_rate": 3.920240229550948e-05, "loss": 0.4702, "num_tokens": 548172445.0, "step": 718 }, { "epoch": 0.9818212853119399, "grad_norm": 0.548420405251202, "learning_rate": 3.919947303800729e-05, "loss": 0.4698, "num_tokens": 548891111.0, "step": 719 }, { "epoch": 0.9831868225655032, "grad_norm": 0.5034975289957422, "learning_rate": 3.9196538533690734e-05, "loss": 0.4577, "num_tokens": 549684049.0, "step": 720 }, { "epoch": 0.9845523598190663, "grad_norm": 0.49130786407992894, "learning_rate": 3.9193598783454984e-05, "loss": 0.4517, "num_tokens": 550381777.0, "step": 721 }, { "epoch": 0.9859178970726296, "grad_norm": 0.505548515838037, "learning_rate": 3.9190653788196835e-05, "loss": 0.4792, "num_tokens": 551248951.0, "step": 722 }, { "epoch": 0.9872834343261927, "grad_norm": 0.48285392157186124, "learning_rate": 3.918770354881468e-05, "loss": 0.4576, "num_tokens": 551973185.0, "step": 723 }, { "epoch": 0.988648971579756, "grad_norm": 0.4497793253414394, "learning_rate": 3.918474806620851e-05, "loss": 0.4499, "num_tokens": 552740161.0, "step": 724 }, { "epoch": 0.9900145088333191, "grad_norm": 0.42180485730265405, "learning_rate": 3.918178734127991e-05, "loss": 0.4576, "num_tokens": 553526563.0, "step": 725 }, { "epoch": 0.9913800460868823, "grad_norm": 0.4429506359996404, "learning_rate": 3.9178821374932076e-05, "loss": 0.454, "num_tokens": 554245398.0, "step": 726 }, { "epoch": 0.9927455833404455, "grad_norm": 0.4737521353906329, "learning_rate": 3.917585016806979e-05, "loss": 0.4764, "num_tokens": 554985005.0, "step": 727 }, { "epoch": 0.9941111205940087, "grad_norm": 0.5759144895270836, "learning_rate": 3.9172873721599435e-05, "loss": 0.4888, "num_tokens": 555748935.0, "step": 728 }, { "epoch": 0.9954766578475719, "grad_norm": 0.42808185380328745, "learning_rate": 3.9169892036429014e-05, "loss": 0.4567, "num_tokens": 556485927.0, "step": 729 }, { "epoch": 0.9968421951011351, "grad_norm": 0.459789684070546, "learning_rate": 3.9166905113468086e-05, "loss": 0.4556, "num_tokens": 557294696.0, "step": 730 }, { "epoch": 0.9982077323546983, "grad_norm": 0.4383321116532218, "learning_rate": 3.9163912953627855e-05, "loss": 0.4687, "num_tokens": 558033148.0, "step": 731 }, { "epoch": 0.9995732696082615, "grad_norm": 0.491762051044002, "learning_rate": 3.916091555782108e-05, "loss": 0.4609, "num_tokens": 558794487.0, "step": 732 }, { "epoch": 1.0, "grad_norm": 0.491762051044002, "learning_rate": 3.915791292696214e-05, "loss": 0.4708, "num_tokens": 559042136.0, "step": 733 }, { "epoch": 1.0013655372535633, "grad_norm": 0.8121377102555924, "learning_rate": 3.915490506196702e-05, "loss": 0.4341, "num_tokens": 559798424.0, "step": 734 }, { "epoch": 1.0027310745071265, "grad_norm": 0.48925385916402647, "learning_rate": 3.915189196375327e-05, "loss": 0.4472, "num_tokens": 560570310.0, "step": 735 }, { "epoch": 1.0040966117606895, "grad_norm": 0.4998742683721622, "learning_rate": 3.9148873633240076e-05, "loss": 0.435, "num_tokens": 561257328.0, "step": 736 }, { "epoch": 1.0054621490142528, "grad_norm": 0.43667329716661696, "learning_rate": 3.91458500713482e-05, "loss": 0.4525, "num_tokens": 562035083.0, "step": 737 }, { "epoch": 1.006827686267816, "grad_norm": 0.4029338832262982, "learning_rate": 3.914282127899998e-05, "loss": 0.4307, "num_tokens": 562778400.0, "step": 738 }, { "epoch": 1.0081932235213793, "grad_norm": 0.5096707056112747, "learning_rate": 3.9139787257119384e-05, "loss": 0.4285, "num_tokens": 563508421.0, "step": 739 }, { "epoch": 1.0095587607749423, "grad_norm": 0.4325388828596305, "learning_rate": 3.913674800663195e-05, "loss": 0.4461, "num_tokens": 564286882.0, "step": 740 }, { "epoch": 1.0109242980285056, "grad_norm": 0.42105364912743815, "learning_rate": 3.913370352846483e-05, "loss": 0.4483, "num_tokens": 565065347.0, "step": 741 }, { "epoch": 1.0122898352820688, "grad_norm": 0.5033981500398075, "learning_rate": 3.913065382354677e-05, "loss": 0.4248, "num_tokens": 565906059.0, "step": 742 }, { "epoch": 1.013655372535632, "grad_norm": 0.4239943933293519, "learning_rate": 3.912759889280808e-05, "loss": 0.4764, "num_tokens": 566665307.0, "step": 743 }, { "epoch": 1.0150209097891951, "grad_norm": 0.4481233084540988, "learning_rate": 3.9124538737180714e-05, "loss": 0.4352, "num_tokens": 567409795.0, "step": 744 }, { "epoch": 1.0163864470427584, "grad_norm": 0.400340647100443, "learning_rate": 3.9121473357598175e-05, "loss": 0.4359, "num_tokens": 568121859.0, "step": 745 }, { "epoch": 1.0177519842963216, "grad_norm": 0.4029081271748649, "learning_rate": 3.911840275499558e-05, "loss": 0.4237, "num_tokens": 568949621.0, "step": 746 }, { "epoch": 1.0191175215498849, "grad_norm": 0.38391961098355015, "learning_rate": 3.911532693030965e-05, "loss": 0.4412, "num_tokens": 569716202.0, "step": 747 }, { "epoch": 1.020483058803448, "grad_norm": 0.4921075117856507, "learning_rate": 3.911224588447866e-05, "loss": 0.4473, "num_tokens": 570522506.0, "step": 748 }, { "epoch": 1.0218485960570112, "grad_norm": 0.40834526860538434, "learning_rate": 3.910915961844253e-05, "loss": 0.461, "num_tokens": 571313681.0, "step": 749 }, { "epoch": 1.0232141333105744, "grad_norm": 0.4360144132350624, "learning_rate": 3.910606813314273e-05, "loss": 0.4483, "num_tokens": 572099039.0, "step": 750 }, { "epoch": 1.0245796705641377, "grad_norm": 0.4752505332101611, "learning_rate": 3.9102971429522346e-05, "loss": 0.4347, "num_tokens": 572867223.0, "step": 751 }, { "epoch": 1.0259452078177007, "grad_norm": 0.3691526826046018, "learning_rate": 3.9099869508526045e-05, "loss": 0.4214, "num_tokens": 573561839.0, "step": 752 }, { "epoch": 1.027310745071264, "grad_norm": 0.42668700674125704, "learning_rate": 3.9096762371100094e-05, "loss": 0.4382, "num_tokens": 574292688.0, "step": 753 }, { "epoch": 1.0286762823248272, "grad_norm": 0.4247955515014704, "learning_rate": 3.9093650018192334e-05, "loss": 0.4569, "num_tokens": 575096073.0, "step": 754 }, { "epoch": 1.0300418195783905, "grad_norm": 0.4021613835252284, "learning_rate": 3.9090532450752224e-05, "loss": 0.4345, "num_tokens": 575916595.0, "step": 755 }, { "epoch": 1.0314073568319535, "grad_norm": 0.4205416861819565, "learning_rate": 3.9087409669730777e-05, "loss": 0.4373, "num_tokens": 576712138.0, "step": 756 }, { "epoch": 1.0327728940855168, "grad_norm": 0.4064976952602666, "learning_rate": 3.908428167608065e-05, "loss": 0.4373, "num_tokens": 577507085.0, "step": 757 }, { "epoch": 1.03413843133908, "grad_norm": 0.41639213349201437, "learning_rate": 3.908114847075603e-05, "loss": 0.4567, "num_tokens": 578262098.0, "step": 758 }, { "epoch": 1.0355039685926433, "grad_norm": 0.48843043289469396, "learning_rate": 3.907801005471273e-05, "loss": 0.4273, "num_tokens": 579137978.0, "step": 759 }, { "epoch": 1.0368695058462063, "grad_norm": 0.38638136163158737, "learning_rate": 3.907486642890816e-05, "loss": 0.4396, "num_tokens": 579892754.0, "step": 760 }, { "epoch": 1.0382350430997696, "grad_norm": 0.4806005846307003, "learning_rate": 3.907171759430128e-05, "loss": 0.4441, "num_tokens": 580625847.0, "step": 761 }, { "epoch": 1.0396005803533328, "grad_norm": 0.4355648812293466, "learning_rate": 3.906856355185269e-05, "loss": 0.4653, "num_tokens": 581386690.0, "step": 762 }, { "epoch": 1.040966117606896, "grad_norm": 0.5098650176079161, "learning_rate": 3.9065404302524524e-05, "loss": 0.4301, "num_tokens": 582155426.0, "step": 763 }, { "epoch": 1.042331654860459, "grad_norm": 0.41536307753926033, "learning_rate": 3.906223984728055e-05, "loss": 0.4263, "num_tokens": 582902124.0, "step": 764 }, { "epoch": 1.0436971921140223, "grad_norm": 0.46484142949210466, "learning_rate": 3.90590701870861e-05, "loss": 0.438, "num_tokens": 583659563.0, "step": 765 }, { "epoch": 1.0450627293675856, "grad_norm": 0.4244815337737881, "learning_rate": 3.905589532290811e-05, "loss": 0.4422, "num_tokens": 584429340.0, "step": 766 }, { "epoch": 1.0464282666211489, "grad_norm": 0.4357803517791892, "learning_rate": 3.905271525571507e-05, "loss": 0.4633, "num_tokens": 585223552.0, "step": 767 }, { "epoch": 1.0477938038747119, "grad_norm": 0.43000300531895325, "learning_rate": 3.904952998647711e-05, "loss": 0.4483, "num_tokens": 585982078.0, "step": 768 }, { "epoch": 1.0491593411282751, "grad_norm": 0.42892189453706486, "learning_rate": 3.904633951616589e-05, "loss": 0.4301, "num_tokens": 586703202.0, "step": 769 }, { "epoch": 1.0505248783818384, "grad_norm": 0.4471723622570435, "learning_rate": 3.904314384575471e-05, "loss": 0.4644, "num_tokens": 587497570.0, "step": 770 }, { "epoch": 1.0518904156354016, "grad_norm": 0.43159153932380584, "learning_rate": 3.9039942976218415e-05, "loss": 0.4242, "num_tokens": 588184932.0, "step": 771 }, { "epoch": 1.0532559528889647, "grad_norm": 0.4535444144801732, "learning_rate": 3.903673690853345e-05, "loss": 0.4311, "num_tokens": 588950129.0, "step": 772 }, { "epoch": 1.054621490142528, "grad_norm": 0.3901978909703293, "learning_rate": 3.903352564367785e-05, "loss": 0.433, "num_tokens": 589714638.0, "step": 773 }, { "epoch": 1.0559870273960912, "grad_norm": 0.43691445927940825, "learning_rate": 3.903030918263124e-05, "loss": 0.4254, "num_tokens": 590509231.0, "step": 774 }, { "epoch": 1.0573525646496544, "grad_norm": 0.476562361943942, "learning_rate": 3.902708752637483e-05, "loss": 0.4318, "num_tokens": 591277832.0, "step": 775 }, { "epoch": 1.0587181019032175, "grad_norm": 0.366116729639446, "learning_rate": 3.902386067589138e-05, "loss": 0.4149, "num_tokens": 591950717.0, "step": 776 }, { "epoch": 1.0600836391567807, "grad_norm": 0.47902465500635705, "learning_rate": 3.902062863216528e-05, "loss": 0.469, "num_tokens": 592801890.0, "step": 777 }, { "epoch": 1.061449176410344, "grad_norm": 0.45027238647074785, "learning_rate": 3.9017391396182496e-05, "loss": 0.4337, "num_tokens": 593614528.0, "step": 778 }, { "epoch": 1.0628147136639072, "grad_norm": 0.4116999276930426, "learning_rate": 3.901414896893055e-05, "loss": 0.4178, "num_tokens": 594406685.0, "step": 779 }, { "epoch": 1.0641802509174703, "grad_norm": 0.5187487742924063, "learning_rate": 3.9010901351398576e-05, "loss": 0.4441, "num_tokens": 595158188.0, "step": 780 }, { "epoch": 1.0655457881710335, "grad_norm": 0.40270885275956736, "learning_rate": 3.9007648544577276e-05, "loss": 0.4516, "num_tokens": 595927943.0, "step": 781 }, { "epoch": 1.0669113254245968, "grad_norm": 0.48221252053495545, "learning_rate": 3.9004390549458944e-05, "loss": 0.4436, "num_tokens": 596795301.0, "step": 782 }, { "epoch": 1.06827686267816, "grad_norm": 0.4635293985427226, "learning_rate": 3.900112736703746e-05, "loss": 0.4368, "num_tokens": 597598411.0, "step": 783 }, { "epoch": 1.069642399931723, "grad_norm": 0.43518940260174493, "learning_rate": 3.899785899830826e-05, "loss": 0.4505, "num_tokens": 598380624.0, "step": 784 }, { "epoch": 1.0710079371852863, "grad_norm": 0.46859958263725865, "learning_rate": 3.899458544426841e-05, "loss": 0.4234, "num_tokens": 599130592.0, "step": 785 }, { "epoch": 1.0723734744388496, "grad_norm": 0.46605131616102197, "learning_rate": 3.89913067059165e-05, "loss": 0.4311, "num_tokens": 599807665.0, "step": 786 }, { "epoch": 1.0737390116924128, "grad_norm": 0.48951345936268387, "learning_rate": 3.898802278425275e-05, "loss": 0.4271, "num_tokens": 600571353.0, "step": 787 }, { "epoch": 1.0751045489459758, "grad_norm": 0.46485367797377053, "learning_rate": 3.8984733680278936e-05, "loss": 0.453, "num_tokens": 601282480.0, "step": 788 }, { "epoch": 1.076470086199539, "grad_norm": 0.5152546478613031, "learning_rate": 3.8981439394998425e-05, "loss": 0.4524, "num_tokens": 602059188.0, "step": 789 }, { "epoch": 1.0778356234531024, "grad_norm": 0.47371003212034984, "learning_rate": 3.8978139929416156e-05, "loss": 0.4613, "num_tokens": 602849615.0, "step": 790 }, { "epoch": 1.0792011607066656, "grad_norm": 0.3747290439942359, "learning_rate": 3.8974835284538656e-05, "loss": 0.4399, "num_tokens": 603610326.0, "step": 791 }, { "epoch": 1.0805666979602286, "grad_norm": 0.5330482956804511, "learning_rate": 3.897152546137403e-05, "loss": 0.4285, "num_tokens": 604338380.0, "step": 792 }, { "epoch": 1.081932235213792, "grad_norm": 0.3866145933274817, "learning_rate": 3.8968210460931957e-05, "loss": 0.4507, "num_tokens": 605077064.0, "step": 793 }, { "epoch": 1.0832977724673551, "grad_norm": 0.5178576297062101, "learning_rate": 3.8964890284223704e-05, "loss": 0.4426, "num_tokens": 605809585.0, "step": 794 }, { "epoch": 1.0846633097209184, "grad_norm": 0.4205527739424006, "learning_rate": 3.896156493226211e-05, "loss": 0.428, "num_tokens": 606598541.0, "step": 795 }, { "epoch": 1.0860288469744814, "grad_norm": 0.5871431330998984, "learning_rate": 3.895823440606161e-05, "loss": 0.4288, "num_tokens": 607302298.0, "step": 796 }, { "epoch": 1.0873943842280447, "grad_norm": 0.39698409163612886, "learning_rate": 3.895489870663818e-05, "loss": 0.4479, "num_tokens": 608046682.0, "step": 797 }, { "epoch": 1.088759921481608, "grad_norm": 0.5455861157607177, "learning_rate": 3.895155783500941e-05, "loss": 0.459, "num_tokens": 608810756.0, "step": 798 }, { "epoch": 1.0901254587351712, "grad_norm": 0.5091232577364292, "learning_rate": 3.894821179219446e-05, "loss": 0.4506, "num_tokens": 609603922.0, "step": 799 }, { "epoch": 1.0914909959887342, "grad_norm": 0.485837240742762, "learning_rate": 3.894486057921405e-05, "loss": 0.4191, "num_tokens": 610306230.0, "step": 800 }, { "epoch": 1.0928565332422975, "grad_norm": 0.46427364587040015, "learning_rate": 3.89415041970905e-05, "loss": 0.4275, "num_tokens": 611015690.0, "step": 801 }, { "epoch": 1.0942220704958607, "grad_norm": 0.41982897269812797, "learning_rate": 3.893814264684769e-05, "loss": 0.4256, "num_tokens": 611790112.0, "step": 802 }, { "epoch": 1.095587607749424, "grad_norm": 0.4643508381657446, "learning_rate": 3.893477592951109e-05, "loss": 0.44, "num_tokens": 612578023.0, "step": 803 }, { "epoch": 1.096953145002987, "grad_norm": 0.4211445911137373, "learning_rate": 3.893140404610773e-05, "loss": 0.4622, "num_tokens": 613357473.0, "step": 804 }, { "epoch": 1.0983186822565503, "grad_norm": 0.4375074098773992, "learning_rate": 3.8928026997666246e-05, "loss": 0.4256, "num_tokens": 614079911.0, "step": 805 }, { "epoch": 1.0996842195101135, "grad_norm": 0.3930616669478453, "learning_rate": 3.8924644785216805e-05, "loss": 0.4411, "num_tokens": 614976527.0, "step": 806 }, { "epoch": 1.1010497567636768, "grad_norm": 0.4299246450100229, "learning_rate": 3.892125740979119e-05, "loss": 0.4123, "num_tokens": 615739303.0, "step": 807 }, { "epoch": 1.1024152940172398, "grad_norm": 0.4131635504804379, "learning_rate": 3.891786487242273e-05, "loss": 0.435, "num_tokens": 616511728.0, "step": 808 }, { "epoch": 1.103780831270803, "grad_norm": 0.4642327007214311, "learning_rate": 3.891446717414635e-05, "loss": 0.4563, "num_tokens": 617274800.0, "step": 809 }, { "epoch": 1.1051463685243663, "grad_norm": 0.4519143345097616, "learning_rate": 3.891106431599854e-05, "loss": 0.4356, "num_tokens": 618008106.0, "step": 810 }, { "epoch": 1.1065119057779296, "grad_norm": 0.42509294931530206, "learning_rate": 3.890765629901737e-05, "loss": 0.4316, "num_tokens": 618700596.0, "step": 811 }, { "epoch": 1.1078774430314926, "grad_norm": 0.5297898938250063, "learning_rate": 3.8904243124242464e-05, "loss": 0.447, "num_tokens": 619527542.0, "step": 812 }, { "epoch": 1.1092429802850559, "grad_norm": 0.4228866175816603, "learning_rate": 3.890082479271504e-05, "loss": 0.428, "num_tokens": 620334333.0, "step": 813 }, { "epoch": 1.1106085175386191, "grad_norm": 0.4469784903687682, "learning_rate": 3.8897401305477886e-05, "loss": 0.4325, "num_tokens": 621031584.0, "step": 814 }, { "epoch": 1.1119740547921824, "grad_norm": 0.4588472320623494, "learning_rate": 3.889397266357536e-05, "loss": 0.4331, "num_tokens": 621769147.0, "step": 815 }, { "epoch": 1.1133395920457454, "grad_norm": 0.4720956371733323, "learning_rate": 3.889053886805339e-05, "loss": 0.4238, "num_tokens": 622519658.0, "step": 816 }, { "epoch": 1.1147051292993087, "grad_norm": 0.4767496734490056, "learning_rate": 3.8887099919959475e-05, "loss": 0.4633, "num_tokens": 623293585.0, "step": 817 }, { "epoch": 1.116070666552872, "grad_norm": 0.43225174891916784, "learning_rate": 3.8883655820342694e-05, "loss": 0.4431, "num_tokens": 624144270.0, "step": 818 }, { "epoch": 1.1174362038064352, "grad_norm": 0.5443119091285017, "learning_rate": 3.8880206570253694e-05, "loss": 0.45, "num_tokens": 624944787.0, "step": 819 }, { "epoch": 1.1188017410599982, "grad_norm": 0.4022850942634031, "learning_rate": 3.887675217074468e-05, "loss": 0.4467, "num_tokens": 625748395.0, "step": 820 }, { "epoch": 1.1201672783135614, "grad_norm": 0.5389410162975303, "learning_rate": 3.887329262286946e-05, "loss": 0.4259, "num_tokens": 626474429.0, "step": 821 }, { "epoch": 1.1215328155671247, "grad_norm": 0.47675303378017997, "learning_rate": 3.886982792768338e-05, "loss": 0.4463, "num_tokens": 627243813.0, "step": 822 }, { "epoch": 1.122898352820688, "grad_norm": 0.5168722924197839, "learning_rate": 3.8866358086243366e-05, "loss": 0.439, "num_tokens": 628020340.0, "step": 823 }, { "epoch": 1.1242638900742512, "grad_norm": 0.43599995819779147, "learning_rate": 3.886288309960792e-05, "loss": 0.4393, "num_tokens": 628762821.0, "step": 824 }, { "epoch": 1.1256294273278142, "grad_norm": 0.48943027087360613, "learning_rate": 3.8859402968837106e-05, "loss": 0.4186, "num_tokens": 629539587.0, "step": 825 }, { "epoch": 1.1269949645813775, "grad_norm": 0.4299872780879716, "learning_rate": 3.885591769499256e-05, "loss": 0.4577, "num_tokens": 630258493.0, "step": 826 }, { "epoch": 1.1283605018349407, "grad_norm": 0.4633283371187902, "learning_rate": 3.885242727913751e-05, "loss": 0.4359, "num_tokens": 631009914.0, "step": 827 }, { "epoch": 1.1297260390885038, "grad_norm": 0.4798398870841648, "learning_rate": 3.8848931722336705e-05, "loss": 0.4386, "num_tokens": 631759759.0, "step": 828 }, { "epoch": 1.131091576342067, "grad_norm": 0.4335774211088016, "learning_rate": 3.8845431025656496e-05, "loss": 0.4208, "num_tokens": 632434149.0, "step": 829 }, { "epoch": 1.1324571135956303, "grad_norm": 0.5344776792060464, "learning_rate": 3.884192519016479e-05, "loss": 0.401, "num_tokens": 633144885.0, "step": 830 }, { "epoch": 1.1338226508491935, "grad_norm": 0.43533898329344556, "learning_rate": 3.8838414216931076e-05, "loss": 0.4203, "num_tokens": 633828564.0, "step": 831 }, { "epoch": 1.1351881881027568, "grad_norm": 0.4594259524651011, "learning_rate": 3.88348981070264e-05, "loss": 0.4325, "num_tokens": 634554740.0, "step": 832 }, { "epoch": 1.1365537253563198, "grad_norm": 0.41015553761419493, "learning_rate": 3.883137686152336e-05, "loss": 0.4362, "num_tokens": 635271883.0, "step": 833 }, { "epoch": 1.137919262609883, "grad_norm": 0.45647470586570504, "learning_rate": 3.882785048149615e-05, "loss": 0.4466, "num_tokens": 635992085.0, "step": 834 }, { "epoch": 1.1392847998634463, "grad_norm": 0.38960898917300535, "learning_rate": 3.882431896802051e-05, "loss": 0.4502, "num_tokens": 636717912.0, "step": 835 }, { "epoch": 1.1406503371170094, "grad_norm": 0.4757193592469901, "learning_rate": 3.882078232217374e-05, "loss": 0.4292, "num_tokens": 637491230.0, "step": 836 }, { "epoch": 1.1420158743705726, "grad_norm": 0.41761609249347204, "learning_rate": 3.881724054503474e-05, "loss": 0.4363, "num_tokens": 638285530.0, "step": 837 }, { "epoch": 1.1433814116241359, "grad_norm": 0.444201813286371, "learning_rate": 3.881369363768394e-05, "loss": 0.4291, "num_tokens": 639051804.0, "step": 838 }, { "epoch": 1.1447469488776991, "grad_norm": 0.4187709065338737, "learning_rate": 3.8810141601203354e-05, "loss": 0.4467, "num_tokens": 639875837.0, "step": 839 }, { "epoch": 1.1461124861312624, "grad_norm": 0.4181100114231964, "learning_rate": 3.880658443667655e-05, "loss": 0.4467, "num_tokens": 640615235.0, "step": 840 }, { "epoch": 1.1474780233848254, "grad_norm": 0.42295200137203803, "learning_rate": 3.880302214518866e-05, "loss": 0.4361, "num_tokens": 641400053.0, "step": 841 }, { "epoch": 1.1488435606383887, "grad_norm": 0.3876021377308778, "learning_rate": 3.87994547278264e-05, "loss": 0.4299, "num_tokens": 642184918.0, "step": 842 }, { "epoch": 1.150209097891952, "grad_norm": 0.41770132810374416, "learning_rate": 3.879588218567802e-05, "loss": 0.4397, "num_tokens": 642971503.0, "step": 843 }, { "epoch": 1.151574635145515, "grad_norm": 0.39565484550611607, "learning_rate": 3.879230451983335e-05, "loss": 0.4572, "num_tokens": 643755043.0, "step": 844 }, { "epoch": 1.1529401723990782, "grad_norm": 0.3960609115180172, "learning_rate": 3.878872173138379e-05, "loss": 0.4496, "num_tokens": 644582058.0, "step": 845 }, { "epoch": 1.1543057096526415, "grad_norm": 0.40577196971555873, "learning_rate": 3.878513382142228e-05, "loss": 0.4272, "num_tokens": 645381828.0, "step": 846 }, { "epoch": 1.1556712469062047, "grad_norm": 0.4167109295380741, "learning_rate": 3.878154079104335e-05, "loss": 0.4304, "num_tokens": 646206644.0, "step": 847 }, { "epoch": 1.157036784159768, "grad_norm": 0.3938793514251331, "learning_rate": 3.8777942641343066e-05, "loss": 0.4393, "num_tokens": 646917687.0, "step": 848 }, { "epoch": 1.158402321413331, "grad_norm": 0.40972632868318126, "learning_rate": 3.877433937341908e-05, "loss": 0.4427, "num_tokens": 647662144.0, "step": 849 }, { "epoch": 1.1597678586668942, "grad_norm": 0.4178142564526794, "learning_rate": 3.877073098837058e-05, "loss": 0.4476, "num_tokens": 648424916.0, "step": 850 }, { "epoch": 1.1611333959204575, "grad_norm": 0.4432162862897293, "learning_rate": 3.8767117487298336e-05, "loss": 0.4575, "num_tokens": 649156600.0, "step": 851 }, { "epoch": 1.1624989331740205, "grad_norm": 0.468271160189557, "learning_rate": 3.876349887130467e-05, "loss": 0.4508, "num_tokens": 649917916.0, "step": 852 }, { "epoch": 1.1638644704275838, "grad_norm": 0.4253071064729915, "learning_rate": 3.8759875141493465e-05, "loss": 0.4333, "num_tokens": 650642497.0, "step": 853 }, { "epoch": 1.165230007681147, "grad_norm": 0.47090039849794796, "learning_rate": 3.8756246298970164e-05, "loss": 0.44, "num_tokens": 651386219.0, "step": 854 }, { "epoch": 1.1665955449347103, "grad_norm": 0.46039044141037355, "learning_rate": 3.875261234484176e-05, "loss": 0.46, "num_tokens": 652102809.0, "step": 855 }, { "epoch": 1.1679610821882735, "grad_norm": 0.4279016766376169, "learning_rate": 3.874897328021683e-05, "loss": 0.4373, "num_tokens": 652873368.0, "step": 856 }, { "epoch": 1.1693266194418366, "grad_norm": 0.46950933283171165, "learning_rate": 3.8745329106205486e-05, "loss": 0.4581, "num_tokens": 653648340.0, "step": 857 }, { "epoch": 1.1706921566953998, "grad_norm": 0.3877482601251663, "learning_rate": 3.8741679823919415e-05, "loss": 0.4256, "num_tokens": 654442924.0, "step": 858 }, { "epoch": 1.172057693948963, "grad_norm": 0.4548929926344495, "learning_rate": 3.8738025434471844e-05, "loss": 0.4309, "num_tokens": 655195472.0, "step": 859 }, { "epoch": 1.1734232312025263, "grad_norm": 0.40651013915134426, "learning_rate": 3.8734365938977585e-05, "loss": 0.4519, "num_tokens": 655973657.0, "step": 860 }, { "epoch": 1.1747887684560894, "grad_norm": 0.5496614428240026, "learning_rate": 3.8730701338552965e-05, "loss": 0.4672, "num_tokens": 656753462.0, "step": 861 }, { "epoch": 1.1761543057096526, "grad_norm": 0.4445061523416159, "learning_rate": 3.8727031634315925e-05, "loss": 0.4355, "num_tokens": 657577877.0, "step": 862 }, { "epoch": 1.1775198429632159, "grad_norm": 0.5542265070479905, "learning_rate": 3.872335682738592e-05, "loss": 0.4322, "num_tokens": 658374789.0, "step": 863 }, { "epoch": 1.1788853802167791, "grad_norm": 0.4230868759070302, "learning_rate": 3.871967691888397e-05, "loss": 0.4146, "num_tokens": 659115923.0, "step": 864 }, { "epoch": 1.1802509174703422, "grad_norm": 0.4832209574318927, "learning_rate": 3.871599190993266e-05, "loss": 0.4302, "num_tokens": 659873530.0, "step": 865 }, { "epoch": 1.1816164547239054, "grad_norm": 0.4199172319966955, "learning_rate": 3.871230180165612e-05, "loss": 0.4614, "num_tokens": 660687369.0, "step": 866 }, { "epoch": 1.1829819919774687, "grad_norm": 0.5174595748169207, "learning_rate": 3.870860659518006e-05, "loss": 0.4431, "num_tokens": 661457954.0, "step": 867 }, { "epoch": 1.184347529231032, "grad_norm": 0.4067481927902078, "learning_rate": 3.870490629163171e-05, "loss": 0.4623, "num_tokens": 662210300.0, "step": 868 }, { "epoch": 1.185713066484595, "grad_norm": 0.4376650487717711, "learning_rate": 3.870120089213988e-05, "loss": 0.4239, "num_tokens": 662931119.0, "step": 869 }, { "epoch": 1.1870786037381582, "grad_norm": 0.48978368790912935, "learning_rate": 3.869749039783492e-05, "loss": 0.4428, "num_tokens": 663663455.0, "step": 870 }, { "epoch": 1.1884441409917215, "grad_norm": 0.4566472971174033, "learning_rate": 3.869377480984876e-05, "loss": 0.4339, "num_tokens": 664534798.0, "step": 871 }, { "epoch": 1.1898096782452847, "grad_norm": 0.5283951421896452, "learning_rate": 3.869005412931484e-05, "loss": 0.4375, "num_tokens": 665300874.0, "step": 872 }, { "epoch": 1.1911752154988478, "grad_norm": 0.4959818983489737, "learning_rate": 3.868632835736819e-05, "loss": 0.4698, "num_tokens": 666005267.0, "step": 873 }, { "epoch": 1.192540752752411, "grad_norm": 0.45650222761977455, "learning_rate": 3.868259749514538e-05, "loss": 0.4289, "num_tokens": 666734740.0, "step": 874 }, { "epoch": 1.1939062900059743, "grad_norm": 0.5194679540677515, "learning_rate": 3.867886154378454e-05, "loss": 0.4374, "num_tokens": 667492731.0, "step": 875 }, { "epoch": 1.1952718272595375, "grad_norm": 0.4618108879091837, "learning_rate": 3.8675120504425345e-05, "loss": 0.4191, "num_tokens": 668243503.0, "step": 876 }, { "epoch": 1.1966373645131005, "grad_norm": 0.45676358645265014, "learning_rate": 3.867137437820902e-05, "loss": 0.4478, "num_tokens": 669092752.0, "step": 877 }, { "epoch": 1.1980029017666638, "grad_norm": 0.5870598423551459, "learning_rate": 3.8667623166278346e-05, "loss": 0.4544, "num_tokens": 669851085.0, "step": 878 }, { "epoch": 1.199368439020227, "grad_norm": 0.4127785098037018, "learning_rate": 3.866386686977766e-05, "loss": 0.4205, "num_tokens": 670602523.0, "step": 879 }, { "epoch": 1.2007339762737903, "grad_norm": 0.40290506911728385, "learning_rate": 3.866010548985285e-05, "loss": 0.4371, "num_tokens": 671411552.0, "step": 880 }, { "epoch": 1.2020995135273533, "grad_norm": 0.44799030782748234, "learning_rate": 3.865633902765133e-05, "loss": 0.4378, "num_tokens": 672193651.0, "step": 881 }, { "epoch": 1.2034650507809166, "grad_norm": 0.42075684555788107, "learning_rate": 3.86525674843221e-05, "loss": 0.4494, "num_tokens": 672976492.0, "step": 882 }, { "epoch": 1.2048305880344798, "grad_norm": 0.4340245137001704, "learning_rate": 3.8648790861015694e-05, "loss": 0.4292, "num_tokens": 673659643.0, "step": 883 }, { "epoch": 1.206196125288043, "grad_norm": 0.4569419916294036, "learning_rate": 3.8645009158884195e-05, "loss": 0.4221, "num_tokens": 674453313.0, "step": 884 }, { "epoch": 1.2075616625416061, "grad_norm": 0.47238660303137436, "learning_rate": 3.864122237908123e-05, "loss": 0.4277, "num_tokens": 675212051.0, "step": 885 }, { "epoch": 1.2089271997951694, "grad_norm": 0.43486634198241764, "learning_rate": 3.8637430522762e-05, "loss": 0.4634, "num_tokens": 675930971.0, "step": 886 }, { "epoch": 1.2102927370487326, "grad_norm": 0.44652830595554277, "learning_rate": 3.863363359108322e-05, "loss": 0.4533, "num_tokens": 676632984.0, "step": 887 }, { "epoch": 1.211658274302296, "grad_norm": 0.5059655902086873, "learning_rate": 3.862983158520316e-05, "loss": 0.4294, "num_tokens": 677333340.0, "step": 888 }, { "epoch": 1.213023811555859, "grad_norm": 0.47386129539239985, "learning_rate": 3.862602450628167e-05, "loss": 0.4538, "num_tokens": 678129113.0, "step": 889 }, { "epoch": 1.2143893488094222, "grad_norm": 0.4488491149292043, "learning_rate": 3.862221235548012e-05, "loss": 0.4435, "num_tokens": 678869608.0, "step": 890 }, { "epoch": 1.2157548860629854, "grad_norm": 0.4785526547547321, "learning_rate": 3.861839513396142e-05, "loss": 0.4456, "num_tokens": 679553554.0, "step": 891 }, { "epoch": 1.2171204233165487, "grad_norm": 0.45549715221529313, "learning_rate": 3.8614572842890046e-05, "loss": 0.4352, "num_tokens": 680422050.0, "step": 892 }, { "epoch": 1.2184859605701117, "grad_norm": 0.4015480692509989, "learning_rate": 3.8610745483432016e-05, "loss": 0.4488, "num_tokens": 681202885.0, "step": 893 }, { "epoch": 1.219851497823675, "grad_norm": 0.4332695923152951, "learning_rate": 3.860691305675489e-05, "loss": 0.4321, "num_tokens": 681955579.0, "step": 894 }, { "epoch": 1.2212170350772382, "grad_norm": 0.398674079102588, "learning_rate": 3.860307556402777e-05, "loss": 0.4258, "num_tokens": 682668266.0, "step": 895 }, { "epoch": 1.2225825723308015, "grad_norm": 0.5087434629325828, "learning_rate": 3.8599233006421314e-05, "loss": 0.4409, "num_tokens": 683406434.0, "step": 896 }, { "epoch": 1.2239481095843645, "grad_norm": 0.4012836744986672, "learning_rate": 3.859538538510772e-05, "loss": 0.4392, "num_tokens": 684209879.0, "step": 897 }, { "epoch": 1.2253136468379278, "grad_norm": 0.4545753048235115, "learning_rate": 3.859153270126074e-05, "loss": 0.4402, "num_tokens": 684902408.0, "step": 898 }, { "epoch": 1.226679184091491, "grad_norm": 0.3398434644682556, "learning_rate": 3.858767495605564e-05, "loss": 0.4414, "num_tokens": 685761392.0, "step": 899 }, { "epoch": 1.2280447213450543, "grad_norm": 0.40311335487469535, "learning_rate": 3.858381215066926e-05, "loss": 0.4149, "num_tokens": 686535283.0, "step": 900 }, { "epoch": 1.2294102585986173, "grad_norm": 0.38160498777824103, "learning_rate": 3.8579944286279986e-05, "loss": 0.4415, "num_tokens": 687252380.0, "step": 901 }, { "epoch": 1.2307757958521806, "grad_norm": 0.3996822913492072, "learning_rate": 3.857607136406772e-05, "loss": 0.4332, "num_tokens": 688013226.0, "step": 902 }, { "epoch": 1.2321413331057438, "grad_norm": 0.3759956708690642, "learning_rate": 3.857219338521393e-05, "loss": 0.4464, "num_tokens": 688722910.0, "step": 903 }, { "epoch": 1.233506870359307, "grad_norm": 0.43443453782912317, "learning_rate": 3.856831035090162e-05, "loss": 0.4624, "num_tokens": 689480695.0, "step": 904 }, { "epoch": 1.23487240761287, "grad_norm": 0.4614138134219246, "learning_rate": 3.8564422262315336e-05, "loss": 0.45, "num_tokens": 690231910.0, "step": 905 }, { "epoch": 1.2362379448664333, "grad_norm": 0.4271374089203699, "learning_rate": 3.856052912064116e-05, "loss": 0.4489, "num_tokens": 691017158.0, "step": 906 }, { "epoch": 1.2376034821199966, "grad_norm": 0.46070931072814525, "learning_rate": 3.855663092706673e-05, "loss": 0.4406, "num_tokens": 691701734.0, "step": 907 }, { "epoch": 1.2389690193735599, "grad_norm": 0.4682872171372227, "learning_rate": 3.8552727682781205e-05, "loss": 0.4319, "num_tokens": 692576491.0, "step": 908 }, { "epoch": 1.2403345566271229, "grad_norm": 0.4346757985124618, "learning_rate": 3.854881938897531e-05, "loss": 0.4458, "num_tokens": 693393118.0, "step": 909 }, { "epoch": 1.2417000938806861, "grad_norm": 0.3891559311446127, "learning_rate": 3.854490604684128e-05, "loss": 0.4243, "num_tokens": 694149888.0, "step": 910 }, { "epoch": 1.2430656311342494, "grad_norm": 0.43613243139894864, "learning_rate": 3.8540987657572914e-05, "loss": 0.442, "num_tokens": 694908938.0, "step": 911 }, { "epoch": 1.2444311683878126, "grad_norm": 0.372365224152791, "learning_rate": 3.8537064222365545e-05, "loss": 0.4301, "num_tokens": 695717201.0, "step": 912 }, { "epoch": 1.245796705641376, "grad_norm": 0.3917650641932707, "learning_rate": 3.8533135742416045e-05, "loss": 0.4217, "num_tokens": 696518422.0, "step": 913 }, { "epoch": 1.247162242894939, "grad_norm": 0.37545353704274637, "learning_rate": 3.8529202218922823e-05, "loss": 0.4221, "num_tokens": 697253730.0, "step": 914 }, { "epoch": 1.2485277801485022, "grad_norm": 0.4393876897667239, "learning_rate": 3.852526365308581e-05, "loss": 0.4202, "num_tokens": 698006536.0, "step": 915 }, { "epoch": 1.2498933174020654, "grad_norm": 0.4269885038242418, "learning_rate": 3.852132004610651e-05, "loss": 0.416, "num_tokens": 698712459.0, "step": 916 }, { "epoch": 1.2512588546556285, "grad_norm": 0.4409957015994956, "learning_rate": 3.8517371399187945e-05, "loss": 0.4155, "num_tokens": 699421995.0, "step": 917 }, { "epoch": 1.2526243919091917, "grad_norm": 0.533852395430057, "learning_rate": 3.8513417713534676e-05, "loss": 0.455, "num_tokens": 700254656.0, "step": 918 }, { "epoch": 1.253989929162755, "grad_norm": 0.4041325026538078, "learning_rate": 3.8509458990352785e-05, "loss": 0.4148, "num_tokens": 700967683.0, "step": 919 }, { "epoch": 1.2553554664163182, "grad_norm": 0.4509619198759386, "learning_rate": 3.850549523084993e-05, "loss": 0.4177, "num_tokens": 701725427.0, "step": 920 }, { "epoch": 1.2567210036698815, "grad_norm": 0.48136221608793, "learning_rate": 3.850152643623527e-05, "loss": 0.4206, "num_tokens": 702515753.0, "step": 921 }, { "epoch": 1.2580865409234445, "grad_norm": 0.4969833663675131, "learning_rate": 3.849755260771951e-05, "loss": 0.4391, "num_tokens": 703312570.0, "step": 922 }, { "epoch": 1.2594520781770078, "grad_norm": 0.4437427750285396, "learning_rate": 3.84935737465149e-05, "loss": 0.4398, "num_tokens": 704075881.0, "step": 923 }, { "epoch": 1.260817615430571, "grad_norm": 0.5350554176725208, "learning_rate": 3.848958985383522e-05, "loss": 0.4446, "num_tokens": 704881821.0, "step": 924 }, { "epoch": 1.262183152684134, "grad_norm": 0.4650146054848901, "learning_rate": 3.848560093089578e-05, "loss": 0.456, "num_tokens": 705636460.0, "step": 925 }, { "epoch": 1.2635486899376973, "grad_norm": 0.4694508678213324, "learning_rate": 3.848160697891343e-05, "loss": 0.4319, "num_tokens": 706282807.0, "step": 926 }, { "epoch": 1.2649142271912606, "grad_norm": 0.4851398734980709, "learning_rate": 3.847760799910654e-05, "loss": 0.4447, "num_tokens": 707084446.0, "step": 927 }, { "epoch": 1.2662797644448238, "grad_norm": 0.4088021042498704, "learning_rate": 3.8473603992695027e-05, "loss": 0.4261, "num_tokens": 707811973.0, "step": 928 }, { "epoch": 1.267645301698387, "grad_norm": 0.38562630274807036, "learning_rate": 3.846959496090036e-05, "loss": 0.4421, "num_tokens": 708637101.0, "step": 929 }, { "epoch": 1.26901083895195, "grad_norm": 0.4238819736708451, "learning_rate": 3.84655809049455e-05, "loss": 0.4344, "num_tokens": 709439690.0, "step": 930 }, { "epoch": 1.2703763762055134, "grad_norm": 0.37868014441352676, "learning_rate": 3.846156182605497e-05, "loss": 0.4271, "num_tokens": 710157194.0, "step": 931 }, { "epoch": 1.2717419134590766, "grad_norm": 0.479747970324989, "learning_rate": 3.845753772545482e-05, "loss": 0.4466, "num_tokens": 710876038.0, "step": 932 }, { "epoch": 1.2731074507126396, "grad_norm": 0.3564118823357316, "learning_rate": 3.8453508604372627e-05, "loss": 0.412, "num_tokens": 711629721.0, "step": 933 }, { "epoch": 1.274472987966203, "grad_norm": 0.39749865131153056, "learning_rate": 3.84494744640375e-05, "loss": 0.4322, "num_tokens": 712408387.0, "step": 934 }, { "epoch": 1.2758385252197662, "grad_norm": 0.4244705855760407, "learning_rate": 3.844543530568008e-05, "loss": 0.4492, "num_tokens": 713231205.0, "step": 935 }, { "epoch": 1.2772040624733294, "grad_norm": 0.43341641971834677, "learning_rate": 3.844139113053253e-05, "loss": 0.4247, "num_tokens": 714025276.0, "step": 936 }, { "epoch": 1.2785695997268927, "grad_norm": 0.45430589482888484, "learning_rate": 3.843734193982857e-05, "loss": 0.4427, "num_tokens": 714757577.0, "step": 937 }, { "epoch": 1.2799351369804557, "grad_norm": 0.413278661095314, "learning_rate": 3.843328773480342e-05, "loss": 0.4279, "num_tokens": 715553807.0, "step": 938 }, { "epoch": 1.281300674234019, "grad_norm": 0.5027923915234472, "learning_rate": 3.8429228516693856e-05, "loss": 0.4347, "num_tokens": 716320648.0, "step": 939 }, { "epoch": 1.2826662114875822, "grad_norm": 0.4216735819945775, "learning_rate": 3.8425164286738157e-05, "loss": 0.4295, "num_tokens": 717066607.0, "step": 940 }, { "epoch": 1.2840317487411452, "grad_norm": 0.4612241017012475, "learning_rate": 3.842109504617615e-05, "loss": 0.4603, "num_tokens": 717798341.0, "step": 941 }, { "epoch": 1.2853972859947085, "grad_norm": 0.45722780961158643, "learning_rate": 3.841702079624918e-05, "loss": 0.4359, "num_tokens": 718655059.0, "step": 942 }, { "epoch": 1.2867628232482717, "grad_norm": 0.40863169579814923, "learning_rate": 3.8412941538200125e-05, "loss": 0.4093, "num_tokens": 719398983.0, "step": 943 }, { "epoch": 1.288128360501835, "grad_norm": 0.4992769545437139, "learning_rate": 3.840885727327339e-05, "loss": 0.4439, "num_tokens": 720137181.0, "step": 944 }, { "epoch": 1.2894938977553982, "grad_norm": 0.4647637372274971, "learning_rate": 3.8404768002714904e-05, "loss": 0.4244, "num_tokens": 720887396.0, "step": 945 }, { "epoch": 1.2908594350089613, "grad_norm": 0.47062067096390414, "learning_rate": 3.840067372777214e-05, "loss": 0.4336, "num_tokens": 721650294.0, "step": 946 }, { "epoch": 1.2922249722625245, "grad_norm": 0.48024813896887497, "learning_rate": 3.839657444969407e-05, "loss": 0.425, "num_tokens": 722358014.0, "step": 947 }, { "epoch": 1.2935905095160878, "grad_norm": 0.5059680348883336, "learning_rate": 3.8392470169731206e-05, "loss": 0.4663, "num_tokens": 723174884.0, "step": 948 }, { "epoch": 1.2949560467696508, "grad_norm": 0.4478953447806408, "learning_rate": 3.838836088913559e-05, "loss": 0.4346, "num_tokens": 723980683.0, "step": 949 }, { "epoch": 1.296321584023214, "grad_norm": 0.45629690711702714, "learning_rate": 3.8384246609160797e-05, "loss": 0.4592, "num_tokens": 724784950.0, "step": 950 }, { "epoch": 1.2976871212767773, "grad_norm": 0.4093615152182829, "learning_rate": 3.8380127331061895e-05, "loss": 0.4446, "num_tokens": 725552248.0, "step": 951 }, { "epoch": 1.2990526585303406, "grad_norm": 0.45223512049145315, "learning_rate": 3.837600305609552e-05, "loss": 0.4522, "num_tokens": 726376218.0, "step": 952 }, { "epoch": 1.3004181957839038, "grad_norm": 0.48132539483722364, "learning_rate": 3.8371873785519785e-05, "loss": 0.4585, "num_tokens": 727105372.0, "step": 953 }, { "epoch": 1.3017837330374669, "grad_norm": 0.3623612316684303, "learning_rate": 3.836773952059436e-05, "loss": 0.4416, "num_tokens": 727818025.0, "step": 954 }, { "epoch": 1.3031492702910301, "grad_norm": 0.5138661347902559, "learning_rate": 3.8363600262580444e-05, "loss": 0.4503, "num_tokens": 728555653.0, "step": 955 }, { "epoch": 1.3045148075445934, "grad_norm": 0.3996513711108716, "learning_rate": 3.8359456012740735e-05, "loss": 0.4243, "num_tokens": 729381507.0, "step": 956 }, { "epoch": 1.3058803447981564, "grad_norm": 0.4406568977440799, "learning_rate": 3.835530677233946e-05, "loss": 0.4376, "num_tokens": 730115528.0, "step": 957 }, { "epoch": 1.3072458820517197, "grad_norm": 0.5427262974108464, "learning_rate": 3.8351152542642376e-05, "loss": 0.4334, "num_tokens": 730878062.0, "step": 958 }, { "epoch": 1.308611419305283, "grad_norm": 0.4364884769223424, "learning_rate": 3.834699332491676e-05, "loss": 0.4545, "num_tokens": 731664199.0, "step": 959 }, { "epoch": 1.3099769565588462, "grad_norm": 0.47126774654421394, "learning_rate": 3.834282912043141e-05, "loss": 0.4161, "num_tokens": 732329125.0, "step": 960 }, { "epoch": 1.3113424938124094, "grad_norm": 0.41903594569177266, "learning_rate": 3.833865993045665e-05, "loss": 0.4444, "num_tokens": 733128714.0, "step": 961 }, { "epoch": 1.3127080310659724, "grad_norm": 0.4953182766986741, "learning_rate": 3.83344857562643e-05, "loss": 0.4568, "num_tokens": 733937293.0, "step": 962 }, { "epoch": 1.3140735683195357, "grad_norm": 0.4163203471590676, "learning_rate": 3.833030659912774e-05, "loss": 0.4495, "num_tokens": 734734607.0, "step": 963 }, { "epoch": 1.315439105573099, "grad_norm": 0.4731816239428989, "learning_rate": 3.8326122460321836e-05, "loss": 0.4274, "num_tokens": 735442588.0, "step": 964 }, { "epoch": 1.316804642826662, "grad_norm": 0.4337427011445074, "learning_rate": 3.8321933341123004e-05, "loss": 0.4479, "num_tokens": 736237349.0, "step": 965 }, { "epoch": 1.3181701800802252, "grad_norm": 0.457760898355076, "learning_rate": 3.8317739242809144e-05, "loss": 0.4498, "num_tokens": 736953126.0, "step": 966 }, { "epoch": 1.3195357173337885, "grad_norm": 0.46478777971720164, "learning_rate": 3.83135401666597e-05, "loss": 0.4371, "num_tokens": 737782878.0, "step": 967 }, { "epoch": 1.3209012545873517, "grad_norm": 0.5272263177009271, "learning_rate": 3.830933611395564e-05, "loss": 0.4452, "num_tokens": 738571143.0, "step": 968 }, { "epoch": 1.322266791840915, "grad_norm": 0.41401264735198107, "learning_rate": 3.830512708597942e-05, "loss": 0.4627, "num_tokens": 739373505.0, "step": 969 }, { "epoch": 1.323632329094478, "grad_norm": 0.5266413364312164, "learning_rate": 3.830091308401504e-05, "loss": 0.4216, "num_tokens": 740070006.0, "step": 970 }, { "epoch": 1.3249978663480413, "grad_norm": 0.48714217465429677, "learning_rate": 3.829669410934802e-05, "loss": 0.4563, "num_tokens": 740862451.0, "step": 971 }, { "epoch": 1.3263634036016045, "grad_norm": 0.47487231106147076, "learning_rate": 3.8292470163265375e-05, "loss": 0.4051, "num_tokens": 741583865.0, "step": 972 }, { "epoch": 1.3277289408551676, "grad_norm": 0.4312978541778128, "learning_rate": 3.828824124705565e-05, "loss": 0.4244, "num_tokens": 742408521.0, "step": 973 }, { "epoch": 1.3290944781087308, "grad_norm": 0.43406287130507176, "learning_rate": 3.82840073620089e-05, "loss": 0.4401, "num_tokens": 743160985.0, "step": 974 }, { "epoch": 1.330460015362294, "grad_norm": 0.568997102053163, "learning_rate": 3.8279768509416705e-05, "loss": 0.4541, "num_tokens": 743904992.0, "step": 975 }, { "epoch": 1.3318255526158573, "grad_norm": 0.406810281486347, "learning_rate": 3.8275524690572154e-05, "loss": 0.434, "num_tokens": 744663505.0, "step": 976 }, { "epoch": 1.3331910898694206, "grad_norm": 0.4754373795957301, "learning_rate": 3.827127590676986e-05, "loss": 0.449, "num_tokens": 745491376.0, "step": 977 }, { "epoch": 1.3345566271229836, "grad_norm": 0.44490396661741766, "learning_rate": 3.826702215930593e-05, "loss": 0.4169, "num_tokens": 746254832.0, "step": 978 }, { "epoch": 1.3359221643765469, "grad_norm": 0.5200100769448649, "learning_rate": 3.8262763449478e-05, "loss": 0.4397, "num_tokens": 747070818.0, "step": 979 }, { "epoch": 1.3372877016301101, "grad_norm": 0.46143095440136533, "learning_rate": 3.825849977858523e-05, "loss": 0.4287, "num_tokens": 747768715.0, "step": 980 }, { "epoch": 1.3386532388836732, "grad_norm": 0.4961569806130796, "learning_rate": 3.825423114792826e-05, "loss": 0.4693, "num_tokens": 748595355.0, "step": 981 }, { "epoch": 1.3400187761372364, "grad_norm": 0.4989027308694235, "learning_rate": 3.8249957558809296e-05, "loss": 0.4302, "num_tokens": 749387982.0, "step": 982 }, { "epoch": 1.3413843133907997, "grad_norm": 0.5005178059161008, "learning_rate": 3.8245679012531996e-05, "loss": 0.4191, "num_tokens": 750111930.0, "step": 983 }, { "epoch": 1.342749850644363, "grad_norm": 0.47879232606898486, "learning_rate": 3.824139551040157e-05, "loss": 0.4365, "num_tokens": 750837799.0, "step": 984 }, { "epoch": 1.3441153878979262, "grad_norm": 0.4738355369170936, "learning_rate": 3.823710705372473e-05, "loss": 0.4315, "num_tokens": 751591219.0, "step": 985 }, { "epoch": 1.3454809251514894, "grad_norm": 0.49871129627509725, "learning_rate": 3.82328136438097e-05, "loss": 0.429, "num_tokens": 752313245.0, "step": 986 }, { "epoch": 1.3468464624050525, "grad_norm": 0.45275667891013643, "learning_rate": 3.822851528196621e-05, "loss": 0.46, "num_tokens": 753064329.0, "step": 987 }, { "epoch": 1.3482119996586157, "grad_norm": 0.4137836107221253, "learning_rate": 3.8224211969505505e-05, "loss": 0.4521, "num_tokens": 753952656.0, "step": 988 }, { "epoch": 1.349577536912179, "grad_norm": 0.4111216437098058, "learning_rate": 3.8219903707740335e-05, "loss": 0.4069, "num_tokens": 754612682.0, "step": 989 }, { "epoch": 1.350943074165742, "grad_norm": 0.3968562119416692, "learning_rate": 3.821559049798497e-05, "loss": 0.4442, "num_tokens": 755433330.0, "step": 990 }, { "epoch": 1.3523086114193053, "grad_norm": 0.3943818083311655, "learning_rate": 3.821127234155518e-05, "loss": 0.4262, "num_tokens": 756189698.0, "step": 991 }, { "epoch": 1.3536741486728685, "grad_norm": 0.3527648177377545, "learning_rate": 3.820694923976825e-05, "loss": 0.4201, "num_tokens": 756913299.0, "step": 992 }, { "epoch": 1.3550396859264318, "grad_norm": 0.4180370991393135, "learning_rate": 3.8202621193942976e-05, "loss": 0.4331, "num_tokens": 757647161.0, "step": 993 }, { "epoch": 1.356405223179995, "grad_norm": 0.36523908606306454, "learning_rate": 3.8198288205399646e-05, "loss": 0.4354, "num_tokens": 758342215.0, "step": 994 }, { "epoch": 1.357770760433558, "grad_norm": 0.41246495634388036, "learning_rate": 3.819395027546008e-05, "loss": 0.4337, "num_tokens": 759153206.0, "step": 995 }, { "epoch": 1.3591362976871213, "grad_norm": 0.38193960568580565, "learning_rate": 3.818960740544757e-05, "loss": 0.4235, "num_tokens": 759885538.0, "step": 996 }, { "epoch": 1.3605018349406846, "grad_norm": 0.4530052029411208, "learning_rate": 3.818525959668697e-05, "loss": 0.4362, "num_tokens": 760625195.0, "step": 997 }, { "epoch": 1.3618673721942476, "grad_norm": 0.38844270685310833, "learning_rate": 3.818090685050458e-05, "loss": 0.446, "num_tokens": 761329022.0, "step": 998 }, { "epoch": 1.3632329094478108, "grad_norm": 0.4231592957561708, "learning_rate": 3.817654916822825e-05, "loss": 0.4309, "num_tokens": 762100819.0, "step": 999 }, { "epoch": 1.364598446701374, "grad_norm": 0.4190184047442605, "learning_rate": 3.817218655118731e-05, "loss": 0.4498, "num_tokens": 762939361.0, "step": 1000 }, { "epoch": 1.3659639839549373, "grad_norm": 0.40527424141349416, "learning_rate": 3.8167819000712615e-05, "loss": 0.449, "num_tokens": 763711552.0, "step": 1001 }, { "epoch": 1.3673295212085006, "grad_norm": 0.3966323356923854, "learning_rate": 3.816344651813651e-05, "loss": 0.4092, "num_tokens": 764443102.0, "step": 1002 }, { "epoch": 1.3686950584620636, "grad_norm": 0.36115745259642895, "learning_rate": 3.815906910479286e-05, "loss": 0.4431, "num_tokens": 765222188.0, "step": 1003 }, { "epoch": 1.3700605957156269, "grad_norm": 0.431780487031608, "learning_rate": 3.815468676201701e-05, "loss": 0.4421, "num_tokens": 766059302.0, "step": 1004 }, { "epoch": 1.3714261329691901, "grad_norm": 0.4029647688176794, "learning_rate": 3.8150299491145824e-05, "loss": 0.4352, "num_tokens": 766772992.0, "step": 1005 }, { "epoch": 1.3727916702227532, "grad_norm": 0.36704113054534754, "learning_rate": 3.8145907293517675e-05, "loss": 0.4352, "num_tokens": 767544810.0, "step": 1006 }, { "epoch": 1.3741572074763164, "grad_norm": 0.4009233523707378, "learning_rate": 3.8141510170472436e-05, "loss": 0.4471, "num_tokens": 768293750.0, "step": 1007 }, { "epoch": 1.3755227447298797, "grad_norm": 0.4362890869452277, "learning_rate": 3.813710812335147e-05, "loss": 0.4377, "num_tokens": 769080589.0, "step": 1008 }, { "epoch": 1.376888281983443, "grad_norm": 0.42104015345774637, "learning_rate": 3.8132701153497655e-05, "loss": 0.4347, "num_tokens": 769795210.0, "step": 1009 }, { "epoch": 1.3782538192370062, "grad_norm": 0.3694048990283339, "learning_rate": 3.812828926225537e-05, "loss": 0.4313, "num_tokens": 770598994.0, "step": 1010 }, { "epoch": 1.3796193564905692, "grad_norm": 0.5176870969856636, "learning_rate": 3.812387245097048e-05, "loss": 0.4466, "num_tokens": 771444771.0, "step": 1011 }, { "epoch": 1.3809848937441325, "grad_norm": 0.42559269790433224, "learning_rate": 3.8119450720990375e-05, "loss": 0.462, "num_tokens": 772224971.0, "step": 1012 }, { "epoch": 1.3823504309976957, "grad_norm": 0.39242895553386187, "learning_rate": 3.8115024073663924e-05, "loss": 0.4393, "num_tokens": 773053413.0, "step": 1013 }, { "epoch": 1.3837159682512588, "grad_norm": 0.46341088688564347, "learning_rate": 3.811059251034152e-05, "loss": 0.4431, "num_tokens": 773815635.0, "step": 1014 }, { "epoch": 1.385081505504822, "grad_norm": 0.39211284752304243, "learning_rate": 3.8106156032375026e-05, "loss": 0.4333, "num_tokens": 774577434.0, "step": 1015 }, { "epoch": 1.3864470427583853, "grad_norm": 0.4330779471060922, "learning_rate": 3.810171464111782e-05, "loss": 0.4312, "num_tokens": 775300403.0, "step": 1016 }, { "epoch": 1.3878125800119485, "grad_norm": 0.41629563036871564, "learning_rate": 3.809726833792479e-05, "loss": 0.4417, "num_tokens": 776072863.0, "step": 1017 }, { "epoch": 1.3891781172655118, "grad_norm": 0.36073877008457833, "learning_rate": 3.8092817124152305e-05, "loss": 0.4194, "num_tokens": 776809411.0, "step": 1018 }, { "epoch": 1.3905436545190748, "grad_norm": 0.4204060818417228, "learning_rate": 3.808836100115823e-05, "loss": 0.4341, "num_tokens": 777602078.0, "step": 1019 }, { "epoch": 1.391909191772638, "grad_norm": 0.41466698258044843, "learning_rate": 3.8083899970301944e-05, "loss": 0.4339, "num_tokens": 778411563.0, "step": 1020 }, { "epoch": 1.3932747290262013, "grad_norm": 0.3748405882037072, "learning_rate": 3.8079434032944316e-05, "loss": 0.4339, "num_tokens": 779214139.0, "step": 1021 }, { "epoch": 1.3946402662797643, "grad_norm": 0.40637885120271966, "learning_rate": 3.80749631904477e-05, "loss": 0.4311, "num_tokens": 779924509.0, "step": 1022 }, { "epoch": 1.3960058035333276, "grad_norm": 0.38678973325928456, "learning_rate": 3.8070487444175965e-05, "loss": 0.4331, "num_tokens": 780712634.0, "step": 1023 }, { "epoch": 1.3973713407868908, "grad_norm": 0.42223982203297183, "learning_rate": 3.8066006795494456e-05, "loss": 0.4376, "num_tokens": 781535885.0, "step": 1024 }, { "epoch": 1.398736878040454, "grad_norm": 0.3922017829031399, "learning_rate": 3.8061521245770044e-05, "loss": 0.4397, "num_tokens": 782325858.0, "step": 1025 }, { "epoch": 1.4001024152940174, "grad_norm": 0.4297986221189276, "learning_rate": 3.805703079637106e-05, "loss": 0.4662, "num_tokens": 783162812.0, "step": 1026 }, { "epoch": 1.4014679525475804, "grad_norm": 0.37883160696931656, "learning_rate": 3.805253544866735e-05, "loss": 0.4227, "num_tokens": 783981985.0, "step": 1027 }, { "epoch": 1.4028334898011436, "grad_norm": 0.3990875235321775, "learning_rate": 3.8048035204030255e-05, "loss": 0.4309, "num_tokens": 784679182.0, "step": 1028 }, { "epoch": 1.404199027054707, "grad_norm": 0.4329019375551469, "learning_rate": 3.80435300638326e-05, "loss": 0.4398, "num_tokens": 785526390.0, "step": 1029 }, { "epoch": 1.40556456430827, "grad_norm": 0.390344862501062, "learning_rate": 3.80390200294487e-05, "loss": 0.4219, "num_tokens": 786326316.0, "step": 1030 }, { "epoch": 1.4069301015618332, "grad_norm": 0.42099599027034584, "learning_rate": 3.803450510225439e-05, "loss": 0.4465, "num_tokens": 787084834.0, "step": 1031 }, { "epoch": 1.4082956388153964, "grad_norm": 0.44732195539013453, "learning_rate": 3.802998528362697e-05, "loss": 0.4454, "num_tokens": 787885803.0, "step": 1032 }, { "epoch": 1.4096611760689597, "grad_norm": 0.3687748315001251, "learning_rate": 3.802546057494524e-05, "loss": 0.4268, "num_tokens": 788689073.0, "step": 1033 }, { "epoch": 1.411026713322523, "grad_norm": 0.38242853248483977, "learning_rate": 3.802093097758948e-05, "loss": 0.4277, "num_tokens": 789398548.0, "step": 1034 }, { "epoch": 1.412392250576086, "grad_norm": 0.45465378088436637, "learning_rate": 3.8016396492941496e-05, "loss": 0.4559, "num_tokens": 790140215.0, "step": 1035 }, { "epoch": 1.4137577878296492, "grad_norm": 0.46627479061339727, "learning_rate": 3.801185712238455e-05, "loss": 0.4441, "num_tokens": 790841488.0, "step": 1036 }, { "epoch": 1.4151233250832125, "grad_norm": 0.42050645818590804, "learning_rate": 3.800731286730342e-05, "loss": 0.4299, "num_tokens": 791541324.0, "step": 1037 }, { "epoch": 1.4164888623367755, "grad_norm": 0.40612228395309286, "learning_rate": 3.8002763729084335e-05, "loss": 0.4387, "num_tokens": 792334607.0, "step": 1038 }, { "epoch": 1.4178543995903388, "grad_norm": 0.3793121125288589, "learning_rate": 3.799820970911507e-05, "loss": 0.4453, "num_tokens": 793048243.0, "step": 1039 }, { "epoch": 1.419219936843902, "grad_norm": 0.44064874761682643, "learning_rate": 3.799365080878484e-05, "loss": 0.4331, "num_tokens": 793734926.0, "step": 1040 }, { "epoch": 1.4205854740974653, "grad_norm": 0.48023749314886055, "learning_rate": 3.798908702948437e-05, "loss": 0.4618, "num_tokens": 794421277.0, "step": 1041 }, { "epoch": 1.4219510113510285, "grad_norm": 0.420764939612907, "learning_rate": 3.798451837260589e-05, "loss": 0.4324, "num_tokens": 795229793.0, "step": 1042 }, { "epoch": 1.4233165486045916, "grad_norm": 0.4092932134137612, "learning_rate": 3.7979944839543074e-05, "loss": 0.4404, "num_tokens": 795997880.0, "step": 1043 }, { "epoch": 1.4246820858581548, "grad_norm": 0.43468260174964707, "learning_rate": 3.7975366431691124e-05, "loss": 0.4274, "num_tokens": 796782476.0, "step": 1044 }, { "epoch": 1.426047623111718, "grad_norm": 0.4148163303867054, "learning_rate": 3.797078315044672e-05, "loss": 0.4288, "num_tokens": 797568445.0, "step": 1045 }, { "epoch": 1.427413160365281, "grad_norm": 0.43776859094200377, "learning_rate": 3.796619499720799e-05, "loss": 0.4113, "num_tokens": 798369710.0, "step": 1046 }, { "epoch": 1.4287786976188444, "grad_norm": 0.37333322326751006, "learning_rate": 3.796160197337462e-05, "loss": 0.4351, "num_tokens": 799170218.0, "step": 1047 }, { "epoch": 1.4301442348724076, "grad_norm": 0.4112412411480293, "learning_rate": 3.795700408034772e-05, "loss": 0.4339, "num_tokens": 799962100.0, "step": 1048 }, { "epoch": 1.4315097721259709, "grad_norm": 0.4146936985951953, "learning_rate": 3.795240131952992e-05, "loss": 0.445, "num_tokens": 800736291.0, "step": 1049 }, { "epoch": 1.4328753093795341, "grad_norm": 0.4028002895572046, "learning_rate": 3.794779369232531e-05, "loss": 0.4096, "num_tokens": 801479180.0, "step": 1050 }, { "epoch": 1.4342408466330971, "grad_norm": 0.4262752604145639, "learning_rate": 3.794318120013949e-05, "loss": 0.4314, "num_tokens": 802317711.0, "step": 1051 }, { "epoch": 1.4356063838866604, "grad_norm": 0.4155309907082243, "learning_rate": 3.793856384437953e-05, "loss": 0.4224, "num_tokens": 803208138.0, "step": 1052 }, { "epoch": 1.4369719211402237, "grad_norm": 0.4319615269769592, "learning_rate": 3.7933941626453965e-05, "loss": 0.4421, "num_tokens": 803986681.0, "step": 1053 }, { "epoch": 1.4383374583937867, "grad_norm": 0.49289917847869175, "learning_rate": 3.792931454777286e-05, "loss": 0.4627, "num_tokens": 804736757.0, "step": 1054 }, { "epoch": 1.43970299564735, "grad_norm": 0.4160772812479961, "learning_rate": 3.792468260974773e-05, "loss": 0.4264, "num_tokens": 805457947.0, "step": 1055 }, { "epoch": 1.4410685329009132, "grad_norm": 0.5014542658243606, "learning_rate": 3.792004581379157e-05, "loss": 0.4311, "num_tokens": 806170211.0, "step": 1056 }, { "epoch": 1.4424340701544764, "grad_norm": 0.42353347790166634, "learning_rate": 3.791540416131887e-05, "loss": 0.458, "num_tokens": 806919908.0, "step": 1057 }, { "epoch": 1.4437996074080397, "grad_norm": 0.4852229753321058, "learning_rate": 3.791075765374561e-05, "loss": 0.4489, "num_tokens": 807742448.0, "step": 1058 }, { "epoch": 1.4451651446616027, "grad_norm": 0.43380141358746704, "learning_rate": 3.7906106292489214e-05, "loss": 0.4106, "num_tokens": 808524618.0, "step": 1059 }, { "epoch": 1.446530681915166, "grad_norm": 0.4483976362249972, "learning_rate": 3.790145007896863e-05, "loss": 0.4358, "num_tokens": 809284663.0, "step": 1060 }, { "epoch": 1.4478962191687292, "grad_norm": 0.49314072659816116, "learning_rate": 3.7896789014604253e-05, "loss": 0.4465, "num_tokens": 810034681.0, "step": 1061 }, { "epoch": 1.4492617564222923, "grad_norm": 0.4110168018004797, "learning_rate": 3.789212310081798e-05, "loss": 0.4236, "num_tokens": 810838475.0, "step": 1062 }, { "epoch": 1.4506272936758555, "grad_norm": 0.5104756678956625, "learning_rate": 3.7887452339033185e-05, "loss": 0.4248, "num_tokens": 811591952.0, "step": 1063 }, { "epoch": 1.4519928309294188, "grad_norm": 0.4523561338079344, "learning_rate": 3.788277673067471e-05, "loss": 0.4387, "num_tokens": 812338606.0, "step": 1064 }, { "epoch": 1.453358368182982, "grad_norm": 0.4801378354311215, "learning_rate": 3.787809627716887e-05, "loss": 0.4409, "num_tokens": 813084600.0, "step": 1065 }, { "epoch": 1.4547239054365453, "grad_norm": 0.47058147258047345, "learning_rate": 3.787341097994348e-05, "loss": 0.4356, "num_tokens": 813846474.0, "step": 1066 }, { "epoch": 1.4560894426901083, "grad_norm": 0.37557446949377726, "learning_rate": 3.786872084042782e-05, "loss": 0.4213, "num_tokens": 814593720.0, "step": 1067 }, { "epoch": 1.4574549799436716, "grad_norm": 0.4216300870212667, "learning_rate": 3.786402586005264e-05, "loss": 0.453, "num_tokens": 815413507.0, "step": 1068 }, { "epoch": 1.4588205171972348, "grad_norm": 0.3585109055720238, "learning_rate": 3.7859326040250196e-05, "loss": 0.4347, "num_tokens": 816145472.0, "step": 1069 }, { "epoch": 1.4601860544507979, "grad_norm": 0.4732920249827573, "learning_rate": 3.785462138245417e-05, "loss": 0.4341, "num_tokens": 816887616.0, "step": 1070 }, { "epoch": 1.461551591704361, "grad_norm": 0.42959163621782526, "learning_rate": 3.784991188809979e-05, "loss": 0.4452, "num_tokens": 817636306.0, "step": 1071 }, { "epoch": 1.4629171289579244, "grad_norm": 0.5092876051928621, "learning_rate": 3.784519755862367e-05, "loss": 0.4075, "num_tokens": 818354838.0, "step": 1072 }, { "epoch": 1.4642826662114876, "grad_norm": 0.4569806638309128, "learning_rate": 3.784047839546399e-05, "loss": 0.4115, "num_tokens": 819108556.0, "step": 1073 }, { "epoch": 1.4656482034650509, "grad_norm": 0.5057588476245893, "learning_rate": 3.783575440006033e-05, "loss": 0.4292, "num_tokens": 819931541.0, "step": 1074 }, { "epoch": 1.467013740718614, "grad_norm": 0.4808634651929069, "learning_rate": 3.7831025573853796e-05, "loss": 0.4709, "num_tokens": 820754766.0, "step": 1075 }, { "epoch": 1.4683792779721772, "grad_norm": 0.4843343299147391, "learning_rate": 3.782629191828695e-05, "loss": 0.4278, "num_tokens": 821496785.0, "step": 1076 }, { "epoch": 1.4697448152257404, "grad_norm": 0.5144967861840841, "learning_rate": 3.7821553434803806e-05, "loss": 0.4567, "num_tokens": 822266906.0, "step": 1077 }, { "epoch": 1.4711103524793034, "grad_norm": 0.43761518687415996, "learning_rate": 3.78168101248499e-05, "loss": 0.4106, "num_tokens": 822963253.0, "step": 1078 }, { "epoch": 1.4724758897328667, "grad_norm": 0.4725000350608277, "learning_rate": 3.7812061989872176e-05, "loss": 0.428, "num_tokens": 823680894.0, "step": 1079 }, { "epoch": 1.47384142698643, "grad_norm": 0.4139911659811226, "learning_rate": 3.78073090313191e-05, "loss": 0.4324, "num_tokens": 824416461.0, "step": 1080 }, { "epoch": 1.4752069642399932, "grad_norm": 0.445260561886843, "learning_rate": 3.780255125064061e-05, "loss": 0.4375, "num_tokens": 825187052.0, "step": 1081 }, { "epoch": 1.4765725014935565, "grad_norm": 0.4220405733646303, "learning_rate": 3.779778864928807e-05, "loss": 0.4561, "num_tokens": 825948377.0, "step": 1082 }, { "epoch": 1.4779380387471195, "grad_norm": 0.44473091707488926, "learning_rate": 3.7793021228714366e-05, "loss": 0.4148, "num_tokens": 826675665.0, "step": 1083 }, { "epoch": 1.4793035760006827, "grad_norm": 0.41991011498307307, "learning_rate": 3.7788248990373815e-05, "loss": 0.4296, "num_tokens": 827442330.0, "step": 1084 }, { "epoch": 1.480669113254246, "grad_norm": 0.5156577368691494, "learning_rate": 3.778347193572223e-05, "loss": 0.4379, "num_tokens": 828225665.0, "step": 1085 }, { "epoch": 1.4820346505078092, "grad_norm": 0.4019235029306686, "learning_rate": 3.7778690066216886e-05, "loss": 0.4612, "num_tokens": 829039240.0, "step": 1086 }, { "epoch": 1.4834001877613723, "grad_norm": 0.4933709481263319, "learning_rate": 3.7773903383316516e-05, "loss": 0.4555, "num_tokens": 829803133.0, "step": 1087 }, { "epoch": 1.4847657250149355, "grad_norm": 0.4141499239517742, "learning_rate": 3.776911188848133e-05, "loss": 0.4179, "num_tokens": 830477988.0, "step": 1088 }, { "epoch": 1.4861312622684988, "grad_norm": 0.40010453673844637, "learning_rate": 3.776431558317302e-05, "loss": 0.4093, "num_tokens": 831142418.0, "step": 1089 }, { "epoch": 1.487496799522062, "grad_norm": 0.4707656827351541, "learning_rate": 3.775951446885471e-05, "loss": 0.4407, "num_tokens": 831952350.0, "step": 1090 }, { "epoch": 1.4888623367756253, "grad_norm": 0.41221651159186484, "learning_rate": 3.775470854699103e-05, "loss": 0.4433, "num_tokens": 832718894.0, "step": 1091 }, { "epoch": 1.4902278740291883, "grad_norm": 0.4093361099895309, "learning_rate": 3.7749897819048044e-05, "loss": 0.4339, "num_tokens": 833518145.0, "step": 1092 }, { "epoch": 1.4915934112827516, "grad_norm": 0.3963182877258251, "learning_rate": 3.77450822864933e-05, "loss": 0.4303, "num_tokens": 834282263.0, "step": 1093 }, { "epoch": 1.4929589485363148, "grad_norm": 0.4712393733283107, "learning_rate": 3.774026195079582e-05, "loss": 0.4489, "num_tokens": 835071186.0, "step": 1094 }, { "epoch": 1.4943244857898779, "grad_norm": 0.45582341786926583, "learning_rate": 3.773543681342607e-05, "loss": 0.4702, "num_tokens": 835844139.0, "step": 1095 }, { "epoch": 1.4956900230434411, "grad_norm": 0.42853810190888675, "learning_rate": 3.773060687585599e-05, "loss": 0.4352, "num_tokens": 836667359.0, "step": 1096 }, { "epoch": 1.4970555602970044, "grad_norm": 0.3978761442523462, "learning_rate": 3.7725772139559e-05, "loss": 0.4433, "num_tokens": 837465077.0, "step": 1097 }, { "epoch": 1.4984210975505676, "grad_norm": 0.39723406248404164, "learning_rate": 3.772093260600993e-05, "loss": 0.4342, "num_tokens": 838278586.0, "step": 1098 }, { "epoch": 1.4997866348041309, "grad_norm": 0.39269984942902064, "learning_rate": 3.771608827668516e-05, "loss": 0.4378, "num_tokens": 839039986.0, "step": 1099 }, { "epoch": 1.501152172057694, "grad_norm": 0.4578783289724579, "learning_rate": 3.771123915306245e-05, "loss": 0.4443, "num_tokens": 839892474.0, "step": 1100 }, { "epoch": 1.5025177093112572, "grad_norm": 0.4274875699975632, "learning_rate": 3.7706385236621074e-05, "loss": 0.4433, "num_tokens": 840661420.0, "step": 1101 }, { "epoch": 1.5038832465648202, "grad_norm": 0.4722936941718131, "learning_rate": 3.770152652884176e-05, "loss": 0.423, "num_tokens": 841366700.0, "step": 1102 }, { "epoch": 1.5052487838183835, "grad_norm": 0.410874939772818, "learning_rate": 3.769666303120665e-05, "loss": 0.4401, "num_tokens": 842157945.0, "step": 1103 }, { "epoch": 1.5066143210719467, "grad_norm": 0.34813342746383763, "learning_rate": 3.7691794745199435e-05, "loss": 0.4241, "num_tokens": 842903525.0, "step": 1104 }, { "epoch": 1.50797985832551, "grad_norm": 0.4112327106010894, "learning_rate": 3.768692167230519e-05, "loss": 0.4262, "num_tokens": 843642274.0, "step": 1105 }, { "epoch": 1.5093453955790732, "grad_norm": 0.4226420911712803, "learning_rate": 3.768204381401049e-05, "loss": 0.4353, "num_tokens": 844456405.0, "step": 1106 }, { "epoch": 1.5107109328326365, "grad_norm": 0.4107826681731726, "learning_rate": 3.7677161171803346e-05, "loss": 0.4507, "num_tokens": 845222915.0, "step": 1107 }, { "epoch": 1.5120764700861995, "grad_norm": 0.36933461531108974, "learning_rate": 3.767227374717325e-05, "loss": 0.4198, "num_tokens": 846018937.0, "step": 1108 }, { "epoch": 1.5134420073397628, "grad_norm": 0.38644285230034553, "learning_rate": 3.7667381541611155e-05, "loss": 0.4465, "num_tokens": 846726028.0, "step": 1109 }, { "epoch": 1.5148075445933258, "grad_norm": 0.3781939501407643, "learning_rate": 3.766248455660944e-05, "loss": 0.4415, "num_tokens": 847427544.0, "step": 1110 }, { "epoch": 1.516173081846889, "grad_norm": 0.42589296309532454, "learning_rate": 3.765758279366197e-05, "loss": 0.4278, "num_tokens": 848120260.0, "step": 1111 }, { "epoch": 1.5175386191004523, "grad_norm": 0.35697637080879385, "learning_rate": 3.765267625426406e-05, "loss": 0.4336, "num_tokens": 848934638.0, "step": 1112 }, { "epoch": 1.5189041563540155, "grad_norm": 0.4224901590000625, "learning_rate": 3.764776493991249e-05, "loss": 0.44, "num_tokens": 849700977.0, "step": 1113 }, { "epoch": 1.5202696936075788, "grad_norm": 0.40656401691771354, "learning_rate": 3.7642848852105485e-05, "loss": 0.4315, "num_tokens": 850492645.0, "step": 1114 }, { "epoch": 1.521635230861142, "grad_norm": 0.43317642303000176, "learning_rate": 3.763792799234272e-05, "loss": 0.4452, "num_tokens": 851235954.0, "step": 1115 }, { "epoch": 1.523000768114705, "grad_norm": 0.3819585960900694, "learning_rate": 3.763300236212535e-05, "loss": 0.4385, "num_tokens": 852020802.0, "step": 1116 }, { "epoch": 1.5243663053682683, "grad_norm": 0.42946230349127673, "learning_rate": 3.762807196295597e-05, "loss": 0.4416, "num_tokens": 852794546.0, "step": 1117 }, { "epoch": 1.5257318426218314, "grad_norm": 0.4206833636996143, "learning_rate": 3.762313679633863e-05, "loss": 0.4379, "num_tokens": 853507662.0, "step": 1118 }, { "epoch": 1.5270973798753946, "grad_norm": 0.37568393235093056, "learning_rate": 3.761819686377882e-05, "loss": 0.4433, "num_tokens": 854281205.0, "step": 1119 }, { "epoch": 1.5284629171289579, "grad_norm": 0.39164299003912123, "learning_rate": 3.761325216678353e-05, "loss": 0.4141, "num_tokens": 854976425.0, "step": 1120 }, { "epoch": 1.5298284543825211, "grad_norm": 0.39530669844226013, "learning_rate": 3.7608302706861154e-05, "loss": 0.4415, "num_tokens": 855758022.0, "step": 1121 }, { "epoch": 1.5311939916360844, "grad_norm": 0.398299027534764, "learning_rate": 3.7603348485521546e-05, "loss": 0.4477, "num_tokens": 856591959.0, "step": 1122 }, { "epoch": 1.5325595288896476, "grad_norm": 0.3728133828364313, "learning_rate": 3.7598389504276056e-05, "loss": 0.4428, "num_tokens": 857389990.0, "step": 1123 }, { "epoch": 1.5339250661432107, "grad_norm": 0.40077286125757067, "learning_rate": 3.759342576463743e-05, "loss": 0.4189, "num_tokens": 858127252.0, "step": 1124 }, { "epoch": 1.535290603396774, "grad_norm": 0.4824554583169786, "learning_rate": 3.75884572681199e-05, "loss": 0.4599, "num_tokens": 858900327.0, "step": 1125 }, { "epoch": 1.536656140650337, "grad_norm": 0.3836318124387484, "learning_rate": 3.758348401623913e-05, "loss": 0.4226, "num_tokens": 859666194.0, "step": 1126 }, { "epoch": 1.5380216779039002, "grad_norm": 0.5265314130531245, "learning_rate": 3.757850601051226e-05, "loss": 0.457, "num_tokens": 860532261.0, "step": 1127 }, { "epoch": 1.5393872151574635, "grad_norm": 0.4492247669323924, "learning_rate": 3.757352325245784e-05, "loss": 0.4243, "num_tokens": 861225131.0, "step": 1128 }, { "epoch": 1.5407527524110267, "grad_norm": 0.5350689869022015, "learning_rate": 3.756853574359592e-05, "loss": 0.4268, "num_tokens": 861989442.0, "step": 1129 }, { "epoch": 1.54211828966459, "grad_norm": 0.5022342182137443, "learning_rate": 3.7563543485447956e-05, "loss": 0.4487, "num_tokens": 862774724.0, "step": 1130 }, { "epoch": 1.5434838269181532, "grad_norm": 0.4001766279417761, "learning_rate": 3.755854647953687e-05, "loss": 0.4214, "num_tokens": 863576784.0, "step": 1131 }, { "epoch": 1.5448493641717163, "grad_norm": 0.405710851586757, "learning_rate": 3.755354472738705e-05, "loss": 0.4404, "num_tokens": 864374330.0, "step": 1132 }, { "epoch": 1.5462149014252795, "grad_norm": 0.49921313624964203, "learning_rate": 3.7548538230524284e-05, "loss": 0.4457, "num_tokens": 865134067.0, "step": 1133 }, { "epoch": 1.5475804386788428, "grad_norm": 0.3905867455333946, "learning_rate": 3.7543526990475864e-05, "loss": 0.4301, "num_tokens": 865912923.0, "step": 1134 }, { "epoch": 1.5489459759324058, "grad_norm": 0.5569679704381019, "learning_rate": 3.753851100877049e-05, "loss": 0.4479, "num_tokens": 866644649.0, "step": 1135 }, { "epoch": 1.550311513185969, "grad_norm": 0.47205675013186804, "learning_rate": 3.7533490286938315e-05, "loss": 0.4668, "num_tokens": 867427608.0, "step": 1136 }, { "epoch": 1.5516770504395323, "grad_norm": 0.43819652273440535, "learning_rate": 3.7528464826510965e-05, "loss": 0.4325, "num_tokens": 868137649.0, "step": 1137 }, { "epoch": 1.5530425876930956, "grad_norm": 0.4488236148503356, "learning_rate": 3.7523434629021466e-05, "loss": 0.4627, "num_tokens": 868969239.0, "step": 1138 }, { "epoch": 1.5544081249466588, "grad_norm": 0.4107858362084565, "learning_rate": 3.7518399696004324e-05, "loss": 0.4346, "num_tokens": 869732977.0, "step": 1139 }, { "epoch": 1.555773662200222, "grad_norm": 0.48961165036909315, "learning_rate": 3.751336002899549e-05, "loss": 0.4461, "num_tokens": 870544496.0, "step": 1140 }, { "epoch": 1.557139199453785, "grad_norm": 0.4061195567711272, "learning_rate": 3.750831562953234e-05, "loss": 0.4223, "num_tokens": 871386951.0, "step": 1141 }, { "epoch": 1.5585047367073483, "grad_norm": 0.4712075636913604, "learning_rate": 3.750326649915368e-05, "loss": 0.4538, "num_tokens": 872218398.0, "step": 1142 }, { "epoch": 1.5598702739609114, "grad_norm": 0.4073332298009698, "learning_rate": 3.7498212639399814e-05, "loss": 0.433, "num_tokens": 872948013.0, "step": 1143 }, { "epoch": 1.5612358112144746, "grad_norm": 0.4401726718383672, "learning_rate": 3.749315405181244e-05, "loss": 0.4204, "num_tokens": 873685991.0, "step": 1144 }, { "epoch": 1.5626013484680379, "grad_norm": 0.3628889004680459, "learning_rate": 3.7488090737934714e-05, "loss": 0.4559, "num_tokens": 874395948.0, "step": 1145 }, { "epoch": 1.5639668857216011, "grad_norm": 0.4118273489238772, "learning_rate": 3.748302269931124e-05, "loss": 0.4551, "num_tokens": 875178449.0, "step": 1146 }, { "epoch": 1.5653324229751644, "grad_norm": 0.3937966225628666, "learning_rate": 3.747794993748805e-05, "loss": 0.4427, "num_tokens": 875987020.0, "step": 1147 }, { "epoch": 1.5666979602287276, "grad_norm": 0.41558435991675596, "learning_rate": 3.7472872454012636e-05, "loss": 0.4286, "num_tokens": 876714590.0, "step": 1148 }, { "epoch": 1.5680634974822907, "grad_norm": 0.3465657378340947, "learning_rate": 3.7467790250433903e-05, "loss": 0.4188, "num_tokens": 877502963.0, "step": 1149 }, { "epoch": 1.569429034735854, "grad_norm": 0.3740813064063447, "learning_rate": 3.7462703328302216e-05, "loss": 0.4442, "num_tokens": 878295248.0, "step": 1150 }, { "epoch": 1.570794571989417, "grad_norm": 0.4036495484960055, "learning_rate": 3.745761168916937e-05, "loss": 0.4251, "num_tokens": 878948915.0, "step": 1151 }, { "epoch": 1.5721601092429802, "grad_norm": 0.4014685562969714, "learning_rate": 3.745251533458863e-05, "loss": 0.4499, "num_tokens": 879733990.0, "step": 1152 }, { "epoch": 1.5735256464965435, "grad_norm": 0.3587342974371942, "learning_rate": 3.744741426611464e-05, "loss": 0.446, "num_tokens": 880509444.0, "step": 1153 }, { "epoch": 1.5748911837501067, "grad_norm": 0.3737183228419222, "learning_rate": 3.744230848530352e-05, "loss": 0.4354, "num_tokens": 881299960.0, "step": 1154 }, { "epoch": 1.57625672100367, "grad_norm": 0.34889604086846626, "learning_rate": 3.743719799371284e-05, "loss": 0.4436, "num_tokens": 882061339.0, "step": 1155 }, { "epoch": 1.5776222582572332, "grad_norm": 0.40167959919778407, "learning_rate": 3.743208279290158e-05, "loss": 0.4641, "num_tokens": 882765973.0, "step": 1156 }, { "epoch": 1.5789877955107963, "grad_norm": 0.40143051986469724, "learning_rate": 3.742696288443017e-05, "loss": 0.4598, "num_tokens": 883570192.0, "step": 1157 }, { "epoch": 1.5803533327643595, "grad_norm": 0.3772809618515918, "learning_rate": 3.742183826986046e-05, "loss": 0.4386, "num_tokens": 884336114.0, "step": 1158 }, { "epoch": 1.5817188700179226, "grad_norm": 0.43123157211273067, "learning_rate": 3.741670895075576e-05, "loss": 0.4652, "num_tokens": 885059560.0, "step": 1159 }, { "epoch": 1.5830844072714858, "grad_norm": 0.3946725591520069, "learning_rate": 3.7411574928680806e-05, "loss": 0.4498, "num_tokens": 885846501.0, "step": 1160 }, { "epoch": 1.584449944525049, "grad_norm": 0.3897003199716668, "learning_rate": 3.7406436205201755e-05, "loss": 0.4187, "num_tokens": 886561332.0, "step": 1161 }, { "epoch": 1.5858154817786123, "grad_norm": 0.4371359183652179, "learning_rate": 3.7401292781886215e-05, "loss": 0.428, "num_tokens": 887321622.0, "step": 1162 }, { "epoch": 1.5871810190321756, "grad_norm": 0.32891914752104057, "learning_rate": 3.739614466030322e-05, "loss": 0.4303, "num_tokens": 888137265.0, "step": 1163 }, { "epoch": 1.5885465562857388, "grad_norm": 0.34071658335049554, "learning_rate": 3.739099184202324e-05, "loss": 0.4261, "num_tokens": 888917315.0, "step": 1164 }, { "epoch": 1.5899120935393019, "grad_norm": 0.38482442628806957, "learning_rate": 3.738583432861817e-05, "loss": 0.4363, "num_tokens": 889663794.0, "step": 1165 }, { "epoch": 1.591277630792865, "grad_norm": 0.44444041214426416, "learning_rate": 3.7380672121661364e-05, "loss": 0.4341, "num_tokens": 890416078.0, "step": 1166 }, { "epoch": 1.5926431680464281, "grad_norm": 0.35356270137497137, "learning_rate": 3.737550522272756e-05, "loss": 0.4345, "num_tokens": 891292450.0, "step": 1167 }, { "epoch": 1.5940087052999914, "grad_norm": 0.3801145601601044, "learning_rate": 3.7370333633392977e-05, "loss": 0.4303, "num_tokens": 892049989.0, "step": 1168 }, { "epoch": 1.5953742425535546, "grad_norm": 0.45800367583824214, "learning_rate": 3.736515735523523e-05, "loss": 0.4215, "num_tokens": 892786184.0, "step": 1169 }, { "epoch": 1.596739779807118, "grad_norm": 0.40186142497903976, "learning_rate": 3.7359976389833384e-05, "loss": 0.4415, "num_tokens": 893542058.0, "step": 1170 }, { "epoch": 1.5981053170606812, "grad_norm": 0.3987261921206058, "learning_rate": 3.735479073876793e-05, "loss": 0.425, "num_tokens": 894276344.0, "step": 1171 }, { "epoch": 1.5994708543142444, "grad_norm": 0.3565742090197278, "learning_rate": 3.734960040362078e-05, "loss": 0.435, "num_tokens": 895049576.0, "step": 1172 }, { "epoch": 1.6008363915678074, "grad_norm": 0.4466263281470545, "learning_rate": 3.734440538597528e-05, "loss": 0.4381, "num_tokens": 895841118.0, "step": 1173 }, { "epoch": 1.6022019288213707, "grad_norm": 0.3959780830342691, "learning_rate": 3.73392056874162e-05, "loss": 0.4401, "num_tokens": 896636430.0, "step": 1174 }, { "epoch": 1.6035674660749337, "grad_norm": 0.4374467095941515, "learning_rate": 3.7334001309529754e-05, "loss": 0.4297, "num_tokens": 897414539.0, "step": 1175 }, { "epoch": 1.604933003328497, "grad_norm": 0.3729094067478619, "learning_rate": 3.732879225390357e-05, "loss": 0.4293, "num_tokens": 898158005.0, "step": 1176 }, { "epoch": 1.6062985405820602, "grad_norm": 0.3994689748823303, "learning_rate": 3.7323578522126704e-05, "loss": 0.4384, "num_tokens": 898883706.0, "step": 1177 }, { "epoch": 1.6076640778356235, "grad_norm": 0.4411446122093727, "learning_rate": 3.7318360115789637e-05, "loss": 0.4488, "num_tokens": 899662132.0, "step": 1178 }, { "epoch": 1.6090296150891867, "grad_norm": 0.48193833513141876, "learning_rate": 3.731313703648427e-05, "loss": 0.4297, "num_tokens": 900428985.0, "step": 1179 }, { "epoch": 1.61039515234275, "grad_norm": 0.39527451587073287, "learning_rate": 3.730790928580397e-05, "loss": 0.4449, "num_tokens": 901132358.0, "step": 1180 }, { "epoch": 1.611760689596313, "grad_norm": 0.472631423560088, "learning_rate": 3.730267686534346e-05, "loss": 0.4396, "num_tokens": 901834067.0, "step": 1181 }, { "epoch": 1.6131262268498763, "grad_norm": 0.41093459920861375, "learning_rate": 3.729743977669895e-05, "loss": 0.4342, "num_tokens": 902657804.0, "step": 1182 }, { "epoch": 1.6144917641034393, "grad_norm": 0.44601737534029023, "learning_rate": 3.729219802146803e-05, "loss": 0.4304, "num_tokens": 903405433.0, "step": 1183 }, { "epoch": 1.6158573013570026, "grad_norm": 0.40563813234551294, "learning_rate": 3.7286951601249746e-05, "loss": 0.4616, "num_tokens": 904184629.0, "step": 1184 }, { "epoch": 1.6172228386105658, "grad_norm": 0.44176247106705635, "learning_rate": 3.7281700517644554e-05, "loss": 0.4204, "num_tokens": 904955451.0, "step": 1185 }, { "epoch": 1.618588375864129, "grad_norm": 0.496030785993317, "learning_rate": 3.7276444772254325e-05, "loss": 0.4568, "num_tokens": 905696793.0, "step": 1186 }, { "epoch": 1.6199539131176923, "grad_norm": 0.4157974439673121, "learning_rate": 3.7271184366682365e-05, "loss": 0.4145, "num_tokens": 906438038.0, "step": 1187 }, { "epoch": 1.6213194503712556, "grad_norm": 0.43785134299124745, "learning_rate": 3.726591930253339e-05, "loss": 0.4457, "num_tokens": 907270607.0, "step": 1188 }, { "epoch": 1.6226849876248186, "grad_norm": 0.39634775519417814, "learning_rate": 3.726064958141355e-05, "loss": 0.4401, "num_tokens": 908090745.0, "step": 1189 }, { "epoch": 1.6240505248783819, "grad_norm": 0.42206072603064937, "learning_rate": 3.72553752049304e-05, "loss": 0.4367, "num_tokens": 908865997.0, "step": 1190 }, { "epoch": 1.625416062131945, "grad_norm": 0.3950833259376444, "learning_rate": 3.7250096174692936e-05, "loss": 0.447, "num_tokens": 909640165.0, "step": 1191 }, { "epoch": 1.6267815993855081, "grad_norm": 0.41317452778829866, "learning_rate": 3.7244812492311554e-05, "loss": 0.427, "num_tokens": 910419507.0, "step": 1192 }, { "epoch": 1.6281471366390714, "grad_norm": 0.3727013073466636, "learning_rate": 3.7239524159398076e-05, "loss": 0.4298, "num_tokens": 911224684.0, "step": 1193 }, { "epoch": 1.6295126738926347, "grad_norm": 0.3737946619663177, "learning_rate": 3.723423117756574e-05, "loss": 0.4214, "num_tokens": 911996270.0, "step": 1194 }, { "epoch": 1.630878211146198, "grad_norm": 0.38837823338121696, "learning_rate": 3.722893354842921e-05, "loss": 0.4437, "num_tokens": 912769798.0, "step": 1195 }, { "epoch": 1.6322437483997612, "grad_norm": 0.36066506479324645, "learning_rate": 3.722363127360457e-05, "loss": 0.4424, "num_tokens": 913646571.0, "step": 1196 }, { "epoch": 1.6336092856533242, "grad_norm": 0.47248207532884423, "learning_rate": 3.7218324354709305e-05, "loss": 0.4446, "num_tokens": 914384512.0, "step": 1197 }, { "epoch": 1.6349748229068874, "grad_norm": 0.3747098510754133, "learning_rate": 3.721301279336233e-05, "loss": 0.4232, "num_tokens": 915134562.0, "step": 1198 }, { "epoch": 1.6363403601604505, "grad_norm": 0.44658359851108975, "learning_rate": 3.720769659118397e-05, "loss": 0.4483, "num_tokens": 915937181.0, "step": 1199 }, { "epoch": 1.6377058974140137, "grad_norm": 0.38103572736744035, "learning_rate": 3.720237574979597e-05, "loss": 0.4223, "num_tokens": 916696280.0, "step": 1200 }, { "epoch": 1.639071434667577, "grad_norm": 0.47166855075810143, "learning_rate": 3.7197050270821485e-05, "loss": 0.4159, "num_tokens": 917412503.0, "step": 1201 }, { "epoch": 1.6404369719211402, "grad_norm": 0.455155603747887, "learning_rate": 3.719172015588509e-05, "loss": 0.4253, "num_tokens": 918117121.0, "step": 1202 }, { "epoch": 1.6418025091747035, "grad_norm": 0.3945823063069175, "learning_rate": 3.718638540661278e-05, "loss": 0.4504, "num_tokens": 918937113.0, "step": 1203 }, { "epoch": 1.6431680464282667, "grad_norm": 0.47746632238146747, "learning_rate": 3.718104602463194e-05, "loss": 0.4322, "num_tokens": 919612274.0, "step": 1204 }, { "epoch": 1.6445335836818298, "grad_norm": 0.4067143466038313, "learning_rate": 3.71757020115714e-05, "loss": 0.4169, "num_tokens": 920432589.0, "step": 1205 }, { "epoch": 1.645899120935393, "grad_norm": 0.44899955488235815, "learning_rate": 3.717035336906138e-05, "loss": 0.439, "num_tokens": 921231719.0, "step": 1206 }, { "epoch": 1.647264658188956, "grad_norm": 0.42282771542227215, "learning_rate": 3.7165000098733515e-05, "loss": 0.4392, "num_tokens": 921997319.0, "step": 1207 }, { "epoch": 1.6486301954425193, "grad_norm": 0.42628861966429293, "learning_rate": 3.715964220222087e-05, "loss": 0.4288, "num_tokens": 922727822.0, "step": 1208 }, { "epoch": 1.6499957326960826, "grad_norm": 0.40605877008869795, "learning_rate": 3.715427968115789e-05, "loss": 0.4459, "num_tokens": 923479628.0, "step": 1209 }, { "epoch": 1.6513612699496458, "grad_norm": 0.4424546177212002, "learning_rate": 3.7148912537180454e-05, "loss": 0.4285, "num_tokens": 924180362.0, "step": 1210 }, { "epoch": 1.652726807203209, "grad_norm": 0.4420655134475249, "learning_rate": 3.714354077192585e-05, "loss": 0.4315, "num_tokens": 925028872.0, "step": 1211 }, { "epoch": 1.6540923444567723, "grad_norm": 0.42224194343263083, "learning_rate": 3.713816438703276e-05, "loss": 0.4288, "num_tokens": 925812614.0, "step": 1212 }, { "epoch": 1.6554578817103354, "grad_norm": 0.36875269053847587, "learning_rate": 3.71327833841413e-05, "loss": 0.4429, "num_tokens": 926595468.0, "step": 1213 }, { "epoch": 1.6568234189638986, "grad_norm": 0.4466544277712264, "learning_rate": 3.712739776489297e-05, "loss": 0.4286, "num_tokens": 927392118.0, "step": 1214 }, { "epoch": 1.6581889562174617, "grad_norm": 0.3818484560711353, "learning_rate": 3.71220075309307e-05, "loss": 0.4297, "num_tokens": 928157989.0, "step": 1215 }, { "epoch": 1.659554493471025, "grad_norm": 0.47757093251386, "learning_rate": 3.711661268389881e-05, "loss": 0.4058, "num_tokens": 928846657.0, "step": 1216 }, { "epoch": 1.6609200307245882, "grad_norm": 0.38647052938399473, "learning_rate": 3.711121322544302e-05, "loss": 0.4318, "num_tokens": 929638962.0, "step": 1217 }, { "epoch": 1.6622855679781514, "grad_norm": 0.4668656091156422, "learning_rate": 3.710580915721051e-05, "loss": 0.4444, "num_tokens": 930366074.0, "step": 1218 }, { "epoch": 1.6636511052317147, "grad_norm": 0.39609511749846404, "learning_rate": 3.710040048084979e-05, "loss": 0.4205, "num_tokens": 931085956.0, "step": 1219 }, { "epoch": 1.665016642485278, "grad_norm": 0.3615473278653633, "learning_rate": 3.709498719801083e-05, "loss": 0.4184, "num_tokens": 931840275.0, "step": 1220 }, { "epoch": 1.666382179738841, "grad_norm": 0.39651641105053564, "learning_rate": 3.708956931034498e-05, "loss": 0.435, "num_tokens": 932575611.0, "step": 1221 }, { "epoch": 1.6677477169924042, "grad_norm": 0.39346222857871616, "learning_rate": 3.708414681950501e-05, "loss": 0.445, "num_tokens": 933317306.0, "step": 1222 }, { "epoch": 1.6691132542459672, "grad_norm": 0.42122411957596384, "learning_rate": 3.707871972714509e-05, "loss": 0.4284, "num_tokens": 934017808.0, "step": 1223 }, { "epoch": 1.6704787914995305, "grad_norm": 0.4008540762038735, "learning_rate": 3.707328803492077e-05, "loss": 0.4293, "num_tokens": 934747849.0, "step": 1224 }, { "epoch": 1.6718443287530937, "grad_norm": 0.3638222387770411, "learning_rate": 3.706785174448905e-05, "loss": 0.405, "num_tokens": 935398512.0, "step": 1225 }, { "epoch": 1.673209866006657, "grad_norm": 0.4233364474326722, "learning_rate": 3.706241085750829e-05, "loss": 0.4408, "num_tokens": 936201755.0, "step": 1226 }, { "epoch": 1.6745754032602203, "grad_norm": 0.3753797621467007, "learning_rate": 3.705696537563828e-05, "loss": 0.43, "num_tokens": 937026108.0, "step": 1227 }, { "epoch": 1.6759409405137835, "grad_norm": 0.4111324893079923, "learning_rate": 3.70515153005402e-05, "loss": 0.4335, "num_tokens": 937830227.0, "step": 1228 }, { "epoch": 1.6773064777673465, "grad_norm": 0.4170484213701709, "learning_rate": 3.704606063387662e-05, "loss": 0.4217, "num_tokens": 938602809.0, "step": 1229 }, { "epoch": 1.6786720150209098, "grad_norm": 0.3972111616293787, "learning_rate": 3.7040601377311526e-05, "loss": 0.4219, "num_tokens": 939307550.0, "step": 1230 }, { "epoch": 1.6800375522744728, "grad_norm": 0.40089761752121894, "learning_rate": 3.703513753251032e-05, "loss": 0.4268, "num_tokens": 940033170.0, "step": 1231 }, { "epoch": 1.681403089528036, "grad_norm": 0.38198188035404834, "learning_rate": 3.7029669101139756e-05, "loss": 0.4404, "num_tokens": 940818366.0, "step": 1232 }, { "epoch": 1.6827686267815993, "grad_norm": 0.39487053409055123, "learning_rate": 3.702419608486803e-05, "loss": 0.4475, "num_tokens": 941562733.0, "step": 1233 }, { "epoch": 1.6841341640351626, "grad_norm": 0.4002062762478148, "learning_rate": 3.701871848536473e-05, "loss": 0.4179, "num_tokens": 942309901.0, "step": 1234 }, { "epoch": 1.6854997012887258, "grad_norm": 0.387509331432347, "learning_rate": 3.701323630430082e-05, "loss": 0.4576, "num_tokens": 943082992.0, "step": 1235 }, { "epoch": 1.686865238542289, "grad_norm": 0.3491419263490325, "learning_rate": 3.700774954334868e-05, "loss": 0.4239, "num_tokens": 943902619.0, "step": 1236 }, { "epoch": 1.6882307757958523, "grad_norm": 0.4328842843291413, "learning_rate": 3.7002258204182084e-05, "loss": 0.4343, "num_tokens": 944742807.0, "step": 1237 }, { "epoch": 1.6895963130494154, "grad_norm": 0.39969691629815, "learning_rate": 3.69967622884762e-05, "loss": 0.4118, "num_tokens": 945491535.0, "step": 1238 }, { "epoch": 1.6909618503029786, "grad_norm": 0.47257393044053647, "learning_rate": 3.6991261797907604e-05, "loss": 0.4313, "num_tokens": 946259346.0, "step": 1239 }, { "epoch": 1.6923273875565417, "grad_norm": 0.39630515407796274, "learning_rate": 3.698575673415425e-05, "loss": 0.4488, "num_tokens": 947042346.0, "step": 1240 }, { "epoch": 1.693692924810105, "grad_norm": 0.40874020786102544, "learning_rate": 3.6980247098895485e-05, "loss": 0.4094, "num_tokens": 947742050.0, "step": 1241 }, { "epoch": 1.6950584620636682, "grad_norm": 0.3937491962396805, "learning_rate": 3.6974732893812074e-05, "loss": 0.4361, "num_tokens": 948460727.0, "step": 1242 }, { "epoch": 1.6964239993172314, "grad_norm": 0.44257586139991395, "learning_rate": 3.696921412058617e-05, "loss": 0.4277, "num_tokens": 949212962.0, "step": 1243 }, { "epoch": 1.6977895365707947, "grad_norm": 0.4085460314626642, "learning_rate": 3.696369078090128e-05, "loss": 0.4405, "num_tokens": 950015170.0, "step": 1244 }, { "epoch": 1.699155073824358, "grad_norm": 0.4318784124269646, "learning_rate": 3.695816287644236e-05, "loss": 0.43, "num_tokens": 950789340.0, "step": 1245 }, { "epoch": 1.700520611077921, "grad_norm": 0.3773451448818903, "learning_rate": 3.6952630408895734e-05, "loss": 0.4421, "num_tokens": 951538216.0, "step": 1246 }, { "epoch": 1.7018861483314842, "grad_norm": 0.4179482992580424, "learning_rate": 3.6947093379949115e-05, "loss": 0.4375, "num_tokens": 952324531.0, "step": 1247 }, { "epoch": 1.7032516855850472, "grad_norm": 0.4076549635525947, "learning_rate": 3.6941551791291604e-05, "loss": 0.4421, "num_tokens": 953075087.0, "step": 1248 }, { "epoch": 1.7046172228386105, "grad_norm": 0.42469087294999724, "learning_rate": 3.693600564461371e-05, "loss": 0.4197, "num_tokens": 953891104.0, "step": 1249 }, { "epoch": 1.7059827600921738, "grad_norm": 0.44445267478692735, "learning_rate": 3.6930454941607316e-05, "loss": 0.4355, "num_tokens": 954650868.0, "step": 1250 }, { "epoch": 1.707348297345737, "grad_norm": 0.433674830422921, "learning_rate": 3.69248996839657e-05, "loss": 0.4233, "num_tokens": 955350949.0, "step": 1251 }, { "epoch": 1.7087138345993003, "grad_norm": 0.3985915485089401, "learning_rate": 3.6919339873383537e-05, "loss": 0.4465, "num_tokens": 956108107.0, "step": 1252 }, { "epoch": 1.7100793718528635, "grad_norm": 0.4202137935753078, "learning_rate": 3.691377551155687e-05, "loss": 0.41, "num_tokens": 956824755.0, "step": 1253 }, { "epoch": 1.7114449091064265, "grad_norm": 0.4282785582296837, "learning_rate": 3.690820660018317e-05, "loss": 0.4319, "num_tokens": 957591891.0, "step": 1254 }, { "epoch": 1.7128104463599898, "grad_norm": 0.43147183765777436, "learning_rate": 3.690263314096125e-05, "loss": 0.4328, "num_tokens": 958328498.0, "step": 1255 }, { "epoch": 1.7141759836135528, "grad_norm": 0.3675209788842465, "learning_rate": 3.689705513559133e-05, "loss": 0.3988, "num_tokens": 959035921.0, "step": 1256 }, { "epoch": 1.715541520867116, "grad_norm": 0.4487551839516968, "learning_rate": 3.6891472585775034e-05, "loss": 0.4391, "num_tokens": 959763911.0, "step": 1257 }, { "epoch": 1.7169070581206793, "grad_norm": 0.39894681592377607, "learning_rate": 3.688588549321534e-05, "loss": 0.4408, "num_tokens": 960581633.0, "step": 1258 }, { "epoch": 1.7182725953742426, "grad_norm": 0.4555491950260509, "learning_rate": 3.6880293859616646e-05, "loss": 0.417, "num_tokens": 961311201.0, "step": 1259 }, { "epoch": 1.7196381326278058, "grad_norm": 0.3902813899779041, "learning_rate": 3.687469768668469e-05, "loss": 0.4317, "num_tokens": 962135110.0, "step": 1260 }, { "epoch": 1.721003669881369, "grad_norm": 0.4444574517286021, "learning_rate": 3.686909697612665e-05, "loss": 0.419, "num_tokens": 962877982.0, "step": 1261 }, { "epoch": 1.7223692071349321, "grad_norm": 0.38551070431496526, "learning_rate": 3.6863491729651046e-05, "loss": 0.4424, "num_tokens": 963664778.0, "step": 1262 }, { "epoch": 1.7237347443884954, "grad_norm": 0.4304875722720706, "learning_rate": 3.685788194896779e-05, "loss": 0.4325, "num_tokens": 964383183.0, "step": 1263 }, { "epoch": 1.7251002816420584, "grad_norm": 0.3759388078302131, "learning_rate": 3.6852267635788205e-05, "loss": 0.4226, "num_tokens": 965127212.0, "step": 1264 }, { "epoch": 1.7264658188956217, "grad_norm": 0.45301133716542163, "learning_rate": 3.684664879182496e-05, "loss": 0.4372, "num_tokens": 965943491.0, "step": 1265 }, { "epoch": 1.727831356149185, "grad_norm": 0.3732552341805434, "learning_rate": 3.6841025418792114e-05, "loss": 0.4241, "num_tokens": 966704554.0, "step": 1266 }, { "epoch": 1.7291968934027482, "grad_norm": 0.5096694461153144, "learning_rate": 3.6835397518405124e-05, "loss": 0.4078, "num_tokens": 967443941.0, "step": 1267 }, { "epoch": 1.7305624306563114, "grad_norm": 0.38719814256779617, "learning_rate": 3.682976509238082e-05, "loss": 0.4087, "num_tokens": 968170723.0, "step": 1268 }, { "epoch": 1.7319279679098747, "grad_norm": 0.4703993441009247, "learning_rate": 3.682412814243741e-05, "loss": 0.4198, "num_tokens": 968917602.0, "step": 1269 }, { "epoch": 1.7332935051634377, "grad_norm": 0.45418216457421196, "learning_rate": 3.6818486670294476e-05, "loss": 0.4215, "num_tokens": 969663786.0, "step": 1270 }, { "epoch": 1.734659042417001, "grad_norm": 0.4346729186295309, "learning_rate": 3.6812840677672996e-05, "loss": 0.4288, "num_tokens": 970455867.0, "step": 1271 }, { "epoch": 1.736024579670564, "grad_norm": 0.45163951458757595, "learning_rate": 3.6807190166295324e-05, "loss": 0.4609, "num_tokens": 971248699.0, "step": 1272 }, { "epoch": 1.7373901169241273, "grad_norm": 0.4341893067833592, "learning_rate": 3.680153513788517e-05, "loss": 0.4318, "num_tokens": 972016614.0, "step": 1273 }, { "epoch": 1.7387556541776905, "grad_norm": 0.4868502607185428, "learning_rate": 3.679587559416764e-05, "loss": 0.441, "num_tokens": 972771512.0, "step": 1274 }, { "epoch": 1.7401211914312538, "grad_norm": 0.4325605933186321, "learning_rate": 3.679021153686923e-05, "loss": 0.4503, "num_tokens": 973502735.0, "step": 1275 }, { "epoch": 1.741486728684817, "grad_norm": 0.4485728409028035, "learning_rate": 3.678454296771778e-05, "loss": 0.4499, "num_tokens": 974196212.0, "step": 1276 }, { "epoch": 1.7428522659383803, "grad_norm": 0.4282565224241514, "learning_rate": 3.677886988844255e-05, "loss": 0.4285, "num_tokens": 974944799.0, "step": 1277 }, { "epoch": 1.7442178031919433, "grad_norm": 0.341653193471189, "learning_rate": 3.677319230077412e-05, "loss": 0.4236, "num_tokens": 975748225.0, "step": 1278 }, { "epoch": 1.7455833404455066, "grad_norm": 0.38682730905759927, "learning_rate": 3.67675102064445e-05, "loss": 0.4403, "num_tokens": 976552779.0, "step": 1279 }, { "epoch": 1.7469488776990696, "grad_norm": 0.4134881219246007, "learning_rate": 3.676182360718704e-05, "loss": 0.4366, "num_tokens": 977323948.0, "step": 1280 }, { "epoch": 1.7483144149526328, "grad_norm": 0.3379204004806738, "learning_rate": 3.675613250473648e-05, "loss": 0.4167, "num_tokens": 978154226.0, "step": 1281 }, { "epoch": 1.749679952206196, "grad_norm": 0.38431913992483663, "learning_rate": 3.675043690082892e-05, "loss": 0.428, "num_tokens": 978900281.0, "step": 1282 }, { "epoch": 1.7510454894597594, "grad_norm": 0.3270010572575604, "learning_rate": 3.6744736797201856e-05, "loss": 0.4364, "num_tokens": 979717687.0, "step": 1283 }, { "epoch": 1.7524110267133226, "grad_norm": 0.3914799446154672, "learning_rate": 3.6739032195594134e-05, "loss": 0.4324, "num_tokens": 980482708.0, "step": 1284 }, { "epoch": 1.7537765639668859, "grad_norm": 0.33644346902133015, "learning_rate": 3.673332309774598e-05, "loss": 0.4314, "num_tokens": 981324398.0, "step": 1285 }, { "epoch": 1.755142101220449, "grad_norm": 0.35280925352273823, "learning_rate": 3.6727609505398995e-05, "loss": 0.445, "num_tokens": 982189022.0, "step": 1286 }, { "epoch": 1.7565076384740121, "grad_norm": 0.3929253579176977, "learning_rate": 3.672189142029614e-05, "loss": 0.4324, "num_tokens": 982907827.0, "step": 1287 }, { "epoch": 1.7578731757275752, "grad_norm": 0.37111295141544676, "learning_rate": 3.671616884418178e-05, "loss": 0.4428, "num_tokens": 983702106.0, "step": 1288 }, { "epoch": 1.7592387129811384, "grad_norm": 0.45314197539394024, "learning_rate": 3.67104417788016e-05, "loss": 0.4306, "num_tokens": 984378865.0, "step": 1289 }, { "epoch": 1.7606042502347017, "grad_norm": 0.3875055090887739, "learning_rate": 3.670471022590269e-05, "loss": 0.453, "num_tokens": 985168260.0, "step": 1290 }, { "epoch": 1.761969787488265, "grad_norm": 0.39706729117850575, "learning_rate": 3.66989741872335e-05, "loss": 0.4281, "num_tokens": 985833549.0, "step": 1291 }, { "epoch": 1.7633353247418282, "grad_norm": 0.42721882842178116, "learning_rate": 3.6693233664543846e-05, "loss": 0.4439, "num_tokens": 986548571.0, "step": 1292 }, { "epoch": 1.7647008619953914, "grad_norm": 0.39284975355351126, "learning_rate": 3.66874886595849e-05, "loss": 0.4367, "num_tokens": 987277391.0, "step": 1293 }, { "epoch": 1.7660663992489545, "grad_norm": 0.4511402124763085, "learning_rate": 3.668173917410923e-05, "loss": 0.419, "num_tokens": 988038211.0, "step": 1294 }, { "epoch": 1.7674319365025177, "grad_norm": 0.4211806019581649, "learning_rate": 3.667598520987075e-05, "loss": 0.4472, "num_tokens": 988843265.0, "step": 1295 }, { "epoch": 1.7687974737560808, "grad_norm": 0.4639412958618325, "learning_rate": 3.6670226768624745e-05, "loss": 0.4378, "num_tokens": 989739636.0, "step": 1296 }, { "epoch": 1.770163011009644, "grad_norm": 0.47329094827012846, "learning_rate": 3.666446385212787e-05, "loss": 0.4445, "num_tokens": 990541518.0, "step": 1297 }, { "epoch": 1.7715285482632073, "grad_norm": 0.49285066639381153, "learning_rate": 3.665869646213814e-05, "loss": 0.4221, "num_tokens": 991288069.0, "step": 1298 }, { "epoch": 1.7728940855167705, "grad_norm": 0.4503032847222511, "learning_rate": 3.665292460041492e-05, "loss": 0.4185, "num_tokens": 991987588.0, "step": 1299 }, { "epoch": 1.7742596227703338, "grad_norm": 0.3804828788301973, "learning_rate": 3.664714826871898e-05, "loss": 0.4107, "num_tokens": 992744334.0, "step": 1300 }, { "epoch": 1.775625160023897, "grad_norm": 0.4677432343244054, "learning_rate": 3.6641367468812415e-05, "loss": 0.462, "num_tokens": 993497890.0, "step": 1301 }, { "epoch": 1.77699069727746, "grad_norm": 0.38316486910609054, "learning_rate": 3.66355822024587e-05, "loss": 0.4303, "num_tokens": 994238417.0, "step": 1302 }, { "epoch": 1.7783562345310233, "grad_norm": 0.42612092293562, "learning_rate": 3.662979247142266e-05, "loss": 0.4199, "num_tokens": 994963096.0, "step": 1303 }, { "epoch": 1.7797217717845863, "grad_norm": 0.45188760020875446, "learning_rate": 3.662399827747051e-05, "loss": 0.4228, "num_tokens": 995640603.0, "step": 1304 }, { "epoch": 1.7810873090381496, "grad_norm": 0.38947315493174417, "learning_rate": 3.66181996223698e-05, "loss": 0.4193, "num_tokens": 996328265.0, "step": 1305 }, { "epoch": 1.7824528462917129, "grad_norm": 0.3981479954159484, "learning_rate": 3.661239650788944e-05, "loss": 0.4354, "num_tokens": 997089711.0, "step": 1306 }, { "epoch": 1.783818383545276, "grad_norm": 0.3412004123459722, "learning_rate": 3.6606588935799706e-05, "loss": 0.4344, "num_tokens": 997803333.0, "step": 1307 }, { "epoch": 1.7851839207988394, "grad_norm": 0.33729341978311056, "learning_rate": 3.660077690787226e-05, "loss": 0.4172, "num_tokens": 998555301.0, "step": 1308 }, { "epoch": 1.7865494580524026, "grad_norm": 0.3866042618346756, "learning_rate": 3.659496042588008e-05, "loss": 0.4214, "num_tokens": 999278977.0, "step": 1309 }, { "epoch": 1.7879149953059656, "grad_norm": 0.3205351897468973, "learning_rate": 3.658913949159752e-05, "loss": 0.4201, "num_tokens": 1000014672.0, "step": 1310 }, { "epoch": 1.789280532559529, "grad_norm": 0.40523825900959853, "learning_rate": 3.658331410680031e-05, "loss": 0.437, "num_tokens": 1000741773.0, "step": 1311 }, { "epoch": 1.790646069813092, "grad_norm": 0.39834312011753414, "learning_rate": 3.657748427326551e-05, "loss": 0.4309, "num_tokens": 1001494940.0, "step": 1312 }, { "epoch": 1.7920116070666552, "grad_norm": 0.3470342983573062, "learning_rate": 3.657164999277156e-05, "loss": 0.4634, "num_tokens": 1002252893.0, "step": 1313 }, { "epoch": 1.7933771443202184, "grad_norm": 0.4013549569387024, "learning_rate": 3.6565811267098245e-05, "loss": 0.4194, "num_tokens": 1002977777.0, "step": 1314 }, { "epoch": 1.7947426815737817, "grad_norm": 0.3914645238894948, "learning_rate": 3.6559968098026704e-05, "loss": 0.4325, "num_tokens": 1003740831.0, "step": 1315 }, { "epoch": 1.796108218827345, "grad_norm": 0.3670788224543238, "learning_rate": 3.655412048733943e-05, "loss": 0.4322, "num_tokens": 1004414611.0, "step": 1316 }, { "epoch": 1.7974737560809082, "grad_norm": 0.42865235625487763, "learning_rate": 3.654826843682028e-05, "loss": 0.4414, "num_tokens": 1005144139.0, "step": 1317 }, { "epoch": 1.7988392933344712, "grad_norm": 0.4114719744192057, "learning_rate": 3.654241194825446e-05, "loss": 0.4244, "num_tokens": 1005913575.0, "step": 1318 }, { "epoch": 1.8002048305880345, "grad_norm": 0.3904819881487266, "learning_rate": 3.653655102342854e-05, "loss": 0.444, "num_tokens": 1006592989.0, "step": 1319 }, { "epoch": 1.8015703678415975, "grad_norm": 0.35086332997298525, "learning_rate": 3.653068566413043e-05, "loss": 0.4187, "num_tokens": 1007310808.0, "step": 1320 }, { "epoch": 1.8029359050951608, "grad_norm": 0.378870962976585, "learning_rate": 3.652481587214939e-05, "loss": 0.4305, "num_tokens": 1007965480.0, "step": 1321 }, { "epoch": 1.804301442348724, "grad_norm": 0.38903746546002976, "learning_rate": 3.6518941649276035e-05, "loss": 0.4203, "num_tokens": 1008622295.0, "step": 1322 }, { "epoch": 1.8056669796022873, "grad_norm": 0.3612569313136121, "learning_rate": 3.6513062997302356e-05, "loss": 0.4464, "num_tokens": 1009364283.0, "step": 1323 }, { "epoch": 1.8070325168558505, "grad_norm": 0.33761318667786805, "learning_rate": 3.6507179918021665e-05, "loss": 0.4327, "num_tokens": 1010141126.0, "step": 1324 }, { "epoch": 1.8083980541094138, "grad_norm": 0.3710295893560384, "learning_rate": 3.650129241322863e-05, "loss": 0.4607, "num_tokens": 1010884857.0, "step": 1325 }, { "epoch": 1.8097635913629768, "grad_norm": 0.3925915644976262, "learning_rate": 3.649540048471927e-05, "loss": 0.4253, "num_tokens": 1011570362.0, "step": 1326 }, { "epoch": 1.81112912861654, "grad_norm": 0.3903976329190139, "learning_rate": 3.648950413429097e-05, "loss": 0.4264, "num_tokens": 1012365637.0, "step": 1327 }, { "epoch": 1.812494665870103, "grad_norm": 0.3847411120251657, "learning_rate": 3.648360336374245e-05, "loss": 0.4295, "num_tokens": 1013157884.0, "step": 1328 }, { "epoch": 1.8138602031236664, "grad_norm": 0.34721004308380593, "learning_rate": 3.647769817487377e-05, "loss": 0.4179, "num_tokens": 1013881776.0, "step": 1329 }, { "epoch": 1.8152257403772296, "grad_norm": 0.4312740355905501, "learning_rate": 3.647178856948635e-05, "loss": 0.4408, "num_tokens": 1014637454.0, "step": 1330 }, { "epoch": 1.8165912776307929, "grad_norm": 0.3829952159347743, "learning_rate": 3.646587454938296e-05, "loss": 0.418, "num_tokens": 1015365856.0, "step": 1331 }, { "epoch": 1.8179568148843561, "grad_norm": 0.3623552645471658, "learning_rate": 3.645995611636772e-05, "loss": 0.423, "num_tokens": 1016121239.0, "step": 1332 }, { "epoch": 1.8193223521379194, "grad_norm": 0.38095126071193486, "learning_rate": 3.645403327224606e-05, "loss": 0.4423, "num_tokens": 1016913224.0, "step": 1333 }, { "epoch": 1.8206878893914826, "grad_norm": 0.42696583747420747, "learning_rate": 3.644810601882481e-05, "loss": 0.425, "num_tokens": 1017707092.0, "step": 1334 }, { "epoch": 1.8220534266450457, "grad_norm": 0.33276735985836675, "learning_rate": 3.64421743579121e-05, "loss": 0.4464, "num_tokens": 1018451466.0, "step": 1335 }, { "epoch": 1.823418963898609, "grad_norm": 0.3892321004003309, "learning_rate": 3.643623829131744e-05, "loss": 0.4174, "num_tokens": 1019208986.0, "step": 1336 }, { "epoch": 1.824784501152172, "grad_norm": 0.375900408824364, "learning_rate": 3.643029782085165e-05, "loss": 0.4221, "num_tokens": 1019981657.0, "step": 1337 }, { "epoch": 1.8261500384057352, "grad_norm": 0.311267303173057, "learning_rate": 3.642435294832692e-05, "loss": 0.4221, "num_tokens": 1020766462.0, "step": 1338 }, { "epoch": 1.8275155756592985, "grad_norm": 0.37760932870105257, "learning_rate": 3.641840367555677e-05, "loss": 0.4265, "num_tokens": 1021524094.0, "step": 1339 }, { "epoch": 1.8288811129128617, "grad_norm": 0.4031154603744908, "learning_rate": 3.6412450004356074e-05, "loss": 0.4241, "num_tokens": 1022304667.0, "step": 1340 }, { "epoch": 1.830246650166425, "grad_norm": 0.3689930350942328, "learning_rate": 3.640649193654102e-05, "loss": 0.4427, "num_tokens": 1023037930.0, "step": 1341 }, { "epoch": 1.8316121874199882, "grad_norm": 0.3730987160059288, "learning_rate": 3.6400529473929175e-05, "loss": 0.4508, "num_tokens": 1023873838.0, "step": 1342 }, { "epoch": 1.8329777246735512, "grad_norm": 0.40345713644089326, "learning_rate": 3.639456261833942e-05, "loss": 0.4328, "num_tokens": 1024651306.0, "step": 1343 }, { "epoch": 1.8343432619271145, "grad_norm": 0.3514499709032294, "learning_rate": 3.638859137159199e-05, "loss": 0.4564, "num_tokens": 1025393758.0, "step": 1344 }, { "epoch": 1.8357087991806775, "grad_norm": 0.4007709149592754, "learning_rate": 3.638261573550844e-05, "loss": 0.4219, "num_tokens": 1026091695.0, "step": 1345 }, { "epoch": 1.8370743364342408, "grad_norm": 0.36639783695059436, "learning_rate": 3.63766357119117e-05, "loss": 0.4197, "num_tokens": 1026878436.0, "step": 1346 }, { "epoch": 1.838439873687804, "grad_norm": 0.36738749385587754, "learning_rate": 3.637065130262599e-05, "loss": 0.4433, "num_tokens": 1027662285.0, "step": 1347 }, { "epoch": 1.8398054109413673, "grad_norm": 0.3353072748351123, "learning_rate": 3.636466250947692e-05, "loss": 0.4171, "num_tokens": 1028417573.0, "step": 1348 }, { "epoch": 1.8411709481949305, "grad_norm": 0.40227966449912483, "learning_rate": 3.6358669334291395e-05, "loss": 0.4563, "num_tokens": 1029154389.0, "step": 1349 }, { "epoch": 1.8425364854484938, "grad_norm": 0.4049942830616662, "learning_rate": 3.635267177889767e-05, "loss": 0.4349, "num_tokens": 1029957505.0, "step": 1350 }, { "epoch": 1.8439020227020568, "grad_norm": 0.3643460114160466, "learning_rate": 3.634666984512536e-05, "loss": 0.4351, "num_tokens": 1030745318.0, "step": 1351 }, { "epoch": 1.84526755995562, "grad_norm": 0.39264975506633437, "learning_rate": 3.634066353480536e-05, "loss": 0.4404, "num_tokens": 1031487740.0, "step": 1352 }, { "epoch": 1.8466330972091831, "grad_norm": 0.37926060721673627, "learning_rate": 3.633465284976998e-05, "loss": 0.4307, "num_tokens": 1032266420.0, "step": 1353 }, { "epoch": 1.8479986344627464, "grad_norm": 0.41375605594460674, "learning_rate": 3.632863779185279e-05, "loss": 0.4456, "num_tokens": 1033022760.0, "step": 1354 }, { "epoch": 1.8493641717163096, "grad_norm": 0.42105313762419133, "learning_rate": 3.632261836288872e-05, "loss": 0.4247, "num_tokens": 1033757383.0, "step": 1355 }, { "epoch": 1.8507297089698729, "grad_norm": 0.3868017690287465, "learning_rate": 3.6316594564714055e-05, "loss": 0.4535, "num_tokens": 1034547784.0, "step": 1356 }, { "epoch": 1.8520952462234361, "grad_norm": 0.39415356066035095, "learning_rate": 3.631056639916639e-05, "loss": 0.4398, "num_tokens": 1035322866.0, "step": 1357 }, { "epoch": 1.8534607834769994, "grad_norm": 0.33474155472208494, "learning_rate": 3.630453386808465e-05, "loss": 0.4404, "num_tokens": 1036103208.0, "step": 1358 }, { "epoch": 1.8548263207305624, "grad_norm": 0.45636267247617907, "learning_rate": 3.629849697330911e-05, "loss": 0.433, "num_tokens": 1036808074.0, "step": 1359 }, { "epoch": 1.8561918579841257, "grad_norm": 0.3588594313293287, "learning_rate": 3.629245571668135e-05, "loss": 0.4491, "num_tokens": 1037655441.0, "step": 1360 }, { "epoch": 1.8575573952376887, "grad_norm": 0.4137563324041265, "learning_rate": 3.628641010004431e-05, "loss": 0.4289, "num_tokens": 1038422815.0, "step": 1361 }, { "epoch": 1.858922932491252, "grad_norm": 0.3905651693458075, "learning_rate": 3.6280360125242234e-05, "loss": 0.4373, "num_tokens": 1039246967.0, "step": 1362 }, { "epoch": 1.8602884697448152, "grad_norm": 0.4059703177652625, "learning_rate": 3.6274305794120716e-05, "loss": 0.4249, "num_tokens": 1039984720.0, "step": 1363 }, { "epoch": 1.8616540069983785, "grad_norm": 0.38612473446033346, "learning_rate": 3.6268247108526674e-05, "loss": 0.441, "num_tokens": 1040678355.0, "step": 1364 }, { "epoch": 1.8630195442519417, "grad_norm": 0.398369353700162, "learning_rate": 3.626218407030834e-05, "loss": 0.4315, "num_tokens": 1041440149.0, "step": 1365 }, { "epoch": 1.864385081505505, "grad_norm": 0.38537777649284055, "learning_rate": 3.6256116681315285e-05, "loss": 0.4366, "num_tokens": 1042158039.0, "step": 1366 }, { "epoch": 1.865750618759068, "grad_norm": 0.40611508903832094, "learning_rate": 3.6250044943398416e-05, "loss": 0.4389, "num_tokens": 1042927338.0, "step": 1367 }, { "epoch": 1.8671161560126313, "grad_norm": 0.38519756626401264, "learning_rate": 3.6243968858409955e-05, "loss": 0.4434, "num_tokens": 1043710526.0, "step": 1368 }, { "epoch": 1.8684816932661943, "grad_norm": 0.435085943289413, "learning_rate": 3.623788842820344e-05, "loss": 0.446, "num_tokens": 1044452119.0, "step": 1369 }, { "epoch": 1.8698472305197575, "grad_norm": 0.37590653899047816, "learning_rate": 3.6231803654633766e-05, "loss": 0.4192, "num_tokens": 1045251019.0, "step": 1370 }, { "epoch": 1.8712127677733208, "grad_norm": 0.3794397235913343, "learning_rate": 3.622571453955712e-05, "loss": 0.4261, "num_tokens": 1046008119.0, "step": 1371 }, { "epoch": 1.872578305026884, "grad_norm": 0.43706443891858887, "learning_rate": 3.6219621084831026e-05, "loss": 0.4333, "num_tokens": 1046755215.0, "step": 1372 }, { "epoch": 1.8739438422804473, "grad_norm": 0.3615675834929493, "learning_rate": 3.621352329231434e-05, "loss": 0.4407, "num_tokens": 1047551893.0, "step": 1373 }, { "epoch": 1.8753093795340106, "grad_norm": 0.33188903972209294, "learning_rate": 3.620742116386724e-05, "loss": 0.4337, "num_tokens": 1048327530.0, "step": 1374 }, { "epoch": 1.8766749167875736, "grad_norm": 0.3663255732407586, "learning_rate": 3.620131470135121e-05, "loss": 0.4305, "num_tokens": 1049156457.0, "step": 1375 }, { "epoch": 1.8780404540411368, "grad_norm": 0.347732650642205, "learning_rate": 3.619520390662907e-05, "loss": 0.4324, "num_tokens": 1049866305.0, "step": 1376 }, { "epoch": 1.8794059912946999, "grad_norm": 0.375426282172125, "learning_rate": 3.618908878156496e-05, "loss": 0.4427, "num_tokens": 1050527086.0, "step": 1377 }, { "epoch": 1.8807715285482631, "grad_norm": 0.40755311059738164, "learning_rate": 3.6182969328024335e-05, "loss": 0.4103, "num_tokens": 1051300919.0, "step": 1378 }, { "epoch": 1.8821370658018264, "grad_norm": 0.3486059165452465, "learning_rate": 3.6176845547873994e-05, "loss": 0.4174, "num_tokens": 1052052554.0, "step": 1379 }, { "epoch": 1.8835026030553896, "grad_norm": 0.42067427931325574, "learning_rate": 3.617071744298201e-05, "loss": 0.4308, "num_tokens": 1052805026.0, "step": 1380 }, { "epoch": 1.8848681403089529, "grad_norm": 0.37223477623721823, "learning_rate": 3.616458501521782e-05, "loss": 0.4516, "num_tokens": 1053635092.0, "step": 1381 }, { "epoch": 1.8862336775625161, "grad_norm": 0.38905062564204673, "learning_rate": 3.6158448266452164e-05, "loss": 0.4158, "num_tokens": 1054399214.0, "step": 1382 }, { "epoch": 1.8875992148160792, "grad_norm": 0.36732730655080703, "learning_rate": 3.615230719855709e-05, "loss": 0.4294, "num_tokens": 1055161105.0, "step": 1383 }, { "epoch": 1.8889647520696424, "grad_norm": 0.3630398567412825, "learning_rate": 3.6146161813405966e-05, "loss": 0.4129, "num_tokens": 1055888181.0, "step": 1384 }, { "epoch": 1.8903302893232055, "grad_norm": 0.36713177773832734, "learning_rate": 3.61400121128735e-05, "loss": 0.4202, "num_tokens": 1056594511.0, "step": 1385 }, { "epoch": 1.8916958265767687, "grad_norm": 0.3717382769577863, "learning_rate": 3.613385809883569e-05, "loss": 0.4217, "num_tokens": 1057373681.0, "step": 1386 }, { "epoch": 1.893061363830332, "grad_norm": 0.400369274707793, "learning_rate": 3.6127699773169845e-05, "loss": 0.4248, "num_tokens": 1058148174.0, "step": 1387 }, { "epoch": 1.8944269010838952, "grad_norm": 0.33648128730058996, "learning_rate": 3.6121537137754626e-05, "loss": 0.4298, "num_tokens": 1058931524.0, "step": 1388 }, { "epoch": 1.8957924383374585, "grad_norm": 0.38324765364356594, "learning_rate": 3.611537019446999e-05, "loss": 0.4268, "num_tokens": 1059745866.0, "step": 1389 }, { "epoch": 1.8971579755910217, "grad_norm": 0.359020880052292, "learning_rate": 3.610919894519718e-05, "loss": 0.4088, "num_tokens": 1060462436.0, "step": 1390 }, { "epoch": 1.8985235128445848, "grad_norm": 0.42863794571861985, "learning_rate": 3.610302339181879e-05, "loss": 0.43, "num_tokens": 1061121671.0, "step": 1391 }, { "epoch": 1.899889050098148, "grad_norm": 0.42040434144688615, "learning_rate": 3.6096843536218714e-05, "loss": 0.4429, "num_tokens": 1061955157.0, "step": 1392 }, { "epoch": 1.901254587351711, "grad_norm": 0.3935388669374965, "learning_rate": 3.609065938028216e-05, "loss": 0.4494, "num_tokens": 1062734127.0, "step": 1393 }, { "epoch": 1.9026201246052743, "grad_norm": 0.3788574416547319, "learning_rate": 3.6084470925895646e-05, "loss": 0.436, "num_tokens": 1063546649.0, "step": 1394 }, { "epoch": 1.9039856618588376, "grad_norm": 0.4399012830595921, "learning_rate": 3.607827817494699e-05, "loss": 0.4343, "num_tokens": 1064301713.0, "step": 1395 }, { "epoch": 1.9053511991124008, "grad_norm": 0.40152427253247236, "learning_rate": 3.607208112932535e-05, "loss": 0.4351, "num_tokens": 1065069050.0, "step": 1396 }, { "epoch": 1.906716736365964, "grad_norm": 0.389267606747491, "learning_rate": 3.606587979092116e-05, "loss": 0.4324, "num_tokens": 1065867837.0, "step": 1397 }, { "epoch": 1.9080822736195273, "grad_norm": 0.4374833713958801, "learning_rate": 3.605967416162619e-05, "loss": 0.432, "num_tokens": 1066635576.0, "step": 1398 }, { "epoch": 1.9094478108730903, "grad_norm": 0.369258536153923, "learning_rate": 3.6053464243333515e-05, "loss": 0.4155, "num_tokens": 1067400109.0, "step": 1399 }, { "epoch": 1.9108133481266536, "grad_norm": 0.4232859232067434, "learning_rate": 3.60472500379375e-05, "loss": 0.4382, "num_tokens": 1068169071.0, "step": 1400 }, { "epoch": 1.9121788853802166, "grad_norm": 0.38481846405325837, "learning_rate": 3.604103154733382e-05, "loss": 0.4316, "num_tokens": 1068973662.0, "step": 1401 }, { "epoch": 1.9135444226337799, "grad_norm": 0.3922899532025941, "learning_rate": 3.6034808773419494e-05, "loss": 0.4083, "num_tokens": 1069687808.0, "step": 1402 }, { "epoch": 1.9149099598873431, "grad_norm": 0.40328130921197125, "learning_rate": 3.60285817180928e-05, "loss": 0.4318, "num_tokens": 1070490203.0, "step": 1403 }, { "epoch": 1.9162754971409064, "grad_norm": 0.40194911055404453, "learning_rate": 3.602235038325335e-05, "loss": 0.4447, "num_tokens": 1071259380.0, "step": 1404 }, { "epoch": 1.9176410343944696, "grad_norm": 0.4242785553547215, "learning_rate": 3.6016114770802056e-05, "loss": 0.4248, "num_tokens": 1071981465.0, "step": 1405 }, { "epoch": 1.919006571648033, "grad_norm": 0.4043805919992712, "learning_rate": 3.600987488264114e-05, "loss": 0.4499, "num_tokens": 1072766456.0, "step": 1406 }, { "epoch": 1.920372108901596, "grad_norm": 0.3684685193139591, "learning_rate": 3.60036307206741e-05, "loss": 0.4544, "num_tokens": 1073561309.0, "step": 1407 }, { "epoch": 1.9217376461551592, "grad_norm": 0.49192094064725056, "learning_rate": 3.5997382286805775e-05, "loss": 0.4248, "num_tokens": 1074288618.0, "step": 1408 }, { "epoch": 1.9231031834087222, "grad_norm": 0.3547404583365754, "learning_rate": 3.599112958294229e-05, "loss": 0.4265, "num_tokens": 1075024005.0, "step": 1409 }, { "epoch": 1.9244687206622855, "grad_norm": 0.402865172272666, "learning_rate": 3.598487261099108e-05, "loss": 0.4255, "num_tokens": 1075820395.0, "step": 1410 }, { "epoch": 1.9258342579158487, "grad_norm": 0.3751543923032575, "learning_rate": 3.5978611372860866e-05, "loss": 0.4268, "num_tokens": 1076589499.0, "step": 1411 }, { "epoch": 1.927199795169412, "grad_norm": 0.4264087763552726, "learning_rate": 3.5972345870461686e-05, "loss": 0.4274, "num_tokens": 1077334267.0, "step": 1412 }, { "epoch": 1.9285653324229752, "grad_norm": 0.34709502546193, "learning_rate": 3.596607610570487e-05, "loss": 0.4213, "num_tokens": 1078129944.0, "step": 1413 }, { "epoch": 1.9299308696765385, "grad_norm": 0.4446448915188761, "learning_rate": 3.595980208050306e-05, "loss": 0.4128, "num_tokens": 1078836245.0, "step": 1414 }, { "epoch": 1.9312964069301015, "grad_norm": 0.36205308447354745, "learning_rate": 3.595352379677017e-05, "loss": 0.4452, "num_tokens": 1079587809.0, "step": 1415 }, { "epoch": 1.9326619441836648, "grad_norm": 0.46641518953100636, "learning_rate": 3.594724125642145e-05, "loss": 0.4431, "num_tokens": 1080361621.0, "step": 1416 }, { "epoch": 1.9340274814372278, "grad_norm": 0.3759664235355608, "learning_rate": 3.594095446137343e-05, "loss": 0.4432, "num_tokens": 1081039509.0, "step": 1417 }, { "epoch": 1.935393018690791, "grad_norm": 0.48955142038035776, "learning_rate": 3.593466341354394e-05, "loss": 0.4408, "num_tokens": 1081839421.0, "step": 1418 }, { "epoch": 1.9367585559443543, "grad_norm": 0.3647108141184802, "learning_rate": 3.592836811485209e-05, "loss": 0.4237, "num_tokens": 1082569390.0, "step": 1419 }, { "epoch": 1.9381240931979176, "grad_norm": 0.423823213024988, "learning_rate": 3.592206856721832e-05, "loss": 0.4425, "num_tokens": 1083292577.0, "step": 1420 }, { "epoch": 1.9394896304514808, "grad_norm": 0.35957851686073883, "learning_rate": 3.5915764772564346e-05, "loss": 0.4358, "num_tokens": 1084064757.0, "step": 1421 }, { "epoch": 1.940855167705044, "grad_norm": 0.4319916377572871, "learning_rate": 3.5909456732813175e-05, "loss": 0.4107, "num_tokens": 1084831561.0, "step": 1422 }, { "epoch": 1.942220704958607, "grad_norm": 0.3717907855915832, "learning_rate": 3.590314444988913e-05, "loss": 0.442, "num_tokens": 1085676633.0, "step": 1423 }, { "epoch": 1.9435862422121704, "grad_norm": 0.4604102383141899, "learning_rate": 3.58968279257178e-05, "loss": 0.4431, "num_tokens": 1086491645.0, "step": 1424 }, { "epoch": 1.9449517794657334, "grad_norm": 0.41645497767310297, "learning_rate": 3.589050716222609e-05, "loss": 0.4334, "num_tokens": 1087234735.0, "step": 1425 }, { "epoch": 1.9463173167192966, "grad_norm": 0.39219121405606006, "learning_rate": 3.5884182161342194e-05, "loss": 0.4294, "num_tokens": 1087992936.0, "step": 1426 }, { "epoch": 1.94768285397286, "grad_norm": 0.42546174350560567, "learning_rate": 3.5877852924995594e-05, "loss": 0.4346, "num_tokens": 1088704460.0, "step": 1427 }, { "epoch": 1.9490483912264231, "grad_norm": 0.4570001055507728, "learning_rate": 3.587151945511707e-05, "loss": 0.4299, "num_tokens": 1089486597.0, "step": 1428 }, { "epoch": 1.9504139284799864, "grad_norm": 0.3584214778879992, "learning_rate": 3.586518175363868e-05, "loss": 0.4188, "num_tokens": 1090271761.0, "step": 1429 }, { "epoch": 1.9517794657335497, "grad_norm": 0.41443384492871144, "learning_rate": 3.585883982249378e-05, "loss": 0.4423, "num_tokens": 1091088504.0, "step": 1430 }, { "epoch": 1.9531450029871127, "grad_norm": 0.4202402819536376, "learning_rate": 3.585249366361703e-05, "loss": 0.4332, "num_tokens": 1091844749.0, "step": 1431 }, { "epoch": 1.954510540240676, "grad_norm": 0.41916429737114397, "learning_rate": 3.5846143278944375e-05, "loss": 0.4606, "num_tokens": 1092633853.0, "step": 1432 }, { "epoch": 1.955876077494239, "grad_norm": 0.37659204399501206, "learning_rate": 3.583978867041302e-05, "loss": 0.4413, "num_tokens": 1093469460.0, "step": 1433 }, { "epoch": 1.9572416147478022, "grad_norm": 0.45541917565815565, "learning_rate": 3.583342983996149e-05, "loss": 0.4337, "num_tokens": 1094146535.0, "step": 1434 }, { "epoch": 1.9586071520013655, "grad_norm": 0.40368074881954585, "learning_rate": 3.5827066789529596e-05, "loss": 0.4262, "num_tokens": 1094903981.0, "step": 1435 }, { "epoch": 1.9599726892549287, "grad_norm": 0.41741392467377303, "learning_rate": 3.5820699521058414e-05, "loss": 0.4127, "num_tokens": 1095626160.0, "step": 1436 }, { "epoch": 1.961338226508492, "grad_norm": 0.4043033791738134, "learning_rate": 3.5814328036490346e-05, "loss": 0.433, "num_tokens": 1096460621.0, "step": 1437 }, { "epoch": 1.9627037637620552, "grad_norm": 0.41418377100905435, "learning_rate": 3.580795233776903e-05, "loss": 0.4264, "num_tokens": 1097258051.0, "step": 1438 }, { "epoch": 1.9640693010156185, "grad_norm": 0.3460186118751466, "learning_rate": 3.580157242683944e-05, "loss": 0.4304, "num_tokens": 1098056109.0, "step": 1439 }, { "epoch": 1.9654348382691815, "grad_norm": 0.40229210717903346, "learning_rate": 3.5795188305647786e-05, "loss": 0.4334, "num_tokens": 1098865627.0, "step": 1440 }, { "epoch": 1.9668003755227448, "grad_norm": 0.4345395443876319, "learning_rate": 3.578879997614161e-05, "loss": 0.4324, "num_tokens": 1099577923.0, "step": 1441 }, { "epoch": 1.9681659127763078, "grad_norm": 0.40787001966049763, "learning_rate": 3.578240744026969e-05, "loss": 0.4181, "num_tokens": 1100345883.0, "step": 1442 }, { "epoch": 1.969531450029871, "grad_norm": 0.4734332448410739, "learning_rate": 3.577601069998214e-05, "loss": 0.4221, "num_tokens": 1101023823.0, "step": 1443 }, { "epoch": 1.9708969872834343, "grad_norm": 0.4280163941970784, "learning_rate": 3.5769609757230306e-05, "loss": 0.4453, "num_tokens": 1101822081.0, "step": 1444 }, { "epoch": 1.9722625245369976, "grad_norm": 0.3982790865554422, "learning_rate": 3.5763204613966846e-05, "loss": 0.4145, "num_tokens": 1102662227.0, "step": 1445 }, { "epoch": 1.9736280617905608, "grad_norm": 0.4170203906245631, "learning_rate": 3.575679527214569e-05, "loss": 0.4322, "num_tokens": 1103413966.0, "step": 1446 }, { "epoch": 1.974993599044124, "grad_norm": 0.4382290343293651, "learning_rate": 3.5750381733722054e-05, "loss": 0.4126, "num_tokens": 1104141719.0, "step": 1447 }, { "epoch": 1.9763591362976871, "grad_norm": 0.40116459855999687, "learning_rate": 3.574396400065243e-05, "loss": 0.4268, "num_tokens": 1104907981.0, "step": 1448 }, { "epoch": 1.9777246735512504, "grad_norm": 0.459081137218158, "learning_rate": 3.5737542074894595e-05, "loss": 0.4368, "num_tokens": 1105658789.0, "step": 1449 }, { "epoch": 1.9790902108048134, "grad_norm": 0.38240548049437945, "learning_rate": 3.573111595840759e-05, "loss": 0.4044, "num_tokens": 1106411334.0, "step": 1450 }, { "epoch": 1.9804557480583767, "grad_norm": 0.410544692978027, "learning_rate": 3.5724685653151753e-05, "loss": 0.4413, "num_tokens": 1107196407.0, "step": 1451 }, { "epoch": 1.98182128531194, "grad_norm": 0.3749848194832759, "learning_rate": 3.571825116108868e-05, "loss": 0.4399, "num_tokens": 1107970681.0, "step": 1452 }, { "epoch": 1.9831868225655032, "grad_norm": 0.39748584708767243, "learning_rate": 3.571181248418128e-05, "loss": 0.428, "num_tokens": 1108752574.0, "step": 1453 }, { "epoch": 1.9845523598190664, "grad_norm": 0.37453974004696317, "learning_rate": 3.5705369624393685e-05, "loss": 0.428, "num_tokens": 1109515949.0, "step": 1454 }, { "epoch": 1.9859178970726297, "grad_norm": 0.37364693326015874, "learning_rate": 3.569892258369135e-05, "loss": 0.4394, "num_tokens": 1110352057.0, "step": 1455 }, { "epoch": 1.9872834343261927, "grad_norm": 0.38562328996075007, "learning_rate": 3.569247136404099e-05, "loss": 0.4407, "num_tokens": 1111038880.0, "step": 1456 }, { "epoch": 1.988648971579756, "grad_norm": 0.3916878390706284, "learning_rate": 3.5686015967410584e-05, "loss": 0.4371, "num_tokens": 1111785922.0, "step": 1457 }, { "epoch": 1.990014508833319, "grad_norm": 0.4103315252532406, "learning_rate": 3.567955639576939e-05, "loss": 0.4275, "num_tokens": 1112590900.0, "step": 1458 }, { "epoch": 1.9913800460868822, "grad_norm": 0.40433075725606166, "learning_rate": 3.567309265108796e-05, "loss": 0.4301, "num_tokens": 1113357660.0, "step": 1459 }, { "epoch": 1.9927455833404455, "grad_norm": 0.4289215136886791, "learning_rate": 3.5666624735338095e-05, "loss": 0.4274, "num_tokens": 1114126554.0, "step": 1460 }, { "epoch": 1.9941111205940087, "grad_norm": 0.37645228699899647, "learning_rate": 3.5660152650492874e-05, "loss": 0.4205, "num_tokens": 1114877291.0, "step": 1461 }, { "epoch": 1.995476657847572, "grad_norm": 0.3643490820727832, "learning_rate": 3.5653676398526645e-05, "loss": 0.4106, "num_tokens": 1115672422.0, "step": 1462 }, { "epoch": 1.9968421951011353, "grad_norm": 0.4507669767261551, "learning_rate": 3.564719598141504e-05, "loss": 0.4302, "num_tokens": 1116443311.0, "step": 1463 }, { "epoch": 1.9982077323546983, "grad_norm": 0.3938021781122735, "learning_rate": 3.564071140113495e-05, "loss": 0.4338, "num_tokens": 1117179618.0, "step": 1464 }, { "epoch": 1.9995732696082615, "grad_norm": 0.45262177589331853, "learning_rate": 3.563422265966454e-05, "loss": 0.422, "num_tokens": 1117875132.0, "step": 1465 }, { "epoch": 2.0, "grad_norm": 0.45262177589331853, "learning_rate": 3.5627729758983246e-05, "loss": 0.4457, "num_tokens": 1118084272.0, "step": 1466 }, { "epoch": 2.0013655372535633, "grad_norm": 0.7962068004263575, "learning_rate": 3.562123270107177e-05, "loss": 0.4028, "num_tokens": 1118868163.0, "step": 1467 }, { "epoch": 2.0027310745071265, "grad_norm": 0.5275603051215744, "learning_rate": 3.561473148791208e-05, "loss": 0.3892, "num_tokens": 1119643637.0, "step": 1468 }, { "epoch": 2.0040966117606898, "grad_norm": 0.5008568152972795, "learning_rate": 3.560822612148742e-05, "loss": 0.3933, "num_tokens": 1120381599.0, "step": 1469 }, { "epoch": 2.005462149014253, "grad_norm": 0.4185468315820307, "learning_rate": 3.5601716603782295e-05, "loss": 0.3992, "num_tokens": 1121151170.0, "step": 1470 }, { "epoch": 2.006827686267816, "grad_norm": 0.4553767411447847, "learning_rate": 3.559520293678247e-05, "loss": 0.398, "num_tokens": 1121915108.0, "step": 1471 }, { "epoch": 2.008193223521379, "grad_norm": 0.4377442578052042, "learning_rate": 3.558868512247498e-05, "loss": 0.3812, "num_tokens": 1122662888.0, "step": 1472 }, { "epoch": 2.0095587607749423, "grad_norm": 0.4304390300683712, "learning_rate": 3.558216316284815e-05, "loss": 0.3899, "num_tokens": 1123450898.0, "step": 1473 }, { "epoch": 2.0109242980285056, "grad_norm": 0.4484335095774548, "learning_rate": 3.557563705989152e-05, "loss": 0.4135, "num_tokens": 1124287336.0, "step": 1474 }, { "epoch": 2.012289835282069, "grad_norm": 0.3849139351981153, "learning_rate": 3.556910681559594e-05, "loss": 0.3813, "num_tokens": 1125036585.0, "step": 1475 }, { "epoch": 2.013655372535632, "grad_norm": 0.41262712762976744, "learning_rate": 3.556257243195349e-05, "loss": 0.415, "num_tokens": 1125847140.0, "step": 1476 }, { "epoch": 2.0150209097891953, "grad_norm": 0.4047363815722634, "learning_rate": 3.555603391095755e-05, "loss": 0.4046, "num_tokens": 1126599912.0, "step": 1477 }, { "epoch": 2.0163864470427586, "grad_norm": 0.40400246571145493, "learning_rate": 3.554949125460272e-05, "loss": 0.4014, "num_tokens": 1127431146.0, "step": 1478 }, { "epoch": 2.0177519842963214, "grad_norm": 0.4201828560702374, "learning_rate": 3.554294446488488e-05, "loss": 0.3885, "num_tokens": 1128139535.0, "step": 1479 }, { "epoch": 2.0191175215498847, "grad_norm": 0.3734379016376752, "learning_rate": 3.553639354380119e-05, "loss": 0.3935, "num_tokens": 1128817824.0, "step": 1480 }, { "epoch": 2.020483058803448, "grad_norm": 0.40324728117544545, "learning_rate": 3.5529838493350034e-05, "loss": 0.3858, "num_tokens": 1129519989.0, "step": 1481 }, { "epoch": 2.021848596057011, "grad_norm": 0.3891334905817608, "learning_rate": 3.5523279315531084e-05, "loss": 0.4032, "num_tokens": 1130352398.0, "step": 1482 }, { "epoch": 2.0232141333105744, "grad_norm": 0.37403526996136655, "learning_rate": 3.5516716012345257e-05, "loss": 0.4112, "num_tokens": 1131122937.0, "step": 1483 }, { "epoch": 2.0245796705641377, "grad_norm": 0.3674999209277368, "learning_rate": 3.5510148585794734e-05, "loss": 0.3874, "num_tokens": 1131797648.0, "step": 1484 }, { "epoch": 2.025945207817701, "grad_norm": 0.344779527192772, "learning_rate": 3.550357703788295e-05, "loss": 0.3884, "num_tokens": 1132611499.0, "step": 1485 }, { "epoch": 2.027310745071264, "grad_norm": 0.3488850827387411, "learning_rate": 3.549700137061461e-05, "loss": 0.4017, "num_tokens": 1133394765.0, "step": 1486 }, { "epoch": 2.028676282324827, "grad_norm": 0.42090127174469005, "learning_rate": 3.549042158599565e-05, "loss": 0.4139, "num_tokens": 1134180473.0, "step": 1487 }, { "epoch": 2.0300418195783902, "grad_norm": 0.35382251051241986, "learning_rate": 3.5483837686033284e-05, "loss": 0.4148, "num_tokens": 1134952514.0, "step": 1488 }, { "epoch": 2.0314073568319535, "grad_norm": 0.4067284090267863, "learning_rate": 3.5477249672735976e-05, "loss": 0.4192, "num_tokens": 1135780919.0, "step": 1489 }, { "epoch": 2.0327728940855168, "grad_norm": 0.4060471634013766, "learning_rate": 3.547065754811345e-05, "loss": 0.3879, "num_tokens": 1136597576.0, "step": 1490 }, { "epoch": 2.03413843133908, "grad_norm": 0.3769602736542382, "learning_rate": 3.546406131417666e-05, "loss": 0.395, "num_tokens": 1137372645.0, "step": 1491 }, { "epoch": 2.0355039685926433, "grad_norm": 0.3943789424665568, "learning_rate": 3.545746097293786e-05, "loss": 0.394, "num_tokens": 1138161263.0, "step": 1492 }, { "epoch": 2.0368695058462065, "grad_norm": 0.4327627128588982, "learning_rate": 3.545085652641049e-05, "loss": 0.4167, "num_tokens": 1138946272.0, "step": 1493 }, { "epoch": 2.0382350430997698, "grad_norm": 0.3925202107711022, "learning_rate": 3.544424797660931e-05, "loss": 0.3927, "num_tokens": 1139664557.0, "step": 1494 }, { "epoch": 2.0396005803533326, "grad_norm": 0.4021749280306289, "learning_rate": 3.543763532555029e-05, "loss": 0.3911, "num_tokens": 1140406072.0, "step": 1495 }, { "epoch": 2.040966117606896, "grad_norm": 0.36685702539119847, "learning_rate": 3.5431018575250665e-05, "loss": 0.3822, "num_tokens": 1141156031.0, "step": 1496 }, { "epoch": 2.042331654860459, "grad_norm": 0.36009221632965327, "learning_rate": 3.542439772772893e-05, "loss": 0.3793, "num_tokens": 1141864061.0, "step": 1497 }, { "epoch": 2.0436971921140223, "grad_norm": 0.40466825502625525, "learning_rate": 3.54177727850048e-05, "loss": 0.399, "num_tokens": 1142600217.0, "step": 1498 }, { "epoch": 2.0450627293675856, "grad_norm": 0.40183014040773885, "learning_rate": 3.541114374909927e-05, "loss": 0.3926, "num_tokens": 1143355489.0, "step": 1499 }, { "epoch": 2.046428266621149, "grad_norm": 0.37718649986307434, "learning_rate": 3.540451062203457e-05, "loss": 0.3983, "num_tokens": 1144092999.0, "step": 1500 }, { "epoch": 2.047793803874712, "grad_norm": 0.4641196932144461, "learning_rate": 3.5397873405834174e-05, "loss": 0.4051, "num_tokens": 1144978396.0, "step": 1501 }, { "epoch": 2.0491593411282754, "grad_norm": 0.3533533092029966, "learning_rate": 3.539123210252282e-05, "loss": 0.4077, "num_tokens": 1145779524.0, "step": 1502 }, { "epoch": 2.050524878381838, "grad_norm": 0.38936983072396597, "learning_rate": 3.5384586714126486e-05, "loss": 0.4209, "num_tokens": 1146581057.0, "step": 1503 }, { "epoch": 2.0518904156354014, "grad_norm": 0.37506009789445066, "learning_rate": 3.537793724267237e-05, "loss": 0.3915, "num_tokens": 1147314748.0, "step": 1504 }, { "epoch": 2.0532559528889647, "grad_norm": 0.38434421109129724, "learning_rate": 3.537128369018895e-05, "loss": 0.393, "num_tokens": 1148057925.0, "step": 1505 }, { "epoch": 2.054621490142528, "grad_norm": 0.3426629797407092, "learning_rate": 3.5364626058705944e-05, "loss": 0.3837, "num_tokens": 1148809488.0, "step": 1506 }, { "epoch": 2.055987027396091, "grad_norm": 0.3864706003769476, "learning_rate": 3.53579643502543e-05, "loss": 0.3875, "num_tokens": 1149529398.0, "step": 1507 }, { "epoch": 2.0573525646496544, "grad_norm": 0.4308499975065107, "learning_rate": 3.535129856686621e-05, "loss": 0.4063, "num_tokens": 1150279434.0, "step": 1508 }, { "epoch": 2.0587181019032177, "grad_norm": 0.345390060874938, "learning_rate": 3.534462871057514e-05, "loss": 0.4039, "num_tokens": 1151052985.0, "step": 1509 }, { "epoch": 2.060083639156781, "grad_norm": 0.4085602710929797, "learning_rate": 3.533795478341574e-05, "loss": 0.3965, "num_tokens": 1151792037.0, "step": 1510 }, { "epoch": 2.0614491764103438, "grad_norm": 0.355129185159014, "learning_rate": 3.533127678742395e-05, "loss": 0.4187, "num_tokens": 1152561644.0, "step": 1511 }, { "epoch": 2.062814713663907, "grad_norm": 0.40338192527344646, "learning_rate": 3.532459472463695e-05, "loss": 0.4101, "num_tokens": 1153487062.0, "step": 1512 }, { "epoch": 2.0641802509174703, "grad_norm": 0.365659941509853, "learning_rate": 3.531790859709313e-05, "loss": 0.3864, "num_tokens": 1154270702.0, "step": 1513 }, { "epoch": 2.0655457881710335, "grad_norm": 0.42195099902437905, "learning_rate": 3.531121840683216e-05, "loss": 0.3889, "num_tokens": 1155034975.0, "step": 1514 }, { "epoch": 2.0669113254245968, "grad_norm": 0.37580048937708327, "learning_rate": 3.530452415589491e-05, "loss": 0.3951, "num_tokens": 1155797178.0, "step": 1515 }, { "epoch": 2.06827686267816, "grad_norm": 0.3833107023541771, "learning_rate": 3.52978258463235e-05, "loss": 0.4076, "num_tokens": 1156566344.0, "step": 1516 }, { "epoch": 2.0696423999317233, "grad_norm": 0.41380730691177986, "learning_rate": 3.5291123480161315e-05, "loss": 0.3994, "num_tokens": 1157328685.0, "step": 1517 }, { "epoch": 2.0710079371852865, "grad_norm": 0.3667965344988366, "learning_rate": 3.528441705945294e-05, "loss": 0.4077, "num_tokens": 1158098621.0, "step": 1518 }, { "epoch": 2.0723734744388493, "grad_norm": 0.4244770381166427, "learning_rate": 3.527770658624422e-05, "loss": 0.3908, "num_tokens": 1158761210.0, "step": 1519 }, { "epoch": 2.0737390116924126, "grad_norm": 0.3720115340508333, "learning_rate": 3.5270992062582236e-05, "loss": 0.4106, "num_tokens": 1159550811.0, "step": 1520 }, { "epoch": 2.075104548945976, "grad_norm": 0.40072467326993977, "learning_rate": 3.526427349051528e-05, "loss": 0.4043, "num_tokens": 1160326563.0, "step": 1521 }, { "epoch": 2.076470086199539, "grad_norm": 0.39003927973058133, "learning_rate": 3.525755087209292e-05, "loss": 0.3882, "num_tokens": 1161088340.0, "step": 1522 }, { "epoch": 2.0778356234531024, "grad_norm": 0.35919870256948233, "learning_rate": 3.525082420936592e-05, "loss": 0.4284, "num_tokens": 1161847490.0, "step": 1523 }, { "epoch": 2.0792011607066656, "grad_norm": 0.42156977499088766, "learning_rate": 3.52440935043863e-05, "loss": 0.3877, "num_tokens": 1162546306.0, "step": 1524 }, { "epoch": 2.080566697960229, "grad_norm": 0.38581884091241864, "learning_rate": 3.52373587592073e-05, "loss": 0.4091, "num_tokens": 1163336518.0, "step": 1525 }, { "epoch": 2.081932235213792, "grad_norm": 0.3633713449245009, "learning_rate": 3.523061997588341e-05, "loss": 0.3896, "num_tokens": 1164100514.0, "step": 1526 }, { "epoch": 2.083297772467355, "grad_norm": 0.3689886541384313, "learning_rate": 3.5223877156470333e-05, "loss": 0.3945, "num_tokens": 1164823377.0, "step": 1527 }, { "epoch": 2.084663309720918, "grad_norm": 0.37360608750611285, "learning_rate": 3.521713030302501e-05, "loss": 0.3932, "num_tokens": 1165628898.0, "step": 1528 }, { "epoch": 2.0860288469744814, "grad_norm": 0.4329359068841464, "learning_rate": 3.521037941760562e-05, "loss": 0.411, "num_tokens": 1166381108.0, "step": 1529 }, { "epoch": 2.0873943842280447, "grad_norm": 0.39843508468996325, "learning_rate": 3.520362450227155e-05, "loss": 0.3937, "num_tokens": 1167159948.0, "step": 1530 }, { "epoch": 2.088759921481608, "grad_norm": 0.44078989547592196, "learning_rate": 3.5196865559083466e-05, "loss": 0.3984, "num_tokens": 1167897464.0, "step": 1531 }, { "epoch": 2.090125458735171, "grad_norm": 0.3372925687033934, "learning_rate": 3.519010259010319e-05, "loss": 0.3957, "num_tokens": 1168608133.0, "step": 1532 }, { "epoch": 2.0914909959887344, "grad_norm": 0.4537648314745353, "learning_rate": 3.518333559739384e-05, "loss": 0.4046, "num_tokens": 1169426566.0, "step": 1533 }, { "epoch": 2.0928565332422977, "grad_norm": 0.43503997750062634, "learning_rate": 3.5176564583019716e-05, "loss": 0.397, "num_tokens": 1170136846.0, "step": 1534 }, { "epoch": 2.0942220704958605, "grad_norm": 0.38668772248319166, "learning_rate": 3.516978954904637e-05, "loss": 0.4153, "num_tokens": 1170893379.0, "step": 1535 }, { "epoch": 2.0955876077494238, "grad_norm": 0.4382955872303597, "learning_rate": 3.5163010497540556e-05, "loss": 0.4067, "num_tokens": 1171636982.0, "step": 1536 }, { "epoch": 2.096953145002987, "grad_norm": 0.40157413804326775, "learning_rate": 3.5156227430570294e-05, "loss": 0.4302, "num_tokens": 1172436991.0, "step": 1537 }, { "epoch": 2.0983186822565503, "grad_norm": 0.40335261220474133, "learning_rate": 3.5149440350204785e-05, "loss": 0.3898, "num_tokens": 1173197965.0, "step": 1538 }, { "epoch": 2.0996842195101135, "grad_norm": 0.3930979835628002, "learning_rate": 3.514264925851448e-05, "loss": 0.4043, "num_tokens": 1174004921.0, "step": 1539 }, { "epoch": 2.101049756763677, "grad_norm": 0.38352001615451414, "learning_rate": 3.513585415757104e-05, "loss": 0.4054, "num_tokens": 1174717386.0, "step": 1540 }, { "epoch": 2.10241529401724, "grad_norm": 0.4022101390807964, "learning_rate": 3.512905504944737e-05, "loss": 0.3962, "num_tokens": 1175500615.0, "step": 1541 }, { "epoch": 2.1037808312708033, "grad_norm": 0.39807709197318614, "learning_rate": 3.5122251936217575e-05, "loss": 0.3925, "num_tokens": 1176259304.0, "step": 1542 }, { "epoch": 2.105146368524366, "grad_norm": 0.4098287583541047, "learning_rate": 3.511544481995699e-05, "loss": 0.4016, "num_tokens": 1177018979.0, "step": 1543 }, { "epoch": 2.1065119057779293, "grad_norm": 0.40505851664976417, "learning_rate": 3.510863370274217e-05, "loss": 0.3926, "num_tokens": 1177743745.0, "step": 1544 }, { "epoch": 2.1078774430314926, "grad_norm": 0.4093121481642884, "learning_rate": 3.51018185866509e-05, "loss": 0.3945, "num_tokens": 1178499475.0, "step": 1545 }, { "epoch": 2.109242980285056, "grad_norm": 0.3377756936292702, "learning_rate": 3.509499947376216e-05, "loss": 0.413, "num_tokens": 1179308225.0, "step": 1546 }, { "epoch": 2.110608517538619, "grad_norm": 0.4203597946936166, "learning_rate": 3.508817636615618e-05, "loss": 0.3886, "num_tokens": 1180078786.0, "step": 1547 }, { "epoch": 2.1119740547921824, "grad_norm": 0.4052442470823492, "learning_rate": 3.50813492659144e-05, "loss": 0.4217, "num_tokens": 1180893609.0, "step": 1548 }, { "epoch": 2.1133395920457456, "grad_norm": 0.3668224444433922, "learning_rate": 3.5074518175119454e-05, "loss": 0.4118, "num_tokens": 1181647251.0, "step": 1549 }, { "epoch": 2.114705129299309, "grad_norm": 0.42479170991803983, "learning_rate": 3.506768309585523e-05, "loss": 0.3912, "num_tokens": 1182372341.0, "step": 1550 }, { "epoch": 2.1160706665528717, "grad_norm": 0.3998827769932243, "learning_rate": 3.50608440302068e-05, "loss": 0.4012, "num_tokens": 1183165313.0, "step": 1551 }, { "epoch": 2.117436203806435, "grad_norm": 0.39381801426913443, "learning_rate": 3.5054000980260476e-05, "loss": 0.4192, "num_tokens": 1183867519.0, "step": 1552 }, { "epoch": 2.118801741059998, "grad_norm": 0.434552399453608, "learning_rate": 3.5047153948103775e-05, "loss": 0.4239, "num_tokens": 1184646914.0, "step": 1553 }, { "epoch": 2.1201672783135614, "grad_norm": 0.41648480942009464, "learning_rate": 3.504030293582543e-05, "loss": 0.4, "num_tokens": 1185371483.0, "step": 1554 }, { "epoch": 2.1215328155671247, "grad_norm": 0.44817928372680177, "learning_rate": 3.503344794551538e-05, "loss": 0.4202, "num_tokens": 1186092583.0, "step": 1555 }, { "epoch": 2.122898352820688, "grad_norm": 0.3885425093671181, "learning_rate": 3.50265889792648e-05, "loss": 0.4166, "num_tokens": 1186919447.0, "step": 1556 }, { "epoch": 2.124263890074251, "grad_norm": 0.4631357852009637, "learning_rate": 3.5019726039166055e-05, "loss": 0.3938, "num_tokens": 1187619757.0, "step": 1557 }, { "epoch": 2.1256294273278145, "grad_norm": 0.3749990785084671, "learning_rate": 3.501285912731273e-05, "loss": 0.3819, "num_tokens": 1188367092.0, "step": 1558 }, { "epoch": 2.1269949645813773, "grad_norm": 0.35612599295810016, "learning_rate": 3.500598824579962e-05, "loss": 0.3751, "num_tokens": 1189046025.0, "step": 1559 }, { "epoch": 2.1283605018349405, "grad_norm": 0.42013638244920276, "learning_rate": 3.499911339672274e-05, "loss": 0.398, "num_tokens": 1189811291.0, "step": 1560 }, { "epoch": 2.1297260390885038, "grad_norm": 0.3845496620374964, "learning_rate": 3.499223458217931e-05, "loss": 0.4264, "num_tokens": 1190641692.0, "step": 1561 }, { "epoch": 2.131091576342067, "grad_norm": 0.36868200785051847, "learning_rate": 3.498535180426775e-05, "loss": 0.3937, "num_tokens": 1191389977.0, "step": 1562 }, { "epoch": 2.1324571135956303, "grad_norm": 0.40487988856270196, "learning_rate": 3.497846506508771e-05, "loss": 0.397, "num_tokens": 1192157715.0, "step": 1563 }, { "epoch": 2.1338226508491935, "grad_norm": 0.41915681730065646, "learning_rate": 3.4971574366740016e-05, "loss": 0.3778, "num_tokens": 1192861461.0, "step": 1564 }, { "epoch": 2.135188188102757, "grad_norm": 0.3849289669225865, "learning_rate": 3.4964679711326746e-05, "loss": 0.4023, "num_tokens": 1193585101.0, "step": 1565 }, { "epoch": 2.13655372535632, "grad_norm": 0.35732419368623214, "learning_rate": 3.495778110095114e-05, "loss": 0.3909, "num_tokens": 1194398715.0, "step": 1566 }, { "epoch": 2.1379192626098833, "grad_norm": 0.41033485527844743, "learning_rate": 3.495087853771768e-05, "loss": 0.3932, "num_tokens": 1195098929.0, "step": 1567 }, { "epoch": 2.139284799863446, "grad_norm": 0.3886756716730733, "learning_rate": 3.494397202373203e-05, "loss": 0.4024, "num_tokens": 1195892171.0, "step": 1568 }, { "epoch": 2.1406503371170094, "grad_norm": 0.39294432279376, "learning_rate": 3.493706156110107e-05, "loss": 0.3915, "num_tokens": 1196738073.0, "step": 1569 }, { "epoch": 2.1420158743705726, "grad_norm": 0.417195776381746, "learning_rate": 3.493014715193289e-05, "loss": 0.395, "num_tokens": 1197411356.0, "step": 1570 }, { "epoch": 2.143381411624136, "grad_norm": 0.43975551286268216, "learning_rate": 3.492322879833677e-05, "loss": 0.4124, "num_tokens": 1198278619.0, "step": 1571 }, { "epoch": 2.144746948877699, "grad_norm": 0.4403660695444377, "learning_rate": 3.491630650242319e-05, "loss": 0.4039, "num_tokens": 1199040956.0, "step": 1572 }, { "epoch": 2.1461124861312624, "grad_norm": 0.37869927279632376, "learning_rate": 3.490938026630386e-05, "loss": 0.3861, "num_tokens": 1199756148.0, "step": 1573 }, { "epoch": 2.1474780233848256, "grad_norm": 0.38205677248754283, "learning_rate": 3.4902450092091675e-05, "loss": 0.4108, "num_tokens": 1200538061.0, "step": 1574 }, { "epoch": 2.1488435606383884, "grad_norm": 0.39928058172441966, "learning_rate": 3.489551598190072e-05, "loss": 0.3956, "num_tokens": 1201316915.0, "step": 1575 }, { "epoch": 2.1502090978919517, "grad_norm": 0.34509429339457964, "learning_rate": 3.48885779378463e-05, "loss": 0.3931, "num_tokens": 1202098701.0, "step": 1576 }, { "epoch": 2.151574635145515, "grad_norm": 0.3905034464427744, "learning_rate": 3.4881635962044894e-05, "loss": 0.413, "num_tokens": 1202893733.0, "step": 1577 }, { "epoch": 2.152940172399078, "grad_norm": 0.40881229926275414, "learning_rate": 3.4874690056614225e-05, "loss": 0.4066, "num_tokens": 1203657787.0, "step": 1578 }, { "epoch": 2.1543057096526415, "grad_norm": 0.3829432047009685, "learning_rate": 3.486774022367318e-05, "loss": 0.4075, "num_tokens": 1204430801.0, "step": 1579 }, { "epoch": 2.1556712469062047, "grad_norm": 0.4055813780286584, "learning_rate": 3.4860786465341834e-05, "loss": 0.4046, "num_tokens": 1205279288.0, "step": 1580 }, { "epoch": 2.157036784159768, "grad_norm": 0.3602157657968452, "learning_rate": 3.48538287837415e-05, "loss": 0.3792, "num_tokens": 1206006243.0, "step": 1581 }, { "epoch": 2.158402321413331, "grad_norm": 0.3806856796446483, "learning_rate": 3.484686718099465e-05, "loss": 0.3868, "num_tokens": 1206698774.0, "step": 1582 }, { "epoch": 2.1597678586668945, "grad_norm": 0.3097390932226843, "learning_rate": 3.4839901659224975e-05, "loss": 0.4091, "num_tokens": 1207492312.0, "step": 1583 }, { "epoch": 2.1611333959204573, "grad_norm": 0.3796672759912363, "learning_rate": 3.4832932220557355e-05, "loss": 0.4101, "num_tokens": 1208340905.0, "step": 1584 }, { "epoch": 2.1624989331740205, "grad_norm": 0.38776321189336865, "learning_rate": 3.482595886711786e-05, "loss": 0.3923, "num_tokens": 1209060695.0, "step": 1585 }, { "epoch": 2.163864470427584, "grad_norm": 0.4153058799587803, "learning_rate": 3.4818981601033764e-05, "loss": 0.3878, "num_tokens": 1209811137.0, "step": 1586 }, { "epoch": 2.165230007681147, "grad_norm": 0.3845094293675754, "learning_rate": 3.481200042443352e-05, "loss": 0.4127, "num_tokens": 1210503848.0, "step": 1587 }, { "epoch": 2.1665955449347103, "grad_norm": 0.3934125060049197, "learning_rate": 3.4805015339446784e-05, "loss": 0.4007, "num_tokens": 1211333338.0, "step": 1588 }, { "epoch": 2.1679610821882735, "grad_norm": 0.37963841653771524, "learning_rate": 3.4798026348204415e-05, "loss": 0.4066, "num_tokens": 1212059086.0, "step": 1589 }, { "epoch": 2.169326619441837, "grad_norm": 0.40041842355047524, "learning_rate": 3.479103345283843e-05, "loss": 0.4053, "num_tokens": 1212804267.0, "step": 1590 }, { "epoch": 2.1706921566953996, "grad_norm": 0.45766812132249185, "learning_rate": 3.478403665548208e-05, "loss": 0.4309, "num_tokens": 1213615914.0, "step": 1591 }, { "epoch": 2.172057693948963, "grad_norm": 0.35238273058499753, "learning_rate": 3.477703595826978e-05, "loss": 0.4054, "num_tokens": 1214404658.0, "step": 1592 }, { "epoch": 2.173423231202526, "grad_norm": 0.3982720638602209, "learning_rate": 3.4770031363337126e-05, "loss": 0.42, "num_tokens": 1215201452.0, "step": 1593 }, { "epoch": 2.1747887684560894, "grad_norm": 0.40897944777927797, "learning_rate": 3.4763022872820926e-05, "loss": 0.3933, "num_tokens": 1215897206.0, "step": 1594 }, { "epoch": 2.1761543057096526, "grad_norm": 0.39812153978934883, "learning_rate": 3.4756010488859175e-05, "loss": 0.3882, "num_tokens": 1216639890.0, "step": 1595 }, { "epoch": 2.177519842963216, "grad_norm": 0.40355651027061956, "learning_rate": 3.474899421359103e-05, "loss": 0.4101, "num_tokens": 1217393107.0, "step": 1596 }, { "epoch": 2.178885380216779, "grad_norm": 0.42345284334935956, "learning_rate": 3.4741974049156875e-05, "loss": 0.4145, "num_tokens": 1218117690.0, "step": 1597 }, { "epoch": 2.1802509174703424, "grad_norm": 0.46607391487165833, "learning_rate": 3.473494999769824e-05, "loss": 0.4271, "num_tokens": 1218954238.0, "step": 1598 }, { "epoch": 2.1816164547239056, "grad_norm": 0.3662969086722117, "learning_rate": 3.472792206135786e-05, "loss": 0.3775, "num_tokens": 1219750936.0, "step": 1599 }, { "epoch": 2.1829819919774685, "grad_norm": 0.41645754501656984, "learning_rate": 3.472089024227966e-05, "loss": 0.4061, "num_tokens": 1220509174.0, "step": 1600 }, { "epoch": 2.1843475292310317, "grad_norm": 0.3854742046994506, "learning_rate": 3.4713854542608744e-05, "loss": 0.3988, "num_tokens": 1221262257.0, "step": 1601 }, { "epoch": 2.185713066484595, "grad_norm": 0.4596233131652602, "learning_rate": 3.4706814964491395e-05, "loss": 0.3775, "num_tokens": 1221994116.0, "step": 1602 }, { "epoch": 2.187078603738158, "grad_norm": 0.40359382958035556, "learning_rate": 3.469977151007509e-05, "loss": 0.3804, "num_tokens": 1222697836.0, "step": 1603 }, { "epoch": 2.1884441409917215, "grad_norm": 0.414675359295637, "learning_rate": 3.4692724181508474e-05, "loss": 0.3839, "num_tokens": 1223480523.0, "step": 1604 }, { "epoch": 2.1898096782452847, "grad_norm": 0.43893369416778105, "learning_rate": 3.4685672980941384e-05, "loss": 0.3957, "num_tokens": 1224216506.0, "step": 1605 }, { "epoch": 2.191175215498848, "grad_norm": 0.40657909912657736, "learning_rate": 3.467861791052484e-05, "loss": 0.3889, "num_tokens": 1224956603.0, "step": 1606 }, { "epoch": 2.1925407527524112, "grad_norm": 0.3401138257070472, "learning_rate": 3.467155897241104e-05, "loss": 0.4078, "num_tokens": 1225770045.0, "step": 1607 }, { "epoch": 2.193906290005974, "grad_norm": 0.3753777526823173, "learning_rate": 3.466449616875335e-05, "loss": 0.3971, "num_tokens": 1226504346.0, "step": 1608 }, { "epoch": 2.1952718272595373, "grad_norm": 0.366254577554027, "learning_rate": 3.465742950170633e-05, "loss": 0.4065, "num_tokens": 1227269905.0, "step": 1609 }, { "epoch": 2.1966373645131005, "grad_norm": 0.34171985808134697, "learning_rate": 3.465035897342572e-05, "loss": 0.3863, "num_tokens": 1228014620.0, "step": 1610 }, { "epoch": 2.198002901766664, "grad_norm": 0.38240622044390454, "learning_rate": 3.464328458606843e-05, "loss": 0.4183, "num_tokens": 1228827411.0, "step": 1611 }, { "epoch": 2.199368439020227, "grad_norm": 0.36907727080253533, "learning_rate": 3.463620634179254e-05, "loss": 0.3927, "num_tokens": 1229550333.0, "step": 1612 }, { "epoch": 2.2007339762737903, "grad_norm": 0.37749543645084294, "learning_rate": 3.462912424275733e-05, "loss": 0.401, "num_tokens": 1230246583.0, "step": 1613 }, { "epoch": 2.2020995135273536, "grad_norm": 0.3979274268936437, "learning_rate": 3.462203829112323e-05, "loss": 0.3969, "num_tokens": 1231011477.0, "step": 1614 }, { "epoch": 2.203465050780917, "grad_norm": 0.4259200536380381, "learning_rate": 3.461494848905186e-05, "loss": 0.4186, "num_tokens": 1231789421.0, "step": 1615 }, { "epoch": 2.2048305880344796, "grad_norm": 0.452330097915782, "learning_rate": 3.4607854838706016e-05, "loss": 0.4051, "num_tokens": 1232553357.0, "step": 1616 }, { "epoch": 2.206196125288043, "grad_norm": 0.41186279140390764, "learning_rate": 3.460075734224966e-05, "loss": 0.4023, "num_tokens": 1233326954.0, "step": 1617 }, { "epoch": 2.207561662541606, "grad_norm": 0.46069068665114155, "learning_rate": 3.4593656001847935e-05, "loss": 0.4063, "num_tokens": 1234112353.0, "step": 1618 }, { "epoch": 2.2089271997951694, "grad_norm": 0.3787063289333942, "learning_rate": 3.4586550819667144e-05, "loss": 0.4085, "num_tokens": 1234852535.0, "step": 1619 }, { "epoch": 2.2102927370487326, "grad_norm": 0.4648818794765327, "learning_rate": 3.457944179787477e-05, "loss": 0.3941, "num_tokens": 1235656199.0, "step": 1620 }, { "epoch": 2.211658274302296, "grad_norm": 0.4093429467214122, "learning_rate": 3.457232893863948e-05, "loss": 0.3903, "num_tokens": 1236356066.0, "step": 1621 }, { "epoch": 2.213023811555859, "grad_norm": 0.4262212695768751, "learning_rate": 3.456521224413109e-05, "loss": 0.4198, "num_tokens": 1237138317.0, "step": 1622 }, { "epoch": 2.2143893488094224, "grad_norm": 0.4616944316511116, "learning_rate": 3.45580917165206e-05, "loss": 0.4168, "num_tokens": 1237961751.0, "step": 1623 }, { "epoch": 2.215754886062985, "grad_norm": 0.4545920747021152, "learning_rate": 3.455096735798017e-05, "loss": 0.4029, "num_tokens": 1238689402.0, "step": 1624 }, { "epoch": 2.2171204233165485, "grad_norm": 0.34567025122701883, "learning_rate": 3.4543839170683135e-05, "loss": 0.3836, "num_tokens": 1239455746.0, "step": 1625 }, { "epoch": 2.2184859605701117, "grad_norm": 0.42153068813141886, "learning_rate": 3.4536707156803996e-05, "loss": 0.4002, "num_tokens": 1240189374.0, "step": 1626 }, { "epoch": 2.219851497823675, "grad_norm": 0.37622211932754007, "learning_rate": 3.452957131851842e-05, "loss": 0.4069, "num_tokens": 1240951640.0, "step": 1627 }, { "epoch": 2.2212170350772382, "grad_norm": 0.49637091691247126, "learning_rate": 3.452243165800325e-05, "loss": 0.3908, "num_tokens": 1241720430.0, "step": 1628 }, { "epoch": 2.2225825723308015, "grad_norm": 0.44517595454186165, "learning_rate": 3.4515288177436474e-05, "loss": 0.4115, "num_tokens": 1242548477.0, "step": 1629 }, { "epoch": 2.2239481095843647, "grad_norm": 0.5061979462239993, "learning_rate": 3.450814087899728e-05, "loss": 0.3968, "num_tokens": 1243335787.0, "step": 1630 }, { "epoch": 2.225313646837928, "grad_norm": 0.4134524632580856, "learning_rate": 3.450098976486597e-05, "loss": 0.4139, "num_tokens": 1244055332.0, "step": 1631 }, { "epoch": 2.226679184091491, "grad_norm": 0.4202413477392713, "learning_rate": 3.4493834837224067e-05, "loss": 0.4197, "num_tokens": 1244831964.0, "step": 1632 }, { "epoch": 2.228044721345054, "grad_norm": 0.45720044552579053, "learning_rate": 3.4486676098254206e-05, "loss": 0.4105, "num_tokens": 1245681481.0, "step": 1633 }, { "epoch": 2.2294102585986173, "grad_norm": 0.41983310292462905, "learning_rate": 3.4479513550140225e-05, "loss": 0.4094, "num_tokens": 1246501306.0, "step": 1634 }, { "epoch": 2.2307757958521806, "grad_norm": 0.37882699092897965, "learning_rate": 3.44723471950671e-05, "loss": 0.3901, "num_tokens": 1247262390.0, "step": 1635 }, { "epoch": 2.232141333105744, "grad_norm": 0.3630531975987233, "learning_rate": 3.446517703522098e-05, "loss": 0.3963, "num_tokens": 1247979818.0, "step": 1636 }, { "epoch": 2.233506870359307, "grad_norm": 0.3828117400838086, "learning_rate": 3.445800307278916e-05, "loss": 0.4073, "num_tokens": 1248698103.0, "step": 1637 }, { "epoch": 2.2348724076128703, "grad_norm": 0.34932130413505014, "learning_rate": 3.445082530996012e-05, "loss": 0.3819, "num_tokens": 1249361491.0, "step": 1638 }, { "epoch": 2.2362379448664336, "grad_norm": 0.44092904483938333, "learning_rate": 3.444364374892347e-05, "loss": 0.4025, "num_tokens": 1250131505.0, "step": 1639 }, { "epoch": 2.2376034821199964, "grad_norm": 0.3713356933785866, "learning_rate": 3.443645839187001e-05, "loss": 0.4061, "num_tokens": 1250833679.0, "step": 1640 }, { "epoch": 2.2389690193735596, "grad_norm": 0.40009433977831355, "learning_rate": 3.442926924099167e-05, "loss": 0.4002, "num_tokens": 1251607518.0, "step": 1641 }, { "epoch": 2.240334556627123, "grad_norm": 0.3598374091359237, "learning_rate": 3.442207629848155e-05, "loss": 0.4042, "num_tokens": 1252444863.0, "step": 1642 }, { "epoch": 2.241700093880686, "grad_norm": 0.3607513745591958, "learning_rate": 3.4414879566533905e-05, "loss": 0.3976, "num_tokens": 1253205956.0, "step": 1643 }, { "epoch": 2.2430656311342494, "grad_norm": 0.4175215887935757, "learning_rate": 3.440767904734414e-05, "loss": 0.4167, "num_tokens": 1253956104.0, "step": 1644 }, { "epoch": 2.2444311683878126, "grad_norm": 0.36432084354862143, "learning_rate": 3.440047474310884e-05, "loss": 0.3995, "num_tokens": 1254688382.0, "step": 1645 }, { "epoch": 2.245796705641376, "grad_norm": 0.38188176256417855, "learning_rate": 3.439326665602571e-05, "loss": 0.379, "num_tokens": 1255526705.0, "step": 1646 }, { "epoch": 2.247162242894939, "grad_norm": 0.3632439030435451, "learning_rate": 3.4386054788293625e-05, "loss": 0.4136, "num_tokens": 1256340114.0, "step": 1647 }, { "epoch": 2.2485277801485024, "grad_norm": 0.38263227374517145, "learning_rate": 3.437883914211263e-05, "loss": 0.4093, "num_tokens": 1257082983.0, "step": 1648 }, { "epoch": 2.249893317402065, "grad_norm": 0.4307635243299965, "learning_rate": 3.437161971968388e-05, "loss": 0.4083, "num_tokens": 1257865306.0, "step": 1649 }, { "epoch": 2.2512588546556285, "grad_norm": 0.3586460928417406, "learning_rate": 3.436439652320973e-05, "loss": 0.4176, "num_tokens": 1258645258.0, "step": 1650 }, { "epoch": 2.2526243919091917, "grad_norm": 0.41217506472302073, "learning_rate": 3.435716955489367e-05, "loss": 0.3906, "num_tokens": 1259425592.0, "step": 1651 }, { "epoch": 2.253989929162755, "grad_norm": 0.3872612402914182, "learning_rate": 3.43499388169403e-05, "loss": 0.3899, "num_tokens": 1260204885.0, "step": 1652 }, { "epoch": 2.2553554664163182, "grad_norm": 0.4652735957519978, "learning_rate": 3.434270431155544e-05, "loss": 0.404, "num_tokens": 1260918033.0, "step": 1653 }, { "epoch": 2.2567210036698815, "grad_norm": 0.44251341640856984, "learning_rate": 3.4335466040946003e-05, "loss": 0.4119, "num_tokens": 1261661298.0, "step": 1654 }, { "epoch": 2.2580865409234447, "grad_norm": 0.42076484097176753, "learning_rate": 3.432822400732009e-05, "loss": 0.4129, "num_tokens": 1262460817.0, "step": 1655 }, { "epoch": 2.2594520781770076, "grad_norm": 0.41146163026446986, "learning_rate": 3.432097821288692e-05, "loss": 0.4174, "num_tokens": 1263196208.0, "step": 1656 }, { "epoch": 2.260817615430571, "grad_norm": 0.44875309546356906, "learning_rate": 3.4313728659856873e-05, "loss": 0.3794, "num_tokens": 1263958686.0, "step": 1657 }, { "epoch": 2.262183152684134, "grad_norm": 0.36843362324886514, "learning_rate": 3.430647535044148e-05, "loss": 0.3976, "num_tokens": 1264716705.0, "step": 1658 }, { "epoch": 2.2635486899376973, "grad_norm": 0.4278702925005499, "learning_rate": 3.4299218286853394e-05, "loss": 0.402, "num_tokens": 1265448868.0, "step": 1659 }, { "epoch": 2.2649142271912606, "grad_norm": 0.3713724952329078, "learning_rate": 3.429195747130645e-05, "loss": 0.3995, "num_tokens": 1266217065.0, "step": 1660 }, { "epoch": 2.266279764444824, "grad_norm": 0.38891671955350726, "learning_rate": 3.428469290601559e-05, "loss": 0.3956, "num_tokens": 1267028442.0, "step": 1661 }, { "epoch": 2.267645301698387, "grad_norm": 0.39357931592122114, "learning_rate": 3.427742459319694e-05, "loss": 0.392, "num_tokens": 1267765424.0, "step": 1662 }, { "epoch": 2.2690108389519503, "grad_norm": 0.4321643957257089, "learning_rate": 3.427015253506774e-05, "loss": 0.3841, "num_tokens": 1268514972.0, "step": 1663 }, { "epoch": 2.2703763762055136, "grad_norm": 0.3591999435847207, "learning_rate": 3.426287673384637e-05, "loss": 0.3821, "num_tokens": 1269174574.0, "step": 1664 }, { "epoch": 2.2717419134590764, "grad_norm": 0.3650113192047785, "learning_rate": 3.4255597191752365e-05, "loss": 0.3987, "num_tokens": 1269979292.0, "step": 1665 }, { "epoch": 2.2731074507126396, "grad_norm": 0.40394942321694094, "learning_rate": 3.4248313911006416e-05, "loss": 0.4027, "num_tokens": 1270739608.0, "step": 1666 }, { "epoch": 2.274472987966203, "grad_norm": 0.35554518826704423, "learning_rate": 3.424102689383031e-05, "loss": 0.3873, "num_tokens": 1271543309.0, "step": 1667 }, { "epoch": 2.275838525219766, "grad_norm": 0.32881818508224026, "learning_rate": 3.423373614244702e-05, "loss": 0.3896, "num_tokens": 1272295795.0, "step": 1668 }, { "epoch": 2.2772040624733294, "grad_norm": 0.3403528339054029, "learning_rate": 3.4226441659080626e-05, "loss": 0.4066, "num_tokens": 1273172358.0, "step": 1669 }, { "epoch": 2.2785695997268927, "grad_norm": 0.4258127244000837, "learning_rate": 3.421914344595636e-05, "loss": 0.4002, "num_tokens": 1273891520.0, "step": 1670 }, { "epoch": 2.279935136980456, "grad_norm": 0.3784334172242243, "learning_rate": 3.42118415053006e-05, "loss": 0.3861, "num_tokens": 1274649585.0, "step": 1671 }, { "epoch": 2.2813006742340187, "grad_norm": 0.38999878660450205, "learning_rate": 3.420453583934084e-05, "loss": 0.403, "num_tokens": 1275330787.0, "step": 1672 }, { "epoch": 2.282666211487582, "grad_norm": 0.33025706797267, "learning_rate": 3.419722645030574e-05, "loss": 0.4198, "num_tokens": 1276185055.0, "step": 1673 }, { "epoch": 2.2840317487411452, "grad_norm": 0.41005197911359836, "learning_rate": 3.4189913340425054e-05, "loss": 0.4083, "num_tokens": 1276967152.0, "step": 1674 }, { "epoch": 2.2853972859947085, "grad_norm": 0.3557210098752958, "learning_rate": 3.418259651192972e-05, "loss": 0.4048, "num_tokens": 1277682487.0, "step": 1675 }, { "epoch": 2.2867628232482717, "grad_norm": 0.3746616292600005, "learning_rate": 3.4175275967051766e-05, "loss": 0.3871, "num_tokens": 1278525929.0, "step": 1676 }, { "epoch": 2.288128360501835, "grad_norm": 0.3651301544704772, "learning_rate": 3.4167951708024386e-05, "loss": 0.4085, "num_tokens": 1279252968.0, "step": 1677 }, { "epoch": 2.2894938977553982, "grad_norm": 0.3825290852252251, "learning_rate": 3.4160623737081886e-05, "loss": 0.3994, "num_tokens": 1280017395.0, "step": 1678 }, { "epoch": 2.2908594350089615, "grad_norm": 0.4197101963867537, "learning_rate": 3.4153292056459715e-05, "loss": 0.4051, "num_tokens": 1280757734.0, "step": 1679 }, { "epoch": 2.2922249722625248, "grad_norm": 0.3980347879064363, "learning_rate": 3.414595666839446e-05, "loss": 0.4069, "num_tokens": 1281551240.0, "step": 1680 }, { "epoch": 2.2935905095160876, "grad_norm": 0.4574669275891515, "learning_rate": 3.413861757512381e-05, "loss": 0.3921, "num_tokens": 1282370339.0, "step": 1681 }, { "epoch": 2.294956046769651, "grad_norm": 0.37301272091518195, "learning_rate": 3.4131274778886624e-05, "loss": 0.415, "num_tokens": 1283142666.0, "step": 1682 }, { "epoch": 2.296321584023214, "grad_norm": 0.4662046818455047, "learning_rate": 3.4123928281922865e-05, "loss": 0.3952, "num_tokens": 1283883040.0, "step": 1683 }, { "epoch": 2.2976871212767773, "grad_norm": 0.386375780405343, "learning_rate": 3.4116578086473625e-05, "loss": 0.3997, "num_tokens": 1284677952.0, "step": 1684 }, { "epoch": 2.2990526585303406, "grad_norm": 0.40673330695954646, "learning_rate": 3.4109224194781136e-05, "loss": 0.3913, "num_tokens": 1285506489.0, "step": 1685 }, { "epoch": 2.300418195783904, "grad_norm": 0.41110502989629955, "learning_rate": 3.4101866609088755e-05, "loss": 0.4003, "num_tokens": 1286221804.0, "step": 1686 }, { "epoch": 2.301783733037467, "grad_norm": 0.49700927208336865, "learning_rate": 3.4094505331640955e-05, "loss": 0.3965, "num_tokens": 1286988171.0, "step": 1687 }, { "epoch": 2.30314927029103, "grad_norm": 0.43234191931425464, "learning_rate": 3.4087140364683344e-05, "loss": 0.398, "num_tokens": 1287741158.0, "step": 1688 }, { "epoch": 2.304514807544593, "grad_norm": 0.435059396679447, "learning_rate": 3.407977171046267e-05, "loss": 0.4125, "num_tokens": 1288566326.0, "step": 1689 }, { "epoch": 2.3058803447981564, "grad_norm": 0.41602528077443274, "learning_rate": 3.4072399371226765e-05, "loss": 0.4114, "num_tokens": 1289324608.0, "step": 1690 }, { "epoch": 2.3072458820517197, "grad_norm": 0.43121260986185095, "learning_rate": 3.4065023349224626e-05, "loss": 0.4134, "num_tokens": 1290115930.0, "step": 1691 }, { "epoch": 2.308611419305283, "grad_norm": 0.439780828497443, "learning_rate": 3.405764364670635e-05, "loss": 0.3946, "num_tokens": 1290858071.0, "step": 1692 }, { "epoch": 2.309976956558846, "grad_norm": 0.4080824488309746, "learning_rate": 3.405026026592318e-05, "loss": 0.3931, "num_tokens": 1291671597.0, "step": 1693 }, { "epoch": 2.3113424938124094, "grad_norm": 0.39493888865381366, "learning_rate": 3.4042873209127446e-05, "loss": 0.3878, "num_tokens": 1292397134.0, "step": 1694 }, { "epoch": 2.3127080310659727, "grad_norm": 0.3962993253256635, "learning_rate": 3.403548247857264e-05, "loss": 0.4045, "num_tokens": 1293140104.0, "step": 1695 }, { "epoch": 2.314073568319536, "grad_norm": 0.40623649514595345, "learning_rate": 3.402808807651333e-05, "loss": 0.4037, "num_tokens": 1293863467.0, "step": 1696 }, { "epoch": 2.3154391055730987, "grad_norm": 0.4241678592873414, "learning_rate": 3.4020690005205245e-05, "loss": 0.3821, "num_tokens": 1294677183.0, "step": 1697 }, { "epoch": 2.316804642826662, "grad_norm": 0.3584440590582081, "learning_rate": 3.4013288266905214e-05, "loss": 0.3988, "num_tokens": 1295434715.0, "step": 1698 }, { "epoch": 2.3181701800802252, "grad_norm": 0.39715936147679126, "learning_rate": 3.400588286387118e-05, "loss": 0.3811, "num_tokens": 1296182884.0, "step": 1699 }, { "epoch": 2.3195357173337885, "grad_norm": 0.3810332537865395, "learning_rate": 3.399847379836222e-05, "loss": 0.3971, "num_tokens": 1296858812.0, "step": 1700 }, { "epoch": 2.3209012545873517, "grad_norm": 0.45753312132920626, "learning_rate": 3.399106107263851e-05, "loss": 0.4027, "num_tokens": 1297653607.0, "step": 1701 }, { "epoch": 2.322266791840915, "grad_norm": 0.39073009019256044, "learning_rate": 3.398364468896135e-05, "loss": 0.4025, "num_tokens": 1298372030.0, "step": 1702 }, { "epoch": 2.3236323290944783, "grad_norm": 0.42219950872245415, "learning_rate": 3.397622464959317e-05, "loss": 0.4156, "num_tokens": 1299189694.0, "step": 1703 }, { "epoch": 2.324997866348041, "grad_norm": 0.37239392147162936, "learning_rate": 3.396880095679749e-05, "loss": 0.4161, "num_tokens": 1299875638.0, "step": 1704 }, { "epoch": 2.3263634036016043, "grad_norm": 0.4046301369085504, "learning_rate": 3.396137361283898e-05, "loss": 0.4006, "num_tokens": 1300720094.0, "step": 1705 }, { "epoch": 2.3277289408551676, "grad_norm": 0.3926096289147974, "learning_rate": 3.395394261998336e-05, "loss": 0.4247, "num_tokens": 1301507197.0, "step": 1706 }, { "epoch": 2.329094478108731, "grad_norm": 0.39783681184155256, "learning_rate": 3.394650798049755e-05, "loss": 0.3988, "num_tokens": 1302222817.0, "step": 1707 }, { "epoch": 2.330460015362294, "grad_norm": 0.35942582208117735, "learning_rate": 3.3939069696649495e-05, "loss": 0.4, "num_tokens": 1302923451.0, "step": 1708 }, { "epoch": 2.3318255526158573, "grad_norm": 0.34252859052322204, "learning_rate": 3.3931627770708326e-05, "loss": 0.3971, "num_tokens": 1303644790.0, "step": 1709 }, { "epoch": 2.3331910898694206, "grad_norm": 0.452875247516403, "learning_rate": 3.392418220494423e-05, "loss": 0.4127, "num_tokens": 1304429905.0, "step": 1710 }, { "epoch": 2.334556627122984, "grad_norm": 0.3901812695620557, "learning_rate": 3.3916733001628536e-05, "loss": 0.3955, "num_tokens": 1305203401.0, "step": 1711 }, { "epoch": 2.335922164376547, "grad_norm": 0.3949028621411126, "learning_rate": 3.390928016303368e-05, "loss": 0.3818, "num_tokens": 1305941214.0, "step": 1712 }, { "epoch": 2.33728770163011, "grad_norm": 0.35498223354274, "learning_rate": 3.390182369143319e-05, "loss": 0.3896, "num_tokens": 1306690777.0, "step": 1713 }, { "epoch": 2.338653238883673, "grad_norm": 0.40196980165863894, "learning_rate": 3.3894363589101715e-05, "loss": 0.4133, "num_tokens": 1307446705.0, "step": 1714 }, { "epoch": 2.3400187761372364, "grad_norm": 0.3579455354591373, "learning_rate": 3.388689985831501e-05, "loss": 0.3926, "num_tokens": 1308219842.0, "step": 1715 }, { "epoch": 2.3413843133907997, "grad_norm": 0.38480170182416223, "learning_rate": 3.3879432501349945e-05, "loss": 0.4058, "num_tokens": 1308912931.0, "step": 1716 }, { "epoch": 2.342749850644363, "grad_norm": 0.38504508679743144, "learning_rate": 3.387196152048447e-05, "loss": 0.4056, "num_tokens": 1309677959.0, "step": 1717 }, { "epoch": 2.344115387897926, "grad_norm": 0.4107866477512154, "learning_rate": 3.386448691799766e-05, "loss": 0.3981, "num_tokens": 1310405001.0, "step": 1718 }, { "epoch": 2.3454809251514894, "grad_norm": 0.43796075255607, "learning_rate": 3.3857008696169705e-05, "loss": 0.3901, "num_tokens": 1311159861.0, "step": 1719 }, { "epoch": 2.3468464624050527, "grad_norm": 0.36884456612775485, "learning_rate": 3.384952685728188e-05, "loss": 0.3894, "num_tokens": 1311827091.0, "step": 1720 }, { "epoch": 2.3482119996586155, "grad_norm": 0.3985331174359302, "learning_rate": 3.384204140361657e-05, "loss": 0.412, "num_tokens": 1312588648.0, "step": 1721 }, { "epoch": 2.3495775369121787, "grad_norm": 0.39584304403248666, "learning_rate": 3.383455233745727e-05, "loss": 0.3932, "num_tokens": 1313300228.0, "step": 1722 }, { "epoch": 2.350943074165742, "grad_norm": 0.35046801531615845, "learning_rate": 3.382705966108855e-05, "loss": 0.3792, "num_tokens": 1313981114.0, "step": 1723 }, { "epoch": 2.3523086114193053, "grad_norm": 0.3615221877405335, "learning_rate": 3.381956337679611e-05, "loss": 0.4072, "num_tokens": 1314805436.0, "step": 1724 }, { "epoch": 2.3536741486728685, "grad_norm": 0.36472847163878463, "learning_rate": 3.3812063486866766e-05, "loss": 0.39, "num_tokens": 1315615689.0, "step": 1725 }, { "epoch": 2.3550396859264318, "grad_norm": 0.34820568508426897, "learning_rate": 3.3804559993588376e-05, "loss": 0.3983, "num_tokens": 1316340770.0, "step": 1726 }, { "epoch": 2.356405223179995, "grad_norm": 0.3738665846192497, "learning_rate": 3.3797052899249944e-05, "loss": 0.4012, "num_tokens": 1317031406.0, "step": 1727 }, { "epoch": 2.3577707604335583, "grad_norm": 0.33573488658783174, "learning_rate": 3.378954220614156e-05, "loss": 0.3922, "num_tokens": 1317733282.0, "step": 1728 }, { "epoch": 2.3591362976871215, "grad_norm": 0.36829676993201127, "learning_rate": 3.378202791655441e-05, "loss": 0.3911, "num_tokens": 1318545253.0, "step": 1729 }, { "epoch": 2.3605018349406843, "grad_norm": 0.4023036850203353, "learning_rate": 3.377451003278079e-05, "loss": 0.4038, "num_tokens": 1319269525.0, "step": 1730 }, { "epoch": 2.3618673721942476, "grad_norm": 0.345577357910495, "learning_rate": 3.376698855711406e-05, "loss": 0.3854, "num_tokens": 1320073829.0, "step": 1731 }, { "epoch": 2.363232909447811, "grad_norm": 0.41689507157575234, "learning_rate": 3.375946349184871e-05, "loss": 0.3816, "num_tokens": 1320819598.0, "step": 1732 }, { "epoch": 2.364598446701374, "grad_norm": 0.4108003183040995, "learning_rate": 3.3751934839280296e-05, "loss": 0.4061, "num_tokens": 1321513625.0, "step": 1733 }, { "epoch": 2.3659639839549373, "grad_norm": 0.3800597098704019, "learning_rate": 3.374440260170551e-05, "loss": 0.4155, "num_tokens": 1322301532.0, "step": 1734 }, { "epoch": 2.3673295212085006, "grad_norm": 0.39037575551456366, "learning_rate": 3.373686678142209e-05, "loss": 0.4138, "num_tokens": 1323133432.0, "step": 1735 }, { "epoch": 2.368695058462064, "grad_norm": 0.3780077772824528, "learning_rate": 3.3729327380728905e-05, "loss": 0.3775, "num_tokens": 1323861399.0, "step": 1736 }, { "epoch": 2.3700605957156267, "grad_norm": 0.389284781978462, "learning_rate": 3.372178440192588e-05, "loss": 0.3963, "num_tokens": 1324562252.0, "step": 1737 }, { "epoch": 2.37142613296919, "grad_norm": 0.3714674522059091, "learning_rate": 3.371423784731406e-05, "loss": 0.4112, "num_tokens": 1325352214.0, "step": 1738 }, { "epoch": 2.372791670222753, "grad_norm": 0.3818312813453747, "learning_rate": 3.3706687719195575e-05, "loss": 0.3983, "num_tokens": 1326106083.0, "step": 1739 }, { "epoch": 2.3741572074763164, "grad_norm": 0.35860985401727025, "learning_rate": 3.369913401987364e-05, "loss": 0.3803, "num_tokens": 1326884407.0, "step": 1740 }, { "epoch": 2.3755227447298797, "grad_norm": 0.3227750950136082, "learning_rate": 3.369157675165256e-05, "loss": 0.4018, "num_tokens": 1327620400.0, "step": 1741 }, { "epoch": 2.376888281983443, "grad_norm": 0.44451776031416057, "learning_rate": 3.3684015916837745e-05, "loss": 0.4057, "num_tokens": 1328369028.0, "step": 1742 }, { "epoch": 2.378253819237006, "grad_norm": 0.35065516979937783, "learning_rate": 3.367645151773565e-05, "loss": 0.3844, "num_tokens": 1329110107.0, "step": 1743 }, { "epoch": 2.3796193564905694, "grad_norm": 0.36860330240568534, "learning_rate": 3.366888355665387e-05, "loss": 0.3786, "num_tokens": 1329864602.0, "step": 1744 }, { "epoch": 2.3809848937441327, "grad_norm": 0.34054558265833795, "learning_rate": 3.366131203590106e-05, "loss": 0.4003, "num_tokens": 1330707441.0, "step": 1745 }, { "epoch": 2.3823504309976955, "grad_norm": 0.35612685320709064, "learning_rate": 3.365373695778694e-05, "loss": 0.3887, "num_tokens": 1331401316.0, "step": 1746 }, { "epoch": 2.3837159682512588, "grad_norm": 0.4021315172892305, "learning_rate": 3.364615832462236e-05, "loss": 0.4051, "num_tokens": 1332189804.0, "step": 1747 }, { "epoch": 2.385081505504822, "grad_norm": 0.3794120858418387, "learning_rate": 3.363857613871923e-05, "loss": 0.3911, "num_tokens": 1332847679.0, "step": 1748 }, { "epoch": 2.3864470427583853, "grad_norm": 0.3883166464611228, "learning_rate": 3.363099040239055e-05, "loss": 0.4148, "num_tokens": 1333603702.0, "step": 1749 }, { "epoch": 2.3878125800119485, "grad_norm": 0.3457165855921822, "learning_rate": 3.362340111795038e-05, "loss": 0.4015, "num_tokens": 1334412987.0, "step": 1750 }, { "epoch": 2.3891781172655118, "grad_norm": 0.3526378221872903, "learning_rate": 3.36158082877139e-05, "loss": 0.4135, "num_tokens": 1335178156.0, "step": 1751 }, { "epoch": 2.390543654519075, "grad_norm": 0.373134270012426, "learning_rate": 3.360821191399736e-05, "loss": 0.4051, "num_tokens": 1335961601.0, "step": 1752 }, { "epoch": 2.391909191772638, "grad_norm": 0.4247224822480782, "learning_rate": 3.360061199911807e-05, "loss": 0.3834, "num_tokens": 1336703067.0, "step": 1753 }, { "epoch": 2.393274729026201, "grad_norm": 0.35125201105927006, "learning_rate": 3.359300854539443e-05, "loss": 0.4015, "num_tokens": 1337459233.0, "step": 1754 }, { "epoch": 2.3946402662797643, "grad_norm": 0.41352412469872835, "learning_rate": 3.358540155514594e-05, "loss": 0.4054, "num_tokens": 1338191424.0, "step": 1755 }, { "epoch": 2.3960058035333276, "grad_norm": 0.3384622872545055, "learning_rate": 3.357779103069315e-05, "loss": 0.4123, "num_tokens": 1338961879.0, "step": 1756 }, { "epoch": 2.397371340786891, "grad_norm": 0.34647423049117226, "learning_rate": 3.3570176974357714e-05, "loss": 0.4197, "num_tokens": 1339773054.0, "step": 1757 }, { "epoch": 2.398736878040454, "grad_norm": 0.39289749577298216, "learning_rate": 3.356255938846234e-05, "loss": 0.4033, "num_tokens": 1340539734.0, "step": 1758 }, { "epoch": 2.4001024152940174, "grad_norm": 0.36288157086816075, "learning_rate": 3.355493827533083e-05, "loss": 0.4218, "num_tokens": 1341291107.0, "step": 1759 }, { "epoch": 2.4014679525475806, "grad_norm": 0.37842690891991415, "learning_rate": 3.354731363728806e-05, "loss": 0.4053, "num_tokens": 1342082237.0, "step": 1760 }, { "epoch": 2.402833489801144, "grad_norm": 0.3717705000266866, "learning_rate": 3.3539685476659964e-05, "loss": 0.4026, "num_tokens": 1342771301.0, "step": 1761 }, { "epoch": 2.4041990270547067, "grad_norm": 0.3756986228286128, "learning_rate": 3.353205379577357e-05, "loss": 0.4046, "num_tokens": 1343585949.0, "step": 1762 }, { "epoch": 2.40556456430827, "grad_norm": 0.3401991675280335, "learning_rate": 3.352441859695698e-05, "loss": 0.4189, "num_tokens": 1344453263.0, "step": 1763 }, { "epoch": 2.406930101561833, "grad_norm": 0.3677862405600871, "learning_rate": 3.3516779882539355e-05, "loss": 0.4157, "num_tokens": 1345189836.0, "step": 1764 }, { "epoch": 2.4082956388153964, "grad_norm": 0.38819480086925023, "learning_rate": 3.350913765485094e-05, "loss": 0.4018, "num_tokens": 1345935629.0, "step": 1765 }, { "epoch": 2.4096611760689597, "grad_norm": 0.3506753818203337, "learning_rate": 3.3501491916223037e-05, "loss": 0.4205, "num_tokens": 1346704479.0, "step": 1766 }, { "epoch": 2.411026713322523, "grad_norm": 0.39106655467843177, "learning_rate": 3.349384266898804e-05, "loss": 0.4136, "num_tokens": 1347589680.0, "step": 1767 }, { "epoch": 2.412392250576086, "grad_norm": 0.4052675059032639, "learning_rate": 3.3486189915479404e-05, "loss": 0.3789, "num_tokens": 1348338987.0, "step": 1768 }, { "epoch": 2.413757787829649, "grad_norm": 0.3748346748969994, "learning_rate": 3.3478533658031645e-05, "loss": 0.398, "num_tokens": 1349106948.0, "step": 1769 }, { "epoch": 2.4151233250832123, "grad_norm": 0.3812552990084386, "learning_rate": 3.347087389898036e-05, "loss": 0.3934, "num_tokens": 1349835026.0, "step": 1770 }, { "epoch": 2.4164888623367755, "grad_norm": 0.41642281395179537, "learning_rate": 3.346321064066221e-05, "loss": 0.4012, "num_tokens": 1350601070.0, "step": 1771 }, { "epoch": 2.4178543995903388, "grad_norm": 0.3693643488778739, "learning_rate": 3.345554388541493e-05, "loss": 0.4016, "num_tokens": 1351296728.0, "step": 1772 }, { "epoch": 2.419219936843902, "grad_norm": 0.3818320985489402, "learning_rate": 3.34478736355773e-05, "loss": 0.4078, "num_tokens": 1352072637.0, "step": 1773 }, { "epoch": 2.4205854740974653, "grad_norm": 0.3533221777102373, "learning_rate": 3.344019989348919e-05, "loss": 0.4181, "num_tokens": 1352887029.0, "step": 1774 }, { "epoch": 2.4219510113510285, "grad_norm": 0.40895520966579335, "learning_rate": 3.343252266149153e-05, "loss": 0.3964, "num_tokens": 1353583699.0, "step": 1775 }, { "epoch": 2.423316548604592, "grad_norm": 0.3778862084638007, "learning_rate": 3.3424841941926304e-05, "loss": 0.4051, "num_tokens": 1354351110.0, "step": 1776 }, { "epoch": 2.424682085858155, "grad_norm": 0.3882801882974769, "learning_rate": 3.341715773713657e-05, "loss": 0.4179, "num_tokens": 1355114870.0, "step": 1777 }, { "epoch": 2.426047623111718, "grad_norm": 0.36370987189006754, "learning_rate": 3.3409470049466443e-05, "loss": 0.3972, "num_tokens": 1355839648.0, "step": 1778 }, { "epoch": 2.427413160365281, "grad_norm": 0.4011122815005503, "learning_rate": 3.3401778881261104e-05, "loss": 0.3902, "num_tokens": 1356605855.0, "step": 1779 }, { "epoch": 2.4287786976188444, "grad_norm": 0.4143660876776707, "learning_rate": 3.33940842348668e-05, "loss": 0.4218, "num_tokens": 1357427989.0, "step": 1780 }, { "epoch": 2.4301442348724076, "grad_norm": 0.4047240067445464, "learning_rate": 3.3386386112630836e-05, "loss": 0.3976, "num_tokens": 1358245928.0, "step": 1781 }, { "epoch": 2.431509772125971, "grad_norm": 0.39084805404360984, "learning_rate": 3.337868451690157e-05, "loss": 0.4149, "num_tokens": 1359027679.0, "step": 1782 }, { "epoch": 2.432875309379534, "grad_norm": 0.3665268579779999, "learning_rate": 3.337097945002843e-05, "loss": 0.3933, "num_tokens": 1359814448.0, "step": 1783 }, { "epoch": 2.4342408466330974, "grad_norm": 0.4181772561548066, "learning_rate": 3.336327091436188e-05, "loss": 0.381, "num_tokens": 1360540315.0, "step": 1784 }, { "epoch": 2.43560638388666, "grad_norm": 0.35505462028425666, "learning_rate": 3.335555891225349e-05, "loss": 0.4056, "num_tokens": 1361311581.0, "step": 1785 }, { "epoch": 2.4369719211402234, "grad_norm": 0.42017238941566076, "learning_rate": 3.334784344605584e-05, "loss": 0.4022, "num_tokens": 1362128968.0, "step": 1786 }, { "epoch": 2.4383374583937867, "grad_norm": 0.40849185111726566, "learning_rate": 3.334012451812258e-05, "loss": 0.3912, "num_tokens": 1362844500.0, "step": 1787 }, { "epoch": 2.43970299564735, "grad_norm": 0.3395433410761719, "learning_rate": 3.3332402130808435e-05, "loss": 0.414, "num_tokens": 1363665788.0, "step": 1788 }, { "epoch": 2.441068532900913, "grad_norm": 0.4097803076571032, "learning_rate": 3.332467628646917e-05, "loss": 0.4116, "num_tokens": 1364433671.0, "step": 1789 }, { "epoch": 2.4424340701544764, "grad_norm": 0.39664806227637656, "learning_rate": 3.331694698746159e-05, "loss": 0.411, "num_tokens": 1365282190.0, "step": 1790 }, { "epoch": 2.4437996074080397, "grad_norm": 0.40394376638309293, "learning_rate": 3.3309214236143585e-05, "loss": 0.4033, "num_tokens": 1366045094.0, "step": 1791 }, { "epoch": 2.445165144661603, "grad_norm": 0.4627280761097945, "learning_rate": 3.330147803487408e-05, "loss": 0.3996, "num_tokens": 1366763997.0, "step": 1792 }, { "epoch": 2.446530681915166, "grad_norm": 0.3339971449516977, "learning_rate": 3.3293738386013046e-05, "loss": 0.4055, "num_tokens": 1367565978.0, "step": 1793 }, { "epoch": 2.447896219168729, "grad_norm": 0.41956488830764566, "learning_rate": 3.3285995291921515e-05, "loss": 0.4064, "num_tokens": 1368338571.0, "step": 1794 }, { "epoch": 2.4492617564222923, "grad_norm": 0.35739547359871876, "learning_rate": 3.327824875496159e-05, "loss": 0.3963, "num_tokens": 1369081718.0, "step": 1795 }, { "epoch": 2.4506272936758555, "grad_norm": 0.3895064087817495, "learning_rate": 3.327049877749638e-05, "loss": 0.4234, "num_tokens": 1369841051.0, "step": 1796 }, { "epoch": 2.4519928309294188, "grad_norm": 0.3823739839351626, "learning_rate": 3.326274536189008e-05, "loss": 0.3723, "num_tokens": 1370607944.0, "step": 1797 }, { "epoch": 2.453358368182982, "grad_norm": 0.39487835021881385, "learning_rate": 3.325498851050792e-05, "loss": 0.4086, "num_tokens": 1371402221.0, "step": 1798 }, { "epoch": 2.4547239054365453, "grad_norm": 0.41594315195887493, "learning_rate": 3.324722822571617e-05, "loss": 0.4091, "num_tokens": 1372189550.0, "step": 1799 }, { "epoch": 2.4560894426901085, "grad_norm": 0.3877397732263848, "learning_rate": 3.323946450988217e-05, "loss": 0.4039, "num_tokens": 1372857974.0, "step": 1800 }, { "epoch": 2.4574549799436713, "grad_norm": 0.3616597574115198, "learning_rate": 3.323169736537429e-05, "loss": 0.4101, "num_tokens": 1373613075.0, "step": 1801 }, { "epoch": 2.4588205171972346, "grad_norm": 0.42563574968377976, "learning_rate": 3.322392679456194e-05, "loss": 0.3911, "num_tokens": 1374334754.0, "step": 1802 }, { "epoch": 2.460186054450798, "grad_norm": 0.3402528611260554, "learning_rate": 3.32161527998156e-05, "loss": 0.3877, "num_tokens": 1375035870.0, "step": 1803 }, { "epoch": 2.461551591704361, "grad_norm": 0.38570790682121864, "learning_rate": 3.320837538350677e-05, "loss": 0.3959, "num_tokens": 1375764284.0, "step": 1804 }, { "epoch": 2.4629171289579244, "grad_norm": 0.38715417715087325, "learning_rate": 3.3200594548007995e-05, "loss": 0.3923, "num_tokens": 1376508016.0, "step": 1805 }, { "epoch": 2.4642826662114876, "grad_norm": 0.3283146602027628, "learning_rate": 3.3192810295692885e-05, "loss": 0.3997, "num_tokens": 1377276417.0, "step": 1806 }, { "epoch": 2.465648203465051, "grad_norm": 0.4487485584256729, "learning_rate": 3.318502262893608e-05, "loss": 0.3953, "num_tokens": 1378034170.0, "step": 1807 }, { "epoch": 2.467013740718614, "grad_norm": 0.3650437749283035, "learning_rate": 3.317723155011325e-05, "loss": 0.3749, "num_tokens": 1378794263.0, "step": 1808 }, { "epoch": 2.4683792779721774, "grad_norm": 0.43845776782438134, "learning_rate": 3.316943706160111e-05, "loss": 0.4076, "num_tokens": 1379578390.0, "step": 1809 }, { "epoch": 2.46974481522574, "grad_norm": 0.39225999618513313, "learning_rate": 3.3161639165777444e-05, "loss": 0.4335, "num_tokens": 1380352357.0, "step": 1810 }, { "epoch": 2.4711103524793034, "grad_norm": 0.382677658028876, "learning_rate": 3.315383786502103e-05, "loss": 0.3819, "num_tokens": 1381028630.0, "step": 1811 }, { "epoch": 2.4724758897328667, "grad_norm": 0.4086575514871356, "learning_rate": 3.314603316171171e-05, "loss": 0.4064, "num_tokens": 1381810142.0, "step": 1812 }, { "epoch": 2.47384142698643, "grad_norm": 0.3377459505016925, "learning_rate": 3.3138225058230367e-05, "loss": 0.3958, "num_tokens": 1382589992.0, "step": 1813 }, { "epoch": 2.475206964239993, "grad_norm": 0.4488765004541103, "learning_rate": 3.3130413556958924e-05, "loss": 0.3813, "num_tokens": 1383367392.0, "step": 1814 }, { "epoch": 2.4765725014935565, "grad_norm": 0.361460065915858, "learning_rate": 3.312259866028032e-05, "loss": 0.399, "num_tokens": 1384131046.0, "step": 1815 }, { "epoch": 2.4779380387471197, "grad_norm": 0.4331193564452884, "learning_rate": 3.3114780370578536e-05, "loss": 0.4114, "num_tokens": 1384845362.0, "step": 1816 }, { "epoch": 2.479303576000683, "grad_norm": 0.43023654566720937, "learning_rate": 3.310695869023861e-05, "loss": 0.4164, "num_tokens": 1385634732.0, "step": 1817 }, { "epoch": 2.4806691132542458, "grad_norm": 0.3677684596617767, "learning_rate": 3.3099133621646574e-05, "loss": 0.3822, "num_tokens": 1386412843.0, "step": 1818 }, { "epoch": 2.482034650507809, "grad_norm": 0.3417380798337288, "learning_rate": 3.3091305167189545e-05, "loss": 0.3973, "num_tokens": 1387173246.0, "step": 1819 }, { "epoch": 2.4834001877613723, "grad_norm": 0.41711102863466215, "learning_rate": 3.3083473329255634e-05, "loss": 0.3975, "num_tokens": 1387923077.0, "step": 1820 }, { "epoch": 2.4847657250149355, "grad_norm": 0.37630241875534426, "learning_rate": 3.307563811023399e-05, "loss": 0.4308, "num_tokens": 1388725455.0, "step": 1821 }, { "epoch": 2.486131262268499, "grad_norm": 0.3900590757463629, "learning_rate": 3.306779951251481e-05, "loss": 0.386, "num_tokens": 1389420512.0, "step": 1822 }, { "epoch": 2.487496799522062, "grad_norm": 0.3929520973565133, "learning_rate": 3.30599575384893e-05, "loss": 0.3844, "num_tokens": 1390162276.0, "step": 1823 }, { "epoch": 2.4888623367756253, "grad_norm": 0.40712167642293673, "learning_rate": 3.3052112190549705e-05, "loss": 0.4133, "num_tokens": 1390922240.0, "step": 1824 }, { "epoch": 2.4902278740291885, "grad_norm": 0.3352095947361305, "learning_rate": 3.304426347108932e-05, "loss": 0.3854, "num_tokens": 1391659533.0, "step": 1825 }, { "epoch": 2.491593411282752, "grad_norm": 0.36012831480413593, "learning_rate": 3.3036411382502425e-05, "loss": 0.3878, "num_tokens": 1392344814.0, "step": 1826 }, { "epoch": 2.4929589485363146, "grad_norm": 0.46572770592712115, "learning_rate": 3.302855592718436e-05, "loss": 0.3985, "num_tokens": 1393149909.0, "step": 1827 }, { "epoch": 2.494324485789878, "grad_norm": 0.40853501169147866, "learning_rate": 3.3020697107531496e-05, "loss": 0.39, "num_tokens": 1393925870.0, "step": 1828 }, { "epoch": 2.495690023043441, "grad_norm": 0.36769020452722045, "learning_rate": 3.301283492594122e-05, "loss": 0.414, "num_tokens": 1394673408.0, "step": 1829 }, { "epoch": 2.4970555602970044, "grad_norm": 0.38013629265360926, "learning_rate": 3.3004969384811915e-05, "loss": 0.3889, "num_tokens": 1395469951.0, "step": 1830 }, { "epoch": 2.4984210975505676, "grad_norm": 0.43563461052722396, "learning_rate": 3.2997100486543046e-05, "loss": 0.3796, "num_tokens": 1396201746.0, "step": 1831 }, { "epoch": 2.499786634804131, "grad_norm": 0.38713828440486064, "learning_rate": 3.2989228233535057e-05, "loss": 0.4185, "num_tokens": 1397073615.0, "step": 1832 }, { "epoch": 2.5011521720576937, "grad_norm": 0.37342233567277816, "learning_rate": 3.298135262818944e-05, "loss": 0.4014, "num_tokens": 1397791284.0, "step": 1833 }, { "epoch": 2.502517709311257, "grad_norm": 0.4607753095806145, "learning_rate": 3.29734736729087e-05, "loss": 0.3914, "num_tokens": 1398530726.0, "step": 1834 }, { "epoch": 2.50388324656482, "grad_norm": 0.4023583359942118, "learning_rate": 3.296559137009636e-05, "loss": 0.4174, "num_tokens": 1399211721.0, "step": 1835 }, { "epoch": 2.5052487838183835, "grad_norm": 0.3963625518142885, "learning_rate": 3.295770572215697e-05, "loss": 0.4026, "num_tokens": 1400006125.0, "step": 1836 }, { "epoch": 2.5066143210719467, "grad_norm": 0.3396845178076891, "learning_rate": 3.2949816731496106e-05, "loss": 0.3969, "num_tokens": 1400804425.0, "step": 1837 }, { "epoch": 2.50797985832551, "grad_norm": 0.32553751323709607, "learning_rate": 3.294192440052036e-05, "loss": 0.3966, "num_tokens": 1401534459.0, "step": 1838 }, { "epoch": 2.509345395579073, "grad_norm": 0.39554363662699804, "learning_rate": 3.293402873163732e-05, "loss": 0.4157, "num_tokens": 1402304702.0, "step": 1839 }, { "epoch": 2.5107109328326365, "grad_norm": 0.36753372908426296, "learning_rate": 3.292612972725564e-05, "loss": 0.4098, "num_tokens": 1403089389.0, "step": 1840 }, { "epoch": 2.5120764700861997, "grad_norm": 0.36761327018463036, "learning_rate": 3.291822738978494e-05, "loss": 0.4078, "num_tokens": 1403876738.0, "step": 1841 }, { "epoch": 2.513442007339763, "grad_norm": 0.39496195286843866, "learning_rate": 3.29103217216359e-05, "loss": 0.415, "num_tokens": 1404604983.0, "step": 1842 }, { "epoch": 2.514807544593326, "grad_norm": 0.38695076924028377, "learning_rate": 3.290241272522018e-05, "loss": 0.3787, "num_tokens": 1405329463.0, "step": 1843 }, { "epoch": 2.516173081846889, "grad_norm": 0.3989690606481285, "learning_rate": 3.289450040295048e-05, "loss": 0.3866, "num_tokens": 1405990295.0, "step": 1844 }, { "epoch": 2.5175386191004523, "grad_norm": 0.357056597995354, "learning_rate": 3.288658475724052e-05, "loss": 0.3984, "num_tokens": 1406792919.0, "step": 1845 }, { "epoch": 2.5189041563540155, "grad_norm": 0.37906654357795616, "learning_rate": 3.2878665790505e-05, "loss": 0.4032, "num_tokens": 1407608999.0, "step": 1846 }, { "epoch": 2.520269693607579, "grad_norm": 0.3483978857453706, "learning_rate": 3.287074350515966e-05, "loss": 0.3986, "num_tokens": 1408395533.0, "step": 1847 }, { "epoch": 2.521635230861142, "grad_norm": 0.44539728253280786, "learning_rate": 3.286281790362125e-05, "loss": 0.4157, "num_tokens": 1409107723.0, "step": 1848 }, { "epoch": 2.523000768114705, "grad_norm": 0.3626056844712926, "learning_rate": 3.2854888988307536e-05, "loss": 0.4019, "num_tokens": 1410008464.0, "step": 1849 }, { "epoch": 2.524366305368268, "grad_norm": 0.3625523958789969, "learning_rate": 3.2846956761637266e-05, "loss": 0.417, "num_tokens": 1410732552.0, "step": 1850 }, { "epoch": 2.5257318426218314, "grad_norm": 0.3980301724395393, "learning_rate": 3.283902122603023e-05, "loss": 0.4214, "num_tokens": 1411446834.0, "step": 1851 }, { "epoch": 2.5270973798753946, "grad_norm": 0.3380589617376178, "learning_rate": 3.283108238390722e-05, "loss": 0.3996, "num_tokens": 1412188644.0, "step": 1852 }, { "epoch": 2.528462917128958, "grad_norm": 0.3704273447605162, "learning_rate": 3.282314023769003e-05, "loss": 0.4194, "num_tokens": 1412963986.0, "step": 1853 }, { "epoch": 2.529828454382521, "grad_norm": 0.34085711335747854, "learning_rate": 3.2815194789801466e-05, "loss": 0.3685, "num_tokens": 1413668828.0, "step": 1854 }, { "epoch": 2.5311939916360844, "grad_norm": 0.35261791674557547, "learning_rate": 3.280724604266533e-05, "loss": 0.4255, "num_tokens": 1414462053.0, "step": 1855 }, { "epoch": 2.5325595288896476, "grad_norm": 0.3906592138675309, "learning_rate": 3.279929399870645e-05, "loss": 0.388, "num_tokens": 1415171077.0, "step": 1856 }, { "epoch": 2.533925066143211, "grad_norm": 0.3622101001217704, "learning_rate": 3.279133866035065e-05, "loss": 0.3856, "num_tokens": 1415939956.0, "step": 1857 }, { "epoch": 2.535290603396774, "grad_norm": 0.3475184800044915, "learning_rate": 3.2783380030024764e-05, "loss": 0.3968, "num_tokens": 1416698042.0, "step": 1858 }, { "epoch": 2.536656140650337, "grad_norm": 0.31761004636432816, "learning_rate": 3.277541811015661e-05, "loss": 0.4068, "num_tokens": 1417580763.0, "step": 1859 }, { "epoch": 2.5380216779039, "grad_norm": 0.38796616259859273, "learning_rate": 3.276745290317504e-05, "loss": 0.4025, "num_tokens": 1418389376.0, "step": 1860 }, { "epoch": 2.5393872151574635, "grad_norm": 0.3507774165928244, "learning_rate": 3.275948441150989e-05, "loss": 0.4189, "num_tokens": 1419112005.0, "step": 1861 }, { "epoch": 2.5407527524110267, "grad_norm": 0.38259859784948047, "learning_rate": 3.2751512637591995e-05, "loss": 0.3905, "num_tokens": 1419933833.0, "step": 1862 }, { "epoch": 2.54211828966459, "grad_norm": 0.33866829842762974, "learning_rate": 3.27435375838532e-05, "loss": 0.4149, "num_tokens": 1420746843.0, "step": 1863 }, { "epoch": 2.5434838269181532, "grad_norm": 0.3843033931152206, "learning_rate": 3.2735559252726354e-05, "loss": 0.4058, "num_tokens": 1421480601.0, "step": 1864 }, { "epoch": 2.544849364171716, "grad_norm": 0.3695527107111469, "learning_rate": 3.27275776466453e-05, "loss": 0.3966, "num_tokens": 1422248596.0, "step": 1865 }, { "epoch": 2.5462149014252793, "grad_norm": 0.3502302790950411, "learning_rate": 3.271959276804488e-05, "loss": 0.3939, "num_tokens": 1423045325.0, "step": 1866 }, { "epoch": 2.5475804386788425, "grad_norm": 0.37335946625359745, "learning_rate": 3.271160461936093e-05, "loss": 0.3868, "num_tokens": 1423951072.0, "step": 1867 }, { "epoch": 2.548945975932406, "grad_norm": 0.3610762838834721, "learning_rate": 3.2703613203030286e-05, "loss": 0.3926, "num_tokens": 1424676843.0, "step": 1868 }, { "epoch": 2.550311513185969, "grad_norm": 0.3455546681459487, "learning_rate": 3.269561852149079e-05, "loss": 0.4043, "num_tokens": 1425512818.0, "step": 1869 }, { "epoch": 2.5516770504395323, "grad_norm": 0.4134996108237878, "learning_rate": 3.268762057718128e-05, "loss": 0.4101, "num_tokens": 1426314016.0, "step": 1870 }, { "epoch": 2.5530425876930956, "grad_norm": 0.35599641295001516, "learning_rate": 3.267961937254156e-05, "loss": 0.3863, "num_tokens": 1427062453.0, "step": 1871 }, { "epoch": 2.554408124946659, "grad_norm": 0.37402004202776656, "learning_rate": 3.2671614910012475e-05, "loss": 0.4072, "num_tokens": 1427781104.0, "step": 1872 }, { "epoch": 2.555773662200222, "grad_norm": 0.4118843069026922, "learning_rate": 3.266360719203582e-05, "loss": 0.3946, "num_tokens": 1428564427.0, "step": 1873 }, { "epoch": 2.5571391994537853, "grad_norm": 0.3128544375840903, "learning_rate": 3.265559622105442e-05, "loss": 0.3858, "num_tokens": 1429339515.0, "step": 1874 }, { "epoch": 2.5585047367073486, "grad_norm": 0.3975963762301497, "learning_rate": 3.264758199951206e-05, "loss": 0.4111, "num_tokens": 1430127707.0, "step": 1875 }, { "epoch": 2.5598702739609114, "grad_norm": 0.37624925648046637, "learning_rate": 3.2639564529853545e-05, "loss": 0.4001, "num_tokens": 1430970867.0, "step": 1876 }, { "epoch": 2.5612358112144746, "grad_norm": 0.35189135259481513, "learning_rate": 3.263154381452465e-05, "loss": 0.3971, "num_tokens": 1431685533.0, "step": 1877 }, { "epoch": 2.562601348468038, "grad_norm": 0.36038154872332895, "learning_rate": 3.2623519855972156e-05, "loss": 0.401, "num_tokens": 1432420498.0, "step": 1878 }, { "epoch": 2.563966885721601, "grad_norm": 0.3470545973490495, "learning_rate": 3.261549265664381e-05, "loss": 0.3954, "num_tokens": 1433236123.0, "step": 1879 }, { "epoch": 2.5653324229751644, "grad_norm": 0.3399397801955326, "learning_rate": 3.2607462218988387e-05, "loss": 0.4077, "num_tokens": 1434006479.0, "step": 1880 }, { "epoch": 2.5666979602287276, "grad_norm": 0.42826617313339127, "learning_rate": 3.259942854545559e-05, "loss": 0.4047, "num_tokens": 1434769628.0, "step": 1881 }, { "epoch": 2.5680634974822905, "grad_norm": 0.3630194861777675, "learning_rate": 3.2591391638496176e-05, "loss": 0.4091, "num_tokens": 1435552685.0, "step": 1882 }, { "epoch": 2.5694290347358537, "grad_norm": 0.3607480583196766, "learning_rate": 3.2583351500561855e-05, "loss": 0.3958, "num_tokens": 1436327722.0, "step": 1883 }, { "epoch": 2.570794571989417, "grad_norm": 0.39041570085092414, "learning_rate": 3.2575308134105314e-05, "loss": 0.424, "num_tokens": 1437057891.0, "step": 1884 }, { "epoch": 2.57216010924298, "grad_norm": 0.4097731218211943, "learning_rate": 3.256726154158024e-05, "loss": 0.3989, "num_tokens": 1437803906.0, "step": 1885 }, { "epoch": 2.5735256464965435, "grad_norm": 0.364542014630709, "learning_rate": 3.255921172544129e-05, "loss": 0.4015, "num_tokens": 1438553462.0, "step": 1886 }, { "epoch": 2.5748911837501067, "grad_norm": 0.3568113312844893, "learning_rate": 3.255115868814413e-05, "loss": 0.405, "num_tokens": 1439302157.0, "step": 1887 }, { "epoch": 2.57625672100367, "grad_norm": 0.3621553385096562, "learning_rate": 3.254310243214538e-05, "loss": 0.4134, "num_tokens": 1440092772.0, "step": 1888 }, { "epoch": 2.5776222582572332, "grad_norm": 0.40913330466001696, "learning_rate": 3.2535042959902663e-05, "loss": 0.3937, "num_tokens": 1440812008.0, "step": 1889 }, { "epoch": 2.5789877955107965, "grad_norm": 0.35499057683347407, "learning_rate": 3.252698027387458e-05, "loss": 0.3905, "num_tokens": 1441576985.0, "step": 1890 }, { "epoch": 2.5803533327643597, "grad_norm": 0.34831517147520213, "learning_rate": 3.251891437652069e-05, "loss": 0.4023, "num_tokens": 1442375915.0, "step": 1891 }, { "epoch": 2.5817188700179226, "grad_norm": 0.39683559666303175, "learning_rate": 3.2510845270301566e-05, "loss": 0.3828, "num_tokens": 1443115738.0, "step": 1892 }, { "epoch": 2.583084407271486, "grad_norm": 0.35406629281312174, "learning_rate": 3.250277295767873e-05, "loss": 0.4176, "num_tokens": 1443886074.0, "step": 1893 }, { "epoch": 2.584449944525049, "grad_norm": 0.3903675128771384, "learning_rate": 3.2494697441114705e-05, "loss": 0.396, "num_tokens": 1444634059.0, "step": 1894 }, { "epoch": 2.5858154817786123, "grad_norm": 0.3679534764287634, "learning_rate": 3.248661872307298e-05, "loss": 0.4231, "num_tokens": 1445454319.0, "step": 1895 }, { "epoch": 2.5871810190321756, "grad_norm": 0.35496021806016737, "learning_rate": 3.247853680601802e-05, "loss": 0.4143, "num_tokens": 1446236930.0, "step": 1896 }, { "epoch": 2.588546556285739, "grad_norm": 0.3538942348073179, "learning_rate": 3.247045169241526e-05, "loss": 0.406, "num_tokens": 1446990959.0, "step": 1897 }, { "epoch": 2.5899120935393016, "grad_norm": 0.37344754778145156, "learning_rate": 3.2462363384731135e-05, "loss": 0.4162, "num_tokens": 1447721442.0, "step": 1898 }, { "epoch": 2.591277630792865, "grad_norm": 0.4345171506680281, "learning_rate": 3.245427188543302e-05, "loss": 0.4063, "num_tokens": 1448455544.0, "step": 1899 }, { "epoch": 2.592643168046428, "grad_norm": 0.36928896940008443, "learning_rate": 3.2446177196989294e-05, "loss": 0.4097, "num_tokens": 1449272081.0, "step": 1900 }, { "epoch": 2.5940087052999914, "grad_norm": 0.3476677750657752, "learning_rate": 3.243807932186929e-05, "loss": 0.3899, "num_tokens": 1450028131.0, "step": 1901 }, { "epoch": 2.5953742425535546, "grad_norm": 0.3492910501708233, "learning_rate": 3.2429978262543316e-05, "loss": 0.4086, "num_tokens": 1450771886.0, "step": 1902 }, { "epoch": 2.596739779807118, "grad_norm": 0.3688137568551824, "learning_rate": 3.242187402148266e-05, "loss": 0.4069, "num_tokens": 1451523813.0, "step": 1903 }, { "epoch": 2.598105317060681, "grad_norm": 0.3567447656697406, "learning_rate": 3.241376660115957e-05, "loss": 0.3973, "num_tokens": 1452231344.0, "step": 1904 }, { "epoch": 2.5994708543142444, "grad_norm": 0.3476206066113737, "learning_rate": 3.240565600404727e-05, "loss": 0.4287, "num_tokens": 1452958199.0, "step": 1905 }, { "epoch": 2.6008363915678077, "grad_norm": 0.36683980232876257, "learning_rate": 3.2397542232619965e-05, "loss": 0.411, "num_tokens": 1453778010.0, "step": 1906 }, { "epoch": 2.602201928821371, "grad_norm": 0.4277075023528582, "learning_rate": 3.23894252893528e-05, "loss": 0.3872, "num_tokens": 1454491324.0, "step": 1907 }, { "epoch": 2.6035674660749337, "grad_norm": 0.3309470002994611, "learning_rate": 3.23813051767219e-05, "loss": 0.4069, "num_tokens": 1455341194.0, "step": 1908 }, { "epoch": 2.604933003328497, "grad_norm": 0.426225686115781, "learning_rate": 3.2373181897204374e-05, "loss": 0.4121, "num_tokens": 1456106730.0, "step": 1909 }, { "epoch": 2.6062985405820602, "grad_norm": 0.3995274591442336, "learning_rate": 3.236505545327827e-05, "loss": 0.4001, "num_tokens": 1456857613.0, "step": 1910 }, { "epoch": 2.6076640778356235, "grad_norm": 0.3633336783296631, "learning_rate": 3.235692584742262e-05, "loss": 0.3902, "num_tokens": 1457591432.0, "step": 1911 }, { "epoch": 2.6090296150891867, "grad_norm": 0.3741308182951071, "learning_rate": 3.234879308211742e-05, "loss": 0.3944, "num_tokens": 1458368779.0, "step": 1912 }, { "epoch": 2.61039515234275, "grad_norm": 0.39132262533474016, "learning_rate": 3.234065715984361e-05, "loss": 0.3879, "num_tokens": 1459066112.0, "step": 1913 }, { "epoch": 2.611760689596313, "grad_norm": 0.38837543094099003, "learning_rate": 3.233251808308312e-05, "loss": 0.3967, "num_tokens": 1459791952.0, "step": 1914 }, { "epoch": 2.613126226849876, "grad_norm": 0.3490725562300912, "learning_rate": 3.232437585431883e-05, "loss": 0.3968, "num_tokens": 1460541770.0, "step": 1915 }, { "epoch": 2.6144917641034393, "grad_norm": 0.3936477304331134, "learning_rate": 3.231623047603458e-05, "loss": 0.3895, "num_tokens": 1461356324.0, "step": 1916 }, { "epoch": 2.6158573013570026, "grad_norm": 0.31816771609569694, "learning_rate": 3.2308081950715165e-05, "loss": 0.4167, "num_tokens": 1462143492.0, "step": 1917 }, { "epoch": 2.617222838610566, "grad_norm": 0.36324239912938866, "learning_rate": 3.2299930280846345e-05, "loss": 0.3971, "num_tokens": 1462857213.0, "step": 1918 }, { "epoch": 2.618588375864129, "grad_norm": 0.3903807222673258, "learning_rate": 3.2291775468914864e-05, "loss": 0.3805, "num_tokens": 1463488127.0, "step": 1919 }, { "epoch": 2.6199539131176923, "grad_norm": 0.37832811136409317, "learning_rate": 3.228361751740837e-05, "loss": 0.3988, "num_tokens": 1464297199.0, "step": 1920 }, { "epoch": 2.6213194503712556, "grad_norm": 0.34440934990804606, "learning_rate": 3.227545642881553e-05, "loss": 0.4037, "num_tokens": 1465095846.0, "step": 1921 }, { "epoch": 2.622684987624819, "grad_norm": 0.34125368407345397, "learning_rate": 3.2267292205625915e-05, "loss": 0.3906, "num_tokens": 1465840495.0, "step": 1922 }, { "epoch": 2.624050524878382, "grad_norm": 0.3394328846719233, "learning_rate": 3.22591248503301e-05, "loss": 0.4121, "num_tokens": 1466624399.0, "step": 1923 }, { "epoch": 2.625416062131945, "grad_norm": 0.35078618174030585, "learning_rate": 3.225095436541957e-05, "loss": 0.3905, "num_tokens": 1467392149.0, "step": 1924 }, { "epoch": 2.626781599385508, "grad_norm": 0.35189010531623033, "learning_rate": 3.224278075338679e-05, "loss": 0.4037, "num_tokens": 1468194714.0, "step": 1925 }, { "epoch": 2.6281471366390714, "grad_norm": 0.35709613227663645, "learning_rate": 3.223460401672518e-05, "loss": 0.3919, "num_tokens": 1468921710.0, "step": 1926 }, { "epoch": 2.6295126738926347, "grad_norm": 0.3387622933259251, "learning_rate": 3.2226424157929116e-05, "loss": 0.3894, "num_tokens": 1469749002.0, "step": 1927 }, { "epoch": 2.630878211146198, "grad_norm": 0.3616958089096876, "learning_rate": 3.2218241179493906e-05, "loss": 0.4001, "num_tokens": 1470519522.0, "step": 1928 }, { "epoch": 2.632243748399761, "grad_norm": 0.355918904199072, "learning_rate": 3.221005508391583e-05, "loss": 0.3959, "num_tokens": 1471248108.0, "step": 1929 }, { "epoch": 2.633609285653324, "grad_norm": 0.3701051309886955, "learning_rate": 3.22018658736921e-05, "loss": 0.3975, "num_tokens": 1471937767.0, "step": 1930 }, { "epoch": 2.6349748229068872, "grad_norm": 0.3429430733184018, "learning_rate": 3.21936735513209e-05, "loss": 0.3928, "num_tokens": 1472682276.0, "step": 1931 }, { "epoch": 2.6363403601604505, "grad_norm": 0.3883216247000244, "learning_rate": 3.218547811930136e-05, "loss": 0.4195, "num_tokens": 1473508408.0, "step": 1932 }, { "epoch": 2.6377058974140137, "grad_norm": 0.3687708054608913, "learning_rate": 3.2177279580133527e-05, "loss": 0.3852, "num_tokens": 1474299012.0, "step": 1933 }, { "epoch": 2.639071434667577, "grad_norm": 0.31661966756594934, "learning_rate": 3.216907793631843e-05, "loss": 0.3977, "num_tokens": 1475060852.0, "step": 1934 }, { "epoch": 2.6404369719211402, "grad_norm": 0.396379777613998, "learning_rate": 3.2160873190358056e-05, "loss": 0.4047, "num_tokens": 1475791789.0, "step": 1935 }, { "epoch": 2.6418025091747035, "grad_norm": 0.4023286914562756, "learning_rate": 3.21526653447553e-05, "loss": 0.4354, "num_tokens": 1476557808.0, "step": 1936 }, { "epoch": 2.6431680464282667, "grad_norm": 0.36509469618621687, "learning_rate": 3.214445440201401e-05, "loss": 0.4032, "num_tokens": 1477365856.0, "step": 1937 }, { "epoch": 2.64453358368183, "grad_norm": 0.379939571216293, "learning_rate": 3.213624036463901e-05, "loss": 0.4094, "num_tokens": 1478097410.0, "step": 1938 }, { "epoch": 2.6458991209353933, "grad_norm": 0.3551671034702388, "learning_rate": 3.212802323513604e-05, "loss": 0.4079, "num_tokens": 1478863190.0, "step": 1939 }, { "epoch": 2.647264658188956, "grad_norm": 0.3624854024215106, "learning_rate": 3.211980301601179e-05, "loss": 0.4006, "num_tokens": 1479639316.0, "step": 1940 }, { "epoch": 2.6486301954425193, "grad_norm": 0.35208600368845083, "learning_rate": 3.211157970977388e-05, "loss": 0.4017, "num_tokens": 1480382633.0, "step": 1941 }, { "epoch": 2.6499957326960826, "grad_norm": 0.37896152838111846, "learning_rate": 3.2103353318930896e-05, "loss": 0.4151, "num_tokens": 1481048833.0, "step": 1942 }, { "epoch": 2.651361269949646, "grad_norm": 0.3769876323816541, "learning_rate": 3.2095123845992364e-05, "loss": 0.3951, "num_tokens": 1481764793.0, "step": 1943 }, { "epoch": 2.652726807203209, "grad_norm": 0.3155981060251194, "learning_rate": 3.208689129346872e-05, "loss": 0.4032, "num_tokens": 1482587905.0, "step": 1944 }, { "epoch": 2.6540923444567723, "grad_norm": 0.37325169548620646, "learning_rate": 3.207865566387138e-05, "loss": 0.4125, "num_tokens": 1483388092.0, "step": 1945 }, { "epoch": 2.655457881710335, "grad_norm": 0.3507806190294731, "learning_rate": 3.207041695971266e-05, "loss": 0.3699, "num_tokens": 1484154604.0, "step": 1946 }, { "epoch": 2.6568234189638984, "grad_norm": 0.3343008690151341, "learning_rate": 3.206217518350584e-05, "loss": 0.4014, "num_tokens": 1484914329.0, "step": 1947 }, { "epoch": 2.6581889562174617, "grad_norm": 0.37359573623823, "learning_rate": 3.205393033776512e-05, "loss": 0.4101, "num_tokens": 1485659032.0, "step": 1948 }, { "epoch": 2.659554493471025, "grad_norm": 0.3347462319937897, "learning_rate": 3.2045682425005654e-05, "loss": 0.3787, "num_tokens": 1486381380.0, "step": 1949 }, { "epoch": 2.660920030724588, "grad_norm": 0.3542907755234933, "learning_rate": 3.2037431447743524e-05, "loss": 0.3942, "num_tokens": 1487124409.0, "step": 1950 }, { "epoch": 2.6622855679781514, "grad_norm": 0.37133224425569117, "learning_rate": 3.2029177408495746e-05, "loss": 0.3888, "num_tokens": 1487912413.0, "step": 1951 }, { "epoch": 2.6636511052317147, "grad_norm": 0.3733379456410945, "learning_rate": 3.202092030978026e-05, "loss": 0.426, "num_tokens": 1488734754.0, "step": 1952 }, { "epoch": 2.665016642485278, "grad_norm": 0.36490465911664016, "learning_rate": 3.2012660154115974e-05, "loss": 0.4183, "num_tokens": 1489469100.0, "step": 1953 }, { "epoch": 2.666382179738841, "grad_norm": 0.36840899181529485, "learning_rate": 3.200439694402267e-05, "loss": 0.3819, "num_tokens": 1490173238.0, "step": 1954 }, { "epoch": 2.6677477169924044, "grad_norm": 0.3689507345597953, "learning_rate": 3.199613068202112e-05, "loss": 0.4071, "num_tokens": 1490952911.0, "step": 1955 }, { "epoch": 2.6691132542459672, "grad_norm": 0.3879364668517655, "learning_rate": 3.198786137063299e-05, "loss": 0.3828, "num_tokens": 1491672850.0, "step": 1956 }, { "epoch": 2.6704787914995305, "grad_norm": 0.32495308207797047, "learning_rate": 3.19795890123809e-05, "loss": 0.3883, "num_tokens": 1492470706.0, "step": 1957 }, { "epoch": 2.6718443287530937, "grad_norm": 0.3257037277666565, "learning_rate": 3.197131360978837e-05, "loss": 0.4077, "num_tokens": 1493229132.0, "step": 1958 }, { "epoch": 2.673209866006657, "grad_norm": 0.3523567532827415, "learning_rate": 3.1963035165379884e-05, "loss": 0.3812, "num_tokens": 1493955181.0, "step": 1959 }, { "epoch": 2.6745754032602203, "grad_norm": 0.32807544145065454, "learning_rate": 3.195475368168083e-05, "loss": 0.3988, "num_tokens": 1494681409.0, "step": 1960 }, { "epoch": 2.6759409405137835, "grad_norm": 0.33344234570646397, "learning_rate": 3.1946469161217534e-05, "loss": 0.39, "num_tokens": 1495394894.0, "step": 1961 }, { "epoch": 2.6773064777673463, "grad_norm": 0.39916025899478297, "learning_rate": 3.1938181606517244e-05, "loss": 0.399, "num_tokens": 1496151766.0, "step": 1962 }, { "epoch": 2.6786720150209096, "grad_norm": 0.36780514250704927, "learning_rate": 3.192989102010813e-05, "loss": 0.4289, "num_tokens": 1496898845.0, "step": 1963 }, { "epoch": 2.680037552274473, "grad_norm": 0.3758693176932925, "learning_rate": 3.1921597404519294e-05, "loss": 0.4052, "num_tokens": 1497706631.0, "step": 1964 }, { "epoch": 2.681403089528036, "grad_norm": 0.3342020637693274, "learning_rate": 3.191330076228076e-05, "loss": 0.4027, "num_tokens": 1498541856.0, "step": 1965 }, { "epoch": 2.6827686267815993, "grad_norm": 0.38625733583982585, "learning_rate": 3.190500109592346e-05, "loss": 0.4033, "num_tokens": 1499322830.0, "step": 1966 }, { "epoch": 2.6841341640351626, "grad_norm": 0.35295442872434063, "learning_rate": 3.1896698407979284e-05, "loss": 0.3946, "num_tokens": 1499995157.0, "step": 1967 }, { "epoch": 2.685499701288726, "grad_norm": 0.36404320854319877, "learning_rate": 3.188839270098101e-05, "loss": 0.4143, "num_tokens": 1500816621.0, "step": 1968 }, { "epoch": 2.686865238542289, "grad_norm": 0.32251802021692094, "learning_rate": 3.188008397746236e-05, "loss": 0.4078, "num_tokens": 1501661704.0, "step": 1969 }, { "epoch": 2.6882307757958523, "grad_norm": 0.3576169006149078, "learning_rate": 3.1871772239957955e-05, "loss": 0.3964, "num_tokens": 1502361364.0, "step": 1970 }, { "epoch": 2.6895963130494156, "grad_norm": 0.3653396260580772, "learning_rate": 3.186345749100335e-05, "loss": 0.3906, "num_tokens": 1503010898.0, "step": 1971 }, { "epoch": 2.690961850302979, "grad_norm": 0.4757474856416096, "learning_rate": 3.185513973313501e-05, "loss": 0.4128, "num_tokens": 1503749531.0, "step": 1972 }, { "epoch": 2.6923273875565417, "grad_norm": 0.3642316684215514, "learning_rate": 3.1846818968890337e-05, "loss": 0.3767, "num_tokens": 1504394244.0, "step": 1973 }, { "epoch": 2.693692924810105, "grad_norm": 0.3876498054844526, "learning_rate": 3.1838495200807615e-05, "loss": 0.4114, "num_tokens": 1505163958.0, "step": 1974 }, { "epoch": 2.695058462063668, "grad_norm": 0.39675530038243434, "learning_rate": 3.183016843142607e-05, "loss": 0.3899, "num_tokens": 1505977645.0, "step": 1975 }, { "epoch": 2.6964239993172314, "grad_norm": 0.3438309466690207, "learning_rate": 3.1821838663285864e-05, "loss": 0.3765, "num_tokens": 1506674391.0, "step": 1976 }, { "epoch": 2.6977895365707947, "grad_norm": 0.3767210290600603, "learning_rate": 3.1813505898928017e-05, "loss": 0.389, "num_tokens": 1507423484.0, "step": 1977 }, { "epoch": 2.699155073824358, "grad_norm": 0.3324527374793602, "learning_rate": 3.1805170140894515e-05, "loss": 0.4136, "num_tokens": 1508212358.0, "step": 1978 }, { "epoch": 2.7005206110779207, "grad_norm": 0.41621803570814797, "learning_rate": 3.179683139172823e-05, "loss": 0.3905, "num_tokens": 1508937222.0, "step": 1979 }, { "epoch": 2.701886148331484, "grad_norm": 0.32501140119590843, "learning_rate": 3.178848965397294e-05, "loss": 0.4186, "num_tokens": 1509691104.0, "step": 1980 }, { "epoch": 2.7032516855850472, "grad_norm": 0.42350060530828115, "learning_rate": 3.178014493017337e-05, "loss": 0.3886, "num_tokens": 1510510392.0, "step": 1981 }, { "epoch": 2.7046172228386105, "grad_norm": 0.36885059938077464, "learning_rate": 3.1771797222875115e-05, "loss": 0.3752, "num_tokens": 1511252490.0, "step": 1982 }, { "epoch": 2.7059827600921738, "grad_norm": 0.36146545700329585, "learning_rate": 3.176344653462472e-05, "loss": 0.4168, "num_tokens": 1512143817.0, "step": 1983 }, { "epoch": 2.707348297345737, "grad_norm": 0.3809497741797732, "learning_rate": 3.17550928679696e-05, "loss": 0.4115, "num_tokens": 1512873263.0, "step": 1984 }, { "epoch": 2.7087138345993003, "grad_norm": 0.37130811756874493, "learning_rate": 3.1746736225458104e-05, "loss": 0.4172, "num_tokens": 1513651686.0, "step": 1985 }, { "epoch": 2.7100793718528635, "grad_norm": 0.38676600795845717, "learning_rate": 3.173837660963948e-05, "loss": 0.376, "num_tokens": 1514385434.0, "step": 1986 }, { "epoch": 2.7114449091064268, "grad_norm": 0.3902592024956496, "learning_rate": 3.173001402306388e-05, "loss": 0.4073, "num_tokens": 1515164356.0, "step": 1987 }, { "epoch": 2.71281044635999, "grad_norm": 0.37506508047510523, "learning_rate": 3.172164846828238e-05, "loss": 0.3926, "num_tokens": 1515922639.0, "step": 1988 }, { "epoch": 2.714175983613553, "grad_norm": 0.3198094273976503, "learning_rate": 3.171327994784694e-05, "loss": 0.4022, "num_tokens": 1516724373.0, "step": 1989 }, { "epoch": 2.715541520867116, "grad_norm": 0.3899833382318777, "learning_rate": 3.170490846431043e-05, "loss": 0.4035, "num_tokens": 1517540615.0, "step": 1990 }, { "epoch": 2.7169070581206793, "grad_norm": 0.3421355717218307, "learning_rate": 3.1696534020226635e-05, "loss": 0.4012, "num_tokens": 1518259705.0, "step": 1991 }, { "epoch": 2.7182725953742426, "grad_norm": 0.3691361016458802, "learning_rate": 3.168815661815024e-05, "loss": 0.4009, "num_tokens": 1519056670.0, "step": 1992 }, { "epoch": 2.719638132627806, "grad_norm": 0.33592647947268206, "learning_rate": 3.16797762606368e-05, "loss": 0.4014, "num_tokens": 1519831618.0, "step": 1993 }, { "epoch": 2.721003669881369, "grad_norm": 0.377828568170052, "learning_rate": 3.1671392950242836e-05, "loss": 0.41, "num_tokens": 1520558945.0, "step": 1994 }, { "epoch": 2.722369207134932, "grad_norm": 0.3479010227559626, "learning_rate": 3.166300668952571e-05, "loss": 0.3989, "num_tokens": 1521319408.0, "step": 1995 }, { "epoch": 2.723734744388495, "grad_norm": 0.3429873806367233, "learning_rate": 3.1654617481043706e-05, "loss": 0.3963, "num_tokens": 1522131685.0, "step": 1996 }, { "epoch": 2.7251002816420584, "grad_norm": 0.3619100322145041, "learning_rate": 3.164622532735602e-05, "loss": 0.3797, "num_tokens": 1522796011.0, "step": 1997 }, { "epoch": 2.7264658188956217, "grad_norm": 0.37744685858983373, "learning_rate": 3.1637830231022727e-05, "loss": 0.3928, "num_tokens": 1523514448.0, "step": 1998 }, { "epoch": 2.727831356149185, "grad_norm": 0.3457735706130647, "learning_rate": 3.16294321946048e-05, "loss": 0.4147, "num_tokens": 1524244415.0, "step": 1999 }, { "epoch": 2.729196893402748, "grad_norm": 0.37399991010334116, "learning_rate": 3.1621031220664144e-05, "loss": 0.4127, "num_tokens": 1525073445.0, "step": 2000 }, { "epoch": 2.7305624306563114, "grad_norm": 0.333310198910779, "learning_rate": 3.161262731176351e-05, "loss": 0.3997, "num_tokens": 1525836411.0, "step": 2001 }, { "epoch": 2.7319279679098747, "grad_norm": 0.4136457686616289, "learning_rate": 3.160422047046657e-05, "loss": 0.4099, "num_tokens": 1526555017.0, "step": 2002 }, { "epoch": 2.733293505163438, "grad_norm": 0.4280196012470066, "learning_rate": 3.159581069933788e-05, "loss": 0.3972, "num_tokens": 1527290561.0, "step": 2003 }, { "epoch": 2.734659042417001, "grad_norm": 0.36426325243810936, "learning_rate": 3.1587398000942907e-05, "loss": 0.4175, "num_tokens": 1528144211.0, "step": 2004 }, { "epoch": 2.736024579670564, "grad_norm": 0.35526191507382127, "learning_rate": 3.1578982377848e-05, "loss": 0.4179, "num_tokens": 1528940233.0, "step": 2005 }, { "epoch": 2.7373901169241273, "grad_norm": 0.3538988774867061, "learning_rate": 3.15705638326204e-05, "loss": 0.4072, "num_tokens": 1529739089.0, "step": 2006 }, { "epoch": 2.7387556541776905, "grad_norm": 0.3405862711098866, "learning_rate": 3.156214236782825e-05, "loss": 0.4084, "num_tokens": 1530535202.0, "step": 2007 }, { "epoch": 2.7401211914312538, "grad_norm": 0.3160896235897266, "learning_rate": 3.155371798604055e-05, "loss": 0.417, "num_tokens": 1531366180.0, "step": 2008 }, { "epoch": 2.741486728684817, "grad_norm": 0.3477609574102779, "learning_rate": 3.154529068982724e-05, "loss": 0.384, "num_tokens": 1532130781.0, "step": 2009 }, { "epoch": 2.7428522659383803, "grad_norm": 0.35365498743893253, "learning_rate": 3.1536860481759106e-05, "loss": 0.3861, "num_tokens": 1532831553.0, "step": 2010 }, { "epoch": 2.744217803191943, "grad_norm": 0.33639582194016454, "learning_rate": 3.152842736440785e-05, "loss": 0.4009, "num_tokens": 1533554243.0, "step": 2011 }, { "epoch": 2.7455833404455063, "grad_norm": 0.3868152248007652, "learning_rate": 3.151999134034604e-05, "loss": 0.3963, "num_tokens": 1534274206.0, "step": 2012 }, { "epoch": 2.7469488776990696, "grad_norm": 0.3436303510915513, "learning_rate": 3.151155241214715e-05, "loss": 0.3815, "num_tokens": 1535003594.0, "step": 2013 }, { "epoch": 2.748314414952633, "grad_norm": 0.36466969365601676, "learning_rate": 3.1503110582385533e-05, "loss": 0.3845, "num_tokens": 1535742456.0, "step": 2014 }, { "epoch": 2.749679952206196, "grad_norm": 0.39295234483001173, "learning_rate": 3.149466585363642e-05, "loss": 0.3838, "num_tokens": 1536504692.0, "step": 2015 }, { "epoch": 2.7510454894597594, "grad_norm": 0.34405379802515107, "learning_rate": 3.1486218228475936e-05, "loss": 0.3948, "num_tokens": 1537413578.0, "step": 2016 }, { "epoch": 2.7524110267133226, "grad_norm": 0.3250942226408872, "learning_rate": 3.147776770948108e-05, "loss": 0.414, "num_tokens": 1538242925.0, "step": 2017 }, { "epoch": 2.753776563966886, "grad_norm": 0.35511404473351244, "learning_rate": 3.146931429922974e-05, "loss": 0.4136, "num_tokens": 1539119585.0, "step": 2018 }, { "epoch": 2.755142101220449, "grad_norm": 0.37045738850474247, "learning_rate": 3.14608580003007e-05, "loss": 0.4004, "num_tokens": 1539886400.0, "step": 2019 }, { "epoch": 2.7565076384740124, "grad_norm": 0.35785098106797275, "learning_rate": 3.145239881527359e-05, "loss": 0.4166, "num_tokens": 1540662660.0, "step": 2020 }, { "epoch": 2.757873175727575, "grad_norm": 0.3364133191272308, "learning_rate": 3.144393674672895e-05, "loss": 0.3924, "num_tokens": 1541425254.0, "step": 2021 }, { "epoch": 2.7592387129811384, "grad_norm": 0.3747663384110767, "learning_rate": 3.143547179724819e-05, "loss": 0.4027, "num_tokens": 1542138955.0, "step": 2022 }, { "epoch": 2.7606042502347017, "grad_norm": 0.31624040389608543, "learning_rate": 3.1427003969413606e-05, "loss": 0.415, "num_tokens": 1542946291.0, "step": 2023 }, { "epoch": 2.761969787488265, "grad_norm": 0.36787619994619897, "learning_rate": 3.141853326580835e-05, "loss": 0.4124, "num_tokens": 1543757717.0, "step": 2024 }, { "epoch": 2.763335324741828, "grad_norm": 0.3723030738368818, "learning_rate": 3.1410059689016485e-05, "loss": 0.3875, "num_tokens": 1544601415.0, "step": 2025 }, { "epoch": 2.7647008619953914, "grad_norm": 0.3766414733006091, "learning_rate": 3.1401583241622926e-05, "loss": 0.3977, "num_tokens": 1545311476.0, "step": 2026 }, { "epoch": 2.7660663992489543, "grad_norm": 0.3582398078777735, "learning_rate": 3.139310392621346e-05, "loss": 0.3941, "num_tokens": 1546033137.0, "step": 2027 }, { "epoch": 2.7674319365025175, "grad_norm": 0.34487589509786476, "learning_rate": 3.138462174537477e-05, "loss": 0.3993, "num_tokens": 1546851226.0, "step": 2028 }, { "epoch": 2.7687974737560808, "grad_norm": 0.31014619030339663, "learning_rate": 3.1376136701694404e-05, "loss": 0.391, "num_tokens": 1547692709.0, "step": 2029 }, { "epoch": 2.770163011009644, "grad_norm": 0.3525089050737881, "learning_rate": 3.1367648797760775e-05, "loss": 0.413, "num_tokens": 1548501157.0, "step": 2030 }, { "epoch": 2.7715285482632073, "grad_norm": 0.3053140501788561, "learning_rate": 3.1359158036163165e-05, "loss": 0.4061, "num_tokens": 1549293356.0, "step": 2031 }, { "epoch": 2.7728940855167705, "grad_norm": 0.3986401925115733, "learning_rate": 3.1350664419491765e-05, "loss": 0.4089, "num_tokens": 1550104515.0, "step": 2032 }, { "epoch": 2.7742596227703338, "grad_norm": 0.3138790280164195, "learning_rate": 3.134216795033758e-05, "loss": 0.4176, "num_tokens": 1550962342.0, "step": 2033 }, { "epoch": 2.775625160023897, "grad_norm": 0.40416496939605295, "learning_rate": 3.133366863129253e-05, "loss": 0.4082, "num_tokens": 1551761563.0, "step": 2034 }, { "epoch": 2.7769906972774603, "grad_norm": 0.321630229238573, "learning_rate": 3.13251664649494e-05, "loss": 0.3844, "num_tokens": 1552442608.0, "step": 2035 }, { "epoch": 2.7783562345310235, "grad_norm": 0.3745505635962941, "learning_rate": 3.1316661453901807e-05, "loss": 0.4026, "num_tokens": 1553169029.0, "step": 2036 }, { "epoch": 2.7797217717845863, "grad_norm": 0.3383025196763615, "learning_rate": 3.130815360074428e-05, "loss": 0.3847, "num_tokens": 1553935121.0, "step": 2037 }, { "epoch": 2.7810873090381496, "grad_norm": 0.3767395155304097, "learning_rate": 3.129964290807219e-05, "loss": 0.3931, "num_tokens": 1554728804.0, "step": 2038 }, { "epoch": 2.782452846291713, "grad_norm": 0.33776499108064795, "learning_rate": 3.129112937848178e-05, "loss": 0.3965, "num_tokens": 1555505128.0, "step": 2039 }, { "epoch": 2.783818383545276, "grad_norm": 0.4268000070650175, "learning_rate": 3.1282613014570166e-05, "loss": 0.3792, "num_tokens": 1556268399.0, "step": 2040 }, { "epoch": 2.7851839207988394, "grad_norm": 0.3776803340279711, "learning_rate": 3.127409381893531e-05, "loss": 0.4026, "num_tokens": 1557008983.0, "step": 2041 }, { "epoch": 2.7865494580524026, "grad_norm": 0.41095554862224615, "learning_rate": 3.126557179417607e-05, "loss": 0.4089, "num_tokens": 1557768226.0, "step": 2042 }, { "epoch": 2.7879149953059654, "grad_norm": 0.4103111594276284, "learning_rate": 3.125704694289212e-05, "loss": 0.4007, "num_tokens": 1558523087.0, "step": 2043 }, { "epoch": 2.7892805325595287, "grad_norm": 0.43028400626596425, "learning_rate": 3.1248519267684044e-05, "loss": 0.4082, "num_tokens": 1559323848.0, "step": 2044 }, { "epoch": 2.790646069813092, "grad_norm": 0.44650048719485175, "learning_rate": 3.1239988771153255e-05, "loss": 0.3958, "num_tokens": 1560120139.0, "step": 2045 }, { "epoch": 2.792011607066655, "grad_norm": 0.38053815914028305, "learning_rate": 3.123145545590204e-05, "loss": 0.408, "num_tokens": 1560893595.0, "step": 2046 }, { "epoch": 2.7933771443202184, "grad_norm": 0.4124956870883397, "learning_rate": 3.122291932453355e-05, "loss": 0.4129, "num_tokens": 1561691170.0, "step": 2047 }, { "epoch": 2.7947426815737817, "grad_norm": 0.4365186129569528, "learning_rate": 3.1214380379651784e-05, "loss": 0.4009, "num_tokens": 1562456890.0, "step": 2048 }, { "epoch": 2.796108218827345, "grad_norm": 0.4101285544547989, "learning_rate": 3.12058386238616e-05, "loss": 0.4057, "num_tokens": 1563192703.0, "step": 2049 }, { "epoch": 2.797473756080908, "grad_norm": 0.38437730331357106, "learning_rate": 3.119729405976873e-05, "loss": 0.4106, "num_tokens": 1563969623.0, "step": 2050 }, { "epoch": 2.7988392933344715, "grad_norm": 0.4107864706274665, "learning_rate": 3.118874668997974e-05, "loss": 0.4065, "num_tokens": 1564667276.0, "step": 2051 }, { "epoch": 2.8002048305880347, "grad_norm": 0.31747527850430235, "learning_rate": 3.118019651710207e-05, "loss": 0.4065, "num_tokens": 1565406796.0, "step": 2052 }, { "epoch": 2.8015703678415975, "grad_norm": 0.4228425120621424, "learning_rate": 3.1171643543743994e-05, "loss": 0.4296, "num_tokens": 1566213673.0, "step": 2053 }, { "epoch": 2.8029359050951608, "grad_norm": 0.36874190827580877, "learning_rate": 3.1163087772514666e-05, "loss": 0.4301, "num_tokens": 1566965418.0, "step": 2054 }, { "epoch": 2.804301442348724, "grad_norm": 0.37605506662504173, "learning_rate": 3.1154529206024075e-05, "loss": 0.3939, "num_tokens": 1567757793.0, "step": 2055 }, { "epoch": 2.8056669796022873, "grad_norm": 0.4022115245294908, "learning_rate": 3.114596784688308e-05, "loss": 0.3929, "num_tokens": 1568524984.0, "step": 2056 }, { "epoch": 2.8070325168558505, "grad_norm": 0.39222917928333095, "learning_rate": 3.1137403697703376e-05, "loss": 0.3841, "num_tokens": 1569374994.0, "step": 2057 }, { "epoch": 2.808398054109414, "grad_norm": 0.36745065029225427, "learning_rate": 3.112883676109751e-05, "loss": 0.3991, "num_tokens": 1570128408.0, "step": 2058 }, { "epoch": 2.8097635913629766, "grad_norm": 0.38769237256690925, "learning_rate": 3.1120267039678876e-05, "loss": 0.4046, "num_tokens": 1570914021.0, "step": 2059 }, { "epoch": 2.81112912861654, "grad_norm": 0.34878762601313373, "learning_rate": 3.1111694536061734e-05, "loss": 0.3917, "num_tokens": 1571679817.0, "step": 2060 }, { "epoch": 2.812494665870103, "grad_norm": 0.39592976442743094, "learning_rate": 3.110311925286119e-05, "loss": 0.3871, "num_tokens": 1572445465.0, "step": 2061 }, { "epoch": 2.8138602031236664, "grad_norm": 0.3885522574788008, "learning_rate": 3.1094541192693176e-05, "loss": 0.4051, "num_tokens": 1573239961.0, "step": 2062 }, { "epoch": 2.8152257403772296, "grad_norm": 0.3697840561269414, "learning_rate": 3.1085960358174506e-05, "loss": 0.4026, "num_tokens": 1574079768.0, "step": 2063 }, { "epoch": 2.816591277630793, "grad_norm": 0.3903628697928436, "learning_rate": 3.1077376751922805e-05, "loss": 0.3955, "num_tokens": 1574845718.0, "step": 2064 }, { "epoch": 2.817956814884356, "grad_norm": 0.3347475427955123, "learning_rate": 3.106879037655656e-05, "loss": 0.4148, "num_tokens": 1575600068.0, "step": 2065 }, { "epoch": 2.8193223521379194, "grad_norm": 0.38432209179994087, "learning_rate": 3.106020123469511e-05, "loss": 0.3915, "num_tokens": 1576348638.0, "step": 2066 }, { "epoch": 2.8206878893914826, "grad_norm": 0.3614862945937457, "learning_rate": 3.105160932895864e-05, "loss": 0.4022, "num_tokens": 1577105931.0, "step": 2067 }, { "epoch": 2.822053426645046, "grad_norm": 0.3819665966398395, "learning_rate": 3.104301466196813e-05, "loss": 0.395, "num_tokens": 1577823053.0, "step": 2068 }, { "epoch": 2.823418963898609, "grad_norm": 0.3887729936606231, "learning_rate": 3.1034417236345484e-05, "loss": 0.4183, "num_tokens": 1578623716.0, "step": 2069 }, { "epoch": 2.824784501152172, "grad_norm": 0.39959019173714333, "learning_rate": 3.1025817054713385e-05, "loss": 0.4155, "num_tokens": 1579394406.0, "step": 2070 }, { "epoch": 2.826150038405735, "grad_norm": 0.3568344897177389, "learning_rate": 3.101721411969538e-05, "loss": 0.397, "num_tokens": 1580055876.0, "step": 2071 }, { "epoch": 2.8275155756592985, "grad_norm": 0.41898176716083857, "learning_rate": 3.1008608433915845e-05, "loss": 0.3979, "num_tokens": 1580755449.0, "step": 2072 }, { "epoch": 2.8288811129128617, "grad_norm": 0.4029000839045327, "learning_rate": 3.1e-05, "loss": 0.3907, "num_tokens": 1581506354.0, "step": 2073 }, { "epoch": 2.830246650166425, "grad_norm": 0.34334946528363375, "learning_rate": 3.0991388820573925e-05, "loss": 0.3929, "num_tokens": 1582268051.0, "step": 2074 }, { "epoch": 2.831612187419988, "grad_norm": 0.3725817080090359, "learning_rate": 3.0982774898264495e-05, "loss": 0.3875, "num_tokens": 1583024561.0, "step": 2075 }, { "epoch": 2.832977724673551, "grad_norm": 0.4043450911850217, "learning_rate": 3.097415823569946e-05, "loss": 0.3708, "num_tokens": 1583785679.0, "step": 2076 }, { "epoch": 2.8343432619271143, "grad_norm": 0.3434253753833492, "learning_rate": 3.096553883550738e-05, "loss": 0.412, "num_tokens": 1584475065.0, "step": 2077 }, { "epoch": 2.8357087991806775, "grad_norm": 0.3592358010944223, "learning_rate": 3.0956916700317664e-05, "loss": 0.3933, "num_tokens": 1585276381.0, "step": 2078 }, { "epoch": 2.837074336434241, "grad_norm": 0.3383818759586345, "learning_rate": 3.094829183276056e-05, "loss": 0.3988, "num_tokens": 1586060990.0, "step": 2079 }, { "epoch": 2.838439873687804, "grad_norm": 0.350452441020068, "learning_rate": 3.0939664235467136e-05, "loss": 0.3774, "num_tokens": 1586766488.0, "step": 2080 }, { "epoch": 2.8398054109413673, "grad_norm": 0.33978340777966404, "learning_rate": 3.093103391106929e-05, "loss": 0.3976, "num_tokens": 1587427568.0, "step": 2081 }, { "epoch": 2.8411709481949305, "grad_norm": 0.3545239914948204, "learning_rate": 3.092240086219978e-05, "loss": 0.4019, "num_tokens": 1588132602.0, "step": 2082 }, { "epoch": 2.842536485448494, "grad_norm": 0.3583974413387045, "learning_rate": 3.0913765091492155e-05, "loss": 0.4066, "num_tokens": 1588926680.0, "step": 2083 }, { "epoch": 2.843902022702057, "grad_norm": 0.3431587847089474, "learning_rate": 3.090512660158082e-05, "loss": 0.4174, "num_tokens": 1589785120.0, "step": 2084 }, { "epoch": 2.8452675599556203, "grad_norm": 0.3134703813536585, "learning_rate": 3.0896485395101016e-05, "loss": 0.3943, "num_tokens": 1590536924.0, "step": 2085 }, { "epoch": 2.846633097209183, "grad_norm": 0.36197588242595174, "learning_rate": 3.088784147468878e-05, "loss": 0.4053, "num_tokens": 1591267859.0, "step": 2086 }, { "epoch": 2.8479986344627464, "grad_norm": 0.34138933529523907, "learning_rate": 3.0879194842981024e-05, "loss": 0.4071, "num_tokens": 1591984438.0, "step": 2087 }, { "epoch": 2.8493641717163096, "grad_norm": 0.3823535402228039, "learning_rate": 3.087054550261544e-05, "loss": 0.3871, "num_tokens": 1592693094.0, "step": 2088 }, { "epoch": 2.850729708969873, "grad_norm": 0.31996802729356844, "learning_rate": 3.086189345623058e-05, "loss": 0.3964, "num_tokens": 1593488758.0, "step": 2089 }, { "epoch": 2.852095246223436, "grad_norm": 0.4117883645209615, "learning_rate": 3.085323870646579e-05, "loss": 0.3933, "num_tokens": 1594261078.0, "step": 2090 }, { "epoch": 2.8534607834769994, "grad_norm": 0.39363614699549176, "learning_rate": 3.0844581255961275e-05, "loss": 0.4102, "num_tokens": 1594984957.0, "step": 2091 }, { "epoch": 2.854826320730562, "grad_norm": 0.36225394132681227, "learning_rate": 3.083592110735805e-05, "loss": 0.3934, "num_tokens": 1595755119.0, "step": 2092 }, { "epoch": 2.8561918579841254, "grad_norm": 0.3904027948062969, "learning_rate": 3.082725826329794e-05, "loss": 0.4179, "num_tokens": 1596549344.0, "step": 2093 }, { "epoch": 2.8575573952376887, "grad_norm": 0.33802417951516583, "learning_rate": 3.0818592726423616e-05, "loss": 0.4009, "num_tokens": 1597383512.0, "step": 2094 }, { "epoch": 2.858922932491252, "grad_norm": 0.37885063334898794, "learning_rate": 3.080992449937855e-05, "loss": 0.3851, "num_tokens": 1598205610.0, "step": 2095 }, { "epoch": 2.860288469744815, "grad_norm": 0.3022953588644794, "learning_rate": 3.080125358480705e-05, "loss": 0.4025, "num_tokens": 1599016746.0, "step": 2096 }, { "epoch": 2.8616540069983785, "grad_norm": 0.31815817896929, "learning_rate": 3.079257998535423e-05, "loss": 0.4219, "num_tokens": 1599903289.0, "step": 2097 }, { "epoch": 2.8630195442519417, "grad_norm": 0.3595781789925898, "learning_rate": 3.078390370366602e-05, "loss": 0.3989, "num_tokens": 1600666887.0, "step": 2098 }, { "epoch": 2.864385081505505, "grad_norm": 0.31757349952299097, "learning_rate": 3.07752247423892e-05, "loss": 0.4116, "num_tokens": 1601490092.0, "step": 2099 }, { "epoch": 2.8657506187590682, "grad_norm": 0.3762406603711272, "learning_rate": 3.0766543104171326e-05, "loss": 0.4019, "num_tokens": 1602203796.0, "step": 2100 }, { "epoch": 2.8671161560126315, "grad_norm": 0.36165190004778797, "learning_rate": 3.07578587916608e-05, "loss": 0.4077, "num_tokens": 1603040348.0, "step": 2101 }, { "epoch": 2.8684816932661943, "grad_norm": 0.3735018452692525, "learning_rate": 3.0749171807506827e-05, "loss": 0.4099, "num_tokens": 1603834290.0, "step": 2102 }, { "epoch": 2.8698472305197575, "grad_norm": 0.42251832824217156, "learning_rate": 3.0740482154359434e-05, "loss": 0.4041, "num_tokens": 1604588774.0, "step": 2103 }, { "epoch": 2.871212767773321, "grad_norm": 0.34968125838891323, "learning_rate": 3.073178983486945e-05, "loss": 0.4249, "num_tokens": 1605430958.0, "step": 2104 }, { "epoch": 2.872578305026884, "grad_norm": 0.420376053365873, "learning_rate": 3.072309485168853e-05, "loss": 0.4, "num_tokens": 1606190754.0, "step": 2105 }, { "epoch": 2.8739438422804473, "grad_norm": 0.361074892621977, "learning_rate": 3.071439720746913e-05, "loss": 0.3938, "num_tokens": 1606919157.0, "step": 2106 }, { "epoch": 2.8753093795340106, "grad_norm": 0.35576551458865674, "learning_rate": 3.070569690486454e-05, "loss": 0.4091, "num_tokens": 1607734009.0, "step": 2107 }, { "epoch": 2.8766749167875734, "grad_norm": 0.37832467373339934, "learning_rate": 3.069699394652883e-05, "loss": 0.4033, "num_tokens": 1608513231.0, "step": 2108 }, { "epoch": 2.8780404540411366, "grad_norm": 0.33064715856085775, "learning_rate": 3.0688288335116906e-05, "loss": 0.4146, "num_tokens": 1609290562.0, "step": 2109 }, { "epoch": 2.8794059912947, "grad_norm": 0.4023295029013234, "learning_rate": 3.0679580073284466e-05, "loss": 0.4035, "num_tokens": 1610051268.0, "step": 2110 }, { "epoch": 2.880771528548263, "grad_norm": 0.4235445098626155, "learning_rate": 3.067086916368803e-05, "loss": 0.4011, "num_tokens": 1610792903.0, "step": 2111 }, { "epoch": 2.8821370658018264, "grad_norm": 0.35521602537282937, "learning_rate": 3.066215560898491e-05, "loss": 0.3951, "num_tokens": 1611558246.0, "step": 2112 }, { "epoch": 2.8835026030553896, "grad_norm": 0.38578908518803184, "learning_rate": 3.065343941183324e-05, "loss": 0.39, "num_tokens": 1612286445.0, "step": 2113 }, { "epoch": 2.884868140308953, "grad_norm": 0.3927109696482439, "learning_rate": 3.064472057489196e-05, "loss": 0.3916, "num_tokens": 1613051462.0, "step": 2114 }, { "epoch": 2.886233677562516, "grad_norm": 0.3567588493564476, "learning_rate": 3.06359991008208e-05, "loss": 0.4004, "num_tokens": 1613820234.0, "step": 2115 }, { "epoch": 2.8875992148160794, "grad_norm": 0.38391204248861877, "learning_rate": 3.062727499228031e-05, "loss": 0.3923, "num_tokens": 1614584273.0, "step": 2116 }, { "epoch": 2.8889647520696426, "grad_norm": 0.35342155042115775, "learning_rate": 3.061854825193183e-05, "loss": 0.3969, "num_tokens": 1615355565.0, "step": 2117 }, { "epoch": 2.8903302893232055, "grad_norm": 0.35296333981621447, "learning_rate": 3.060981888243752e-05, "loss": 0.3957, "num_tokens": 1616110340.0, "step": 2118 }, { "epoch": 2.8916958265767687, "grad_norm": 0.3590431799547813, "learning_rate": 3.060108688646033e-05, "loss": 0.3878, "num_tokens": 1616832491.0, "step": 2119 }, { "epoch": 2.893061363830332, "grad_norm": 0.38412765174068103, "learning_rate": 3.059235226666401e-05, "loss": 0.3976, "num_tokens": 1617561953.0, "step": 2120 }, { "epoch": 2.894426901083895, "grad_norm": 0.33604244801138433, "learning_rate": 3.058361502571312e-05, "loss": 0.4077, "num_tokens": 1618327028.0, "step": 2121 }, { "epoch": 2.8957924383374585, "grad_norm": 0.3448456730070278, "learning_rate": 3.0574875166273e-05, "loss": 0.4054, "num_tokens": 1619058169.0, "step": 2122 }, { "epoch": 2.8971579755910217, "grad_norm": 0.3690328605031888, "learning_rate": 3.056613269100982e-05, "loss": 0.4233, "num_tokens": 1619756907.0, "step": 2123 }, { "epoch": 2.8985235128445845, "grad_norm": 0.37795306188031985, "learning_rate": 3.0557387602590526e-05, "loss": 0.4007, "num_tokens": 1620564265.0, "step": 2124 }, { "epoch": 2.899889050098148, "grad_norm": 0.39750787646968033, "learning_rate": 3.054863990368286e-05, "loss": 0.4024, "num_tokens": 1621323055.0, "step": 2125 }, { "epoch": 2.901254587351711, "grad_norm": 0.3958860981282077, "learning_rate": 3.0539889596955365e-05, "loss": 0.4138, "num_tokens": 1622043973.0, "step": 2126 }, { "epoch": 2.9026201246052743, "grad_norm": 0.3947813615469731, "learning_rate": 3.0531136685077397e-05, "loss": 0.3971, "num_tokens": 1622805818.0, "step": 2127 }, { "epoch": 2.9039856618588376, "grad_norm": 0.32374713558912493, "learning_rate": 3.052238117071907e-05, "loss": 0.3971, "num_tokens": 1623593197.0, "step": 2128 }, { "epoch": 2.905351199112401, "grad_norm": 0.36431128883539254, "learning_rate": 3.0513623056551323e-05, "loss": 0.4212, "num_tokens": 1624397773.0, "step": 2129 }, { "epoch": 2.906716736365964, "grad_norm": 0.297615671113813, "learning_rate": 3.0504862345245875e-05, "loss": 0.3909, "num_tokens": 1625200763.0, "step": 2130 }, { "epoch": 2.9080822736195273, "grad_norm": 0.3516273008650016, "learning_rate": 3.0496099039475237e-05, "loss": 0.4153, "num_tokens": 1625924357.0, "step": 2131 }, { "epoch": 2.9094478108730906, "grad_norm": 0.3283363777306661, "learning_rate": 3.048733314191272e-05, "loss": 0.3954, "num_tokens": 1626661744.0, "step": 2132 }, { "epoch": 2.910813348126654, "grad_norm": 0.33107467088519715, "learning_rate": 3.0478564655232414e-05, "loss": 0.3893, "num_tokens": 1627388872.0, "step": 2133 }, { "epoch": 2.9121788853802166, "grad_norm": 0.3624759664420798, "learning_rate": 3.0469793582109205e-05, "loss": 0.3895, "num_tokens": 1628169555.0, "step": 2134 }, { "epoch": 2.91354442263378, "grad_norm": 0.3587675891573947, "learning_rate": 3.046101992521878e-05, "loss": 0.4045, "num_tokens": 1628909147.0, "step": 2135 }, { "epoch": 2.914909959887343, "grad_norm": 0.38253452214948047, "learning_rate": 3.045224368723758e-05, "loss": 0.4162, "num_tokens": 1629736045.0, "step": 2136 }, { "epoch": 2.9162754971409064, "grad_norm": 0.40117608959319623, "learning_rate": 3.0443464870842873e-05, "loss": 0.4022, "num_tokens": 1630540779.0, "step": 2137 }, { "epoch": 2.9176410343944696, "grad_norm": 0.35843454106367206, "learning_rate": 3.0434683478712686e-05, "loss": 0.409, "num_tokens": 1631334054.0, "step": 2138 }, { "epoch": 2.919006571648033, "grad_norm": 0.3913758565834566, "learning_rate": 3.042589951352585e-05, "loss": 0.4131, "num_tokens": 1632106899.0, "step": 2139 }, { "epoch": 2.9203721089015957, "grad_norm": 0.3804388911901337, "learning_rate": 3.0417112977961967e-05, "loss": 0.3659, "num_tokens": 1632859433.0, "step": 2140 }, { "epoch": 2.921737646155159, "grad_norm": 0.35101278143930054, "learning_rate": 3.0408323874701425e-05, "loss": 0.4134, "num_tokens": 1633574311.0, "step": 2141 }, { "epoch": 2.923103183408722, "grad_norm": 0.39975598439971344, "learning_rate": 3.0399532206425402e-05, "loss": 0.3795, "num_tokens": 1634431326.0, "step": 2142 }, { "epoch": 2.9244687206622855, "grad_norm": 0.38124180406975544, "learning_rate": 3.0390737975815862e-05, "loss": 0.3963, "num_tokens": 1635163965.0, "step": 2143 }, { "epoch": 2.9258342579158487, "grad_norm": 0.41539698888284454, "learning_rate": 3.0381941185555527e-05, "loss": 0.3908, "num_tokens": 1635900716.0, "step": 2144 }, { "epoch": 2.927199795169412, "grad_norm": 0.47123439608873036, "learning_rate": 3.037314183832793e-05, "loss": 0.3972, "num_tokens": 1636621730.0, "step": 2145 }, { "epoch": 2.9285653324229752, "grad_norm": 0.3674717951210242, "learning_rate": 3.0364339936817372e-05, "loss": 0.4032, "num_tokens": 1637412984.0, "step": 2146 }, { "epoch": 2.9299308696765385, "grad_norm": 0.4023519112856007, "learning_rate": 3.0355535483708922e-05, "loss": 0.383, "num_tokens": 1638112779.0, "step": 2147 }, { "epoch": 2.9312964069301017, "grad_norm": 0.38607279942834977, "learning_rate": 3.0346728481688452e-05, "loss": 0.4197, "num_tokens": 1638911250.0, "step": 2148 }, { "epoch": 2.932661944183665, "grad_norm": 0.40305479653623205, "learning_rate": 3.0337918933442582e-05, "loss": 0.3939, "num_tokens": 1639667027.0, "step": 2149 }, { "epoch": 2.934027481437228, "grad_norm": 0.4224344979107423, "learning_rate": 3.0329106841658734e-05, "loss": 0.4138, "num_tokens": 1640437047.0, "step": 2150 }, { "epoch": 2.935393018690791, "grad_norm": 0.43567365012498865, "learning_rate": 3.0320292209025092e-05, "loss": 0.4011, "num_tokens": 1641213566.0, "step": 2151 }, { "epoch": 2.9367585559443543, "grad_norm": 0.3874139242732168, "learning_rate": 3.0311475038230616e-05, "loss": 0.396, "num_tokens": 1641946172.0, "step": 2152 }, { "epoch": 2.9381240931979176, "grad_norm": 0.3800906777724426, "learning_rate": 3.0302655331965044e-05, "loss": 0.388, "num_tokens": 1642609631.0, "step": 2153 }, { "epoch": 2.939489630451481, "grad_norm": 0.366203387135177, "learning_rate": 3.0293833092918886e-05, "loss": 0.3928, "num_tokens": 1643321452.0, "step": 2154 }, { "epoch": 2.940855167705044, "grad_norm": 0.36825605823591007, "learning_rate": 3.028500832378343e-05, "loss": 0.4014, "num_tokens": 1644123007.0, "step": 2155 }, { "epoch": 2.942220704958607, "grad_norm": 0.3251365755254636, "learning_rate": 3.0276181027250725e-05, "loss": 0.3932, "num_tokens": 1644928081.0, "step": 2156 }, { "epoch": 2.94358624221217, "grad_norm": 0.3208645284548645, "learning_rate": 3.0267351206013602e-05, "loss": 0.385, "num_tokens": 1645636129.0, "step": 2157 }, { "epoch": 2.9449517794657334, "grad_norm": 0.3556774776406417, "learning_rate": 3.025851886276566e-05, "loss": 0.3964, "num_tokens": 1646411454.0, "step": 2158 }, { "epoch": 2.9463173167192966, "grad_norm": 0.3442145507256238, "learning_rate": 3.024968400020125e-05, "loss": 0.4006, "num_tokens": 1647244853.0, "step": 2159 }, { "epoch": 2.94768285397286, "grad_norm": 0.3466554097779838, "learning_rate": 3.0240846621015523e-05, "loss": 0.4008, "num_tokens": 1648000197.0, "step": 2160 }, { "epoch": 2.949048391226423, "grad_norm": 0.39557494869844095, "learning_rate": 3.0232006727904368e-05, "loss": 0.4125, "num_tokens": 1648780753.0, "step": 2161 }, { "epoch": 2.9504139284799864, "grad_norm": 0.33415005334202175, "learning_rate": 3.0223164323564455e-05, "loss": 0.3928, "num_tokens": 1649501775.0, "step": 2162 }, { "epoch": 2.9517794657335497, "grad_norm": 0.39429296884334775, "learning_rate": 3.0214319410693223e-05, "loss": 0.3965, "num_tokens": 1650238419.0, "step": 2163 }, { "epoch": 2.953145002987113, "grad_norm": 0.3756295279892369, "learning_rate": 3.0205471991988875e-05, "loss": 0.3971, "num_tokens": 1650995298.0, "step": 2164 }, { "epoch": 2.954510540240676, "grad_norm": 0.3490115463064656, "learning_rate": 3.019662207015037e-05, "loss": 0.4097, "num_tokens": 1651729942.0, "step": 2165 }, { "epoch": 2.955876077494239, "grad_norm": 0.33800200625626925, "learning_rate": 3.018776964787743e-05, "loss": 0.3963, "num_tokens": 1652519149.0, "step": 2166 }, { "epoch": 2.9572416147478022, "grad_norm": 0.40265620413164294, "learning_rate": 3.017891472787056e-05, "loss": 0.4224, "num_tokens": 1653227951.0, "step": 2167 }, { "epoch": 2.9586071520013655, "grad_norm": 0.368104478883704, "learning_rate": 3.0170057312831006e-05, "loss": 0.4007, "num_tokens": 1654014993.0, "step": 2168 }, { "epoch": 2.9599726892549287, "grad_norm": 0.34664801681232627, "learning_rate": 3.0161197405460774e-05, "loss": 0.3806, "num_tokens": 1654756241.0, "step": 2169 }, { "epoch": 2.961338226508492, "grad_norm": 0.3037936691231534, "learning_rate": 3.0152335008462654e-05, "loss": 0.3793, "num_tokens": 1655450021.0, "step": 2170 }, { "epoch": 2.9627037637620552, "grad_norm": 0.3786168063922685, "learning_rate": 3.0143470124540167e-05, "loss": 0.4063, "num_tokens": 1656181156.0, "step": 2171 }, { "epoch": 2.9640693010156185, "grad_norm": 0.35659419218535887, "learning_rate": 3.0134602756397603e-05, "loss": 0.395, "num_tokens": 1656924363.0, "step": 2172 }, { "epoch": 2.9654348382691813, "grad_norm": 0.40104793099806735, "learning_rate": 3.0125732906740025e-05, "loss": 0.4067, "num_tokens": 1657669419.0, "step": 2173 }, { "epoch": 2.9668003755227446, "grad_norm": 0.38350062205811003, "learning_rate": 3.0116860578273232e-05, "loss": 0.3795, "num_tokens": 1658374370.0, "step": 2174 }, { "epoch": 2.968165912776308, "grad_norm": 0.3941324794547933, "learning_rate": 3.0107985773703783e-05, "loss": 0.4198, "num_tokens": 1659198922.0, "step": 2175 }, { "epoch": 2.969531450029871, "grad_norm": 0.3457990153288125, "learning_rate": 3.0099108495739005e-05, "loss": 0.3984, "num_tokens": 1659993027.0, "step": 2176 }, { "epoch": 2.9708969872834343, "grad_norm": 0.3516992296578042, "learning_rate": 3.0090228747086968e-05, "loss": 0.3922, "num_tokens": 1660723056.0, "step": 2177 }, { "epoch": 2.9722625245369976, "grad_norm": 0.3621348978930631, "learning_rate": 3.0081346530456496e-05, "loss": 0.4094, "num_tokens": 1661458478.0, "step": 2178 }, { "epoch": 2.973628061790561, "grad_norm": 0.375470751589552, "learning_rate": 3.007246184855717e-05, "loss": 0.4, "num_tokens": 1662256130.0, "step": 2179 }, { "epoch": 2.974993599044124, "grad_norm": 0.3310301591439453, "learning_rate": 3.006357470409933e-05, "loss": 0.377, "num_tokens": 1663002835.0, "step": 2180 }, { "epoch": 2.9763591362976873, "grad_norm": 0.36954371487595056, "learning_rate": 3.005468509979404e-05, "loss": 0.4019, "num_tokens": 1663769802.0, "step": 2181 }, { "epoch": 2.9777246735512506, "grad_norm": 0.3465256644986584, "learning_rate": 3.0045793038353154e-05, "loss": 0.4106, "num_tokens": 1664645047.0, "step": 2182 }, { "epoch": 2.9790902108048134, "grad_norm": 0.30899821928520693, "learning_rate": 3.0036898522489246e-05, "loss": 0.3918, "num_tokens": 1665403249.0, "step": 2183 }, { "epoch": 2.9804557480583767, "grad_norm": 0.3614297179468087, "learning_rate": 3.002800155491564e-05, "loss": 0.4055, "num_tokens": 1666182797.0, "step": 2184 }, { "epoch": 2.98182128531194, "grad_norm": 0.35962615026632855, "learning_rate": 3.0019102138346427e-05, "loss": 0.4002, "num_tokens": 1666911464.0, "step": 2185 }, { "epoch": 2.983186822565503, "grad_norm": 0.32485153838827674, "learning_rate": 3.001020027549643e-05, "loss": 0.3964, "num_tokens": 1667753519.0, "step": 2186 }, { "epoch": 2.9845523598190664, "grad_norm": 0.39587248369762856, "learning_rate": 3.000129596908122e-05, "loss": 0.4084, "num_tokens": 1668592779.0, "step": 2187 }, { "epoch": 2.9859178970726297, "grad_norm": 0.321805611937802, "learning_rate": 2.9992389221817113e-05, "loss": 0.4013, "num_tokens": 1669353988.0, "step": 2188 }, { "epoch": 2.9872834343261925, "grad_norm": 0.3409887205626239, "learning_rate": 2.9983480036421187e-05, "loss": 0.3882, "num_tokens": 1670090523.0, "step": 2189 }, { "epoch": 2.9886489715797557, "grad_norm": 0.3823004178085291, "learning_rate": 2.997456841561123e-05, "loss": 0.4039, "num_tokens": 1670884394.0, "step": 2190 }, { "epoch": 2.990014508833319, "grad_norm": 0.3433018365892015, "learning_rate": 2.99656543621058e-05, "loss": 0.4165, "num_tokens": 1671655906.0, "step": 2191 }, { "epoch": 2.9913800460868822, "grad_norm": 0.35123602017374433, "learning_rate": 2.995673787862418e-05, "loss": 0.4064, "num_tokens": 1672458813.0, "step": 2192 }, { "epoch": 2.9927455833404455, "grad_norm": 0.3457513692963249, "learning_rate": 2.9947818967886416e-05, "loss": 0.3929, "num_tokens": 1673210726.0, "step": 2193 }, { "epoch": 2.9941111205940087, "grad_norm": 0.32961429226574585, "learning_rate": 2.9938897632613276e-05, "loss": 0.4025, "num_tokens": 1673992648.0, "step": 2194 }, { "epoch": 2.995476657847572, "grad_norm": 0.32465876446400516, "learning_rate": 2.992997387552627e-05, "loss": 0.3876, "num_tokens": 1674753102.0, "step": 2195 }, { "epoch": 2.9968421951011353, "grad_norm": 0.33538850504773854, "learning_rate": 2.9921047699347653e-05, "loss": 0.3918, "num_tokens": 1675506607.0, "step": 2196 }, { "epoch": 2.9982077323546985, "grad_norm": 0.3485645916740732, "learning_rate": 2.9912119106800422e-05, "loss": 0.4034, "num_tokens": 1676226838.0, "step": 2197 }, { "epoch": 2.9995732696082618, "grad_norm": 0.32559023753329613, "learning_rate": 2.9903188100608292e-05, "loss": 0.395, "num_tokens": 1676898951.0, "step": 2198 }, { "epoch": 3.0, "grad_norm": 0.32559023753329613, "learning_rate": 2.9894254683495724e-05, "loss": 0.39, "num_tokens": 1677126408.0, "step": 2199 }, { "epoch": 3.0013655372535633, "grad_norm": 0.7003343817086201, "learning_rate": 2.9885318858187923e-05, "loss": 0.3692, "num_tokens": 1677916942.0, "step": 2200 }, { "epoch": 3.0027310745071265, "grad_norm": 0.4659049979500174, "learning_rate": 2.9876380627410832e-05, "loss": 0.3558, "num_tokens": 1678717166.0, "step": 2201 }, { "epoch": 3.0040966117606898, "grad_norm": 0.3779644471588353, "learning_rate": 2.9867439993891104e-05, "loss": 0.3638, "num_tokens": 1679458334.0, "step": 2202 }, { "epoch": 3.005462149014253, "grad_norm": 0.36743507462412506, "learning_rate": 2.985849696035614e-05, "loss": 0.3688, "num_tokens": 1680236898.0, "step": 2203 }, { "epoch": 3.006827686267816, "grad_norm": 0.4217468471132374, "learning_rate": 2.984955152953408e-05, "loss": 0.3944, "num_tokens": 1681041707.0, "step": 2204 }, { "epoch": 3.008193223521379, "grad_norm": 0.41924083855635697, "learning_rate": 2.9840603704153785e-05, "loss": 0.377, "num_tokens": 1681775995.0, "step": 2205 }, { "epoch": 3.0095587607749423, "grad_norm": 0.4168019497766701, "learning_rate": 2.9831653486944848e-05, "loss": 0.3703, "num_tokens": 1682594543.0, "step": 2206 }, { "epoch": 3.0109242980285056, "grad_norm": 0.3745208288868217, "learning_rate": 2.9822700880637585e-05, "loss": 0.3635, "num_tokens": 1683357198.0, "step": 2207 }, { "epoch": 3.012289835282069, "grad_norm": 0.34107741269613634, "learning_rate": 2.9813745887963065e-05, "loss": 0.3804, "num_tokens": 1684121225.0, "step": 2208 }, { "epoch": 3.013655372535632, "grad_norm": 0.4125779028413801, "learning_rate": 2.9804788511653054e-05, "loss": 0.3881, "num_tokens": 1684923547.0, "step": 2209 }, { "epoch": 3.0150209097891953, "grad_norm": 0.34104161410500544, "learning_rate": 2.9795828754440066e-05, "loss": 0.3527, "num_tokens": 1685699250.0, "step": 2210 }, { "epoch": 3.0163864470427586, "grad_norm": 0.3132307936963849, "learning_rate": 2.9786866619057328e-05, "loss": 0.3661, "num_tokens": 1686434028.0, "step": 2211 }, { "epoch": 3.0177519842963214, "grad_norm": 0.4119783722183827, "learning_rate": 2.977790210823881e-05, "loss": 0.3611, "num_tokens": 1687148227.0, "step": 2212 }, { "epoch": 3.0191175215498847, "grad_norm": 0.3614293582536125, "learning_rate": 2.9768935224719194e-05, "loss": 0.3478, "num_tokens": 1687929275.0, "step": 2213 }, { "epoch": 3.020483058803448, "grad_norm": 0.33845984791067873, "learning_rate": 2.975996597123388e-05, "loss": 0.3797, "num_tokens": 1688651922.0, "step": 2214 }, { "epoch": 3.021848596057011, "grad_norm": 0.4073677433275102, "learning_rate": 2.9750994350519005e-05, "loss": 0.3714, "num_tokens": 1689459178.0, "step": 2215 }, { "epoch": 3.0232141333105744, "grad_norm": 0.36306686241698777, "learning_rate": 2.9742020365311424e-05, "loss": 0.3703, "num_tokens": 1690235706.0, "step": 2216 }, { "epoch": 3.0245796705641377, "grad_norm": 0.36438364204498175, "learning_rate": 2.9733044018348707e-05, "loss": 0.3662, "num_tokens": 1690937730.0, "step": 2217 }, { "epoch": 3.025945207817701, "grad_norm": 0.3940046864798211, "learning_rate": 2.9724065312369146e-05, "loss": 0.3931, "num_tokens": 1691672712.0, "step": 2218 }, { "epoch": 3.027310745071264, "grad_norm": 0.3653653722118115, "learning_rate": 2.971508425011176e-05, "loss": 0.3562, "num_tokens": 1692360559.0, "step": 2219 }, { "epoch": 3.028676282324827, "grad_norm": 0.3772450503680852, "learning_rate": 2.9706100834316282e-05, "loss": 0.3641, "num_tokens": 1693097004.0, "step": 2220 }, { "epoch": 3.0300418195783902, "grad_norm": 0.3839025451315728, "learning_rate": 2.9697115067723166e-05, "loss": 0.3823, "num_tokens": 1693863860.0, "step": 2221 }, { "epoch": 3.0314073568319535, "grad_norm": 0.3538829695106774, "learning_rate": 2.968812695307357e-05, "loss": 0.3734, "num_tokens": 1694616497.0, "step": 2222 }, { "epoch": 3.0327728940855168, "grad_norm": 0.33948438551336313, "learning_rate": 2.9679136493109392e-05, "loss": 0.3633, "num_tokens": 1695335422.0, "step": 2223 }, { "epoch": 3.03413843133908, "grad_norm": 0.3856384121820822, "learning_rate": 2.9670143690573216e-05, "loss": 0.3488, "num_tokens": 1696106399.0, "step": 2224 }, { "epoch": 3.0355039685926433, "grad_norm": 0.3440961017036093, "learning_rate": 2.966114854820837e-05, "loss": 0.3752, "num_tokens": 1696901217.0, "step": 2225 }, { "epoch": 3.0368695058462065, "grad_norm": 0.3274362111444479, "learning_rate": 2.9652151068758882e-05, "loss": 0.3785, "num_tokens": 1697703322.0, "step": 2226 }, { "epoch": 3.0382350430997698, "grad_norm": 0.3987174761975748, "learning_rate": 2.9643151254969487e-05, "loss": 0.3641, "num_tokens": 1698451933.0, "step": 2227 }, { "epoch": 3.0396005803533326, "grad_norm": 0.3501630264743655, "learning_rate": 2.9634149109585642e-05, "loss": 0.3713, "num_tokens": 1699266887.0, "step": 2228 }, { "epoch": 3.040966117606896, "grad_norm": 0.3430006174349219, "learning_rate": 2.9625144635353514e-05, "loss": 0.3676, "num_tokens": 1700005537.0, "step": 2229 }, { "epoch": 3.042331654860459, "grad_norm": 0.42515163399946054, "learning_rate": 2.9616137835019967e-05, "loss": 0.3554, "num_tokens": 1700755292.0, "step": 2230 }, { "epoch": 3.0436971921140223, "grad_norm": 0.33940103592144844, "learning_rate": 2.960712871133259e-05, "loss": 0.3695, "num_tokens": 1701426341.0, "step": 2231 }, { "epoch": 3.0450627293675856, "grad_norm": 0.3949449649859696, "learning_rate": 2.959811726703969e-05, "loss": 0.3676, "num_tokens": 1702180580.0, "step": 2232 }, { "epoch": 3.046428266621149, "grad_norm": 0.3872700715229405, "learning_rate": 2.9589103504890254e-05, "loss": 0.3744, "num_tokens": 1702977186.0, "step": 2233 }, { "epoch": 3.047793803874712, "grad_norm": 0.3467192957456227, "learning_rate": 2.9580087427633995e-05, "loss": 0.3637, "num_tokens": 1703725763.0, "step": 2234 }, { "epoch": 3.0491593411282754, "grad_norm": 0.34098177629002524, "learning_rate": 2.9571069038021324e-05, "loss": 0.3709, "num_tokens": 1704490265.0, "step": 2235 }, { "epoch": 3.050524878381838, "grad_norm": 0.3242115122929884, "learning_rate": 2.9562048338803374e-05, "loss": 0.3687, "num_tokens": 1705216386.0, "step": 2236 }, { "epoch": 3.0518904156354014, "grad_norm": 0.35367968575157965, "learning_rate": 2.955302533273195e-05, "loss": 0.3743, "num_tokens": 1705989014.0, "step": 2237 }, { "epoch": 3.0532559528889647, "grad_norm": 0.3501866290131955, "learning_rate": 2.9544000022559598e-05, "loss": 0.3781, "num_tokens": 1706711821.0, "step": 2238 }, { "epoch": 3.054621490142528, "grad_norm": 0.3714580802435464, "learning_rate": 2.9534972411039534e-05, "loss": 0.3695, "num_tokens": 1707528930.0, "step": 2239 }, { "epoch": 3.055987027396091, "grad_norm": 0.3708377939067916, "learning_rate": 2.952594250092571e-05, "loss": 0.3836, "num_tokens": 1708288649.0, "step": 2240 }, { "epoch": 3.0573525646496544, "grad_norm": 0.3488667511914556, "learning_rate": 2.9516910294972745e-05, "loss": 0.3551, "num_tokens": 1708976971.0, "step": 2241 }, { "epoch": 3.0587181019032177, "grad_norm": 0.3340730304028382, "learning_rate": 2.9507875795935973e-05, "loss": 0.3457, "num_tokens": 1709658194.0, "step": 2242 }, { "epoch": 3.060083639156781, "grad_norm": 0.3505083726960831, "learning_rate": 2.9498839006571437e-05, "loss": 0.3913, "num_tokens": 1710497344.0, "step": 2243 }, { "epoch": 3.0614491764103438, "grad_norm": 0.3404027536617071, "learning_rate": 2.9489799929635875e-05, "loss": 0.3558, "num_tokens": 1711246475.0, "step": 2244 }, { "epoch": 3.062814713663907, "grad_norm": 0.36114610427695704, "learning_rate": 2.9480758567886706e-05, "loss": 0.4125, "num_tokens": 1712035162.0, "step": 2245 }, { "epoch": 3.0641802509174703, "grad_norm": 0.345045733956963, "learning_rate": 2.9471714924082055e-05, "loss": 0.3794, "num_tokens": 1712805969.0, "step": 2246 }, { "epoch": 3.0655457881710335, "grad_norm": 0.383704767933966, "learning_rate": 2.9462669000980763e-05, "loss": 0.3813, "num_tokens": 1713567437.0, "step": 2247 }, { "epoch": 3.0669113254245968, "grad_norm": 0.375221192498475, "learning_rate": 2.9453620801342337e-05, "loss": 0.3888, "num_tokens": 1714432834.0, "step": 2248 }, { "epoch": 3.06827686267816, "grad_norm": 0.35469072337625007, "learning_rate": 2.9444570327926983e-05, "loss": 0.3573, "num_tokens": 1715198984.0, "step": 2249 }, { "epoch": 3.0696423999317233, "grad_norm": 0.3692519981186295, "learning_rate": 2.9435517583495633e-05, "loss": 0.3821, "num_tokens": 1716030529.0, "step": 2250 }, { "epoch": 3.0710079371852865, "grad_norm": 0.31842878397221275, "learning_rate": 2.9426462570809868e-05, "loss": 0.342, "num_tokens": 1716773004.0, "step": 2251 }, { "epoch": 3.0723734744388493, "grad_norm": 0.37575629621820433, "learning_rate": 2.941740529263198e-05, "loss": 0.368, "num_tokens": 1717459740.0, "step": 2252 }, { "epoch": 3.0737390116924126, "grad_norm": 0.3555505581009345, "learning_rate": 2.9408345751724967e-05, "loss": 0.3558, "num_tokens": 1718233093.0, "step": 2253 }, { "epoch": 3.075104548945976, "grad_norm": 0.3488904839087391, "learning_rate": 2.939928395085249e-05, "loss": 0.3891, "num_tokens": 1718984808.0, "step": 2254 }, { "epoch": 3.076470086199539, "grad_norm": 0.39753791150571144, "learning_rate": 2.9390219892778914e-05, "loss": 0.3861, "num_tokens": 1719749428.0, "step": 2255 }, { "epoch": 3.0778356234531024, "grad_norm": 0.3251692984012596, "learning_rate": 2.938115358026929e-05, "loss": 0.3598, "num_tokens": 1720501981.0, "step": 2256 }, { "epoch": 3.0792011607066656, "grad_norm": 0.3455338104479709, "learning_rate": 2.937208501608936e-05, "loss": 0.3723, "num_tokens": 1721285224.0, "step": 2257 }, { "epoch": 3.080566697960229, "grad_norm": 0.333101190718842, "learning_rate": 2.9363014203005552e-05, "loss": 0.3851, "num_tokens": 1722057796.0, "step": 2258 }, { "epoch": 3.081932235213792, "grad_norm": 0.35260441864134545, "learning_rate": 2.9353941143784977e-05, "loss": 0.3884, "num_tokens": 1722793551.0, "step": 2259 }, { "epoch": 3.083297772467355, "grad_norm": 0.36766221412734185, "learning_rate": 2.934486584119543e-05, "loss": 0.3844, "num_tokens": 1723508822.0, "step": 2260 }, { "epoch": 3.084663309720918, "grad_norm": 0.38457006627179124, "learning_rate": 2.933578829800539e-05, "loss": 0.3463, "num_tokens": 1724308513.0, "step": 2261 }, { "epoch": 3.0860288469744814, "grad_norm": 0.35148320726727406, "learning_rate": 2.9326708516984032e-05, "loss": 0.3589, "num_tokens": 1725062416.0, "step": 2262 }, { "epoch": 3.0873943842280447, "grad_norm": 0.3759280012375198, "learning_rate": 2.9317626500901193e-05, "loss": 0.3863, "num_tokens": 1725880813.0, "step": 2263 }, { "epoch": 3.088759921481608, "grad_norm": 0.3561315991677913, "learning_rate": 2.9308542252527407e-05, "loss": 0.3795, "num_tokens": 1726610715.0, "step": 2264 }, { "epoch": 3.090125458735171, "grad_norm": 0.40735613433420226, "learning_rate": 2.9299455774633894e-05, "loss": 0.3707, "num_tokens": 1727451931.0, "step": 2265 }, { "epoch": 3.0914909959887344, "grad_norm": 0.4066865936597913, "learning_rate": 2.9290367069992528e-05, "loss": 0.3749, "num_tokens": 1728339002.0, "step": 2266 }, { "epoch": 3.0928565332422977, "grad_norm": 0.40090366859054977, "learning_rate": 2.9281276141375896e-05, "loss": 0.3694, "num_tokens": 1729082821.0, "step": 2267 }, { "epoch": 3.0942220704958605, "grad_norm": 0.384825370067939, "learning_rate": 2.927218299155723e-05, "loss": 0.3727, "num_tokens": 1729888792.0, "step": 2268 }, { "epoch": 3.0955876077494238, "grad_norm": 0.37484803108333614, "learning_rate": 2.926308762331047e-05, "loss": 0.3891, "num_tokens": 1730641414.0, "step": 2269 }, { "epoch": 3.096953145002987, "grad_norm": 0.3463079946794805, "learning_rate": 2.925399003941021e-05, "loss": 0.3634, "num_tokens": 1731357190.0, "step": 2270 }, { "epoch": 3.0983186822565503, "grad_norm": 0.33765124880494274, "learning_rate": 2.924489024263174e-05, "loss": 0.3715, "num_tokens": 1732135733.0, "step": 2271 }, { "epoch": 3.0996842195101135, "grad_norm": 0.34852911592116975, "learning_rate": 2.9235788235751003e-05, "loss": 0.3679, "num_tokens": 1732925347.0, "step": 2272 }, { "epoch": 3.101049756763677, "grad_norm": 0.35378577893705765, "learning_rate": 2.9226684021544627e-05, "loss": 0.3577, "num_tokens": 1733653877.0, "step": 2273 }, { "epoch": 3.10241529401724, "grad_norm": 0.34302633823916107, "learning_rate": 2.921757760278992e-05, "loss": 0.3644, "num_tokens": 1734409740.0, "step": 2274 }, { "epoch": 3.1037808312708033, "grad_norm": 0.36804076621325865, "learning_rate": 2.9208468982264856e-05, "loss": 0.3814, "num_tokens": 1735177761.0, "step": 2275 }, { "epoch": 3.105146368524366, "grad_norm": 0.3473193275510116, "learning_rate": 2.9199358162748072e-05, "loss": 0.3607, "num_tokens": 1735921648.0, "step": 2276 }, { "epoch": 3.1065119057779293, "grad_norm": 0.35052199543942875, "learning_rate": 2.9190245147018892e-05, "loss": 0.3461, "num_tokens": 1736695076.0, "step": 2277 }, { "epoch": 3.1078774430314926, "grad_norm": 0.3206955373058272, "learning_rate": 2.9181129937857302e-05, "loss": 0.3595, "num_tokens": 1737542493.0, "step": 2278 }, { "epoch": 3.109242980285056, "grad_norm": 0.3502525488800614, "learning_rate": 2.9172012538043955e-05, "loss": 0.3758, "num_tokens": 1738300508.0, "step": 2279 }, { "epoch": 3.110608517538619, "grad_norm": 0.35135753910253376, "learning_rate": 2.9162892950360175e-05, "loss": 0.3589, "num_tokens": 1739104154.0, "step": 2280 }, { "epoch": 3.1119740547921824, "grad_norm": 0.31019404086480107, "learning_rate": 2.915377117758796e-05, "loss": 0.3718, "num_tokens": 1739836600.0, "step": 2281 }, { "epoch": 3.1133395920457456, "grad_norm": 0.3748402897940227, "learning_rate": 2.9144647222509955e-05, "loss": 0.3617, "num_tokens": 1740594005.0, "step": 2282 }, { "epoch": 3.114705129299309, "grad_norm": 0.33809955313251294, "learning_rate": 2.9135521087909494e-05, "loss": 0.3711, "num_tokens": 1741404529.0, "step": 2283 }, { "epoch": 3.1160706665528717, "grad_norm": 0.3416937854453492, "learning_rate": 2.912639277657056e-05, "loss": 0.376, "num_tokens": 1742280480.0, "step": 2284 }, { "epoch": 3.117436203806435, "grad_norm": 0.35529368175844583, "learning_rate": 2.911726229127781e-05, "loss": 0.3448, "num_tokens": 1743055740.0, "step": 2285 }, { "epoch": 3.118801741059998, "grad_norm": 0.32640014898876013, "learning_rate": 2.910812963481656e-05, "loss": 0.3586, "num_tokens": 1743842841.0, "step": 2286 }, { "epoch": 3.1201672783135614, "grad_norm": 0.364073341607513, "learning_rate": 2.9098994809972783e-05, "loss": 0.3633, "num_tokens": 1744607456.0, "step": 2287 }, { "epoch": 3.1215328155671247, "grad_norm": 0.372324617525311, "learning_rate": 2.908985781953312e-05, "loss": 0.3627, "num_tokens": 1745325118.0, "step": 2288 }, { "epoch": 3.122898352820688, "grad_norm": 0.3528938842800424, "learning_rate": 2.908071866628487e-05, "loss": 0.3743, "num_tokens": 1746063993.0, "step": 2289 }, { "epoch": 3.124263890074251, "grad_norm": 0.37188295900961826, "learning_rate": 2.9071577353016002e-05, "loss": 0.3749, "num_tokens": 1746854453.0, "step": 2290 }, { "epoch": 3.1256294273278145, "grad_norm": 0.3372856689524085, "learning_rate": 2.9062433882515125e-05, "loss": 0.3899, "num_tokens": 1747702955.0, "step": 2291 }, { "epoch": 3.1269949645813773, "grad_norm": 0.38654646286368183, "learning_rate": 2.9053288257571514e-05, "loss": 0.3658, "num_tokens": 1748497979.0, "step": 2292 }, { "epoch": 3.1283605018349405, "grad_norm": 0.33070772537539217, "learning_rate": 2.9044140480975107e-05, "loss": 0.3647, "num_tokens": 1749194255.0, "step": 2293 }, { "epoch": 3.1297260390885038, "grad_norm": 0.37331692942133876, "learning_rate": 2.9034990555516497e-05, "loss": 0.3652, "num_tokens": 1749893265.0, "step": 2294 }, { "epoch": 3.131091576342067, "grad_norm": 0.3936331697837498, "learning_rate": 2.9025838483986928e-05, "loss": 0.3813, "num_tokens": 1750646794.0, "step": 2295 }, { "epoch": 3.1324571135956303, "grad_norm": 0.33188257786989306, "learning_rate": 2.90166842691783e-05, "loss": 0.3483, "num_tokens": 1751373646.0, "step": 2296 }, { "epoch": 3.1338226508491935, "grad_norm": 0.33505550559687497, "learning_rate": 2.9007527913883176e-05, "loss": 0.3698, "num_tokens": 1752135850.0, "step": 2297 }, { "epoch": 3.135188188102757, "grad_norm": 0.34172738819618126, "learning_rate": 2.8998369420894746e-05, "loss": 0.3765, "num_tokens": 1752925287.0, "step": 2298 }, { "epoch": 3.13655372535632, "grad_norm": 0.3363018670832331, "learning_rate": 2.8989208793006884e-05, "loss": 0.3717, "num_tokens": 1753659189.0, "step": 2299 }, { "epoch": 3.1379192626098833, "grad_norm": 0.3594134747827139, "learning_rate": 2.8980046033014095e-05, "loss": 0.3913, "num_tokens": 1754553281.0, "step": 2300 }, { "epoch": 3.139284799863446, "grad_norm": 0.3499913692356287, "learning_rate": 2.8970881143711542e-05, "loss": 0.3447, "num_tokens": 1755263921.0, "step": 2301 }, { "epoch": 3.1406503371170094, "grad_norm": 0.32179369622551, "learning_rate": 2.8961714127895032e-05, "loss": 0.3725, "num_tokens": 1755995142.0, "step": 2302 }, { "epoch": 3.1420158743705726, "grad_norm": 0.36885280029800627, "learning_rate": 2.8952544988361033e-05, "loss": 0.3783, "num_tokens": 1756821472.0, "step": 2303 }, { "epoch": 3.143381411624136, "grad_norm": 0.32805774892355083, "learning_rate": 2.894337372790664e-05, "loss": 0.3597, "num_tokens": 1757594663.0, "step": 2304 }, { "epoch": 3.144746948877699, "grad_norm": 0.36923768787363853, "learning_rate": 2.8934200349329623e-05, "loss": 0.3598, "num_tokens": 1758386255.0, "step": 2305 }, { "epoch": 3.1461124861312624, "grad_norm": 0.3288202215249992, "learning_rate": 2.8925024855428366e-05, "loss": 0.3544, "num_tokens": 1759156714.0, "step": 2306 }, { "epoch": 3.1474780233848256, "grad_norm": 0.37339239653184864, "learning_rate": 2.8915847249001934e-05, "loss": 0.3596, "num_tokens": 1759930511.0, "step": 2307 }, { "epoch": 3.1488435606383884, "grad_norm": 0.3285897747845346, "learning_rate": 2.8906667532849998e-05, "loss": 0.3859, "num_tokens": 1760701781.0, "step": 2308 }, { "epoch": 3.1502090978919517, "grad_norm": 0.349083656875489, "learning_rate": 2.8897485709772906e-05, "loss": 0.3733, "num_tokens": 1761461085.0, "step": 2309 }, { "epoch": 3.151574635145515, "grad_norm": 0.3553022243750017, "learning_rate": 2.8888301782571618e-05, "loss": 0.3858, "num_tokens": 1762219826.0, "step": 2310 }, { "epoch": 3.152940172399078, "grad_norm": 0.38931254076785554, "learning_rate": 2.8879115754047768e-05, "loss": 0.3599, "num_tokens": 1762969198.0, "step": 2311 }, { "epoch": 3.1543057096526415, "grad_norm": 0.39413770112190666, "learning_rate": 2.886992762700361e-05, "loss": 0.3833, "num_tokens": 1763798499.0, "step": 2312 }, { "epoch": 3.1556712469062047, "grad_norm": 0.34962766400733697, "learning_rate": 2.886073740424205e-05, "loss": 0.3717, "num_tokens": 1764512846.0, "step": 2313 }, { "epoch": 3.157036784159768, "grad_norm": 0.4014081320350829, "learning_rate": 2.885154508856662e-05, "loss": 0.3742, "num_tokens": 1765280950.0, "step": 2314 }, { "epoch": 3.158402321413331, "grad_norm": 0.34795515293291435, "learning_rate": 2.8842350682781495e-05, "loss": 0.3725, "num_tokens": 1766006894.0, "step": 2315 }, { "epoch": 3.1597678586668945, "grad_norm": 0.44902964811635954, "learning_rate": 2.8833154189691495e-05, "loss": 0.3916, "num_tokens": 1766756459.0, "step": 2316 }, { "epoch": 3.1611333959204573, "grad_norm": 0.38089942606867566, "learning_rate": 2.882395561210207e-05, "loss": 0.3722, "num_tokens": 1767519194.0, "step": 2317 }, { "epoch": 3.1624989331740205, "grad_norm": 0.4027068165919514, "learning_rate": 2.8814754952819307e-05, "loss": 0.3805, "num_tokens": 1768285423.0, "step": 2318 }, { "epoch": 3.163864470427584, "grad_norm": 0.39264183942564257, "learning_rate": 2.880555221464993e-05, "loss": 0.3751, "num_tokens": 1769003459.0, "step": 2319 }, { "epoch": 3.165230007681147, "grad_norm": 0.3830078252654279, "learning_rate": 2.8796347400401293e-05, "loss": 0.3629, "num_tokens": 1769763774.0, "step": 2320 }, { "epoch": 3.1665955449347103, "grad_norm": 0.389736499546156, "learning_rate": 2.8787140512881398e-05, "loss": 0.3836, "num_tokens": 1770502224.0, "step": 2321 }, { "epoch": 3.1679610821882735, "grad_norm": 0.3755797088859478, "learning_rate": 2.8777931554898855e-05, "loss": 0.3659, "num_tokens": 1771285274.0, "step": 2322 }, { "epoch": 3.169326619441837, "grad_norm": 0.36856852780417293, "learning_rate": 2.8768720529262915e-05, "loss": 0.3944, "num_tokens": 1772115136.0, "step": 2323 }, { "epoch": 3.1706921566953996, "grad_norm": 0.3830106103127483, "learning_rate": 2.875950743878348e-05, "loss": 0.3535, "num_tokens": 1772873775.0, "step": 2324 }, { "epoch": 3.172057693948963, "grad_norm": 0.32760533113341134, "learning_rate": 2.8750292286271047e-05, "loss": 0.3698, "num_tokens": 1773591314.0, "step": 2325 }, { "epoch": 3.173423231202526, "grad_norm": 0.3922308166177202, "learning_rate": 2.874107507453677e-05, "loss": 0.3695, "num_tokens": 1774349412.0, "step": 2326 }, { "epoch": 3.1747887684560894, "grad_norm": 0.38375889892703535, "learning_rate": 2.873185580639242e-05, "loss": 0.3773, "num_tokens": 1775162092.0, "step": 2327 }, { "epoch": 3.1761543057096526, "grad_norm": 0.35085827199236774, "learning_rate": 2.8722634484650397e-05, "loss": 0.3745, "num_tokens": 1775965500.0, "step": 2328 }, { "epoch": 3.177519842963216, "grad_norm": 0.3820140068532594, "learning_rate": 2.8713411112123722e-05, "loss": 0.3817, "num_tokens": 1776761987.0, "step": 2329 }, { "epoch": 3.178885380216779, "grad_norm": 0.3850236129671546, "learning_rate": 2.870418569162605e-05, "loss": 0.3832, "num_tokens": 1777572641.0, "step": 2330 }, { "epoch": 3.1802509174703424, "grad_norm": 0.38284314482310194, "learning_rate": 2.8694958225971664e-05, "loss": 0.3704, "num_tokens": 1778306091.0, "step": 2331 }, { "epoch": 3.1816164547239056, "grad_norm": 0.39412216562082186, "learning_rate": 2.868572871797545e-05, "loss": 0.3797, "num_tokens": 1779088127.0, "step": 2332 }, { "epoch": 3.1829819919774685, "grad_norm": 0.37356627747080734, "learning_rate": 2.8676497170452937e-05, "loss": 0.3821, "num_tokens": 1779844476.0, "step": 2333 }, { "epoch": 3.1843475292310317, "grad_norm": 0.337525557418741, "learning_rate": 2.866726358622028e-05, "loss": 0.3645, "num_tokens": 1780636469.0, "step": 2334 }, { "epoch": 3.185713066484595, "grad_norm": 0.3584951879207729, "learning_rate": 2.8658027968094226e-05, "loss": 0.3709, "num_tokens": 1781442926.0, "step": 2335 }, { "epoch": 3.187078603738158, "grad_norm": 0.3484432453469202, "learning_rate": 2.8648790318892182e-05, "loss": 0.37, "num_tokens": 1782217502.0, "step": 2336 }, { "epoch": 3.1884441409917215, "grad_norm": 0.34169379827128493, "learning_rate": 2.8639550641432145e-05, "loss": 0.3709, "num_tokens": 1782994920.0, "step": 2337 }, { "epoch": 3.1898096782452847, "grad_norm": 0.38487626032647004, "learning_rate": 2.863030893853274e-05, "loss": 0.3827, "num_tokens": 1783820902.0, "step": 2338 }, { "epoch": 3.191175215498848, "grad_norm": 0.3338262984186947, "learning_rate": 2.862106521301321e-05, "loss": 0.3632, "num_tokens": 1784466390.0, "step": 2339 }, { "epoch": 3.1925407527524112, "grad_norm": 0.3908757921995659, "learning_rate": 2.8611819467693424e-05, "loss": 0.3712, "num_tokens": 1785193712.0, "step": 2340 }, { "epoch": 3.193906290005974, "grad_norm": 0.3309734986581914, "learning_rate": 2.860257170539385e-05, "loss": 0.3887, "num_tokens": 1785983454.0, "step": 2341 }, { "epoch": 3.1952718272595373, "grad_norm": 0.3720955395020591, "learning_rate": 2.859332192893558e-05, "loss": 0.3977, "num_tokens": 1786769124.0, "step": 2342 }, { "epoch": 3.1966373645131005, "grad_norm": 0.3590098793037313, "learning_rate": 2.858407014114032e-05, "loss": 0.3788, "num_tokens": 1787578782.0, "step": 2343 }, { "epoch": 3.198002901766664, "grad_norm": 0.3290831176457925, "learning_rate": 2.8574816344830393e-05, "loss": 0.3598, "num_tokens": 1788348886.0, "step": 2344 }, { "epoch": 3.199368439020227, "grad_norm": 0.3611072303257949, "learning_rate": 2.856556054282873e-05, "loss": 0.3721, "num_tokens": 1789173291.0, "step": 2345 }, { "epoch": 3.2007339762737903, "grad_norm": 0.34297562218944283, "learning_rate": 2.855630273795888e-05, "loss": 0.3753, "num_tokens": 1789902962.0, "step": 2346 }, { "epoch": 3.2020995135273536, "grad_norm": 0.3619613831404743, "learning_rate": 2.854704293304499e-05, "loss": 0.3609, "num_tokens": 1790634271.0, "step": 2347 }, { "epoch": 3.203465050780917, "grad_norm": 0.35486487332766375, "learning_rate": 2.853778113091183e-05, "loss": 0.3648, "num_tokens": 1791383433.0, "step": 2348 }, { "epoch": 3.2048305880344796, "grad_norm": 0.3618323091646361, "learning_rate": 2.8528517334384775e-05, "loss": 0.3802, "num_tokens": 1792093988.0, "step": 2349 }, { "epoch": 3.206196125288043, "grad_norm": 0.3927878601192379, "learning_rate": 2.8519251546289806e-05, "loss": 0.3578, "num_tokens": 1792863344.0, "step": 2350 }, { "epoch": 3.207561662541606, "grad_norm": 0.35354858103145353, "learning_rate": 2.8509983769453515e-05, "loss": 0.3875, "num_tokens": 1793618958.0, "step": 2351 }, { "epoch": 3.2089271997951694, "grad_norm": 0.3682923927872569, "learning_rate": 2.85007140067031e-05, "loss": 0.3593, "num_tokens": 1794413563.0, "step": 2352 }, { "epoch": 3.2102927370487326, "grad_norm": 0.35905155747505346, "learning_rate": 2.8491442260866372e-05, "loss": 0.393, "num_tokens": 1795183676.0, "step": 2353 }, { "epoch": 3.211658274302296, "grad_norm": 0.3509714474263038, "learning_rate": 2.8482168534771716e-05, "loss": 0.3497, "num_tokens": 1795919763.0, "step": 2354 }, { "epoch": 3.213023811555859, "grad_norm": 0.3494374060046293, "learning_rate": 2.847289283124817e-05, "loss": 0.344, "num_tokens": 1796757219.0, "step": 2355 }, { "epoch": 3.2143893488094224, "grad_norm": 0.34065146747502334, "learning_rate": 2.8463615153125333e-05, "loss": 0.3645, "num_tokens": 1797511233.0, "step": 2356 }, { "epoch": 3.215754886062985, "grad_norm": 0.3576875573804394, "learning_rate": 2.8454335503233426e-05, "loss": 0.357, "num_tokens": 1798315183.0, "step": 2357 }, { "epoch": 3.2171204233165485, "grad_norm": 0.38195868000321725, "learning_rate": 2.8445053884403276e-05, "loss": 0.3569, "num_tokens": 1799033399.0, "step": 2358 }, { "epoch": 3.2184859605701117, "grad_norm": 0.372942331291824, "learning_rate": 2.8435770299466287e-05, "loss": 0.3893, "num_tokens": 1799850743.0, "step": 2359 }, { "epoch": 3.219851497823675, "grad_norm": 0.39812993366088373, "learning_rate": 2.84264847512545e-05, "loss": 0.3749, "num_tokens": 1800662498.0, "step": 2360 }, { "epoch": 3.2212170350772382, "grad_norm": 0.3488747816750978, "learning_rate": 2.841719724260052e-05, "loss": 0.3725, "num_tokens": 1801470029.0, "step": 2361 }, { "epoch": 3.2225825723308015, "grad_norm": 0.37032281059620015, "learning_rate": 2.840790777633757e-05, "loss": 0.3575, "num_tokens": 1802182497.0, "step": 2362 }, { "epoch": 3.2239481095843647, "grad_norm": 0.36345476397317383, "learning_rate": 2.8398616355299452e-05, "loss": 0.3738, "num_tokens": 1802946743.0, "step": 2363 }, { "epoch": 3.225313646837928, "grad_norm": 0.3460109313647308, "learning_rate": 2.8389322982320592e-05, "loss": 0.3647, "num_tokens": 1803698167.0, "step": 2364 }, { "epoch": 3.226679184091491, "grad_norm": 0.3652820300005888, "learning_rate": 2.8380027660235982e-05, "loss": 0.3724, "num_tokens": 1804442908.0, "step": 2365 }, { "epoch": 3.228044721345054, "grad_norm": 0.36386594603026134, "learning_rate": 2.837073039188123e-05, "loss": 0.3621, "num_tokens": 1805209741.0, "step": 2366 }, { "epoch": 3.2294102585986173, "grad_norm": 0.368568920001338, "learning_rate": 2.8361431180092534e-05, "loss": 0.3503, "num_tokens": 1805997003.0, "step": 2367 }, { "epoch": 3.2307757958521806, "grad_norm": 0.35745516418728385, "learning_rate": 2.8352130027706666e-05, "loss": 0.3461, "num_tokens": 1806710104.0, "step": 2368 }, { "epoch": 3.232141333105744, "grad_norm": 0.37026975737159895, "learning_rate": 2.834282693756102e-05, "loss": 0.3681, "num_tokens": 1807514842.0, "step": 2369 }, { "epoch": 3.233506870359307, "grad_norm": 0.37354611695856194, "learning_rate": 2.8333521912493557e-05, "loss": 0.3701, "num_tokens": 1808263080.0, "step": 2370 }, { "epoch": 3.2348724076128703, "grad_norm": 0.36243850665675476, "learning_rate": 2.8324214955342844e-05, "loss": 0.3709, "num_tokens": 1808983353.0, "step": 2371 }, { "epoch": 3.2362379448664336, "grad_norm": 0.3560566176107003, "learning_rate": 2.8314906068948027e-05, "loss": 0.37, "num_tokens": 1809756018.0, "step": 2372 }, { "epoch": 3.2376034821199964, "grad_norm": 0.3679970116178821, "learning_rate": 2.830559525614884e-05, "loss": 0.3727, "num_tokens": 1810533372.0, "step": 2373 }, { "epoch": 3.2389690193735596, "grad_norm": 0.36311349268035326, "learning_rate": 2.8296282519785613e-05, "loss": 0.3554, "num_tokens": 1811294632.0, "step": 2374 }, { "epoch": 3.240334556627123, "grad_norm": 0.36482287834168736, "learning_rate": 2.8286967862699257e-05, "loss": 0.3742, "num_tokens": 1812064769.0, "step": 2375 }, { "epoch": 3.241700093880686, "grad_norm": 0.3450244676345877, "learning_rate": 2.8277651287731273e-05, "loss": 0.3609, "num_tokens": 1812762179.0, "step": 2376 }, { "epoch": 3.2430656311342494, "grad_norm": 0.3995667117492943, "learning_rate": 2.826833279772375e-05, "loss": 0.3675, "num_tokens": 1813498263.0, "step": 2377 }, { "epoch": 3.2444311683878126, "grad_norm": 0.3596746185177499, "learning_rate": 2.8259012395519336e-05, "loss": 0.3754, "num_tokens": 1814303267.0, "step": 2378 }, { "epoch": 3.245796705641376, "grad_norm": 0.36180010886851105, "learning_rate": 2.82496900839613e-05, "loss": 0.3639, "num_tokens": 1815100521.0, "step": 2379 }, { "epoch": 3.247162242894939, "grad_norm": 0.39892653506491316, "learning_rate": 2.824036586589347e-05, "loss": 0.3802, "num_tokens": 1815859424.0, "step": 2380 }, { "epoch": 3.2485277801485024, "grad_norm": 0.3476700356456365, "learning_rate": 2.8231039744160265e-05, "loss": 0.3812, "num_tokens": 1816711193.0, "step": 2381 }, { "epoch": 3.249893317402065, "grad_norm": 0.3448162727035289, "learning_rate": 2.822171172160667e-05, "loss": 0.3696, "num_tokens": 1817520515.0, "step": 2382 }, { "epoch": 3.2512588546556285, "grad_norm": 0.341449889227421, "learning_rate": 2.8212381801078282e-05, "loss": 0.3671, "num_tokens": 1818251730.0, "step": 2383 }, { "epoch": 3.2526243919091917, "grad_norm": 0.3541331787995689, "learning_rate": 2.8203049985421232e-05, "loss": 0.3598, "num_tokens": 1819001705.0, "step": 2384 }, { "epoch": 3.253989929162755, "grad_norm": 0.3585746533270743, "learning_rate": 2.819371627748226e-05, "loss": 0.3825, "num_tokens": 1819754800.0, "step": 2385 }, { "epoch": 3.2553554664163182, "grad_norm": 0.4256721110877668, "learning_rate": 2.818438068010869e-05, "loss": 0.3708, "num_tokens": 1820516845.0, "step": 2386 }, { "epoch": 3.2567210036698815, "grad_norm": 0.362043403056554, "learning_rate": 2.817504319614839e-05, "loss": 0.3865, "num_tokens": 1821280716.0, "step": 2387 }, { "epoch": 3.2580865409234447, "grad_norm": 0.3891460219273839, "learning_rate": 2.8165703828449826e-05, "loss": 0.382, "num_tokens": 1822024802.0, "step": 2388 }, { "epoch": 3.2594520781770076, "grad_norm": 0.3830832291911251, "learning_rate": 2.8156362579862042e-05, "loss": 0.3899, "num_tokens": 1822815644.0, "step": 2389 }, { "epoch": 3.260817615430571, "grad_norm": 0.4012890769210563, "learning_rate": 2.814701945323464e-05, "loss": 0.3535, "num_tokens": 1823567588.0, "step": 2390 }, { "epoch": 3.262183152684134, "grad_norm": 0.3758800216135332, "learning_rate": 2.813767445141781e-05, "loss": 0.3696, "num_tokens": 1824278408.0, "step": 2391 }, { "epoch": 3.2635486899376973, "grad_norm": 0.36817280559661136, "learning_rate": 2.8128327577262305e-05, "loss": 0.375, "num_tokens": 1825024934.0, "step": 2392 }, { "epoch": 3.2649142271912606, "grad_norm": 0.3184743788801983, "learning_rate": 2.8118978833619458e-05, "loss": 0.3917, "num_tokens": 1825789539.0, "step": 2393 }, { "epoch": 3.266279764444824, "grad_norm": 0.3596616720016449, "learning_rate": 2.810962822334115e-05, "loss": 0.3672, "num_tokens": 1826550378.0, "step": 2394 }, { "epoch": 3.267645301698387, "grad_norm": 0.33378974095814984, "learning_rate": 2.810027574927986e-05, "loss": 0.3446, "num_tokens": 1827299610.0, "step": 2395 }, { "epoch": 3.2690108389519503, "grad_norm": 0.3100257688382129, "learning_rate": 2.8090921414288622e-05, "loss": 0.3625, "num_tokens": 1828107717.0, "step": 2396 }, { "epoch": 3.2703763762055136, "grad_norm": 0.32448872907347126, "learning_rate": 2.808156522122103e-05, "loss": 0.3747, "num_tokens": 1828902636.0, "step": 2397 }, { "epoch": 3.2717419134590764, "grad_norm": 0.3339808905915838, "learning_rate": 2.8072207172931264e-05, "loss": 0.3784, "num_tokens": 1829627314.0, "step": 2398 }, { "epoch": 3.2731074507126396, "grad_norm": 0.3588874816712281, "learning_rate": 2.8062847272274055e-05, "loss": 0.3702, "num_tokens": 1830274481.0, "step": 2399 }, { "epoch": 3.274472987966203, "grad_norm": 0.4245357523197985, "learning_rate": 2.8053485522104706e-05, "loss": 0.3789, "num_tokens": 1831045697.0, "step": 2400 }, { "epoch": 3.275838525219766, "grad_norm": 0.39631766320362766, "learning_rate": 2.8044121925279068e-05, "loss": 0.3543, "num_tokens": 1831762032.0, "step": 2401 }, { "epoch": 3.2772040624733294, "grad_norm": 0.3554989250050189, "learning_rate": 2.8034756484653588e-05, "loss": 0.3614, "num_tokens": 1832574196.0, "step": 2402 }, { "epoch": 3.2785695997268927, "grad_norm": 0.32326311884325476, "learning_rate": 2.802538920308524e-05, "loss": 0.3768, "num_tokens": 1833308461.0, "step": 2403 }, { "epoch": 3.279935136980456, "grad_norm": 0.34515254263975015, "learning_rate": 2.8016020083431592e-05, "loss": 0.3569, "num_tokens": 1834044395.0, "step": 2404 }, { "epoch": 3.2813006742340187, "grad_norm": 0.3633302020430495, "learning_rate": 2.8006649128550743e-05, "loss": 0.3691, "num_tokens": 1834811463.0, "step": 2405 }, { "epoch": 3.282666211487582, "grad_norm": 0.3447755195453164, "learning_rate": 2.799727634130137e-05, "loss": 0.3839, "num_tokens": 1835535399.0, "step": 2406 }, { "epoch": 3.2840317487411452, "grad_norm": 0.35594366658905463, "learning_rate": 2.798790172454271e-05, "loss": 0.3908, "num_tokens": 1836314807.0, "step": 2407 }, { "epoch": 3.2853972859947085, "grad_norm": 0.3707651424456984, "learning_rate": 2.7978525281134547e-05, "loss": 0.3959, "num_tokens": 1837137603.0, "step": 2408 }, { "epoch": 3.2867628232482717, "grad_norm": 0.36378496434387114, "learning_rate": 2.7969147013937227e-05, "loss": 0.3666, "num_tokens": 1837951755.0, "step": 2409 }, { "epoch": 3.288128360501835, "grad_norm": 0.35647427722236313, "learning_rate": 2.795976692581165e-05, "loss": 0.3574, "num_tokens": 1838738364.0, "step": 2410 }, { "epoch": 3.2894938977553982, "grad_norm": 0.3612249222070131, "learning_rate": 2.7950385019619287e-05, "loss": 0.3784, "num_tokens": 1839543067.0, "step": 2411 }, { "epoch": 3.2908594350089615, "grad_norm": 0.3884714101702294, "learning_rate": 2.794100129822214e-05, "loss": 0.3757, "num_tokens": 1840324419.0, "step": 2412 }, { "epoch": 3.2922249722625248, "grad_norm": 0.34203281505074257, "learning_rate": 2.793161576448278e-05, "loss": 0.3668, "num_tokens": 1841045468.0, "step": 2413 }, { "epoch": 3.2935905095160876, "grad_norm": 0.3187113605859681, "learning_rate": 2.7922228421264327e-05, "loss": 0.3704, "num_tokens": 1841771222.0, "step": 2414 }, { "epoch": 3.294956046769651, "grad_norm": 0.3481643154550305, "learning_rate": 2.791283927143045e-05, "loss": 0.3642, "num_tokens": 1842507358.0, "step": 2415 }, { "epoch": 3.296321584023214, "grad_norm": 0.34365144652883867, "learning_rate": 2.7903448317845382e-05, "loss": 0.3698, "num_tokens": 1843281092.0, "step": 2416 }, { "epoch": 3.2976871212767773, "grad_norm": 0.31116571953086336, "learning_rate": 2.7894055563373883e-05, "loss": 0.3714, "num_tokens": 1844043217.0, "step": 2417 }, { "epoch": 3.2990526585303406, "grad_norm": 0.3445158299020969, "learning_rate": 2.7884661010881284e-05, "loss": 0.3812, "num_tokens": 1844747286.0, "step": 2418 }, { "epoch": 3.300418195783904, "grad_norm": 0.34344489377685766, "learning_rate": 2.787526466323345e-05, "loss": 0.3789, "num_tokens": 1845555121.0, "step": 2419 }, { "epoch": 3.301783733037467, "grad_norm": 0.3278537832133581, "learning_rate": 2.7865866523296806e-05, "loss": 0.3792, "num_tokens": 1846294143.0, "step": 2420 }, { "epoch": 3.30314927029103, "grad_norm": 0.32264909335386616, "learning_rate": 2.785646659393831e-05, "loss": 0.3589, "num_tokens": 1847059883.0, "step": 2421 }, { "epoch": 3.304514807544593, "grad_norm": 0.32180018787089193, "learning_rate": 2.7847064878025484e-05, "loss": 0.3554, "num_tokens": 1847836713.0, "step": 2422 }, { "epoch": 3.3058803447981564, "grad_norm": 0.33871031379591454, "learning_rate": 2.783766137842638e-05, "loss": 0.3808, "num_tokens": 1848613558.0, "step": 2423 }, { "epoch": 3.3072458820517197, "grad_norm": 0.3412918874459012, "learning_rate": 2.7828256098009595e-05, "loss": 0.3877, "num_tokens": 1849411909.0, "step": 2424 }, { "epoch": 3.308611419305283, "grad_norm": 0.39038719156202945, "learning_rate": 2.7818849039644272e-05, "loss": 0.3606, "num_tokens": 1850210919.0, "step": 2425 }, { "epoch": 3.309976956558846, "grad_norm": 0.3409252545377657, "learning_rate": 2.7809440206200103e-05, "loss": 0.3513, "num_tokens": 1850908224.0, "step": 2426 }, { "epoch": 3.3113424938124094, "grad_norm": 0.3747083280661681, "learning_rate": 2.7800029600547312e-05, "loss": 0.3491, "num_tokens": 1851682845.0, "step": 2427 }, { "epoch": 3.3127080310659727, "grad_norm": 0.36282891554067753, "learning_rate": 2.7790617225556665e-05, "loss": 0.3831, "num_tokens": 1852463298.0, "step": 2428 }, { "epoch": 3.314073568319536, "grad_norm": 0.38781380314121294, "learning_rate": 2.7781203084099475e-05, "loss": 0.3682, "num_tokens": 1853227973.0, "step": 2429 }, { "epoch": 3.3154391055730987, "grad_norm": 0.31924840230524965, "learning_rate": 2.7771787179047588e-05, "loss": 0.3693, "num_tokens": 1853999112.0, "step": 2430 }, { "epoch": 3.316804642826662, "grad_norm": 0.40186288975189904, "learning_rate": 2.7762369513273383e-05, "loss": 0.3393, "num_tokens": 1854747751.0, "step": 2431 }, { "epoch": 3.3181701800802252, "grad_norm": 0.335776725870635, "learning_rate": 2.7752950089649794e-05, "loss": 0.379, "num_tokens": 1855490157.0, "step": 2432 }, { "epoch": 3.3195357173337885, "grad_norm": 0.39390246071654733, "learning_rate": 2.7743528911050267e-05, "loss": 0.3634, "num_tokens": 1856274547.0, "step": 2433 }, { "epoch": 3.3209012545873517, "grad_norm": 0.37888416110149714, "learning_rate": 2.7734105980348797e-05, "loss": 0.3691, "num_tokens": 1856990190.0, "step": 2434 }, { "epoch": 3.322266791840915, "grad_norm": 0.38853317197730586, "learning_rate": 2.7724681300419924e-05, "loss": 0.363, "num_tokens": 1857781952.0, "step": 2435 }, { "epoch": 3.3236323290944783, "grad_norm": 0.3543454500016159, "learning_rate": 2.7715254874138696e-05, "loss": 0.3992, "num_tokens": 1858632728.0, "step": 2436 }, { "epoch": 3.324997866348041, "grad_norm": 0.37552586321259196, "learning_rate": 2.7705826704380713e-05, "loss": 0.3725, "num_tokens": 1859407217.0, "step": 2437 }, { "epoch": 3.3263634036016043, "grad_norm": 0.36542390918116413, "learning_rate": 2.76963967940221e-05, "loss": 0.3697, "num_tokens": 1860132185.0, "step": 2438 }, { "epoch": 3.3277289408551676, "grad_norm": 0.36808906457601753, "learning_rate": 2.768696514593952e-05, "loss": 0.3858, "num_tokens": 1860871687.0, "step": 2439 }, { "epoch": 3.329094478108731, "grad_norm": 0.38838960935404454, "learning_rate": 2.7677531763010147e-05, "loss": 0.3845, "num_tokens": 1861721303.0, "step": 2440 }, { "epoch": 3.330460015362294, "grad_norm": 0.35676912466550065, "learning_rate": 2.7668096648111714e-05, "loss": 0.3824, "num_tokens": 1862521097.0, "step": 2441 }, { "epoch": 3.3318255526158573, "grad_norm": 0.35544813389112395, "learning_rate": 2.7658659804122452e-05, "loss": 0.3673, "num_tokens": 1863338232.0, "step": 2442 }, { "epoch": 3.3331910898694206, "grad_norm": 0.3869469224899018, "learning_rate": 2.7649221233921146e-05, "loss": 0.3541, "num_tokens": 1864055409.0, "step": 2443 }, { "epoch": 3.334556627122984, "grad_norm": 0.315955949463619, "learning_rate": 2.7639780940387086e-05, "loss": 0.3625, "num_tokens": 1864806238.0, "step": 2444 }, { "epoch": 3.335922164376547, "grad_norm": 0.41022023193364604, "learning_rate": 2.7630338926400093e-05, "loss": 0.3679, "num_tokens": 1865484777.0, "step": 2445 }, { "epoch": 3.33728770163011, "grad_norm": 0.35587118562066694, "learning_rate": 2.7620895194840524e-05, "loss": 0.3647, "num_tokens": 1866297124.0, "step": 2446 }, { "epoch": 3.338653238883673, "grad_norm": 0.3804624611029951, "learning_rate": 2.7611449748589255e-05, "loss": 0.3651, "num_tokens": 1867014965.0, "step": 2447 }, { "epoch": 3.3400187761372364, "grad_norm": 0.36368041760566666, "learning_rate": 2.7602002590527684e-05, "loss": 0.375, "num_tokens": 1867800592.0, "step": 2448 }, { "epoch": 3.3413843133907997, "grad_norm": 0.38082227185509465, "learning_rate": 2.7592553723537718e-05, "loss": 0.368, "num_tokens": 1868551776.0, "step": 2449 }, { "epoch": 3.342749850644363, "grad_norm": 0.35674459966252353, "learning_rate": 2.7583103150501802e-05, "loss": 0.3714, "num_tokens": 1869333305.0, "step": 2450 }, { "epoch": 3.344115387897926, "grad_norm": 0.34537221932622697, "learning_rate": 2.7573650874302905e-05, "loss": 0.3719, "num_tokens": 1870111426.0, "step": 2451 }, { "epoch": 3.3454809251514894, "grad_norm": 0.37943407574331356, "learning_rate": 2.7564196897824494e-05, "loss": 0.3796, "num_tokens": 1870907351.0, "step": 2452 }, { "epoch": 3.3468464624050527, "grad_norm": 0.41583766840725683, "learning_rate": 2.7554741223950583e-05, "loss": 0.3688, "num_tokens": 1871697360.0, "step": 2453 }, { "epoch": 3.3482119996586155, "grad_norm": 0.4005393037051117, "learning_rate": 2.754528385556567e-05, "loss": 0.3681, "num_tokens": 1872447283.0, "step": 2454 }, { "epoch": 3.3495775369121787, "grad_norm": 0.39326284901532516, "learning_rate": 2.7535824795554805e-05, "loss": 0.3682, "num_tokens": 1873155344.0, "step": 2455 }, { "epoch": 3.350943074165742, "grad_norm": 0.4061845509907826, "learning_rate": 2.752636404680353e-05, "loss": 0.3761, "num_tokens": 1873997082.0, "step": 2456 }, { "epoch": 3.3523086114193053, "grad_norm": 0.36931610750146343, "learning_rate": 2.751690161219791e-05, "loss": 0.3822, "num_tokens": 1874759460.0, "step": 2457 }, { "epoch": 3.3536741486728685, "grad_norm": 0.3679107089173877, "learning_rate": 2.7507437494624522e-05, "loss": 0.3877, "num_tokens": 1875534961.0, "step": 2458 }, { "epoch": 3.3550396859264318, "grad_norm": 0.3881688506385901, "learning_rate": 2.7497971696970463e-05, "loss": 0.3785, "num_tokens": 1876285425.0, "step": 2459 }, { "epoch": 3.356405223179995, "grad_norm": 0.34412075548934895, "learning_rate": 2.7488504222123342e-05, "loss": 0.3631, "num_tokens": 1876996711.0, "step": 2460 }, { "epoch": 3.3577707604335583, "grad_norm": 0.34268685008265837, "learning_rate": 2.747903507297126e-05, "loss": 0.3727, "num_tokens": 1877840940.0, "step": 2461 }, { "epoch": 3.3591362976871215, "grad_norm": 0.36482222283933763, "learning_rate": 2.7469564252402862e-05, "loss": 0.3742, "num_tokens": 1878639306.0, "step": 2462 }, { "epoch": 3.3605018349406843, "grad_norm": 0.30613909234310266, "learning_rate": 2.7460091763307275e-05, "loss": 0.3715, "num_tokens": 1879304301.0, "step": 2463 }, { "epoch": 3.3618673721942476, "grad_norm": 0.33165712618167253, "learning_rate": 2.745061760857415e-05, "loss": 0.3786, "num_tokens": 1880167411.0, "step": 2464 }, { "epoch": 3.363232909447811, "grad_norm": 0.3498579559322409, "learning_rate": 2.7441141791093627e-05, "loss": 0.366, "num_tokens": 1880951109.0, "step": 2465 }, { "epoch": 3.364598446701374, "grad_norm": 0.3228736896212171, "learning_rate": 2.7431664313756394e-05, "loss": 0.372, "num_tokens": 1881693312.0, "step": 2466 }, { "epoch": 3.3659639839549373, "grad_norm": 0.3612786294914911, "learning_rate": 2.74221851794536e-05, "loss": 0.3636, "num_tokens": 1882440139.0, "step": 2467 }, { "epoch": 3.3673295212085006, "grad_norm": 0.3339534236692447, "learning_rate": 2.7412704391076914e-05, "loss": 0.3774, "num_tokens": 1883271173.0, "step": 2468 }, { "epoch": 3.368695058462064, "grad_norm": 0.33717472761367095, "learning_rate": 2.7403221951518533e-05, "loss": 0.3645, "num_tokens": 1884033818.0, "step": 2469 }, { "epoch": 3.3700605957156267, "grad_norm": 0.33046171523806445, "learning_rate": 2.7393737863671127e-05, "loss": 0.3752, "num_tokens": 1884807066.0, "step": 2470 }, { "epoch": 3.37142613296919, "grad_norm": 0.348764113371738, "learning_rate": 2.7384252130427878e-05, "loss": 0.3622, "num_tokens": 1885592805.0, "step": 2471 }, { "epoch": 3.372791670222753, "grad_norm": 0.3168163085919159, "learning_rate": 2.7374764754682482e-05, "loss": 0.3902, "num_tokens": 1886496945.0, "step": 2472 }, { "epoch": 3.3741572074763164, "grad_norm": 0.32747643057656844, "learning_rate": 2.7365275739329112e-05, "loss": 0.3734, "num_tokens": 1887261885.0, "step": 2473 }, { "epoch": 3.3755227447298797, "grad_norm": 0.33804071607744596, "learning_rate": 2.7355785087262468e-05, "loss": 0.3618, "num_tokens": 1887965663.0, "step": 2474 }, { "epoch": 3.376888281983443, "grad_norm": 0.3418980632688172, "learning_rate": 2.7346292801377737e-05, "loss": 0.3704, "num_tokens": 1888744626.0, "step": 2475 }, { "epoch": 3.378253819237006, "grad_norm": 0.34942428687715493, "learning_rate": 2.7336798884570598e-05, "loss": 0.3669, "num_tokens": 1889487824.0, "step": 2476 }, { "epoch": 3.3796193564905694, "grad_norm": 0.3659434765498868, "learning_rate": 2.7327303339737227e-05, "loss": 0.3649, "num_tokens": 1890221171.0, "step": 2477 }, { "epoch": 3.3809848937441327, "grad_norm": 0.31024362136411615, "learning_rate": 2.7317806169774325e-05, "loss": 0.3894, "num_tokens": 1890986458.0, "step": 2478 }, { "epoch": 3.3823504309976955, "grad_norm": 0.37105897396184384, "learning_rate": 2.7308307377579058e-05, "loss": 0.3622, "num_tokens": 1891726386.0, "step": 2479 }, { "epoch": 3.3837159682512588, "grad_norm": 0.28678430310145564, "learning_rate": 2.7298806966049083e-05, "loss": 0.3801, "num_tokens": 1892403062.0, "step": 2480 }, { "epoch": 3.385081505504822, "grad_norm": 0.3503658440318897, "learning_rate": 2.7289304938082583e-05, "loss": 0.3868, "num_tokens": 1893140884.0, "step": 2481 }, { "epoch": 3.3864470427583853, "grad_norm": 0.33714523183075507, "learning_rate": 2.72798012965782e-05, "loss": 0.3727, "num_tokens": 1893883172.0, "step": 2482 }, { "epoch": 3.3878125800119485, "grad_norm": 0.34737343823320044, "learning_rate": 2.727029604443509e-05, "loss": 0.3679, "num_tokens": 1894656447.0, "step": 2483 }, { "epoch": 3.3891781172655118, "grad_norm": 0.3374297417063293, "learning_rate": 2.7260789184552902e-05, "loss": 0.3649, "num_tokens": 1895475016.0, "step": 2484 }, { "epoch": 3.390543654519075, "grad_norm": 0.3442742156486751, "learning_rate": 2.7251280719831746e-05, "loss": 0.3748, "num_tokens": 1896285167.0, "step": 2485 }, { "epoch": 3.391909191772638, "grad_norm": 0.313199939763217, "learning_rate": 2.724177065317227e-05, "loss": 0.3585, "num_tokens": 1897060389.0, "step": 2486 }, { "epoch": 3.393274729026201, "grad_norm": 0.348404522579981, "learning_rate": 2.723225898747556e-05, "loss": 0.3918, "num_tokens": 1897907424.0, "step": 2487 }, { "epoch": 3.3946402662797643, "grad_norm": 0.30832779935421195, "learning_rate": 2.722274572564323e-05, "loss": 0.3736, "num_tokens": 1898709519.0, "step": 2488 }, { "epoch": 3.3960058035333276, "grad_norm": 0.34724804491287564, "learning_rate": 2.721323087057735e-05, "loss": 0.3828, "num_tokens": 1899368170.0, "step": 2489 }, { "epoch": 3.397371340786891, "grad_norm": 0.33370322301688987, "learning_rate": 2.7203714425180497e-05, "loss": 0.3666, "num_tokens": 1900133953.0, "step": 2490 }, { "epoch": 3.398736878040454, "grad_norm": 0.3616609161324752, "learning_rate": 2.719419639235573e-05, "loss": 0.3639, "num_tokens": 1900916871.0, "step": 2491 }, { "epoch": 3.4001024152940174, "grad_norm": 0.3332252075787499, "learning_rate": 2.718467677500659e-05, "loss": 0.3862, "num_tokens": 1901743286.0, "step": 2492 }, { "epoch": 3.4014679525475806, "grad_norm": 0.32362999513837104, "learning_rate": 2.717515557603709e-05, "loss": 0.3787, "num_tokens": 1902545155.0, "step": 2493 }, { "epoch": 3.402833489801144, "grad_norm": 0.35919479751951994, "learning_rate": 2.716563279835174e-05, "loss": 0.3553, "num_tokens": 1903277404.0, "step": 2494 }, { "epoch": 3.4041990270547067, "grad_norm": 0.34335296426263584, "learning_rate": 2.715610844485553e-05, "loss": 0.3396, "num_tokens": 1904004212.0, "step": 2495 }, { "epoch": 3.40556456430827, "grad_norm": 0.3128775673039261, "learning_rate": 2.7146582518453918e-05, "loss": 0.3631, "num_tokens": 1904686245.0, "step": 2496 }, { "epoch": 3.406930101561833, "grad_norm": 0.37485307132475143, "learning_rate": 2.713705502205287e-05, "loss": 0.3553, "num_tokens": 1905402694.0, "step": 2497 }, { "epoch": 3.4082956388153964, "grad_norm": 0.3382542999990022, "learning_rate": 2.7127525958558796e-05, "loss": 0.3496, "num_tokens": 1906137761.0, "step": 2498 }, { "epoch": 3.4096611760689597, "grad_norm": 0.3415033251265348, "learning_rate": 2.7117995330878604e-05, "loss": 0.39, "num_tokens": 1906983650.0, "step": 2499 }, { "epoch": 3.411026713322523, "grad_norm": 0.4054344485135832, "learning_rate": 2.7108463141919684e-05, "loss": 0.3709, "num_tokens": 1907718910.0, "step": 2500 }, { "epoch": 3.412392250576086, "grad_norm": 0.37203140450946787, "learning_rate": 2.709892939458988e-05, "loss": 0.3936, "num_tokens": 1908476952.0, "step": 2501 }, { "epoch": 3.413757787829649, "grad_norm": 0.44947928810314575, "learning_rate": 2.7089394091797547e-05, "loss": 0.375, "num_tokens": 1909207694.0, "step": 2502 }, { "epoch": 3.4151233250832123, "grad_norm": 0.3866635845808224, "learning_rate": 2.7079857236451474e-05, "loss": 0.369, "num_tokens": 1909966790.0, "step": 2503 }, { "epoch": 3.4164888623367755, "grad_norm": 0.404922570531254, "learning_rate": 2.7070318831460944e-05, "loss": 0.3567, "num_tokens": 1910756122.0, "step": 2504 }, { "epoch": 3.4178543995903388, "grad_norm": 0.3327851969121927, "learning_rate": 2.7060778879735714e-05, "loss": 0.3838, "num_tokens": 1911550831.0, "step": 2505 }, { "epoch": 3.419219936843902, "grad_norm": 0.36319879301601554, "learning_rate": 2.705123738418602e-05, "loss": 0.366, "num_tokens": 1912321032.0, "step": 2506 }, { "epoch": 3.4205854740974653, "grad_norm": 0.35327598260816234, "learning_rate": 2.704169434772254e-05, "loss": 0.3687, "num_tokens": 1913045546.0, "step": 2507 }, { "epoch": 3.4219510113510285, "grad_norm": 0.36409694656422265, "learning_rate": 2.7032149773256454e-05, "loss": 0.3691, "num_tokens": 1913843770.0, "step": 2508 }, { "epoch": 3.423316548604592, "grad_norm": 0.315699989673097, "learning_rate": 2.7022603663699404e-05, "loss": 0.3686, "num_tokens": 1914663045.0, "step": 2509 }, { "epoch": 3.424682085858155, "grad_norm": 0.31954853716335285, "learning_rate": 2.7013056021963483e-05, "loss": 0.3789, "num_tokens": 1915374848.0, "step": 2510 }, { "epoch": 3.426047623111718, "grad_norm": 0.3901564380496453, "learning_rate": 2.7003506850961262e-05, "loss": 0.3766, "num_tokens": 1916150049.0, "step": 2511 }, { "epoch": 3.427413160365281, "grad_norm": 0.3170326956823238, "learning_rate": 2.6993956153605792e-05, "loss": 0.3642, "num_tokens": 1916878480.0, "step": 2512 }, { "epoch": 3.4287786976188444, "grad_norm": 0.33706236528729777, "learning_rate": 2.698440393281056e-05, "loss": 0.3644, "num_tokens": 1917643468.0, "step": 2513 }, { "epoch": 3.4301442348724076, "grad_norm": 0.35284593528478086, "learning_rate": 2.697485019148955e-05, "loss": 0.3626, "num_tokens": 1918402807.0, "step": 2514 }, { "epoch": 3.431509772125971, "grad_norm": 0.3182415109736056, "learning_rate": 2.696529493255719e-05, "loss": 0.3841, "num_tokens": 1919108193.0, "step": 2515 }, { "epoch": 3.432875309379534, "grad_norm": 0.39035478355068937, "learning_rate": 2.695573815892837e-05, "loss": 0.382, "num_tokens": 1919920785.0, "step": 2516 }, { "epoch": 3.4342408466330974, "grad_norm": 0.33753312877018565, "learning_rate": 2.6946179873518453e-05, "loss": 0.3712, "num_tokens": 1920675676.0, "step": 2517 }, { "epoch": 3.43560638388666, "grad_norm": 0.34619523876171643, "learning_rate": 2.6936620079243266e-05, "loss": 0.3779, "num_tokens": 1921393551.0, "step": 2518 }, { "epoch": 3.4369719211402234, "grad_norm": 0.354663560763182, "learning_rate": 2.6927058779019075e-05, "loss": 0.3729, "num_tokens": 1922151515.0, "step": 2519 }, { "epoch": 3.4383374583937867, "grad_norm": 0.34331659212027016, "learning_rate": 2.6917495975762623e-05, "loss": 0.3581, "num_tokens": 1922938495.0, "step": 2520 }, { "epoch": 3.43970299564735, "grad_norm": 0.3456606242537144, "learning_rate": 2.6907931672391106e-05, "loss": 0.343, "num_tokens": 1923678478.0, "step": 2521 }, { "epoch": 3.441068532900913, "grad_norm": 0.3113286930393603, "learning_rate": 2.6898365871822177e-05, "loss": 0.3594, "num_tokens": 1924441622.0, "step": 2522 }, { "epoch": 3.4424340701544764, "grad_norm": 0.3510925774100995, "learning_rate": 2.6888798576973955e-05, "loss": 0.3636, "num_tokens": 1925285595.0, "step": 2523 }, { "epoch": 3.4437996074080397, "grad_norm": 0.3411398463631866, "learning_rate": 2.6879229790765004e-05, "loss": 0.3878, "num_tokens": 1926034125.0, "step": 2524 }, { "epoch": 3.445165144661603, "grad_norm": 0.33536299599857833, "learning_rate": 2.6869659516114345e-05, "loss": 0.3754, "num_tokens": 1926784034.0, "step": 2525 }, { "epoch": 3.446530681915166, "grad_norm": 0.3685970946855561, "learning_rate": 2.6860087755941454e-05, "loss": 0.3652, "num_tokens": 1927584954.0, "step": 2526 }, { "epoch": 3.447896219168729, "grad_norm": 0.34890739302290624, "learning_rate": 2.6850514513166256e-05, "loss": 0.3642, "num_tokens": 1928355876.0, "step": 2527 }, { "epoch": 3.4492617564222923, "grad_norm": 0.37545908778856174, "learning_rate": 2.6840939790709145e-05, "loss": 0.3682, "num_tokens": 1929099433.0, "step": 2528 }, { "epoch": 3.4506272936758555, "grad_norm": 0.3228370454136142, "learning_rate": 2.6831363591490945e-05, "loss": 0.3465, "num_tokens": 1929793860.0, "step": 2529 }, { "epoch": 3.4519928309294188, "grad_norm": 0.3892567694069945, "learning_rate": 2.6821785918432943e-05, "loss": 0.3741, "num_tokens": 1930543005.0, "step": 2530 }, { "epoch": 3.453358368182982, "grad_norm": 0.36777927583406256, "learning_rate": 2.6812206774456867e-05, "loss": 0.3778, "num_tokens": 1931272603.0, "step": 2531 }, { "epoch": 3.4547239054365453, "grad_norm": 0.3140541516036068, "learning_rate": 2.68026261624849e-05, "loss": 0.3608, "num_tokens": 1932043592.0, "step": 2532 }, { "epoch": 3.4560894426901085, "grad_norm": 0.3455105133877621, "learning_rate": 2.6793044085439688e-05, "loss": 0.3907, "num_tokens": 1932771613.0, "step": 2533 }, { "epoch": 3.4574549799436713, "grad_norm": 0.3474000947847991, "learning_rate": 2.6783460546244286e-05, "loss": 0.3637, "num_tokens": 1933545792.0, "step": 2534 }, { "epoch": 3.4588205171972346, "grad_norm": 0.3380242239535042, "learning_rate": 2.677387554782222e-05, "loss": 0.3596, "num_tokens": 1934247075.0, "step": 2535 }, { "epoch": 3.460186054450798, "grad_norm": 0.3315259074590797, "learning_rate": 2.6764289093097473e-05, "loss": 0.352, "num_tokens": 1935038575.0, "step": 2536 }, { "epoch": 3.461551591704361, "grad_norm": 0.3481148456119935, "learning_rate": 2.6754701184994445e-05, "loss": 0.3647, "num_tokens": 1935828880.0, "step": 2537 }, { "epoch": 3.4629171289579244, "grad_norm": 0.31988516685874124, "learning_rate": 2.6745111826437987e-05, "loss": 0.3634, "num_tokens": 1936600590.0, "step": 2538 }, { "epoch": 3.4642826662114876, "grad_norm": 0.3449180390420779, "learning_rate": 2.673552102035341e-05, "loss": 0.3537, "num_tokens": 1937386674.0, "step": 2539 }, { "epoch": 3.465648203465051, "grad_norm": 0.3712071148954322, "learning_rate": 2.6725928769666453e-05, "loss": 0.3634, "num_tokens": 1938199391.0, "step": 2540 }, { "epoch": 3.467013740718614, "grad_norm": 0.3488253539374482, "learning_rate": 2.671633507730329e-05, "loss": 0.3799, "num_tokens": 1938919703.0, "step": 2541 }, { "epoch": 3.4683792779721774, "grad_norm": 0.39432796234361056, "learning_rate": 2.6706739946190537e-05, "loss": 0.359, "num_tokens": 1939589122.0, "step": 2542 }, { "epoch": 3.46974481522574, "grad_norm": 0.44004669743527663, "learning_rate": 2.6697143379255258e-05, "loss": 0.3546, "num_tokens": 1940293135.0, "step": 2543 }, { "epoch": 3.4711103524793034, "grad_norm": 0.379385209768258, "learning_rate": 2.6687545379424954e-05, "loss": 0.395, "num_tokens": 1941056699.0, "step": 2544 }, { "epoch": 3.4724758897328667, "grad_norm": 0.41553386034193013, "learning_rate": 2.6677945949627547e-05, "loss": 0.3618, "num_tokens": 1941771437.0, "step": 2545 }, { "epoch": 3.47384142698643, "grad_norm": 0.37093936893462587, "learning_rate": 2.6668345092791422e-05, "loss": 0.3753, "num_tokens": 1942524165.0, "step": 2546 }, { "epoch": 3.475206964239993, "grad_norm": 0.3430958338890727, "learning_rate": 2.6658742811845377e-05, "loss": 0.3584, "num_tokens": 1943256919.0, "step": 2547 }, { "epoch": 3.4765725014935565, "grad_norm": 0.4051671298214809, "learning_rate": 2.664913910971865e-05, "loss": 0.365, "num_tokens": 1943962410.0, "step": 2548 }, { "epoch": 3.4779380387471197, "grad_norm": 0.37243271316326154, "learning_rate": 2.6639533989340925e-05, "loss": 0.3816, "num_tokens": 1944748848.0, "step": 2549 }, { "epoch": 3.479303576000683, "grad_norm": 0.37398972357714805, "learning_rate": 2.6629927453642293e-05, "loss": 0.3754, "num_tokens": 1945517537.0, "step": 2550 }, { "epoch": 3.4806691132542458, "grad_norm": 0.35515413990017186, "learning_rate": 2.6620319505553304e-05, "loss": 0.3732, "num_tokens": 1946276483.0, "step": 2551 }, { "epoch": 3.482034650507809, "grad_norm": 0.343707495465078, "learning_rate": 2.6610710148004923e-05, "loss": 0.3683, "num_tokens": 1947018662.0, "step": 2552 }, { "epoch": 3.4834001877613723, "grad_norm": 0.3598334836597367, "learning_rate": 2.6601099383928545e-05, "loss": 0.3509, "num_tokens": 1947804065.0, "step": 2553 }, { "epoch": 3.4847657250149355, "grad_norm": 0.3187471566171324, "learning_rate": 2.6591487216256004e-05, "loss": 0.3638, "num_tokens": 1948582658.0, "step": 2554 }, { "epoch": 3.486131262268499, "grad_norm": 0.3559167023271652, "learning_rate": 2.6581873647919557e-05, "loss": 0.3541, "num_tokens": 1949378355.0, "step": 2555 }, { "epoch": 3.487496799522062, "grad_norm": 0.32941840112506365, "learning_rate": 2.6572258681851878e-05, "loss": 0.3789, "num_tokens": 1950131762.0, "step": 2556 }, { "epoch": 3.4888623367756253, "grad_norm": 0.349873412745789, "learning_rate": 2.6562642320986087e-05, "loss": 0.3597, "num_tokens": 1950896103.0, "step": 2557 }, { "epoch": 3.4902278740291885, "grad_norm": 0.3401681336669432, "learning_rate": 2.655302456825572e-05, "loss": 0.3773, "num_tokens": 1951643190.0, "step": 2558 }, { "epoch": 3.491593411282752, "grad_norm": 0.35928955658239603, "learning_rate": 2.6543405426594725e-05, "loss": 0.3663, "num_tokens": 1952365984.0, "step": 2559 }, { "epoch": 3.4929589485363146, "grad_norm": 0.3818394119295786, "learning_rate": 2.653378489893749e-05, "loss": 0.3639, "num_tokens": 1953134885.0, "step": 2560 }, { "epoch": 3.494324485789878, "grad_norm": 0.31617249922081064, "learning_rate": 2.652416298821883e-05, "loss": 0.374, "num_tokens": 1953934754.0, "step": 2561 }, { "epoch": 3.495690023043441, "grad_norm": 0.35915268791135707, "learning_rate": 2.6514539697373964e-05, "loss": 0.377, "num_tokens": 1954617801.0, "step": 2562 }, { "epoch": 3.4970555602970044, "grad_norm": 0.3411398613472808, "learning_rate": 2.6504915029338543e-05, "loss": 0.3648, "num_tokens": 1955332262.0, "step": 2563 }, { "epoch": 3.4984210975505676, "grad_norm": 0.33518086621191484, "learning_rate": 2.6495288987048646e-05, "loss": 0.354, "num_tokens": 1956079591.0, "step": 2564 }, { "epoch": 3.499786634804131, "grad_norm": 0.3340500378584759, "learning_rate": 2.6485661573440746e-05, "loss": 0.3567, "num_tokens": 1956785519.0, "step": 2565 }, { "epoch": 3.5011521720576937, "grad_norm": 0.3177590181166809, "learning_rate": 2.647603279145176e-05, "loss": 0.3726, "num_tokens": 1957605451.0, "step": 2566 }, { "epoch": 3.502517709311257, "grad_norm": 0.36465073589449803, "learning_rate": 2.6466402644019008e-05, "loss": 0.3643, "num_tokens": 1958488516.0, "step": 2567 }, { "epoch": 3.50388324656482, "grad_norm": 0.33677129445060844, "learning_rate": 2.645677113408023e-05, "loss": 0.3767, "num_tokens": 1959308605.0, "step": 2568 }, { "epoch": 3.5052487838183835, "grad_norm": 0.32463823834632427, "learning_rate": 2.644713826457359e-05, "loss": 0.3797, "num_tokens": 1960109856.0, "step": 2569 }, { "epoch": 3.5066143210719467, "grad_norm": 0.37214440558143214, "learning_rate": 2.6437504038437657e-05, "loss": 0.3706, "num_tokens": 1960831380.0, "step": 2570 }, { "epoch": 3.50797985832551, "grad_norm": 0.33908284969570796, "learning_rate": 2.6427868458611404e-05, "loss": 0.3578, "num_tokens": 1961549095.0, "step": 2571 }, { "epoch": 3.509345395579073, "grad_norm": 0.3401070423398514, "learning_rate": 2.641823152803425e-05, "loss": 0.3754, "num_tokens": 1962328061.0, "step": 2572 }, { "epoch": 3.5107109328326365, "grad_norm": 0.32300446070107597, "learning_rate": 2.6408593249645987e-05, "loss": 0.3798, "num_tokens": 1963089803.0, "step": 2573 }, { "epoch": 3.5120764700861997, "grad_norm": 0.33766614476442736, "learning_rate": 2.6398953626386852e-05, "loss": 0.3651, "num_tokens": 1963873791.0, "step": 2574 }, { "epoch": 3.513442007339763, "grad_norm": 0.333185635994923, "learning_rate": 2.638931266119746e-05, "loss": 0.3706, "num_tokens": 1964710938.0, "step": 2575 }, { "epoch": 3.514807544593326, "grad_norm": 0.32975240308650455, "learning_rate": 2.6379670357018853e-05, "loss": 0.3756, "num_tokens": 1965517489.0, "step": 2576 }, { "epoch": 3.516173081846889, "grad_norm": 0.3529264361503596, "learning_rate": 2.6370026716792505e-05, "loss": 0.3803, "num_tokens": 1966268072.0, "step": 2577 }, { "epoch": 3.5175386191004523, "grad_norm": 0.32795143684690053, "learning_rate": 2.6360381743460243e-05, "loss": 0.3832, "num_tokens": 1967054925.0, "step": 2578 }, { "epoch": 3.5189041563540155, "grad_norm": 0.3202838012699601, "learning_rate": 2.6350735439964346e-05, "loss": 0.3704, "num_tokens": 1967832618.0, "step": 2579 }, { "epoch": 3.520269693607579, "grad_norm": 0.31579153552139394, "learning_rate": 2.634108780924749e-05, "loss": 0.3654, "num_tokens": 1968520499.0, "step": 2580 }, { "epoch": 3.521635230861142, "grad_norm": 0.3405165983959088, "learning_rate": 2.6331438854252728e-05, "loss": 0.3866, "num_tokens": 1969224492.0, "step": 2581 }, { "epoch": 3.523000768114705, "grad_norm": 0.36819010896625926, "learning_rate": 2.6321788577923553e-05, "loss": 0.3525, "num_tokens": 1969983578.0, "step": 2582 }, { "epoch": 3.524366305368268, "grad_norm": 0.3151023833427885, "learning_rate": 2.6312136983203848e-05, "loss": 0.3755, "num_tokens": 1970780398.0, "step": 2583 }, { "epoch": 3.5257318426218314, "grad_norm": 0.34289064220846066, "learning_rate": 2.6302484073037886e-05, "loss": 0.389, "num_tokens": 1971546284.0, "step": 2584 }, { "epoch": 3.5270973798753946, "grad_norm": 0.34226210410651164, "learning_rate": 2.6292829850370355e-05, "loss": 0.3602, "num_tokens": 1972321640.0, "step": 2585 }, { "epoch": 3.528462917128958, "grad_norm": 0.3282294192036814, "learning_rate": 2.6283174318146352e-05, "loss": 0.3681, "num_tokens": 1973092596.0, "step": 2586 }, { "epoch": 3.529828454382521, "grad_norm": 0.38254856285866795, "learning_rate": 2.627351747931135e-05, "loss": 0.3897, "num_tokens": 1973788094.0, "step": 2587 }, { "epoch": 3.5311939916360844, "grad_norm": 0.3389289812671832, "learning_rate": 2.6263859336811236e-05, "loss": 0.3646, "num_tokens": 1974611603.0, "step": 2588 }, { "epoch": 3.5325595288896476, "grad_norm": 0.3899201987366583, "learning_rate": 2.6254199893592293e-05, "loss": 0.3871, "num_tokens": 1975392803.0, "step": 2589 }, { "epoch": 3.533925066143211, "grad_norm": 0.36496712359172073, "learning_rate": 2.6244539152601194e-05, "loss": 0.3736, "num_tokens": 1976171281.0, "step": 2590 }, { "epoch": 3.535290603396774, "grad_norm": 0.3782755907562132, "learning_rate": 2.6234877116785005e-05, "loss": 0.3966, "num_tokens": 1976960959.0, "step": 2591 }, { "epoch": 3.536656140650337, "grad_norm": 0.4063456768962934, "learning_rate": 2.6225213789091218e-05, "loss": 0.3542, "num_tokens": 1977733352.0, "step": 2592 }, { "epoch": 3.5380216779039, "grad_norm": 0.3483857464865555, "learning_rate": 2.621554917246768e-05, "loss": 0.3704, "num_tokens": 1978468671.0, "step": 2593 }, { "epoch": 3.5393872151574635, "grad_norm": 0.40626691757246775, "learning_rate": 2.6205883269862647e-05, "loss": 0.3842, "num_tokens": 1979266086.0, "step": 2594 }, { "epoch": 3.5407527524110267, "grad_norm": 0.36404011979046497, "learning_rate": 2.619621608422477e-05, "loss": 0.3861, "num_tokens": 1980066232.0, "step": 2595 }, { "epoch": 3.54211828966459, "grad_norm": 0.3408504696534381, "learning_rate": 2.618654761850309e-05, "loss": 0.3715, "num_tokens": 1980925828.0, "step": 2596 }, { "epoch": 3.5434838269181532, "grad_norm": 0.3639464308896066, "learning_rate": 2.6176877875647035e-05, "loss": 0.3713, "num_tokens": 1981656027.0, "step": 2597 }, { "epoch": 3.544849364171716, "grad_norm": 0.36685529928890837, "learning_rate": 2.616720685860642e-05, "loss": 0.3627, "num_tokens": 1982459508.0, "step": 2598 }, { "epoch": 3.5462149014252793, "grad_norm": 0.3238318009314116, "learning_rate": 2.6157534570331466e-05, "loss": 0.3674, "num_tokens": 1983192509.0, "step": 2599 }, { "epoch": 3.5475804386788425, "grad_norm": 0.33642851109026756, "learning_rate": 2.6147861013772757e-05, "loss": 0.3784, "num_tokens": 1983951615.0, "step": 2600 }, { "epoch": 3.548945975932406, "grad_norm": 0.38587144900536874, "learning_rate": 2.613818619188129e-05, "loss": 0.3793, "num_tokens": 1984726049.0, "step": 2601 }, { "epoch": 3.550311513185969, "grad_norm": 0.35695800342025813, "learning_rate": 2.6128510107608414e-05, "loss": 0.3817, "num_tokens": 1985466794.0, "step": 2602 }, { "epoch": 3.5516770504395323, "grad_norm": 0.37038182106971257, "learning_rate": 2.6118832763905905e-05, "loss": 0.3758, "num_tokens": 1986181342.0, "step": 2603 }, { "epoch": 3.5530425876930956, "grad_norm": 0.3655662784857562, "learning_rate": 2.610915416372588e-05, "loss": 0.3568, "num_tokens": 1987024765.0, "step": 2604 }, { "epoch": 3.554408124946659, "grad_norm": 0.34829471913949406, "learning_rate": 2.609947431002088e-05, "loss": 0.3474, "num_tokens": 1987886251.0, "step": 2605 }, { "epoch": 3.555773662200222, "grad_norm": 0.33881242280428225, "learning_rate": 2.6089793205743798e-05, "loss": 0.3473, "num_tokens": 1988601355.0, "step": 2606 }, { "epoch": 3.5571391994537853, "grad_norm": 0.3606206771829116, "learning_rate": 2.6080110853847922e-05, "loss": 0.3757, "num_tokens": 1989338032.0, "step": 2607 }, { "epoch": 3.5585047367073486, "grad_norm": 0.3599052419625901, "learning_rate": 2.6070427257286923e-05, "loss": 0.3753, "num_tokens": 1990166913.0, "step": 2608 }, { "epoch": 3.5598702739609114, "grad_norm": 0.3391222611008789, "learning_rate": 2.606074241901484e-05, "loss": 0.3752, "num_tokens": 1990941252.0, "step": 2609 }, { "epoch": 3.5612358112144746, "grad_norm": 0.3467295511043517, "learning_rate": 2.60510563419861e-05, "loss": 0.3711, "num_tokens": 1991721991.0, "step": 2610 }, { "epoch": 3.562601348468038, "grad_norm": 0.32117136491499004, "learning_rate": 2.6041369029155514e-05, "loss": 0.3711, "num_tokens": 1992559224.0, "step": 2611 }, { "epoch": 3.563966885721601, "grad_norm": 0.31826851903520004, "learning_rate": 2.6031680483478243e-05, "loss": 0.3544, "num_tokens": 1993262365.0, "step": 2612 }, { "epoch": 3.5653324229751644, "grad_norm": 0.3237148776952608, "learning_rate": 2.6021990707909858e-05, "loss": 0.3728, "num_tokens": 1994020396.0, "step": 2613 }, { "epoch": 3.5666979602287276, "grad_norm": 0.3409066864169854, "learning_rate": 2.601229970540629e-05, "loss": 0.3738, "num_tokens": 1994809654.0, "step": 2614 }, { "epoch": 3.5680634974822905, "grad_norm": 0.34094420238624723, "learning_rate": 2.600260747892384e-05, "loss": 0.3728, "num_tokens": 1995599209.0, "step": 2615 }, { "epoch": 3.5694290347358537, "grad_norm": 0.3280019167971311, "learning_rate": 2.5992914031419174e-05, "loss": 0.3664, "num_tokens": 1996275936.0, "step": 2616 }, { "epoch": 3.570794571989417, "grad_norm": 0.35371155443701074, "learning_rate": 2.5983219365849367e-05, "loss": 0.3623, "num_tokens": 1997049058.0, "step": 2617 }, { "epoch": 3.57216010924298, "grad_norm": 0.3246231288552929, "learning_rate": 2.5973523485171827e-05, "loss": 0.3801, "num_tokens": 1997825402.0, "step": 2618 }, { "epoch": 3.5735256464965435, "grad_norm": 0.369090905683245, "learning_rate": 2.596382639234435e-05, "loss": 0.3536, "num_tokens": 1998600108.0, "step": 2619 }, { "epoch": 3.5748911837501067, "grad_norm": 0.3179366458492704, "learning_rate": 2.5954128090325106e-05, "loss": 0.3603, "num_tokens": 1999312863.0, "step": 2620 }, { "epoch": 3.57625672100367, "grad_norm": 0.3425517716345217, "learning_rate": 2.5944428582072613e-05, "loss": 0.3709, "num_tokens": 2000076698.0, "step": 2621 }, { "epoch": 3.5776222582572332, "grad_norm": 0.34473922812975993, "learning_rate": 2.593472787054578e-05, "loss": 0.3735, "num_tokens": 2000862634.0, "step": 2622 }, { "epoch": 3.5789877955107965, "grad_norm": 0.3170753607896768, "learning_rate": 2.5925025958703872e-05, "loss": 0.402, "num_tokens": 2001649940.0, "step": 2623 }, { "epoch": 3.5803533327643597, "grad_norm": 0.3585472100314437, "learning_rate": 2.591532284950652e-05, "loss": 0.3689, "num_tokens": 2002480706.0, "step": 2624 }, { "epoch": 3.5817188700179226, "grad_norm": 0.3457533529813338, "learning_rate": 2.5905618545913728e-05, "loss": 0.3695, "num_tokens": 2003238777.0, "step": 2625 }, { "epoch": 3.583084407271486, "grad_norm": 0.3472949854151614, "learning_rate": 2.5895913050885853e-05, "loss": 0.3708, "num_tokens": 2003940199.0, "step": 2626 }, { "epoch": 3.584449944525049, "grad_norm": 0.35253476928935124, "learning_rate": 2.588620636738363e-05, "loss": 0.3858, "num_tokens": 2004695868.0, "step": 2627 }, { "epoch": 3.5858154817786123, "grad_norm": 0.322672802081977, "learning_rate": 2.5876498498368135e-05, "loss": 0.3739, "num_tokens": 2005484205.0, "step": 2628 }, { "epoch": 3.5871810190321756, "grad_norm": 0.34879741598004754, "learning_rate": 2.5866789446800828e-05, "loss": 0.3735, "num_tokens": 2006295966.0, "step": 2629 }, { "epoch": 3.588546556285739, "grad_norm": 0.33070769718572574, "learning_rate": 2.5857079215643515e-05, "loss": 0.3693, "num_tokens": 2007121558.0, "step": 2630 }, { "epoch": 3.5899120935393016, "grad_norm": 0.33206462931002717, "learning_rate": 2.584736780785837e-05, "loss": 0.3657, "num_tokens": 2007926470.0, "step": 2631 }, { "epoch": 3.591277630792865, "grad_norm": 0.3376507391047847, "learning_rate": 2.5837655226407928e-05, "loss": 0.3776, "num_tokens": 2008778588.0, "step": 2632 }, { "epoch": 3.592643168046428, "grad_norm": 0.332843422252104, "learning_rate": 2.582794147425506e-05, "loss": 0.395, "num_tokens": 2009540181.0, "step": 2633 }, { "epoch": 3.5940087052999914, "grad_norm": 0.3429077228170153, "learning_rate": 2.5818226554363027e-05, "loss": 0.376, "num_tokens": 2010296763.0, "step": 2634 }, { "epoch": 3.5953742425535546, "grad_norm": 0.3570236169016782, "learning_rate": 2.5808510469695435e-05, "loss": 0.3807, "num_tokens": 2011100490.0, "step": 2635 }, { "epoch": 3.596739779807118, "grad_norm": 0.33818065708167505, "learning_rate": 2.5798793223216216e-05, "loss": 0.374, "num_tokens": 2011863073.0, "step": 2636 }, { "epoch": 3.598105317060681, "grad_norm": 0.39895187990165, "learning_rate": 2.5789074817889704e-05, "loss": 0.3761, "num_tokens": 2012680225.0, "step": 2637 }, { "epoch": 3.5994708543142444, "grad_norm": 0.34659095995394706, "learning_rate": 2.577935525668056e-05, "loss": 0.3894, "num_tokens": 2013466051.0, "step": 2638 }, { "epoch": 3.6008363915678077, "grad_norm": 0.3473478751121266, "learning_rate": 2.576963454255379e-05, "loss": 0.3819, "num_tokens": 2014236841.0, "step": 2639 }, { "epoch": 3.602201928821371, "grad_norm": 0.3752395000259899, "learning_rate": 2.575991267847477e-05, "loss": 0.3795, "num_tokens": 2014999108.0, "step": 2640 }, { "epoch": 3.6035674660749337, "grad_norm": 0.3091395051094046, "learning_rate": 2.5750189667409222e-05, "loss": 0.3928, "num_tokens": 2015847716.0, "step": 2641 }, { "epoch": 3.604933003328497, "grad_norm": 0.3521590416823056, "learning_rate": 2.5740465512323212e-05, "loss": 0.3591, "num_tokens": 2016575341.0, "step": 2642 }, { "epoch": 3.6062985405820602, "grad_norm": 0.3223238603910961, "learning_rate": 2.5730740216183168e-05, "loss": 0.3847, "num_tokens": 2017308005.0, "step": 2643 }, { "epoch": 3.6076640778356235, "grad_norm": 0.31608426357319885, "learning_rate": 2.572101378195584e-05, "loss": 0.3909, "num_tokens": 2018126576.0, "step": 2644 }, { "epoch": 3.6090296150891867, "grad_norm": 0.37705423628504114, "learning_rate": 2.5711286212608354e-05, "loss": 0.3708, "num_tokens": 2018877119.0, "step": 2645 }, { "epoch": 3.61039515234275, "grad_norm": 0.3132609762905505, "learning_rate": 2.570155751110816e-05, "loss": 0.3754, "num_tokens": 2019642747.0, "step": 2646 }, { "epoch": 3.611760689596313, "grad_norm": 0.37740993661923916, "learning_rate": 2.5691827680423082e-05, "loss": 0.3765, "num_tokens": 2020352591.0, "step": 2647 }, { "epoch": 3.613126226849876, "grad_norm": 0.328911210312702, "learning_rate": 2.5682096723521247e-05, "loss": 0.3803, "num_tokens": 2021142134.0, "step": 2648 }, { "epoch": 3.6144917641034393, "grad_norm": 0.3369201041084526, "learning_rate": 2.567236464337117e-05, "loss": 0.3591, "num_tokens": 2021845019.0, "step": 2649 }, { "epoch": 3.6158573013570026, "grad_norm": 0.329860530386698, "learning_rate": 2.566263144294167e-05, "loss": 0.3682, "num_tokens": 2022542438.0, "step": 2650 }, { "epoch": 3.617222838610566, "grad_norm": 0.32558673390935516, "learning_rate": 2.5652897125201937e-05, "loss": 0.3778, "num_tokens": 2023340615.0, "step": 2651 }, { "epoch": 3.618588375864129, "grad_norm": 0.3341407844867449, "learning_rate": 2.5643161693121476e-05, "loss": 0.3637, "num_tokens": 2024042940.0, "step": 2652 }, { "epoch": 3.6199539131176923, "grad_norm": 0.326104600862659, "learning_rate": 2.563342514967016e-05, "loss": 0.3842, "num_tokens": 2024840946.0, "step": 2653 }, { "epoch": 3.6213194503712556, "grad_norm": 0.3281223045496674, "learning_rate": 2.5623687497818185e-05, "loss": 0.3747, "num_tokens": 2025551275.0, "step": 2654 }, { "epoch": 3.622684987624819, "grad_norm": 0.3368432126097624, "learning_rate": 2.561394874053607e-05, "loss": 0.3657, "num_tokens": 2026300509.0, "step": 2655 }, { "epoch": 3.624050524878382, "grad_norm": 0.37141772464525097, "learning_rate": 2.5604208880794708e-05, "loss": 0.3684, "num_tokens": 2027123993.0, "step": 2656 }, { "epoch": 3.625416062131945, "grad_norm": 0.35446347538602785, "learning_rate": 2.5594467921565308e-05, "loss": 0.3681, "num_tokens": 2027882375.0, "step": 2657 }, { "epoch": 3.626781599385508, "grad_norm": 0.3507531568263404, "learning_rate": 2.55847258658194e-05, "loss": 0.3882, "num_tokens": 2028646558.0, "step": 2658 }, { "epoch": 3.6281471366390714, "grad_norm": 0.3373681679476027, "learning_rate": 2.557498271652888e-05, "loss": 0.3732, "num_tokens": 2029375579.0, "step": 2659 }, { "epoch": 3.6295126738926347, "grad_norm": 0.33578506297461713, "learning_rate": 2.556523847666595e-05, "loss": 0.3815, "num_tokens": 2030206674.0, "step": 2660 }, { "epoch": 3.630878211146198, "grad_norm": 0.3351510194406023, "learning_rate": 2.555549314920315e-05, "loss": 0.3777, "num_tokens": 2030931719.0, "step": 2661 }, { "epoch": 3.632243748399761, "grad_norm": 0.33380562266498526, "learning_rate": 2.5545746737113374e-05, "loss": 0.3835, "num_tokens": 2031646398.0, "step": 2662 }, { "epoch": 3.633609285653324, "grad_norm": 0.3550086897481268, "learning_rate": 2.553599924336982e-05, "loss": 0.3715, "num_tokens": 2032355625.0, "step": 2663 }, { "epoch": 3.6349748229068872, "grad_norm": 0.35027042201370523, "learning_rate": 2.5526250670946026e-05, "loss": 0.3631, "num_tokens": 2033085957.0, "step": 2664 }, { "epoch": 3.6363403601604505, "grad_norm": 0.349094624573188, "learning_rate": 2.5516501022815857e-05, "loss": 0.3747, "num_tokens": 2033901005.0, "step": 2665 }, { "epoch": 3.6377058974140137, "grad_norm": 0.34170518960966284, "learning_rate": 2.550675030195352e-05, "loss": 0.3777, "num_tokens": 2034712876.0, "step": 2666 }, { "epoch": 3.639071434667577, "grad_norm": 0.3285757226957739, "learning_rate": 2.5496998511333527e-05, "loss": 0.3746, "num_tokens": 2035502788.0, "step": 2667 }, { "epoch": 3.6404369719211402, "grad_norm": 0.34488221782845835, "learning_rate": 2.5487245653930735e-05, "loss": 0.372, "num_tokens": 2036298910.0, "step": 2668 }, { "epoch": 3.6418025091747035, "grad_norm": 0.3055326503608925, "learning_rate": 2.5477491732720308e-05, "loss": 0.3692, "num_tokens": 2037066288.0, "step": 2669 }, { "epoch": 3.6431680464282667, "grad_norm": 0.3015694798261038, "learning_rate": 2.546773675067775e-05, "loss": 0.3561, "num_tokens": 2037799088.0, "step": 2670 }, { "epoch": 3.64453358368183, "grad_norm": 0.32896436131079365, "learning_rate": 2.5457980710778886e-05, "loss": 0.3527, "num_tokens": 2038649924.0, "step": 2671 }, { "epoch": 3.6458991209353933, "grad_norm": 0.3272086972631099, "learning_rate": 2.5448223615999867e-05, "loss": 0.3861, "num_tokens": 2039486132.0, "step": 2672 }, { "epoch": 3.647264658188956, "grad_norm": 0.35049658540963247, "learning_rate": 2.5438465469317142e-05, "loss": 0.3649, "num_tokens": 2040273686.0, "step": 2673 }, { "epoch": 3.6486301954425193, "grad_norm": 0.3327909000557524, "learning_rate": 2.542870627370752e-05, "loss": 0.3439, "num_tokens": 2040919930.0, "step": 2674 }, { "epoch": 3.6499957326960826, "grad_norm": 0.34199719730830913, "learning_rate": 2.541894603214809e-05, "loss": 0.3601, "num_tokens": 2041672532.0, "step": 2675 }, { "epoch": 3.651361269949646, "grad_norm": 0.34153500333188896, "learning_rate": 2.5409184747616284e-05, "loss": 0.3597, "num_tokens": 2042391154.0, "step": 2676 }, { "epoch": 3.652726807203209, "grad_norm": 0.34259137008771356, "learning_rate": 2.539942242308986e-05, "loss": 0.3742, "num_tokens": 2043094001.0, "step": 2677 }, { "epoch": 3.6540923444567723, "grad_norm": 0.33458596514322997, "learning_rate": 2.5389659061546867e-05, "loss": 0.3994, "num_tokens": 2043924247.0, "step": 2678 }, { "epoch": 3.655457881710335, "grad_norm": 0.32430080346754386, "learning_rate": 2.5379894665965686e-05, "loss": 0.3695, "num_tokens": 2044639662.0, "step": 2679 }, { "epoch": 3.6568234189638984, "grad_norm": 0.32753972544161125, "learning_rate": 2.5370129239325018e-05, "loss": 0.3821, "num_tokens": 2045380574.0, "step": 2680 }, { "epoch": 3.6581889562174617, "grad_norm": 0.3077526794981843, "learning_rate": 2.5360362784603866e-05, "loss": 0.3739, "num_tokens": 2046148297.0, "step": 2681 }, { "epoch": 3.659554493471025, "grad_norm": 0.3224385006210752, "learning_rate": 2.5350595304781557e-05, "loss": 0.3756, "num_tokens": 2046869787.0, "step": 2682 }, { "epoch": 3.660920030724588, "grad_norm": 0.32025799789436266, "learning_rate": 2.5340826802837713e-05, "loss": 0.3608, "num_tokens": 2047625042.0, "step": 2683 }, { "epoch": 3.6622855679781514, "grad_norm": 0.3116423289873876, "learning_rate": 2.5331057281752297e-05, "loss": 0.3529, "num_tokens": 2048306904.0, "step": 2684 }, { "epoch": 3.6636511052317147, "grad_norm": 0.3141059028590751, "learning_rate": 2.5321286744505558e-05, "loss": 0.3536, "num_tokens": 2049055911.0, "step": 2685 }, { "epoch": 3.665016642485278, "grad_norm": 0.31680431502788514, "learning_rate": 2.531151519407806e-05, "loss": 0.3639, "num_tokens": 2049874221.0, "step": 2686 }, { "epoch": 3.666382179738841, "grad_norm": 0.32999557185639394, "learning_rate": 2.53017426334507e-05, "loss": 0.378, "num_tokens": 2050594653.0, "step": 2687 }, { "epoch": 3.6677477169924044, "grad_norm": 0.3329063487620821, "learning_rate": 2.5291969065604636e-05, "loss": 0.3514, "num_tokens": 2051268200.0, "step": 2688 }, { "epoch": 3.6691132542459672, "grad_norm": 0.352549649612148, "learning_rate": 2.528219449352138e-05, "loss": 0.3677, "num_tokens": 2052102523.0, "step": 2689 }, { "epoch": 3.6704787914995305, "grad_norm": 0.33823365930562344, "learning_rate": 2.527241892018272e-05, "loss": 0.3743, "num_tokens": 2052888796.0, "step": 2690 }, { "epoch": 3.6718443287530937, "grad_norm": 0.37336715863921516, "learning_rate": 2.526264234857077e-05, "loss": 0.3635, "num_tokens": 2053609407.0, "step": 2691 }, { "epoch": 3.673209866006657, "grad_norm": 0.3507286688788533, "learning_rate": 2.5252864781667925e-05, "loss": 0.3718, "num_tokens": 2054391530.0, "step": 2692 }, { "epoch": 3.6745754032602203, "grad_norm": 0.3212804611392325, "learning_rate": 2.5243086222456907e-05, "loss": 0.3833, "num_tokens": 2055192638.0, "step": 2693 }, { "epoch": 3.6759409405137835, "grad_norm": 0.37380863315461615, "learning_rate": 2.5233306673920735e-05, "loss": 0.3655, "num_tokens": 2055933861.0, "step": 2694 }, { "epoch": 3.6773064777673463, "grad_norm": 0.34068435994492924, "learning_rate": 2.522352613904271e-05, "loss": 0.3766, "num_tokens": 2056692053.0, "step": 2695 }, { "epoch": 3.6786720150209096, "grad_norm": 0.3358938500663993, "learning_rate": 2.521374462080646e-05, "loss": 0.3661, "num_tokens": 2057436737.0, "step": 2696 }, { "epoch": 3.680037552274473, "grad_norm": 0.35066222658990365, "learning_rate": 2.5203962122195915e-05, "loss": 0.3755, "num_tokens": 2058121427.0, "step": 2697 }, { "epoch": 3.681403089528036, "grad_norm": 0.4056302986106102, "learning_rate": 2.5194178646195262e-05, "loss": 0.356, "num_tokens": 2058847138.0, "step": 2698 }, { "epoch": 3.6827686267815993, "grad_norm": 0.3145023168707672, "learning_rate": 2.5184394195789036e-05, "loss": 0.3666, "num_tokens": 2059574393.0, "step": 2699 }, { "epoch": 3.6841341640351626, "grad_norm": 0.3367983752832853, "learning_rate": 2.517460877396205e-05, "loss": 0.3452, "num_tokens": 2060297424.0, "step": 2700 }, { "epoch": 3.685499701288726, "grad_norm": 0.33461492811840715, "learning_rate": 2.5164822383699403e-05, "loss": 0.3743, "num_tokens": 2061085934.0, "step": 2701 }, { "epoch": 3.686865238542289, "grad_norm": 0.31687722209813163, "learning_rate": 2.5155035027986502e-05, "loss": 0.3782, "num_tokens": 2061849310.0, "step": 2702 }, { "epoch": 3.6882307757958523, "grad_norm": 0.32637759778037734, "learning_rate": 2.514524670980905e-05, "loss": 0.3803, "num_tokens": 2062601441.0, "step": 2703 }, { "epoch": 3.6895963130494156, "grad_norm": 0.36641605792589255, "learning_rate": 2.5135457432153038e-05, "loss": 0.3859, "num_tokens": 2063379133.0, "step": 2704 }, { "epoch": 3.690961850302979, "grad_norm": 0.3423907691721463, "learning_rate": 2.512566719800475e-05, "loss": 0.3787, "num_tokens": 2064167002.0, "step": 2705 }, { "epoch": 3.6923273875565417, "grad_norm": 0.3363422554860873, "learning_rate": 2.5115876010350762e-05, "loss": 0.3603, "num_tokens": 2064910953.0, "step": 2706 }, { "epoch": 3.693692924810105, "grad_norm": 0.36398714376617497, "learning_rate": 2.510608387217793e-05, "loss": 0.3724, "num_tokens": 2065690666.0, "step": 2707 }, { "epoch": 3.695058462063668, "grad_norm": 0.34351590482905997, "learning_rate": 2.509629078647343e-05, "loss": 0.3967, "num_tokens": 2066501502.0, "step": 2708 }, { "epoch": 3.6964239993172314, "grad_norm": 0.3476867747239757, "learning_rate": 2.50864967562247e-05, "loss": 0.3633, "num_tokens": 2067216923.0, "step": 2709 }, { "epoch": 3.6977895365707947, "grad_norm": 0.3482997313928131, "learning_rate": 2.5076701784419468e-05, "loss": 0.3675, "num_tokens": 2068030916.0, "step": 2710 }, { "epoch": 3.699155073824358, "grad_norm": 0.34745062723911985, "learning_rate": 2.5066905874045766e-05, "loss": 0.3722, "num_tokens": 2068880764.0, "step": 2711 }, { "epoch": 3.7005206110779207, "grad_norm": 0.3834712317069338, "learning_rate": 2.50571090280919e-05, "loss": 0.3718, "num_tokens": 2069671861.0, "step": 2712 }, { "epoch": 3.701886148331484, "grad_norm": 0.33821524185231827, "learning_rate": 2.5047311249546464e-05, "loss": 0.3724, "num_tokens": 2070450799.0, "step": 2713 }, { "epoch": 3.7032516855850472, "grad_norm": 0.35457788764085924, "learning_rate": 2.503751254139833e-05, "loss": 0.3691, "num_tokens": 2071242957.0, "step": 2714 }, { "epoch": 3.7046172228386105, "grad_norm": 0.33561562808720996, "learning_rate": 2.5027712906636664e-05, "loss": 0.386, "num_tokens": 2072099839.0, "step": 2715 }, { "epoch": 3.7059827600921738, "grad_norm": 0.3409697361067954, "learning_rate": 2.5017912348250904e-05, "loss": 0.3838, "num_tokens": 2072865067.0, "step": 2716 }, { "epoch": 3.707348297345737, "grad_norm": 0.38057842207138287, "learning_rate": 2.5008110869230787e-05, "loss": 0.3821, "num_tokens": 2073667708.0, "step": 2717 }, { "epoch": 3.7087138345993003, "grad_norm": 0.3607516669742086, "learning_rate": 2.4998308472566315e-05, "loss": 0.3858, "num_tokens": 2074422608.0, "step": 2718 }, { "epoch": 3.7100793718528635, "grad_norm": 0.35230766212032655, "learning_rate": 2.4988505161247768e-05, "loss": 0.3598, "num_tokens": 2075228491.0, "step": 2719 }, { "epoch": 3.7114449091064268, "grad_norm": 0.3227029049298409, "learning_rate": 2.497870093826572e-05, "loss": 0.3926, "num_tokens": 2075963678.0, "step": 2720 }, { "epoch": 3.71281044635999, "grad_norm": 0.3812914929257911, "learning_rate": 2.4968895806611014e-05, "loss": 0.369, "num_tokens": 2076709381.0, "step": 2721 }, { "epoch": 3.714175983613553, "grad_norm": 0.3562109172687739, "learning_rate": 2.4959089769274773e-05, "loss": 0.3541, "num_tokens": 2077410213.0, "step": 2722 }, { "epoch": 3.715541520867116, "grad_norm": 0.33858668758860105, "learning_rate": 2.4949282829248382e-05, "loss": 0.375, "num_tokens": 2078195231.0, "step": 2723 }, { "epoch": 3.7169070581206793, "grad_norm": 0.347068139414361, "learning_rate": 2.493947498952353e-05, "loss": 0.3856, "num_tokens": 2079025859.0, "step": 2724 }, { "epoch": 3.7182725953742426, "grad_norm": 0.3638541443546003, "learning_rate": 2.4929666253092147e-05, "loss": 0.3646, "num_tokens": 2079723610.0, "step": 2725 }, { "epoch": 3.719638132627806, "grad_norm": 0.39893925295512034, "learning_rate": 2.4919856622946476e-05, "loss": 0.3573, "num_tokens": 2080443951.0, "step": 2726 }, { "epoch": 3.721003669881369, "grad_norm": 0.3762229363541838, "learning_rate": 2.4910046102078993e-05, "loss": 0.3933, "num_tokens": 2081248047.0, "step": 2727 }, { "epoch": 3.722369207134932, "grad_norm": 0.3444743597363369, "learning_rate": 2.490023469348246e-05, "loss": 0.3606, "num_tokens": 2081990849.0, "step": 2728 }, { "epoch": 3.723734744388495, "grad_norm": 0.361020908024277, "learning_rate": 2.489042240014993e-05, "loss": 0.3598, "num_tokens": 2082732321.0, "step": 2729 }, { "epoch": 3.7251002816420584, "grad_norm": 0.34349318265714834, "learning_rate": 2.488060922507469e-05, "loss": 0.3543, "num_tokens": 2083419807.0, "step": 2730 }, { "epoch": 3.7264658188956217, "grad_norm": 0.3539938658800011, "learning_rate": 2.487079517125034e-05, "loss": 0.3665, "num_tokens": 2084122527.0, "step": 2731 }, { "epoch": 3.727831356149185, "grad_norm": 0.3457560390671397, "learning_rate": 2.4860980241670687e-05, "loss": 0.3794, "num_tokens": 2084893016.0, "step": 2732 }, { "epoch": 3.729196893402748, "grad_norm": 0.3387804626820002, "learning_rate": 2.4851164439329872e-05, "loss": 0.366, "num_tokens": 2085649748.0, "step": 2733 }, { "epoch": 3.7305624306563114, "grad_norm": 0.3900698521867167, "learning_rate": 2.4841347767222254e-05, "loss": 0.3854, "num_tokens": 2086398509.0, "step": 2734 }, { "epoch": 3.7319279679098747, "grad_norm": 0.3469515912635092, "learning_rate": 2.4831530228342488e-05, "loss": 0.3865, "num_tokens": 2087238268.0, "step": 2735 }, { "epoch": 3.733293505163438, "grad_norm": 0.32464775903752235, "learning_rate": 2.4821711825685467e-05, "loss": 0.3824, "num_tokens": 2087998936.0, "step": 2736 }, { "epoch": 3.734659042417001, "grad_norm": 0.34156071510819846, "learning_rate": 2.4811892562246373e-05, "loss": 0.382, "num_tokens": 2088770875.0, "step": 2737 }, { "epoch": 3.736024579670564, "grad_norm": 0.33550465956215214, "learning_rate": 2.4802072441020624e-05, "loss": 0.3867, "num_tokens": 2089559813.0, "step": 2738 }, { "epoch": 3.7373901169241273, "grad_norm": 0.374706988753562, "learning_rate": 2.479225146500392e-05, "loss": 0.3544, "num_tokens": 2090266449.0, "step": 2739 }, { "epoch": 3.7387556541776905, "grad_norm": 0.33154571401419014, "learning_rate": 2.478242963719222e-05, "loss": 0.3767, "num_tokens": 2090959476.0, "step": 2740 }, { "epoch": 3.7401211914312538, "grad_norm": 0.37684996815326566, "learning_rate": 2.4772606960581733e-05, "loss": 0.3691, "num_tokens": 2091747617.0, "step": 2741 }, { "epoch": 3.741486728684817, "grad_norm": 0.3796532781910524, "learning_rate": 2.476278343816893e-05, "loss": 0.3571, "num_tokens": 2092555125.0, "step": 2742 }, { "epoch": 3.7428522659383803, "grad_norm": 0.31648157724166975, "learning_rate": 2.4752959072950553e-05, "loss": 0.3494, "num_tokens": 2093277052.0, "step": 2743 }, { "epoch": 3.744217803191943, "grad_norm": 0.33963474562247087, "learning_rate": 2.4743133867923582e-05, "loss": 0.3781, "num_tokens": 2093993271.0, "step": 2744 }, { "epoch": 3.7455833404455063, "grad_norm": 0.38722366288460774, "learning_rate": 2.4733307826085265e-05, "loss": 0.3715, "num_tokens": 2094738564.0, "step": 2745 }, { "epoch": 3.7469488776990696, "grad_norm": 0.40226732856402586, "learning_rate": 2.4723480950433095e-05, "loss": 0.362, "num_tokens": 2095489229.0, "step": 2746 }, { "epoch": 3.748314414952633, "grad_norm": 0.3310450705692678, "learning_rate": 2.4713653243964833e-05, "loss": 0.3831, "num_tokens": 2096289641.0, "step": 2747 }, { "epoch": 3.749679952206196, "grad_norm": 0.4106306148337517, "learning_rate": 2.470382470967848e-05, "loss": 0.3735, "num_tokens": 2097060714.0, "step": 2748 }, { "epoch": 3.7510454894597594, "grad_norm": 0.3646963270888365, "learning_rate": 2.4693995350572304e-05, "loss": 0.3661, "num_tokens": 2097783057.0, "step": 2749 }, { "epoch": 3.7524110267133226, "grad_norm": 0.37132126179420816, "learning_rate": 2.468416516964481e-05, "loss": 0.3773, "num_tokens": 2098559100.0, "step": 2750 }, { "epoch": 3.753776563966886, "grad_norm": 0.3308660735598623, "learning_rate": 2.467433416989477e-05, "loss": 0.3726, "num_tokens": 2099299098.0, "step": 2751 }, { "epoch": 3.755142101220449, "grad_norm": 0.3573158822342822, "learning_rate": 2.4664502354321193e-05, "loss": 0.3616, "num_tokens": 2100012292.0, "step": 2752 }, { "epoch": 3.7565076384740124, "grad_norm": 0.36174732352173383, "learning_rate": 2.465466972592333e-05, "loss": 0.3859, "num_tokens": 2100781243.0, "step": 2753 }, { "epoch": 3.757873175727575, "grad_norm": 0.3581914620404961, "learning_rate": 2.4644836287700704e-05, "loss": 0.3719, "num_tokens": 2101515045.0, "step": 2754 }, { "epoch": 3.7592387129811384, "grad_norm": 0.3568866323324211, "learning_rate": 2.463500204265306e-05, "loss": 0.3627, "num_tokens": 2102278399.0, "step": 2755 }, { "epoch": 3.7606042502347017, "grad_norm": 0.2912293668190274, "learning_rate": 2.4625166993780405e-05, "loss": 0.3717, "num_tokens": 2103039788.0, "step": 2756 }, { "epoch": 3.761969787488265, "grad_norm": 0.3536917028682892, "learning_rate": 2.4615331144082984e-05, "loss": 0.3803, "num_tokens": 2103772826.0, "step": 2757 }, { "epoch": 3.763335324741828, "grad_norm": 0.3536578598841026, "learning_rate": 2.4605494496561302e-05, "loss": 0.3828, "num_tokens": 2104529421.0, "step": 2758 }, { "epoch": 3.7647008619953914, "grad_norm": 0.33905166293858635, "learning_rate": 2.459565705421607e-05, "loss": 0.3757, "num_tokens": 2105241715.0, "step": 2759 }, { "epoch": 3.7660663992489543, "grad_norm": 0.3616180311056316, "learning_rate": 2.458581882004829e-05, "loss": 0.3712, "num_tokens": 2106076525.0, "step": 2760 }, { "epoch": 3.7674319365025175, "grad_norm": 0.3364745352875065, "learning_rate": 2.4575979797059165e-05, "loss": 0.395, "num_tokens": 2106873302.0, "step": 2761 }, { "epoch": 3.7687974737560808, "grad_norm": 0.35645032173193536, "learning_rate": 2.456613998825016e-05, "loss": 0.3842, "num_tokens": 2107671218.0, "step": 2762 }, { "epoch": 3.770163011009644, "grad_norm": 0.3449759073686294, "learning_rate": 2.4556299396622976e-05, "loss": 0.3622, "num_tokens": 2108421521.0, "step": 2763 }, { "epoch": 3.7715285482632073, "grad_norm": 0.3509210381631342, "learning_rate": 2.454645802517955e-05, "loss": 0.3844, "num_tokens": 2109174248.0, "step": 2764 }, { "epoch": 3.7728940855167705, "grad_norm": 0.36587887449657647, "learning_rate": 2.4536615876922056e-05, "loss": 0.3617, "num_tokens": 2109856941.0, "step": 2765 }, { "epoch": 3.7742596227703338, "grad_norm": 0.33081738383868203, "learning_rate": 2.452677295485291e-05, "loss": 0.3701, "num_tokens": 2110656486.0, "step": 2766 }, { "epoch": 3.775625160023897, "grad_norm": 0.3392282837475415, "learning_rate": 2.4516929261974763e-05, "loss": 0.3781, "num_tokens": 2111457511.0, "step": 2767 }, { "epoch": 3.7769906972774603, "grad_norm": 0.3195509396040135, "learning_rate": 2.4507084801290502e-05, "loss": 0.3705, "num_tokens": 2112183886.0, "step": 2768 }, { "epoch": 3.7783562345310235, "grad_norm": 0.31992897989058366, "learning_rate": 2.4497239575803233e-05, "loss": 0.3727, "num_tokens": 2112963378.0, "step": 2769 }, { "epoch": 3.7797217717845863, "grad_norm": 0.333424015605469, "learning_rate": 2.4487393588516316e-05, "loss": 0.357, "num_tokens": 2113739620.0, "step": 2770 }, { "epoch": 3.7810873090381496, "grad_norm": 0.3120811984340342, "learning_rate": 2.4477546842433338e-05, "loss": 0.404, "num_tokens": 2114482553.0, "step": 2771 }, { "epoch": 3.782452846291713, "grad_norm": 0.3757004671446502, "learning_rate": 2.4467699340558108e-05, "loss": 0.3516, "num_tokens": 2115305947.0, "step": 2772 }, { "epoch": 3.783818383545276, "grad_norm": 0.2949637376472853, "learning_rate": 2.445785108589467e-05, "loss": 0.3828, "num_tokens": 2116055659.0, "step": 2773 }, { "epoch": 3.7851839207988394, "grad_norm": 0.380211266610594, "learning_rate": 2.4448002081447323e-05, "loss": 0.3859, "num_tokens": 2116843763.0, "step": 2774 }, { "epoch": 3.7865494580524026, "grad_norm": 0.3572702810128483, "learning_rate": 2.4438152330220546e-05, "loss": 0.3726, "num_tokens": 2117637942.0, "step": 2775 }, { "epoch": 3.7879149953059654, "grad_norm": 0.3339614287364625, "learning_rate": 2.4428301835219085e-05, "loss": 0.3592, "num_tokens": 2118347747.0, "step": 2776 }, { "epoch": 3.7892805325595287, "grad_norm": 0.3564000483865899, "learning_rate": 2.441845059944789e-05, "loss": 0.3597, "num_tokens": 2119104242.0, "step": 2777 }, { "epoch": 3.790646069813092, "grad_norm": 0.3378141244774779, "learning_rate": 2.4408598625912154e-05, "loss": 0.3897, "num_tokens": 2119900035.0, "step": 2778 }, { "epoch": 3.792011607066655, "grad_norm": 0.36541174179179275, "learning_rate": 2.439874591761728e-05, "loss": 0.3794, "num_tokens": 2120604547.0, "step": 2779 }, { "epoch": 3.7933771443202184, "grad_norm": 0.32126338088466627, "learning_rate": 2.438889247756891e-05, "loss": 0.3812, "num_tokens": 2121377425.0, "step": 2780 }, { "epoch": 3.7947426815737817, "grad_norm": 0.37137715165662, "learning_rate": 2.4379038308772897e-05, "loss": 0.3632, "num_tokens": 2122167477.0, "step": 2781 }, { "epoch": 3.796108218827345, "grad_norm": 0.31469920245027927, "learning_rate": 2.4369183414235328e-05, "loss": 0.3604, "num_tokens": 2122968575.0, "step": 2782 }, { "epoch": 3.797473756080908, "grad_norm": 0.3887959313677145, "learning_rate": 2.43593277969625e-05, "loss": 0.3638, "num_tokens": 2123699037.0, "step": 2783 }, { "epoch": 3.7988392933344715, "grad_norm": 0.34994311667086414, "learning_rate": 2.4349471459960935e-05, "loss": 0.3671, "num_tokens": 2124458783.0, "step": 2784 }, { "epoch": 3.8002048305880347, "grad_norm": 0.345123448411354, "learning_rate": 2.433961440623737e-05, "loss": 0.3786, "num_tokens": 2125297712.0, "step": 2785 }, { "epoch": 3.8015703678415975, "grad_norm": 0.32452204259732237, "learning_rate": 2.432975663879877e-05, "loss": 0.3679, "num_tokens": 2126064413.0, "step": 2786 }, { "epoch": 3.8029359050951608, "grad_norm": 0.34153539668850136, "learning_rate": 2.4319898160652313e-05, "loss": 0.3816, "num_tokens": 2126828111.0, "step": 2787 }, { "epoch": 3.804301442348724, "grad_norm": 0.3169320146267306, "learning_rate": 2.4310038974805394e-05, "loss": 0.3638, "num_tokens": 2127641641.0, "step": 2788 }, { "epoch": 3.8056669796022873, "grad_norm": 0.3335351089051143, "learning_rate": 2.430017908426563e-05, "loss": 0.3492, "num_tokens": 2128350185.0, "step": 2789 }, { "epoch": 3.8070325168558505, "grad_norm": 0.32562671522463954, "learning_rate": 2.4290318492040835e-05, "loss": 0.3825, "num_tokens": 2129111241.0, "step": 2790 }, { "epoch": 3.808398054109414, "grad_norm": 0.3385970208667263, "learning_rate": 2.428045720113905e-05, "loss": 0.3673, "num_tokens": 2129806659.0, "step": 2791 }, { "epoch": 3.8097635913629766, "grad_norm": 0.33573365111908854, "learning_rate": 2.4270595214568537e-05, "loss": 0.3733, "num_tokens": 2130571628.0, "step": 2792 }, { "epoch": 3.81112912861654, "grad_norm": 0.3319231492288081, "learning_rate": 2.426073253533775e-05, "loss": 0.3886, "num_tokens": 2131389267.0, "step": 2793 }, { "epoch": 3.812494665870103, "grad_norm": 0.3342162489400526, "learning_rate": 2.4250869166455368e-05, "loss": 0.3913, "num_tokens": 2132215214.0, "step": 2794 }, { "epoch": 3.8138602031236664, "grad_norm": 0.35228188799655585, "learning_rate": 2.424100511093029e-05, "loss": 0.3463, "num_tokens": 2132958581.0, "step": 2795 }, { "epoch": 3.8152257403772296, "grad_norm": 0.33047558658743303, "learning_rate": 2.4231140371771592e-05, "loss": 0.3851, "num_tokens": 2133691840.0, "step": 2796 }, { "epoch": 3.816591277630793, "grad_norm": 0.33164498354964167, "learning_rate": 2.422127495198859e-05, "loss": 0.359, "num_tokens": 2134449953.0, "step": 2797 }, { "epoch": 3.817956814884356, "grad_norm": 0.32523610786249296, "learning_rate": 2.4211408854590804e-05, "loss": 0.3703, "num_tokens": 2135283520.0, "step": 2798 }, { "epoch": 3.8193223521379194, "grad_norm": 0.32266800278857766, "learning_rate": 2.4201542082587942e-05, "loss": 0.365, "num_tokens": 2136064065.0, "step": 2799 }, { "epoch": 3.8206878893914826, "grad_norm": 0.34539668296406795, "learning_rate": 2.4191674638989925e-05, "loss": 0.3783, "num_tokens": 2136834562.0, "step": 2800 }, { "epoch": 3.822053426645046, "grad_norm": 0.3501107420596937, "learning_rate": 2.4181806526806893e-05, "loss": 0.3633, "num_tokens": 2137572153.0, "step": 2801 }, { "epoch": 3.823418963898609, "grad_norm": 0.3421261286755689, "learning_rate": 2.4171937749049175e-05, "loss": 0.3721, "num_tokens": 2138372245.0, "step": 2802 }, { "epoch": 3.824784501152172, "grad_norm": 0.34068991037524454, "learning_rate": 2.4162068308727307e-05, "loss": 0.3743, "num_tokens": 2139168394.0, "step": 2803 }, { "epoch": 3.826150038405735, "grad_norm": 0.3277886613736438, "learning_rate": 2.4152198208852036e-05, "loss": 0.3712, "num_tokens": 2139866388.0, "step": 2804 }, { "epoch": 3.8275155756592985, "grad_norm": 0.3664254637165826, "learning_rate": 2.4142327452434292e-05, "loss": 0.3669, "num_tokens": 2140640628.0, "step": 2805 }, { "epoch": 3.8288811129128617, "grad_norm": 0.3429303659219578, "learning_rate": 2.413245604248521e-05, "loss": 0.3651, "num_tokens": 2141444463.0, "step": 2806 }, { "epoch": 3.830246650166425, "grad_norm": 0.36483090135482693, "learning_rate": 2.4122583982016147e-05, "loss": 0.3568, "num_tokens": 2142203551.0, "step": 2807 }, { "epoch": 3.831612187419988, "grad_norm": 0.34406289422119374, "learning_rate": 2.411271127403864e-05, "loss": 0.355, "num_tokens": 2142951167.0, "step": 2808 }, { "epoch": 3.832977724673551, "grad_norm": 0.30553792811472463, "learning_rate": 2.41028379215644e-05, "loss": 0.3799, "num_tokens": 2143700954.0, "step": 2809 }, { "epoch": 3.8343432619271143, "grad_norm": 0.35832949890368715, "learning_rate": 2.409296392760538e-05, "loss": 0.3771, "num_tokens": 2144482898.0, "step": 2810 }, { "epoch": 3.8357087991806775, "grad_norm": 0.33607270766270486, "learning_rate": 2.4083089295173716e-05, "loss": 0.3776, "num_tokens": 2145292145.0, "step": 2811 }, { "epoch": 3.837074336434241, "grad_norm": 0.340818077582055, "learning_rate": 2.4073214027281705e-05, "loss": 0.3825, "num_tokens": 2146005386.0, "step": 2812 }, { "epoch": 3.838439873687804, "grad_norm": 0.3250044016743973, "learning_rate": 2.4063338126941888e-05, "loss": 0.3791, "num_tokens": 2146727011.0, "step": 2813 }, { "epoch": 3.8398054109413673, "grad_norm": 0.37961015368371775, "learning_rate": 2.405346159716696e-05, "loss": 0.3759, "num_tokens": 2147449349.0, "step": 2814 }, { "epoch": 3.8411709481949305, "grad_norm": 0.344935216043625, "learning_rate": 2.4043584440969822e-05, "loss": 0.3541, "num_tokens": 2148238855.0, "step": 2815 }, { "epoch": 3.842536485448494, "grad_norm": 0.36448077534133644, "learning_rate": 2.4033706661363576e-05, "loss": 0.3813, "num_tokens": 2149026579.0, "step": 2816 }, { "epoch": 3.843902022702057, "grad_norm": 0.3175113490972638, "learning_rate": 2.40238282613615e-05, "loss": 0.3625, "num_tokens": 2149752579.0, "step": 2817 }, { "epoch": 3.8452675599556203, "grad_norm": 0.352088868650712, "learning_rate": 2.4013949243977064e-05, "loss": 0.374, "num_tokens": 2150432996.0, "step": 2818 }, { "epoch": 3.846633097209183, "grad_norm": 0.35636701160702833, "learning_rate": 2.4004069612223933e-05, "loss": 0.3577, "num_tokens": 2151121894.0, "step": 2819 }, { "epoch": 3.8479986344627464, "grad_norm": 0.3188106431821294, "learning_rate": 2.3994189369115953e-05, "loss": 0.363, "num_tokens": 2151894627.0, "step": 2820 }, { "epoch": 3.8493641717163096, "grad_norm": 0.32339147078849045, "learning_rate": 2.3984308517667156e-05, "loss": 0.3571, "num_tokens": 2152610488.0, "step": 2821 }, { "epoch": 3.850729708969873, "grad_norm": 0.3415864658790018, "learning_rate": 2.3974427060891765e-05, "loss": 0.3832, "num_tokens": 2153407328.0, "step": 2822 }, { "epoch": 3.852095246223436, "grad_norm": 0.3734130691398444, "learning_rate": 2.396454500180418e-05, "loss": 0.3807, "num_tokens": 2154180150.0, "step": 2823 }, { "epoch": 3.8534607834769994, "grad_norm": 0.3243810144886617, "learning_rate": 2.3954662343418994e-05, "loss": 0.3664, "num_tokens": 2154897339.0, "step": 2824 }, { "epoch": 3.854826320730562, "grad_norm": 0.32026978268278944, "learning_rate": 2.3944779088750975e-05, "loss": 0.3741, "num_tokens": 2155695471.0, "step": 2825 }, { "epoch": 3.8561918579841254, "grad_norm": 0.35436134162987565, "learning_rate": 2.3934895240815075e-05, "loss": 0.3747, "num_tokens": 2156512906.0, "step": 2826 }, { "epoch": 3.8575573952376887, "grad_norm": 0.3429988069925846, "learning_rate": 2.392501080262643e-05, "loss": 0.3624, "num_tokens": 2157319399.0, "step": 2827 }, { "epoch": 3.858922932491252, "grad_norm": 0.3184808450787832, "learning_rate": 2.3915125777200355e-05, "loss": 0.3639, "num_tokens": 2158040814.0, "step": 2828 }, { "epoch": 3.860288469744815, "grad_norm": 0.32857513480065415, "learning_rate": 2.390524016755235e-05, "loss": 0.3571, "num_tokens": 2158781074.0, "step": 2829 }, { "epoch": 3.8616540069983785, "grad_norm": 0.32256600280042624, "learning_rate": 2.3895353976698072e-05, "loss": 0.3871, "num_tokens": 2159516726.0, "step": 2830 }, { "epoch": 3.8630195442519417, "grad_norm": 0.3680390420358965, "learning_rate": 2.3885467207653376e-05, "loss": 0.3823, "num_tokens": 2160199172.0, "step": 2831 }, { "epoch": 3.864385081505505, "grad_norm": 0.3562133196762885, "learning_rate": 2.3875579863434295e-05, "loss": 0.3859, "num_tokens": 2160921798.0, "step": 2832 }, { "epoch": 3.8657506187590682, "grad_norm": 0.32420047900832344, "learning_rate": 2.386569194705701e-05, "loss": 0.3762, "num_tokens": 2161608658.0, "step": 2833 }, { "epoch": 3.8671161560126315, "grad_norm": 0.37175450824999223, "learning_rate": 2.3855803461537917e-05, "loss": 0.3772, "num_tokens": 2162449314.0, "step": 2834 }, { "epoch": 3.8684816932661943, "grad_norm": 0.3258489631359511, "learning_rate": 2.3845914409893554e-05, "loss": 0.3784, "num_tokens": 2163216093.0, "step": 2835 }, { "epoch": 3.8698472305197575, "grad_norm": 0.3155743390851654, "learning_rate": 2.383602479514064e-05, "loss": 0.3782, "num_tokens": 2164044896.0, "step": 2836 }, { "epoch": 3.871212767773321, "grad_norm": 0.3510262992809789, "learning_rate": 2.3826134620296076e-05, "loss": 0.3775, "num_tokens": 2164853966.0, "step": 2837 }, { "epoch": 3.872578305026884, "grad_norm": 0.33587892511847867, "learning_rate": 2.3816243888376925e-05, "loss": 0.3745, "num_tokens": 2165651610.0, "step": 2838 }, { "epoch": 3.8739438422804473, "grad_norm": 0.3029748299634413, "learning_rate": 2.3806352602400415e-05, "loss": 0.3818, "num_tokens": 2166371873.0, "step": 2839 }, { "epoch": 3.8753093795340106, "grad_norm": 0.34343141088271784, "learning_rate": 2.3796460765383954e-05, "loss": 0.3712, "num_tokens": 2167075399.0, "step": 2840 }, { "epoch": 3.8766749167875734, "grad_norm": 0.3052580584760564, "learning_rate": 2.378656838034511e-05, "loss": 0.3637, "num_tokens": 2167773417.0, "step": 2841 }, { "epoch": 3.8780404540411366, "grad_norm": 0.32468430211897825, "learning_rate": 2.3776675450301628e-05, "loss": 0.3706, "num_tokens": 2168528681.0, "step": 2842 }, { "epoch": 3.8794059912947, "grad_norm": 0.3427880439103751, "learning_rate": 2.3766781978271404e-05, "loss": 0.3776, "num_tokens": 2169247210.0, "step": 2843 }, { "epoch": 3.880771528548263, "grad_norm": 0.341575528363674, "learning_rate": 2.3756887967272514e-05, "loss": 0.3891, "num_tokens": 2170036730.0, "step": 2844 }, { "epoch": 3.8821370658018264, "grad_norm": 0.3365479538941513, "learning_rate": 2.3746993420323182e-05, "loss": 0.3767, "num_tokens": 2170830379.0, "step": 2845 }, { "epoch": 3.8835026030553896, "grad_norm": 0.3247900037794101, "learning_rate": 2.3737098340441818e-05, "loss": 0.3666, "num_tokens": 2171494407.0, "step": 2846 }, { "epoch": 3.884868140308953, "grad_norm": 0.3184654118537673, "learning_rate": 2.3727202730646976e-05, "loss": 0.403, "num_tokens": 2172275877.0, "step": 2847 }, { "epoch": 3.886233677562516, "grad_norm": 0.32301412201046265, "learning_rate": 2.3717306593957382e-05, "loss": 0.3646, "num_tokens": 2173008333.0, "step": 2848 }, { "epoch": 3.8875992148160794, "grad_norm": 0.33023357067413306, "learning_rate": 2.3707409933391918e-05, "loss": 0.3815, "num_tokens": 2173782623.0, "step": 2849 }, { "epoch": 3.8889647520696426, "grad_norm": 0.34133266782723765, "learning_rate": 2.3697512751969617e-05, "loss": 0.3561, "num_tokens": 2174539187.0, "step": 2850 }, { "epoch": 3.8903302893232055, "grad_norm": 0.3391953488332205, "learning_rate": 2.3687615052709692e-05, "loss": 0.3753, "num_tokens": 2175233857.0, "step": 2851 }, { "epoch": 3.8916958265767687, "grad_norm": 0.39349856088897794, "learning_rate": 2.36777168386315e-05, "loss": 0.3698, "num_tokens": 2175985301.0, "step": 2852 }, { "epoch": 3.893061363830332, "grad_norm": 0.3158172021878948, "learning_rate": 2.3667818112754563e-05, "loss": 0.384, "num_tokens": 2176673538.0, "step": 2853 }, { "epoch": 3.894426901083895, "grad_norm": 0.3695989639923421, "learning_rate": 2.365791887809854e-05, "loss": 0.3624, "num_tokens": 2177477467.0, "step": 2854 }, { "epoch": 3.8957924383374585, "grad_norm": 0.31647855973440003, "learning_rate": 2.3648019137683264e-05, "loss": 0.3628, "num_tokens": 2178287413.0, "step": 2855 }, { "epoch": 3.8971579755910217, "grad_norm": 0.3139656656721149, "learning_rate": 2.363811889452872e-05, "loss": 0.3765, "num_tokens": 2179088898.0, "step": 2856 }, { "epoch": 3.8985235128445845, "grad_norm": 0.33856709315890904, "learning_rate": 2.3628218151655043e-05, "loss": 0.3482, "num_tokens": 2179824483.0, "step": 2857 }, { "epoch": 3.899889050098148, "grad_norm": 0.3513118891199948, "learning_rate": 2.361831691208252e-05, "loss": 0.393, "num_tokens": 2180643390.0, "step": 2858 }, { "epoch": 3.901254587351711, "grad_norm": 0.3127465999564405, "learning_rate": 2.360841517883159e-05, "loss": 0.3701, "num_tokens": 2181382910.0, "step": 2859 }, { "epoch": 3.9026201246052743, "grad_norm": 0.3181417916532469, "learning_rate": 2.3598512954922845e-05, "loss": 0.3584, "num_tokens": 2182167083.0, "step": 2860 }, { "epoch": 3.9039856618588376, "grad_norm": 0.31776780392301096, "learning_rate": 2.358861024337703e-05, "loss": 0.3696, "num_tokens": 2182941310.0, "step": 2861 }, { "epoch": 3.905351199112401, "grad_norm": 0.30971113053727817, "learning_rate": 2.3578707047215013e-05, "loss": 0.3701, "num_tokens": 2183721088.0, "step": 2862 }, { "epoch": 3.906716736365964, "grad_norm": 0.33714249573616906, "learning_rate": 2.356880336945785e-05, "loss": 0.3792, "num_tokens": 2184502423.0, "step": 2863 }, { "epoch": 3.9080822736195273, "grad_norm": 0.30409239725878084, "learning_rate": 2.355889921312672e-05, "loss": 0.3444, "num_tokens": 2185174272.0, "step": 2864 }, { "epoch": 3.9094478108730906, "grad_norm": 0.3130683192437444, "learning_rate": 2.3548994581242948e-05, "loss": 0.3763, "num_tokens": 2185831387.0, "step": 2865 }, { "epoch": 3.910813348126654, "grad_norm": 0.33832383265263405, "learning_rate": 2.353908947682801e-05, "loss": 0.3727, "num_tokens": 2186565981.0, "step": 2866 }, { "epoch": 3.9121788853802166, "grad_norm": 0.31512295385016975, "learning_rate": 2.3529183902903523e-05, "loss": 0.3639, "num_tokens": 2187260143.0, "step": 2867 }, { "epoch": 3.91354442263378, "grad_norm": 0.33133761914603865, "learning_rate": 2.3519277862491255e-05, "loss": 0.3721, "num_tokens": 2188040523.0, "step": 2868 }, { "epoch": 3.914909959887343, "grad_norm": 0.31581833043396224, "learning_rate": 2.350937135861311e-05, "loss": 0.3871, "num_tokens": 2188818887.0, "step": 2869 }, { "epoch": 3.9162754971409064, "grad_norm": 0.28744932673260776, "learning_rate": 2.3499464394291115e-05, "loss": 0.3876, "num_tokens": 2189578574.0, "step": 2870 }, { "epoch": 3.9176410343944696, "grad_norm": 0.3537129070145767, "learning_rate": 2.348955697254748e-05, "loss": 0.3822, "num_tokens": 2190320830.0, "step": 2871 }, { "epoch": 3.919006571648033, "grad_norm": 0.3307575696614546, "learning_rate": 2.3479649096404518e-05, "loss": 0.3735, "num_tokens": 2191045672.0, "step": 2872 }, { "epoch": 3.9203721089015957, "grad_norm": 0.3181066108520781, "learning_rate": 2.346974076888469e-05, "loss": 0.3877, "num_tokens": 2191844236.0, "step": 2873 }, { "epoch": 3.921737646155159, "grad_norm": 0.3278875344682696, "learning_rate": 2.3459831993010605e-05, "loss": 0.3549, "num_tokens": 2192534480.0, "step": 2874 }, { "epoch": 3.923103183408722, "grad_norm": 0.3207529368505076, "learning_rate": 2.3449922771805e-05, "loss": 0.3825, "num_tokens": 2193224003.0, "step": 2875 }, { "epoch": 3.9244687206622855, "grad_norm": 0.3503342079335991, "learning_rate": 2.3440013108290748e-05, "loss": 0.3756, "num_tokens": 2194045349.0, "step": 2876 }, { "epoch": 3.9258342579158487, "grad_norm": 0.31921574769981986, "learning_rate": 2.3430103005490863e-05, "loss": 0.3659, "num_tokens": 2194860407.0, "step": 2877 }, { "epoch": 3.927199795169412, "grad_norm": 0.3298272763837014, "learning_rate": 2.342019246642848e-05, "loss": 0.3788, "num_tokens": 2195668267.0, "step": 2878 }, { "epoch": 3.9285653324229752, "grad_norm": 0.3078959944435541, "learning_rate": 2.3410281494126878e-05, "loss": 0.36, "num_tokens": 2196456925.0, "step": 2879 }, { "epoch": 3.9299308696765385, "grad_norm": 0.31880690859558997, "learning_rate": 2.3400370091609464e-05, "loss": 0.3769, "num_tokens": 2197266440.0, "step": 2880 }, { "epoch": 3.9312964069301017, "grad_norm": 0.32207304978991846, "learning_rate": 2.3390458261899783e-05, "loss": 0.3555, "num_tokens": 2197904976.0, "step": 2881 }, { "epoch": 3.932661944183665, "grad_norm": 0.3279082228887125, "learning_rate": 2.33805460080215e-05, "loss": 0.3745, "num_tokens": 2198629232.0, "step": 2882 }, { "epoch": 3.934027481437228, "grad_norm": 0.34228639017523976, "learning_rate": 2.3370633332998413e-05, "loss": 0.3852, "num_tokens": 2199372275.0, "step": 2883 }, { "epoch": 3.935393018690791, "grad_norm": 0.3643024879956086, "learning_rate": 2.3360720239854465e-05, "loss": 0.3632, "num_tokens": 2200148857.0, "step": 2884 }, { "epoch": 3.9367585559443543, "grad_norm": 0.3183795286991628, "learning_rate": 2.3350806731613697e-05, "loss": 0.366, "num_tokens": 2200889604.0, "step": 2885 }, { "epoch": 3.9381240931979176, "grad_norm": 0.35251468355618704, "learning_rate": 2.3340892811300283e-05, "loss": 0.3826, "num_tokens": 2201713357.0, "step": 2886 }, { "epoch": 3.939489630451481, "grad_norm": 0.33277115550800224, "learning_rate": 2.3330978481938546e-05, "loss": 0.3709, "num_tokens": 2202495196.0, "step": 2887 }, { "epoch": 3.940855167705044, "grad_norm": 0.32219006529446725, "learning_rate": 2.3321063746552913e-05, "loss": 0.3608, "num_tokens": 2203258383.0, "step": 2888 }, { "epoch": 3.942220704958607, "grad_norm": 0.351066305328942, "learning_rate": 2.3311148608167944e-05, "loss": 0.3933, "num_tokens": 2204029846.0, "step": 2889 }, { "epoch": 3.94358624221217, "grad_norm": 0.3702142598633054, "learning_rate": 2.3301233069808315e-05, "loss": 0.402, "num_tokens": 2204786928.0, "step": 2890 }, { "epoch": 3.9449517794657334, "grad_norm": 0.35361880454910466, "learning_rate": 2.329131713449883e-05, "loss": 0.3698, "num_tokens": 2205531132.0, "step": 2891 }, { "epoch": 3.9463173167192966, "grad_norm": 0.34471519826927427, "learning_rate": 2.3281400805264408e-05, "loss": 0.3561, "num_tokens": 2206351932.0, "step": 2892 }, { "epoch": 3.94768285397286, "grad_norm": 0.32628868254651944, "learning_rate": 2.3271484085130093e-05, "loss": 0.3925, "num_tokens": 2207155947.0, "step": 2893 }, { "epoch": 3.949048391226423, "grad_norm": 0.362632049108661, "learning_rate": 2.3261566977121048e-05, "loss": 0.3689, "num_tokens": 2207996712.0, "step": 2894 }, { "epoch": 3.9504139284799864, "grad_norm": 0.3115941565117092, "learning_rate": 2.3251649484262546e-05, "loss": 0.3823, "num_tokens": 2208785219.0, "step": 2895 }, { "epoch": 3.9517794657335497, "grad_norm": 0.34028662632771023, "learning_rate": 2.3241731609579996e-05, "loss": 0.3624, "num_tokens": 2209527451.0, "step": 2896 }, { "epoch": 3.953145002987113, "grad_norm": 0.3456068362188313, "learning_rate": 2.323181335609891e-05, "loss": 0.3437, "num_tokens": 2210272063.0, "step": 2897 }, { "epoch": 3.954510540240676, "grad_norm": 0.3206267536499308, "learning_rate": 2.322189472684491e-05, "loss": 0.3717, "num_tokens": 2211077885.0, "step": 2898 }, { "epoch": 3.955876077494239, "grad_norm": 0.3377658019462627, "learning_rate": 2.321197572484374e-05, "loss": 0.3772, "num_tokens": 2211845544.0, "step": 2899 }, { "epoch": 3.9572416147478022, "grad_norm": 0.32998580634343555, "learning_rate": 2.3202056353121275e-05, "loss": 0.367, "num_tokens": 2212599513.0, "step": 2900 }, { "epoch": 3.9586071520013655, "grad_norm": 0.3355842509870003, "learning_rate": 2.319213661470346e-05, "loss": 0.3603, "num_tokens": 2213334775.0, "step": 2901 }, { "epoch": 3.9599726892549287, "grad_norm": 0.3238082609808336, "learning_rate": 2.3182216512616398e-05, "loss": 0.3777, "num_tokens": 2214186527.0, "step": 2902 }, { "epoch": 3.961338226508492, "grad_norm": 0.3370701082600817, "learning_rate": 2.3172296049886277e-05, "loss": 0.3707, "num_tokens": 2214913266.0, "step": 2903 }, { "epoch": 3.9627037637620552, "grad_norm": 0.3422721122791187, "learning_rate": 2.3162375229539395e-05, "loss": 0.3737, "num_tokens": 2215657562.0, "step": 2904 }, { "epoch": 3.9640693010156185, "grad_norm": 0.33086239623119373, "learning_rate": 2.3152454054602166e-05, "loss": 0.3641, "num_tokens": 2216420065.0, "step": 2905 }, { "epoch": 3.9654348382691813, "grad_norm": 0.31788797091068816, "learning_rate": 2.314253252810112e-05, "loss": 0.3509, "num_tokens": 2217179107.0, "step": 2906 }, { "epoch": 3.9668003755227446, "grad_norm": 0.3040655648137008, "learning_rate": 2.3132610653062873e-05, "loss": 0.355, "num_tokens": 2217875360.0, "step": 2907 }, { "epoch": 3.968165912776308, "grad_norm": 0.34875273650162314, "learning_rate": 2.3122688432514175e-05, "loss": 0.3544, "num_tokens": 2218615164.0, "step": 2908 }, { "epoch": 3.969531450029871, "grad_norm": 0.3508674584583984, "learning_rate": 2.3112765869481852e-05, "loss": 0.3617, "num_tokens": 2219426841.0, "step": 2909 }, { "epoch": 3.9708969872834343, "grad_norm": 0.3223898765563192, "learning_rate": 2.3102842966992847e-05, "loss": 0.3742, "num_tokens": 2220217734.0, "step": 2910 }, { "epoch": 3.9722625245369976, "grad_norm": 0.37270268399447676, "learning_rate": 2.309291972807421e-05, "loss": 0.3517, "num_tokens": 2220952925.0, "step": 2911 }, { "epoch": 3.973628061790561, "grad_norm": 0.303034101493459, "learning_rate": 2.3082996155753104e-05, "loss": 0.3597, "num_tokens": 2221740519.0, "step": 2912 }, { "epoch": 3.974993599044124, "grad_norm": 0.33509393025573714, "learning_rate": 2.307307225305676e-05, "loss": 0.3611, "num_tokens": 2222435362.0, "step": 2913 }, { "epoch": 3.9763591362976873, "grad_norm": 0.34579947662754934, "learning_rate": 2.3063148023012548e-05, "loss": 0.3657, "num_tokens": 2223221232.0, "step": 2914 }, { "epoch": 3.9777246735512506, "grad_norm": 0.3208673724250611, "learning_rate": 2.3053223468647915e-05, "loss": 0.3891, "num_tokens": 2224014841.0, "step": 2915 }, { "epoch": 3.9790902108048134, "grad_norm": 0.34049887513215216, "learning_rate": 2.304329859299042e-05, "loss": 0.3577, "num_tokens": 2224704990.0, "step": 2916 }, { "epoch": 3.9804557480583767, "grad_norm": 0.3115404449067152, "learning_rate": 2.3033373399067696e-05, "loss": 0.3835, "num_tokens": 2225370203.0, "step": 2917 }, { "epoch": 3.98182128531194, "grad_norm": 0.36392434143970964, "learning_rate": 2.3023447889907504e-05, "loss": 0.3777, "num_tokens": 2226114677.0, "step": 2918 }, { "epoch": 3.983186822565503, "grad_norm": 0.3296580937286341, "learning_rate": 2.301352206853768e-05, "loss": 0.3562, "num_tokens": 2226873356.0, "step": 2919 }, { "epoch": 3.9845523598190664, "grad_norm": 0.3050079349612666, "learning_rate": 2.3003595937986163e-05, "loss": 0.3698, "num_tokens": 2227558989.0, "step": 2920 }, { "epoch": 3.9859178970726297, "grad_norm": 0.3466758606215819, "learning_rate": 2.2993669501280995e-05, "loss": 0.3795, "num_tokens": 2228345563.0, "step": 2921 }, { "epoch": 3.9872834343261925, "grad_norm": 0.3459286824931879, "learning_rate": 2.2983742761450286e-05, "loss": 0.3766, "num_tokens": 2229074785.0, "step": 2922 }, { "epoch": 3.9886489715797557, "grad_norm": 0.33962973712900885, "learning_rate": 2.2973815721522273e-05, "loss": 0.3767, "num_tokens": 2229803139.0, "step": 2923 }, { "epoch": 3.990014508833319, "grad_norm": 0.36216576501966047, "learning_rate": 2.296388838452525e-05, "loss": 0.3621, "num_tokens": 2230574186.0, "step": 2924 }, { "epoch": 3.9913800460868822, "grad_norm": 0.3185786953731498, "learning_rate": 2.2953960753487623e-05, "loss": 0.3857, "num_tokens": 2231362494.0, "step": 2925 }, { "epoch": 3.9927455833404455, "grad_norm": 0.3429206977245957, "learning_rate": 2.294403283143788e-05, "loss": 0.3609, "num_tokens": 2232149136.0, "step": 2926 }, { "epoch": 3.9941111205940087, "grad_norm": 0.3320456266802594, "learning_rate": 2.2934104621404606e-05, "loss": 0.3616, "num_tokens": 2232837205.0, "step": 2927 }, { "epoch": 3.995476657847572, "grad_norm": 0.32806940908261245, "learning_rate": 2.2924176126416463e-05, "loss": 0.4005, "num_tokens": 2233653939.0, "step": 2928 }, { "epoch": 3.9968421951011353, "grad_norm": 0.3813633564535158, "learning_rate": 2.2914247349502206e-05, "loss": 0.3653, "num_tokens": 2234432943.0, "step": 2929 }, { "epoch": 3.9982077323546985, "grad_norm": 0.3091298635185265, "learning_rate": 2.2904318293690683e-05, "loss": 0.3772, "num_tokens": 2235176833.0, "step": 2930 }, { "epoch": 3.9995732696082618, "grad_norm": 0.35020840707859685, "learning_rate": 2.2894388962010798e-05, "loss": 0.3755, "num_tokens": 2235940096.0, "step": 2931 }, { "epoch": 4.0, "grad_norm": 0.3951734882554561, "learning_rate": 2.2884459357491586e-05, "loss": 0.3281, "num_tokens": 2236168544.0, "step": 2932 }, { "epoch": 4.001365537253563, "grad_norm": 0.5578045778932325, "learning_rate": 2.287452948316212e-05, "loss": 0.3526, "num_tokens": 2236987360.0, "step": 2933 }, { "epoch": 4.0027310745071265, "grad_norm": 0.44561037819136123, "learning_rate": 2.2864599342051584e-05, "loss": 0.354, "num_tokens": 2237758713.0, "step": 2934 }, { "epoch": 4.00409661176069, "grad_norm": 0.3378264294685049, "learning_rate": 2.2854668937189226e-05, "loss": 0.3513, "num_tokens": 2238587284.0, "step": 2935 }, { "epoch": 4.005462149014253, "grad_norm": 0.33004197470132207, "learning_rate": 2.2844738271604393e-05, "loss": 0.3484, "num_tokens": 2239322064.0, "step": 2936 }, { "epoch": 4.006827686267816, "grad_norm": 0.35699249978693104, "learning_rate": 2.2834807348326497e-05, "loss": 0.336, "num_tokens": 2240110907.0, "step": 2937 }, { "epoch": 4.0081932235213795, "grad_norm": 0.40992631248035155, "learning_rate": 2.2824876170385024e-05, "loss": 0.3482, "num_tokens": 2240870632.0, "step": 2938 }, { "epoch": 4.009558760774943, "grad_norm": 0.36441832964046195, "learning_rate": 2.2814944740809565e-05, "loss": 0.3416, "num_tokens": 2241638141.0, "step": 2939 }, { "epoch": 4.010924298028506, "grad_norm": 0.3786951181177968, "learning_rate": 2.280501306262975e-05, "loss": 0.3416, "num_tokens": 2242379438.0, "step": 2940 }, { "epoch": 4.012289835282068, "grad_norm": 0.3536208523041886, "learning_rate": 2.2795081138875313e-05, "loss": 0.3298, "num_tokens": 2243077339.0, "step": 2941 }, { "epoch": 4.013655372535632, "grad_norm": 0.38413558517316204, "learning_rate": 2.2785148972576052e-05, "loss": 0.3396, "num_tokens": 2243833947.0, "step": 2942 }, { "epoch": 4.015020909789195, "grad_norm": 0.32034821984020984, "learning_rate": 2.2775216566761843e-05, "loss": 0.3311, "num_tokens": 2244580544.0, "step": 2943 }, { "epoch": 4.016386447042758, "grad_norm": 0.3291370034145609, "learning_rate": 2.2765283924462632e-05, "loss": 0.3335, "num_tokens": 2245321912.0, "step": 2944 }, { "epoch": 4.017751984296321, "grad_norm": 0.3673195456792331, "learning_rate": 2.2755351048708428e-05, "loss": 0.3526, "num_tokens": 2246131359.0, "step": 2945 }, { "epoch": 4.019117521549885, "grad_norm": 0.36679038157241056, "learning_rate": 2.2745417942529343e-05, "loss": 0.3567, "num_tokens": 2246933675.0, "step": 2946 }, { "epoch": 4.020483058803448, "grad_norm": 0.36219182641687714, "learning_rate": 2.273548460895552e-05, "loss": 0.3396, "num_tokens": 2247689412.0, "step": 2947 }, { "epoch": 4.021848596057011, "grad_norm": 0.34212860552175944, "learning_rate": 2.2725551051017195e-05, "loss": 0.3313, "num_tokens": 2248483329.0, "step": 2948 }, { "epoch": 4.023214133310574, "grad_norm": 0.3137607544583463, "learning_rate": 2.271561727174467e-05, "loss": 0.3531, "num_tokens": 2249242772.0, "step": 2949 }, { "epoch": 4.024579670564138, "grad_norm": 0.3328301431732795, "learning_rate": 2.2705683274168297e-05, "loss": 0.329, "num_tokens": 2250015676.0, "step": 2950 }, { "epoch": 4.025945207817701, "grad_norm": 0.32665011918110565, "learning_rate": 2.269574906131852e-05, "loss": 0.3396, "num_tokens": 2250748202.0, "step": 2951 }, { "epoch": 4.027310745071264, "grad_norm": 0.31916583845784047, "learning_rate": 2.2685814636225837e-05, "loss": 0.3382, "num_tokens": 2251483866.0, "step": 2952 }, { "epoch": 4.028676282324827, "grad_norm": 0.3335654361243536, "learning_rate": 2.2675880001920806e-05, "loss": 0.3303, "num_tokens": 2252274994.0, "step": 2953 }, { "epoch": 4.030041819578391, "grad_norm": 0.32887350120290665, "learning_rate": 2.2665945161434057e-05, "loss": 0.3223, "num_tokens": 2252950740.0, "step": 2954 }, { "epoch": 4.031407356831954, "grad_norm": 0.34267688543004154, "learning_rate": 2.2656010117796278e-05, "loss": 0.3431, "num_tokens": 2253725540.0, "step": 2955 }, { "epoch": 4.032772894085517, "grad_norm": 0.30415296734923036, "learning_rate": 2.2646074874038227e-05, "loss": 0.3276, "num_tokens": 2254562951.0, "step": 2956 }, { "epoch": 4.03413843133908, "grad_norm": 0.3426308128890641, "learning_rate": 2.2636139433190706e-05, "loss": 0.3314, "num_tokens": 2255328853.0, "step": 2957 }, { "epoch": 4.035503968592643, "grad_norm": 0.36218930518754855, "learning_rate": 2.26262037982846e-05, "loss": 0.3444, "num_tokens": 2256029792.0, "step": 2958 }, { "epoch": 4.036869505846206, "grad_norm": 0.404089325235941, "learning_rate": 2.2616267972350823e-05, "loss": 0.3256, "num_tokens": 2256690837.0, "step": 2959 }, { "epoch": 4.038235043099769, "grad_norm": 0.3387980414724349, "learning_rate": 2.2606331958420377e-05, "loss": 0.3272, "num_tokens": 2257404593.0, "step": 2960 }, { "epoch": 4.039600580353333, "grad_norm": 0.37332131481295355, "learning_rate": 2.2596395759524316e-05, "loss": 0.3299, "num_tokens": 2258142808.0, "step": 2961 }, { "epoch": 4.040966117606896, "grad_norm": 0.3562910873123263, "learning_rate": 2.2586459378693736e-05, "loss": 0.3414, "num_tokens": 2258926808.0, "step": 2962 }, { "epoch": 4.042331654860459, "grad_norm": 0.3852884397145638, "learning_rate": 2.2576522818959795e-05, "loss": 0.336, "num_tokens": 2259768969.0, "step": 2963 }, { "epoch": 4.043697192114022, "grad_norm": 0.328347740444287, "learning_rate": 2.25665860833537e-05, "loss": 0.3351, "num_tokens": 2260539230.0, "step": 2964 }, { "epoch": 4.045062729367586, "grad_norm": 0.3726910344777043, "learning_rate": 2.2556649174906738e-05, "loss": 0.3473, "num_tokens": 2261278397.0, "step": 2965 }, { "epoch": 4.046428266621149, "grad_norm": 0.37312216828143924, "learning_rate": 2.2546712096650215e-05, "loss": 0.3306, "num_tokens": 2262055951.0, "step": 2966 }, { "epoch": 4.047793803874712, "grad_norm": 0.3367018921934393, "learning_rate": 2.2536774851615507e-05, "loss": 0.3363, "num_tokens": 2262797673.0, "step": 2967 }, { "epoch": 4.049159341128275, "grad_norm": 0.3978900669286773, "learning_rate": 2.2526837442834046e-05, "loss": 0.3387, "num_tokens": 2263620425.0, "step": 2968 }, { "epoch": 4.050524878381839, "grad_norm": 0.35945230999981587, "learning_rate": 2.2516899873337286e-05, "loss": 0.3454, "num_tokens": 2264328474.0, "step": 2969 }, { "epoch": 4.051890415635402, "grad_norm": 0.3655046697861434, "learning_rate": 2.2506962146156764e-05, "loss": 0.3242, "num_tokens": 2265066284.0, "step": 2970 }, { "epoch": 4.053255952888965, "grad_norm": 0.3246458783930462, "learning_rate": 2.2497024264324052e-05, "loss": 0.3269, "num_tokens": 2265837009.0, "step": 2971 }, { "epoch": 4.054621490142528, "grad_norm": 0.32064170811991666, "learning_rate": 2.248708623087075e-05, "loss": 0.3239, "num_tokens": 2266590447.0, "step": 2972 }, { "epoch": 4.055987027396091, "grad_norm": 0.347707993109019, "learning_rate": 2.2477148048828536e-05, "loss": 0.3441, "num_tokens": 2267347558.0, "step": 2973 }, { "epoch": 4.057352564649654, "grad_norm": 0.33659691542165726, "learning_rate": 2.246720972122912e-05, "loss": 0.3357, "num_tokens": 2268090185.0, "step": 2974 }, { "epoch": 4.058718101903217, "grad_norm": 0.34041401056593434, "learning_rate": 2.245727125110425e-05, "loss": 0.3529, "num_tokens": 2268829092.0, "step": 2975 }, { "epoch": 4.0600836391567805, "grad_norm": 0.3155347004407772, "learning_rate": 2.2447332641485717e-05, "loss": 0.3326, "num_tokens": 2269535727.0, "step": 2976 }, { "epoch": 4.061449176410344, "grad_norm": 0.39086594098621125, "learning_rate": 2.2437393895405377e-05, "loss": 0.3412, "num_tokens": 2270270478.0, "step": 2977 }, { "epoch": 4.062814713663907, "grad_norm": 0.3396731452935562, "learning_rate": 2.2427455015895094e-05, "loss": 0.3408, "num_tokens": 2271085462.0, "step": 2978 }, { "epoch": 4.06418025091747, "grad_norm": 0.32512822216937937, "learning_rate": 2.2417516005986806e-05, "loss": 0.3318, "num_tokens": 2271772482.0, "step": 2979 }, { "epoch": 4.0655457881710335, "grad_norm": 0.3489761589086455, "learning_rate": 2.240757686871246e-05, "loss": 0.3466, "num_tokens": 2272494755.0, "step": 2980 }, { "epoch": 4.066911325424597, "grad_norm": 0.36231681335972077, "learning_rate": 2.239763760710406e-05, "loss": 0.3521, "num_tokens": 2273323102.0, "step": 2981 }, { "epoch": 4.06827686267816, "grad_norm": 0.3372385574793856, "learning_rate": 2.2387698224193653e-05, "loss": 0.3322, "num_tokens": 2274073193.0, "step": 2982 }, { "epoch": 4.069642399931723, "grad_norm": 0.30247942365308333, "learning_rate": 2.23777587230133e-05, "loss": 0.3417, "num_tokens": 2274874460.0, "step": 2983 }, { "epoch": 4.0710079371852865, "grad_norm": 0.34678657607760727, "learning_rate": 2.2367819106595127e-05, "loss": 0.3464, "num_tokens": 2275574966.0, "step": 2984 }, { "epoch": 4.07237347443885, "grad_norm": 0.3275464372584132, "learning_rate": 2.2357879377971265e-05, "loss": 0.3542, "num_tokens": 2276343888.0, "step": 2985 }, { "epoch": 4.073739011692413, "grad_norm": 0.3356563486451717, "learning_rate": 2.2347939540173913e-05, "loss": 0.3184, "num_tokens": 2277085124.0, "step": 2986 }, { "epoch": 4.075104548945976, "grad_norm": 0.3052229290261764, "learning_rate": 2.233799959623527e-05, "loss": 0.3476, "num_tokens": 2277906360.0, "step": 2987 }, { "epoch": 4.0764700861995395, "grad_norm": 0.3475255621303882, "learning_rate": 2.2328059549187582e-05, "loss": 0.3409, "num_tokens": 2278676149.0, "step": 2988 }, { "epoch": 4.077835623453102, "grad_norm": 0.34794278114907645, "learning_rate": 2.2318119402063143e-05, "loss": 0.3502, "num_tokens": 2279536253.0, "step": 2989 }, { "epoch": 4.079201160706665, "grad_norm": 0.31470577153924617, "learning_rate": 2.2308179157894246e-05, "loss": 0.3361, "num_tokens": 2280345870.0, "step": 2990 }, { "epoch": 4.080566697960228, "grad_norm": 0.32430576722953297, "learning_rate": 2.2298238819713234e-05, "loss": 0.3305, "num_tokens": 2281166375.0, "step": 2991 }, { "epoch": 4.081932235213792, "grad_norm": 0.3306535221340919, "learning_rate": 2.2288298390552475e-05, "loss": 0.3413, "num_tokens": 2281955896.0, "step": 2992 }, { "epoch": 4.083297772467355, "grad_norm": 0.3261833512392815, "learning_rate": 2.227835787344436e-05, "loss": 0.3353, "num_tokens": 2282819016.0, "step": 2993 }, { "epoch": 4.084663309720918, "grad_norm": 0.3309782028576178, "learning_rate": 2.2268417271421315e-05, "loss": 0.3405, "num_tokens": 2283607744.0, "step": 2994 }, { "epoch": 4.086028846974481, "grad_norm": 0.3570139257579028, "learning_rate": 2.2258476587515782e-05, "loss": 0.3337, "num_tokens": 2284358032.0, "step": 2995 }, { "epoch": 4.087394384228045, "grad_norm": 0.3487293208890538, "learning_rate": 2.224853582476024e-05, "loss": 0.3328, "num_tokens": 2285100283.0, "step": 2996 }, { "epoch": 4.088759921481608, "grad_norm": 0.33345008230484857, "learning_rate": 2.2238594986187177e-05, "loss": 0.3352, "num_tokens": 2285846988.0, "step": 2997 }, { "epoch": 4.090125458735171, "grad_norm": 0.37108685190851287, "learning_rate": 2.222865407482912e-05, "loss": 0.3371, "num_tokens": 2286620684.0, "step": 2998 }, { "epoch": 4.0914909959887344, "grad_norm": 0.29464549080179847, "learning_rate": 2.22187130937186e-05, "loss": 0.3302, "num_tokens": 2287384151.0, "step": 2999 }, { "epoch": 4.092856533242298, "grad_norm": 0.3786828520749586, "learning_rate": 2.2208772045888193e-05, "loss": 0.3342, "num_tokens": 2288137806.0, "step": 3000 }, { "epoch": 4.094222070495861, "grad_norm": 0.3559578261605771, "learning_rate": 2.2198830934370474e-05, "loss": 0.3539, "num_tokens": 2288919886.0, "step": 3001 }, { "epoch": 4.095587607749424, "grad_norm": 0.3349879960073407, "learning_rate": 2.218888976219805e-05, "loss": 0.3329, "num_tokens": 2289640952.0, "step": 3002 }, { "epoch": 4.0969531450029875, "grad_norm": 0.30261975513634454, "learning_rate": 2.2178948532403538e-05, "loss": 0.3284, "num_tokens": 2290344618.0, "step": 3003 }, { "epoch": 4.098318682256551, "grad_norm": 0.37498151626215764, "learning_rate": 2.2169007248019575e-05, "loss": 0.351, "num_tokens": 2291000956.0, "step": 3004 }, { "epoch": 4.099684219510113, "grad_norm": 0.3453075242844875, "learning_rate": 2.2159065912078827e-05, "loss": 0.3308, "num_tokens": 2291746004.0, "step": 3005 }, { "epoch": 4.101049756763676, "grad_norm": 0.38541372265467755, "learning_rate": 2.2149124527613953e-05, "loss": 0.3472, "num_tokens": 2292480613.0, "step": 3006 }, { "epoch": 4.10241529401724, "grad_norm": 0.6411850458014885, "learning_rate": 2.213918309765764e-05, "loss": 0.3627, "num_tokens": 2293296004.0, "step": 3007 }, { "epoch": 4.103780831270803, "grad_norm": 0.33629141341849716, "learning_rate": 2.2129241625242598e-05, "loss": 0.3281, "num_tokens": 2294045155.0, "step": 3008 }, { "epoch": 4.105146368524366, "grad_norm": 0.3430258758715723, "learning_rate": 2.2119300113401527e-05, "loss": 0.3416, "num_tokens": 2294773432.0, "step": 3009 }, { "epoch": 4.106511905777929, "grad_norm": 0.351113654499235, "learning_rate": 2.2109358565167166e-05, "loss": 0.3531, "num_tokens": 2295464793.0, "step": 3010 }, { "epoch": 4.107877443031493, "grad_norm": 0.3545472025293191, "learning_rate": 2.209941698357224e-05, "loss": 0.338, "num_tokens": 2296242292.0, "step": 3011 }, { "epoch": 4.109242980285056, "grad_norm": 0.3144412297517756, "learning_rate": 2.2089475371649493e-05, "loss": 0.3387, "num_tokens": 2297010438.0, "step": 3012 }, { "epoch": 4.110608517538619, "grad_norm": 0.3309747557552062, "learning_rate": 2.2079533732431688e-05, "loss": 0.3542, "num_tokens": 2297848831.0, "step": 3013 }, { "epoch": 4.111974054792182, "grad_norm": 0.32668935544496047, "learning_rate": 2.206959206895159e-05, "loss": 0.3465, "num_tokens": 2298581441.0, "step": 3014 }, { "epoch": 4.113339592045746, "grad_norm": 0.33959429962537613, "learning_rate": 2.2059650384241956e-05, "loss": 0.347, "num_tokens": 2299342586.0, "step": 3015 }, { "epoch": 4.114705129299309, "grad_norm": 0.36023488701935613, "learning_rate": 2.204970868133558e-05, "loss": 0.3408, "num_tokens": 2300023907.0, "step": 3016 }, { "epoch": 4.116070666552872, "grad_norm": 0.336143258001932, "learning_rate": 2.2039766963265237e-05, "loss": 0.3598, "num_tokens": 2300749921.0, "step": 3017 }, { "epoch": 4.117436203806435, "grad_norm": 0.3608728172388074, "learning_rate": 2.2029825233063716e-05, "loss": 0.3436, "num_tokens": 2301634068.0, "step": 3018 }, { "epoch": 4.118801741059999, "grad_norm": 0.29932027152277446, "learning_rate": 2.2019883493763805e-05, "loss": 0.3326, "num_tokens": 2302386679.0, "step": 3019 }, { "epoch": 4.120167278313562, "grad_norm": 0.3159261241565509, "learning_rate": 2.2009941748398305e-05, "loss": 0.3408, "num_tokens": 2303181031.0, "step": 3020 }, { "epoch": 4.121532815567125, "grad_norm": 0.3362247346604816, "learning_rate": 2.2000000000000003e-05, "loss": 0.3497, "num_tokens": 2303965214.0, "step": 3021 }, { "epoch": 4.1228983528206875, "grad_norm": 0.3305564167638304, "learning_rate": 2.1990058251601708e-05, "loss": 0.3447, "num_tokens": 2304706226.0, "step": 3022 }, { "epoch": 4.124263890074251, "grad_norm": 0.3260986409377396, "learning_rate": 2.19801165062362e-05, "loss": 0.3213, "num_tokens": 2305449469.0, "step": 3023 }, { "epoch": 4.125629427327814, "grad_norm": 0.3453735106313123, "learning_rate": 2.19701747669363e-05, "loss": 0.3402, "num_tokens": 2306175759.0, "step": 3024 }, { "epoch": 4.126994964581377, "grad_norm": 0.326186528660417, "learning_rate": 2.1960233036734766e-05, "loss": 0.3281, "num_tokens": 2306857275.0, "step": 3025 }, { "epoch": 4.1283605018349405, "grad_norm": 0.34249635592296834, "learning_rate": 2.195029131866443e-05, "loss": 0.348, "num_tokens": 2307698780.0, "step": 3026 }, { "epoch": 4.129726039088504, "grad_norm": 0.3155627055121278, "learning_rate": 2.1940349615758046e-05, "loss": 0.3428, "num_tokens": 2308387770.0, "step": 3027 }, { "epoch": 4.131091576342067, "grad_norm": 0.29847020182663775, "learning_rate": 2.193040793104842e-05, "loss": 0.3351, "num_tokens": 2309184442.0, "step": 3028 }, { "epoch": 4.13245711359563, "grad_norm": 0.3380972910027082, "learning_rate": 2.1920466267568318e-05, "loss": 0.3398, "num_tokens": 2309934760.0, "step": 3029 }, { "epoch": 4.1338226508491935, "grad_norm": 0.32835562188326883, "learning_rate": 2.1910524628350513e-05, "loss": 0.3518, "num_tokens": 2310770840.0, "step": 3030 }, { "epoch": 4.135188188102757, "grad_norm": 0.3078916717946885, "learning_rate": 2.1900583016427772e-05, "loss": 0.3335, "num_tokens": 2311483471.0, "step": 3031 }, { "epoch": 4.13655372535632, "grad_norm": 0.3351286488428006, "learning_rate": 2.1890641434832843e-05, "loss": 0.3478, "num_tokens": 2312280668.0, "step": 3032 }, { "epoch": 4.137919262609883, "grad_norm": 0.30655032145775185, "learning_rate": 2.188069988659848e-05, "loss": 0.3397, "num_tokens": 2313026099.0, "step": 3033 }, { "epoch": 4.1392847998634466, "grad_norm": 0.3356676533968665, "learning_rate": 2.1870758374757407e-05, "loss": 0.3455, "num_tokens": 2313777789.0, "step": 3034 }, { "epoch": 4.14065033711701, "grad_norm": 0.3086809921199934, "learning_rate": 2.186081690234237e-05, "loss": 0.34, "num_tokens": 2314593680.0, "step": 3035 }, { "epoch": 4.142015874370573, "grad_norm": 0.368858525268747, "learning_rate": 2.1850875472386056e-05, "loss": 0.3537, "num_tokens": 2315338516.0, "step": 3036 }, { "epoch": 4.143381411624135, "grad_norm": 0.31350037082090104, "learning_rate": 2.184093408792118e-05, "loss": 0.3372, "num_tokens": 2316062639.0, "step": 3037 }, { "epoch": 4.144746948877699, "grad_norm": 0.308519539617394, "learning_rate": 2.183099275198043e-05, "loss": 0.3306, "num_tokens": 2316855557.0, "step": 3038 }, { "epoch": 4.146112486131262, "grad_norm": 0.3281090602991828, "learning_rate": 2.1821051467596467e-05, "loss": 0.339, "num_tokens": 2317637883.0, "step": 3039 }, { "epoch": 4.147478023384825, "grad_norm": 0.30546435600428834, "learning_rate": 2.1811110237801958e-05, "loss": 0.3452, "num_tokens": 2318407581.0, "step": 3040 }, { "epoch": 4.148843560638388, "grad_norm": 0.34955418313721404, "learning_rate": 2.180116906562953e-05, "loss": 0.3525, "num_tokens": 2319176345.0, "step": 3041 }, { "epoch": 4.150209097891952, "grad_norm": 0.3218464141129682, "learning_rate": 2.1791227954111816e-05, "loss": 0.3449, "num_tokens": 2319902039.0, "step": 3042 }, { "epoch": 4.151574635145515, "grad_norm": 0.32949996356064654, "learning_rate": 2.1781286906281404e-05, "loss": 0.3422, "num_tokens": 2320649678.0, "step": 3043 }, { "epoch": 4.152940172399078, "grad_norm": 0.30790185006222937, "learning_rate": 2.1771345925170885e-05, "loss": 0.3428, "num_tokens": 2321393010.0, "step": 3044 }, { "epoch": 4.1543057096526415, "grad_norm": 0.3082680251566606, "learning_rate": 2.1761405013812836e-05, "loss": 0.3468, "num_tokens": 2322189304.0, "step": 3045 }, { "epoch": 4.155671246906205, "grad_norm": 0.32752487236321803, "learning_rate": 2.1751464175239768e-05, "loss": 0.3528, "num_tokens": 2322994037.0, "step": 3046 }, { "epoch": 4.157036784159768, "grad_norm": 0.3156497099870555, "learning_rate": 2.1741523412484223e-05, "loss": 0.3212, "num_tokens": 2323715960.0, "step": 3047 }, { "epoch": 4.158402321413331, "grad_norm": 0.29847386934754966, "learning_rate": 2.1731582728578694e-05, "loss": 0.3345, "num_tokens": 2324404936.0, "step": 3048 }, { "epoch": 4.1597678586668945, "grad_norm": 0.3412927459143315, "learning_rate": 2.1721642126555647e-05, "loss": 0.3464, "num_tokens": 2325143633.0, "step": 3049 }, { "epoch": 4.161133395920458, "grad_norm": 0.3222592663859158, "learning_rate": 2.1711701609447534e-05, "loss": 0.3332, "num_tokens": 2325939768.0, "step": 3050 }, { "epoch": 4.162498933174021, "grad_norm": 0.3279333216019383, "learning_rate": 2.1701761180286775e-05, "loss": 0.3416, "num_tokens": 2326707300.0, "step": 3051 }, { "epoch": 4.163864470427584, "grad_norm": 0.3067962598894123, "learning_rate": 2.1691820842105756e-05, "loss": 0.3509, "num_tokens": 2327559022.0, "step": 3052 }, { "epoch": 4.1652300076811475, "grad_norm": 0.33856325445132074, "learning_rate": 2.1681880597936863e-05, "loss": 0.342, "num_tokens": 2328305326.0, "step": 3053 }, { "epoch": 4.16659554493471, "grad_norm": 0.35464395203686233, "learning_rate": 2.1671940450812427e-05, "loss": 0.3474, "num_tokens": 2329065882.0, "step": 3054 }, { "epoch": 4.167961082188273, "grad_norm": 0.334479252207438, "learning_rate": 2.166200040376474e-05, "loss": 0.3468, "num_tokens": 2329833080.0, "step": 3055 }, { "epoch": 4.169326619441836, "grad_norm": 0.3340870358120222, "learning_rate": 2.16520604598261e-05, "loss": 0.3472, "num_tokens": 2330593100.0, "step": 3056 }, { "epoch": 4.1706921566954, "grad_norm": 0.33742031320285776, "learning_rate": 2.1642120622028737e-05, "loss": 0.3575, "num_tokens": 2331345023.0, "step": 3057 }, { "epoch": 4.172057693948963, "grad_norm": 0.360430114138079, "learning_rate": 2.1632180893404885e-05, "loss": 0.3223, "num_tokens": 2332059904.0, "step": 3058 }, { "epoch": 4.173423231202526, "grad_norm": 0.3558591080947815, "learning_rate": 2.1622241276986706e-05, "loss": 0.3188, "num_tokens": 2332774494.0, "step": 3059 }, { "epoch": 4.174788768456089, "grad_norm": 0.3330847746115555, "learning_rate": 2.161230177580636e-05, "loss": 0.3308, "num_tokens": 2333508237.0, "step": 3060 }, { "epoch": 4.176154305709653, "grad_norm": 0.31296756667989356, "learning_rate": 2.160236239289595e-05, "loss": 0.3395, "num_tokens": 2334343237.0, "step": 3061 }, { "epoch": 4.177519842963216, "grad_norm": 0.33325789244726617, "learning_rate": 2.1592423131287545e-05, "loss": 0.3341, "num_tokens": 2335059906.0, "step": 3062 }, { "epoch": 4.178885380216779, "grad_norm": 0.3903607499189792, "learning_rate": 2.1582483994013207e-05, "loss": 0.3572, "num_tokens": 2335900304.0, "step": 3063 }, { "epoch": 4.180250917470342, "grad_norm": 0.3283439951635686, "learning_rate": 2.157254498410491e-05, "loss": 0.3315, "num_tokens": 2336629569.0, "step": 3064 }, { "epoch": 4.181616454723906, "grad_norm": 0.3746897461796416, "learning_rate": 2.1562606104594633e-05, "loss": 0.3318, "num_tokens": 2337330186.0, "step": 3065 }, { "epoch": 4.182981991977469, "grad_norm": 0.3273729033966583, "learning_rate": 2.1552667358514285e-05, "loss": 0.3386, "num_tokens": 2338140663.0, "step": 3066 }, { "epoch": 4.184347529231032, "grad_norm": 0.3427039278505857, "learning_rate": 2.1542728748895757e-05, "loss": 0.3403, "num_tokens": 2338861753.0, "step": 3067 }, { "epoch": 4.185713066484595, "grad_norm": 0.3474590684255807, "learning_rate": 2.1532790278770887e-05, "loss": 0.3399, "num_tokens": 2339582314.0, "step": 3068 }, { "epoch": 4.187078603738159, "grad_norm": 0.3224467831142782, "learning_rate": 2.152285195117147e-05, "loss": 0.3332, "num_tokens": 2340273598.0, "step": 3069 }, { "epoch": 4.188444140991721, "grad_norm": 0.35531336295397714, "learning_rate": 2.151291376912926e-05, "loss": 0.3312, "num_tokens": 2341005832.0, "step": 3070 }, { "epoch": 4.189809678245284, "grad_norm": 0.32281071432901065, "learning_rate": 2.150297573567596e-05, "loss": 0.3299, "num_tokens": 2341756683.0, "step": 3071 }, { "epoch": 4.1911752154988475, "grad_norm": 0.3309943627319559, "learning_rate": 2.149303785384324e-05, "loss": 0.3495, "num_tokens": 2342522329.0, "step": 3072 }, { "epoch": 4.192540752752411, "grad_norm": 0.32618756720643904, "learning_rate": 2.1483100126662716e-05, "loss": 0.3546, "num_tokens": 2343333443.0, "step": 3073 }, { "epoch": 4.193906290005974, "grad_norm": 0.3537476641082998, "learning_rate": 2.1473162557165966e-05, "loss": 0.3389, "num_tokens": 2344085941.0, "step": 3074 }, { "epoch": 4.195271827259537, "grad_norm": 0.32477152603732984, "learning_rate": 2.1463225148384495e-05, "loss": 0.3541, "num_tokens": 2344867882.0, "step": 3075 }, { "epoch": 4.1966373645131005, "grad_norm": 0.3079189519173251, "learning_rate": 2.145328790334979e-05, "loss": 0.3517, "num_tokens": 2345677941.0, "step": 3076 }, { "epoch": 4.198002901766664, "grad_norm": 0.32143829059853773, "learning_rate": 2.144335082509327e-05, "loss": 0.3462, "num_tokens": 2346378536.0, "step": 3077 }, { "epoch": 4.199368439020227, "grad_norm": 0.3214890216847812, "learning_rate": 2.1433413916646305e-05, "loss": 0.3436, "num_tokens": 2347149882.0, "step": 3078 }, { "epoch": 4.20073397627379, "grad_norm": 0.3151367910116307, "learning_rate": 2.1423477181040217e-05, "loss": 0.3533, "num_tokens": 2347844545.0, "step": 3079 }, { "epoch": 4.202099513527354, "grad_norm": 0.3246896076338491, "learning_rate": 2.1413540621306273e-05, "loss": 0.339, "num_tokens": 2348620408.0, "step": 3080 }, { "epoch": 4.203465050780917, "grad_norm": 0.3559672062582907, "learning_rate": 2.140360424047569e-05, "loss": 0.3469, "num_tokens": 2349333715.0, "step": 3081 }, { "epoch": 4.20483058803448, "grad_norm": 0.3153410814638213, "learning_rate": 2.1393668041579625e-05, "loss": 0.3462, "num_tokens": 2350105779.0, "step": 3082 }, { "epoch": 4.206196125288043, "grad_norm": 0.33823629512888914, "learning_rate": 2.1383732027649186e-05, "loss": 0.3468, "num_tokens": 2350825441.0, "step": 3083 }, { "epoch": 4.207561662541607, "grad_norm": 0.3289739460502576, "learning_rate": 2.1373796201715415e-05, "loss": 0.3541, "num_tokens": 2351638437.0, "step": 3084 }, { "epoch": 4.20892719979517, "grad_norm": 0.34666821915610113, "learning_rate": 2.13638605668093e-05, "loss": 0.3346, "num_tokens": 2352401818.0, "step": 3085 }, { "epoch": 4.210292737048732, "grad_norm": 0.31047796921915494, "learning_rate": 2.1353925125961786e-05, "loss": 0.3331, "num_tokens": 2353076321.0, "step": 3086 }, { "epoch": 4.2116582743022954, "grad_norm": 0.3395331019760271, "learning_rate": 2.1343989882203728e-05, "loss": 0.3447, "num_tokens": 2353869317.0, "step": 3087 }, { "epoch": 4.213023811555859, "grad_norm": 0.33168212525329704, "learning_rate": 2.1334054838565952e-05, "loss": 0.3342, "num_tokens": 2354641343.0, "step": 3088 }, { "epoch": 4.214389348809422, "grad_norm": 0.3057069378352252, "learning_rate": 2.1324119998079196e-05, "loss": 0.3628, "num_tokens": 2355491971.0, "step": 3089 }, { "epoch": 4.215754886062985, "grad_norm": 0.33949538655177003, "learning_rate": 2.1314185363774168e-05, "loss": 0.3432, "num_tokens": 2356264724.0, "step": 3090 }, { "epoch": 4.2171204233165485, "grad_norm": 0.34060499092414387, "learning_rate": 2.1304250938681484e-05, "loss": 0.3493, "num_tokens": 2357088088.0, "step": 3091 }, { "epoch": 4.218485960570112, "grad_norm": 0.33504818952854254, "learning_rate": 2.129431672583171e-05, "loss": 0.3609, "num_tokens": 2357854793.0, "step": 3092 }, { "epoch": 4.219851497823675, "grad_norm": 0.33002237130503337, "learning_rate": 2.1284382728255343e-05, "loss": 0.3501, "num_tokens": 2358669950.0, "step": 3093 }, { "epoch": 4.221217035077238, "grad_norm": 0.3230397779140308, "learning_rate": 2.127444894898281e-05, "loss": 0.3295, "num_tokens": 2359433702.0, "step": 3094 }, { "epoch": 4.2225825723308015, "grad_norm": 0.3209494699205609, "learning_rate": 2.1264515391044487e-05, "loss": 0.3439, "num_tokens": 2360259523.0, "step": 3095 }, { "epoch": 4.223948109584365, "grad_norm": 0.3504266482304807, "learning_rate": 2.125458205747066e-05, "loss": 0.3294, "num_tokens": 2361010442.0, "step": 3096 }, { "epoch": 4.225313646837928, "grad_norm": 0.32849372824727824, "learning_rate": 2.1244648951291578e-05, "loss": 0.3484, "num_tokens": 2361781135.0, "step": 3097 }, { "epoch": 4.226679184091491, "grad_norm": 0.3428816958954133, "learning_rate": 2.1234716075537377e-05, "loss": 0.3462, "num_tokens": 2362599399.0, "step": 3098 }, { "epoch": 4.2280447213450545, "grad_norm": 0.3268772873740545, "learning_rate": 2.1224783433238163e-05, "loss": 0.3373, "num_tokens": 2363302966.0, "step": 3099 }, { "epoch": 4.229410258598618, "grad_norm": 0.3275004432945464, "learning_rate": 2.1214851027423954e-05, "loss": 0.3371, "num_tokens": 2364123972.0, "step": 3100 }, { "epoch": 4.230775795852181, "grad_norm": 0.3141387694215126, "learning_rate": 2.1204918861124696e-05, "loss": 0.323, "num_tokens": 2364879335.0, "step": 3101 }, { "epoch": 4.232141333105743, "grad_norm": 0.3158858538293015, "learning_rate": 2.119498693737026e-05, "loss": 0.3345, "num_tokens": 2365698147.0, "step": 3102 }, { "epoch": 4.233506870359307, "grad_norm": 0.40772185296469327, "learning_rate": 2.1185055259190444e-05, "loss": 0.3478, "num_tokens": 2366446852.0, "step": 3103 }, { "epoch": 4.23487240761287, "grad_norm": 0.32992919935223686, "learning_rate": 2.117512382961498e-05, "loss": 0.3377, "num_tokens": 2367199993.0, "step": 3104 }, { "epoch": 4.236237944866433, "grad_norm": 0.3283057873340491, "learning_rate": 2.116519265167351e-05, "loss": 0.3582, "num_tokens": 2367975227.0, "step": 3105 }, { "epoch": 4.237603482119996, "grad_norm": 0.3398675132919286, "learning_rate": 2.1155261728395616e-05, "loss": 0.3384, "num_tokens": 2368717223.0, "step": 3106 }, { "epoch": 4.23896901937356, "grad_norm": 0.33716026491776246, "learning_rate": 2.1145331062810787e-05, "loss": 0.3488, "num_tokens": 2369437957.0, "step": 3107 }, { "epoch": 4.240334556627123, "grad_norm": 0.32169820081642425, "learning_rate": 2.113540065794842e-05, "loss": 0.345, "num_tokens": 2370315977.0, "step": 3108 }, { "epoch": 4.241700093880686, "grad_norm": 0.33539500286264906, "learning_rate": 2.1125470516837886e-05, "loss": 0.3592, "num_tokens": 2371059578.0, "step": 3109 }, { "epoch": 4.243065631134249, "grad_norm": 0.3260102573478667, "learning_rate": 2.111554064250842e-05, "loss": 0.3472, "num_tokens": 2371881213.0, "step": 3110 }, { "epoch": 4.244431168387813, "grad_norm": 0.3342514517610275, "learning_rate": 2.1105611037989208e-05, "loss": 0.3546, "num_tokens": 2372727248.0, "step": 3111 }, { "epoch": 4.245796705641376, "grad_norm": 0.35706128991352204, "learning_rate": 2.1095681706309326e-05, "loss": 0.3403, "num_tokens": 2373487194.0, "step": 3112 }, { "epoch": 4.247162242894939, "grad_norm": 0.3796569197201268, "learning_rate": 2.1085752650497796e-05, "loss": 0.3524, "num_tokens": 2374239350.0, "step": 3113 }, { "epoch": 4.248527780148502, "grad_norm": 0.3169374059967053, "learning_rate": 2.107582387358354e-05, "loss": 0.3557, "num_tokens": 2375006854.0, "step": 3114 }, { "epoch": 4.249893317402066, "grad_norm": 0.3464431239149836, "learning_rate": 2.1065895378595403e-05, "loss": 0.3472, "num_tokens": 2375811675.0, "step": 3115 }, { "epoch": 4.251258854655629, "grad_norm": 0.3681969375625811, "learning_rate": 2.105596716856213e-05, "loss": 0.3327, "num_tokens": 2376578361.0, "step": 3116 }, { "epoch": 4.252624391909192, "grad_norm": 0.30968961220126745, "learning_rate": 2.1046039246512383e-05, "loss": 0.3301, "num_tokens": 2377266865.0, "step": 3117 }, { "epoch": 4.2539899291627545, "grad_norm": 0.3996198370712123, "learning_rate": 2.103611161547476e-05, "loss": 0.3457, "num_tokens": 2378006051.0, "step": 3118 }, { "epoch": 4.255355466416318, "grad_norm": 0.35750311726398804, "learning_rate": 2.1026184278477736e-05, "loss": 0.3304, "num_tokens": 2378773515.0, "step": 3119 }, { "epoch": 4.256721003669881, "grad_norm": 0.3259204041011961, "learning_rate": 2.1016257238549716e-05, "loss": 0.3515, "num_tokens": 2379504325.0, "step": 3120 }, { "epoch": 4.258086540923444, "grad_norm": 0.3335496655080297, "learning_rate": 2.1006330498719008e-05, "loss": 0.3387, "num_tokens": 2380268033.0, "step": 3121 }, { "epoch": 4.2594520781770076, "grad_norm": 0.34976755643604146, "learning_rate": 2.099640406201384e-05, "loss": 0.3168, "num_tokens": 2380969674.0, "step": 3122 }, { "epoch": 4.260817615430571, "grad_norm": 0.2940829480892624, "learning_rate": 2.098647793146233e-05, "loss": 0.3508, "num_tokens": 2381724214.0, "step": 3123 }, { "epoch": 4.262183152684134, "grad_norm": 0.38537078831797295, "learning_rate": 2.09765521100925e-05, "loss": 0.3412, "num_tokens": 2382479549.0, "step": 3124 }, { "epoch": 4.263548689937697, "grad_norm": 0.32588790177830784, "learning_rate": 2.0966626600932313e-05, "loss": 0.3478, "num_tokens": 2383230172.0, "step": 3125 }, { "epoch": 4.264914227191261, "grad_norm": 0.3247895577541028, "learning_rate": 2.095670140700959e-05, "loss": 0.3354, "num_tokens": 2383940168.0, "step": 3126 }, { "epoch": 4.266279764444824, "grad_norm": 0.33621159018789143, "learning_rate": 2.0946776531352087e-05, "loss": 0.3329, "num_tokens": 2384682361.0, "step": 3127 }, { "epoch": 4.267645301698387, "grad_norm": 0.35688756104612407, "learning_rate": 2.0936851976987455e-05, "loss": 0.3309, "num_tokens": 2385467555.0, "step": 3128 }, { "epoch": 4.26901083895195, "grad_norm": 0.3412856099336451, "learning_rate": 2.0926927746943243e-05, "loss": 0.348, "num_tokens": 2386168634.0, "step": 3129 }, { "epoch": 4.270376376205514, "grad_norm": 0.3221225394778789, "learning_rate": 2.0917003844246908e-05, "loss": 0.3331, "num_tokens": 2386923642.0, "step": 3130 }, { "epoch": 4.271741913459077, "grad_norm": 0.33833744438027985, "learning_rate": 2.0907080271925796e-05, "loss": 0.3472, "num_tokens": 2387756070.0, "step": 3131 }, { "epoch": 4.27310745071264, "grad_norm": 0.2873067589822927, "learning_rate": 2.0897157033007166e-05, "loss": 0.3197, "num_tokens": 2388485437.0, "step": 3132 }, { "epoch": 4.274472987966203, "grad_norm": 0.3231230859605487, "learning_rate": 2.088723413051816e-05, "loss": 0.3433, "num_tokens": 2389268384.0, "step": 3133 }, { "epoch": 4.275838525219767, "grad_norm": 0.29804539473957026, "learning_rate": 2.0877311567485838e-05, "loss": 0.3441, "num_tokens": 2389988167.0, "step": 3134 }, { "epoch": 4.277204062473329, "grad_norm": 0.340638690983036, "learning_rate": 2.086738934693713e-05, "loss": 0.3448, "num_tokens": 2390723404.0, "step": 3135 }, { "epoch": 4.278569599726892, "grad_norm": 0.3483968071937646, "learning_rate": 2.085746747189889e-05, "loss": 0.3458, "num_tokens": 2391423418.0, "step": 3136 }, { "epoch": 4.2799351369804555, "grad_norm": 0.34290623613453003, "learning_rate": 2.0847545945397836e-05, "loss": 0.3452, "num_tokens": 2392204783.0, "step": 3137 }, { "epoch": 4.281300674234019, "grad_norm": 0.31966994630026047, "learning_rate": 2.083762477046061e-05, "loss": 0.337, "num_tokens": 2392995363.0, "step": 3138 }, { "epoch": 4.282666211487582, "grad_norm": 0.2837959162766392, "learning_rate": 2.0827703950113735e-05, "loss": 0.336, "num_tokens": 2393823741.0, "step": 3139 }, { "epoch": 4.284031748741145, "grad_norm": 0.3569315222806783, "learning_rate": 2.081778348738361e-05, "loss": 0.3427, "num_tokens": 2394588195.0, "step": 3140 }, { "epoch": 4.2853972859947085, "grad_norm": 0.32061107300994074, "learning_rate": 2.0807863385296544e-05, "loss": 0.3459, "num_tokens": 2395293177.0, "step": 3141 }, { "epoch": 4.286762823248272, "grad_norm": 0.33430903983510984, "learning_rate": 2.079794364687873e-05, "loss": 0.3673, "num_tokens": 2396163039.0, "step": 3142 }, { "epoch": 4.288128360501835, "grad_norm": 0.31202229232956497, "learning_rate": 2.0788024275156268e-05, "loss": 0.3383, "num_tokens": 2396958148.0, "step": 3143 }, { "epoch": 4.289493897755398, "grad_norm": 0.3130707726635102, "learning_rate": 2.0778105273155095e-05, "loss": 0.3446, "num_tokens": 2397696994.0, "step": 3144 }, { "epoch": 4.2908594350089615, "grad_norm": 0.33944431921567864, "learning_rate": 2.0768186643901094e-05, "loss": 0.3358, "num_tokens": 2398439653.0, "step": 3145 }, { "epoch": 4.292224972262525, "grad_norm": 0.3254858091074745, "learning_rate": 2.0758268390420006e-05, "loss": 0.3533, "num_tokens": 2399220416.0, "step": 3146 }, { "epoch": 4.293590509516088, "grad_norm": 0.3385607149575727, "learning_rate": 2.0748350515737456e-05, "loss": 0.3424, "num_tokens": 2399974725.0, "step": 3147 }, { "epoch": 4.294956046769651, "grad_norm": 0.34669084649614296, "learning_rate": 2.0738433022878964e-05, "loss": 0.3469, "num_tokens": 2400766156.0, "step": 3148 }, { "epoch": 4.2963215840232145, "grad_norm": 0.337962054042391, "learning_rate": 2.0728515914869916e-05, "loss": 0.3499, "num_tokens": 2401474912.0, "step": 3149 }, { "epoch": 4.297687121276777, "grad_norm": 0.3355408252992432, "learning_rate": 2.07185991947356e-05, "loss": 0.3378, "num_tokens": 2402232037.0, "step": 3150 }, { "epoch": 4.29905265853034, "grad_norm": 0.3377602794192836, "learning_rate": 2.0708682865501174e-05, "loss": 0.3347, "num_tokens": 2403054864.0, "step": 3151 }, { "epoch": 4.300418195783903, "grad_norm": 0.322672004558342, "learning_rate": 2.069876693019169e-05, "loss": 0.3376, "num_tokens": 2403761055.0, "step": 3152 }, { "epoch": 4.301783733037467, "grad_norm": 0.3471442090868715, "learning_rate": 2.0688851391832058e-05, "loss": 0.3489, "num_tokens": 2404526195.0, "step": 3153 }, { "epoch": 4.30314927029103, "grad_norm": 0.3301130098464835, "learning_rate": 2.0678936253447092e-05, "loss": 0.352, "num_tokens": 2405328612.0, "step": 3154 }, { "epoch": 4.304514807544593, "grad_norm": 0.32984076662424905, "learning_rate": 2.0669021518061463e-05, "loss": 0.3377, "num_tokens": 2406075822.0, "step": 3155 }, { "epoch": 4.305880344798156, "grad_norm": 0.3333996514482176, "learning_rate": 2.0659107188699722e-05, "loss": 0.3468, "num_tokens": 2406934728.0, "step": 3156 }, { "epoch": 4.30724588205172, "grad_norm": 0.3165084246629771, "learning_rate": 2.064919326838632e-05, "loss": 0.3373, "num_tokens": 2407688977.0, "step": 3157 }, { "epoch": 4.308611419305283, "grad_norm": 0.30995064526350896, "learning_rate": 2.0639279760145544e-05, "loss": 0.3321, "num_tokens": 2408497714.0, "step": 3158 }, { "epoch": 4.309976956558846, "grad_norm": 0.3201601425351169, "learning_rate": 2.062936666700159e-05, "loss": 0.3444, "num_tokens": 2409221552.0, "step": 3159 }, { "epoch": 4.311342493812409, "grad_norm": 0.3310449067194394, "learning_rate": 2.0619453991978502e-05, "loss": 0.3319, "num_tokens": 2410012151.0, "step": 3160 }, { "epoch": 4.312708031065973, "grad_norm": 0.34089557772812384, "learning_rate": 2.0609541738100223e-05, "loss": 0.357, "num_tokens": 2410855500.0, "step": 3161 }, { "epoch": 4.314073568319536, "grad_norm": 0.3178443655231617, "learning_rate": 2.0599629908390545e-05, "loss": 0.3372, "num_tokens": 2411571588.0, "step": 3162 }, { "epoch": 4.315439105573099, "grad_norm": 0.30951933222637545, "learning_rate": 2.0589718505873128e-05, "loss": 0.3602, "num_tokens": 2412345733.0, "step": 3163 }, { "epoch": 4.316804642826662, "grad_norm": 0.3473927794745894, "learning_rate": 2.057980753357153e-05, "loss": 0.3345, "num_tokens": 2413037575.0, "step": 3164 }, { "epoch": 4.318170180080226, "grad_norm": 0.32349186463811314, "learning_rate": 2.0569896994509146e-05, "loss": 0.3572, "num_tokens": 2413823654.0, "step": 3165 }, { "epoch": 4.319535717333789, "grad_norm": 0.3114070527893276, "learning_rate": 2.0559986891709258e-05, "loss": 0.3486, "num_tokens": 2414512563.0, "step": 3166 }, { "epoch": 4.320901254587351, "grad_norm": 0.3512081033208699, "learning_rate": 2.0550077228195e-05, "loss": 0.3461, "num_tokens": 2415234866.0, "step": 3167 }, { "epoch": 4.322266791840915, "grad_norm": 0.3327062497374276, "learning_rate": 2.05401680069894e-05, "loss": 0.3544, "num_tokens": 2415958555.0, "step": 3168 }, { "epoch": 4.323632329094478, "grad_norm": 0.3199175667533422, "learning_rate": 2.0530259231115308e-05, "loss": 0.3469, "num_tokens": 2416748290.0, "step": 3169 }, { "epoch": 4.324997866348041, "grad_norm": 0.3178657379239043, "learning_rate": 2.052035090359549e-05, "loss": 0.346, "num_tokens": 2417495153.0, "step": 3170 }, { "epoch": 4.326363403601604, "grad_norm": 0.30385913390645997, "learning_rate": 2.051044302745253e-05, "loss": 0.3469, "num_tokens": 2418280297.0, "step": 3171 }, { "epoch": 4.327728940855168, "grad_norm": 0.3062779297128936, "learning_rate": 2.0500535605708887e-05, "loss": 0.3212, "num_tokens": 2418994602.0, "step": 3172 }, { "epoch": 4.329094478108731, "grad_norm": 0.3339553933028119, "learning_rate": 2.0490628641386903e-05, "loss": 0.3342, "num_tokens": 2419688727.0, "step": 3173 }, { "epoch": 4.330460015362294, "grad_norm": 0.3042337248318485, "learning_rate": 2.0480722137508754e-05, "loss": 0.3193, "num_tokens": 2420412511.0, "step": 3174 }, { "epoch": 4.331825552615857, "grad_norm": 0.39448277519097524, "learning_rate": 2.0470816097096482e-05, "loss": 0.3477, "num_tokens": 2421153131.0, "step": 3175 }, { "epoch": 4.333191089869421, "grad_norm": 0.3229995544942035, "learning_rate": 2.0460910523171997e-05, "loss": 0.3369, "num_tokens": 2421864895.0, "step": 3176 }, { "epoch": 4.334556627122984, "grad_norm": 0.3435071103358658, "learning_rate": 2.0451005418757058e-05, "loss": 0.3433, "num_tokens": 2422672677.0, "step": 3177 }, { "epoch": 4.335922164376547, "grad_norm": 0.35689810911114395, "learning_rate": 2.0441100786873293e-05, "loss": 0.3364, "num_tokens": 2423380998.0, "step": 3178 }, { "epoch": 4.33728770163011, "grad_norm": 0.27453840378459715, "learning_rate": 2.0431196630542152e-05, "loss": 0.3326, "num_tokens": 2424129863.0, "step": 3179 }, { "epoch": 4.338653238883674, "grad_norm": 0.3718086945733191, "learning_rate": 2.0421292952784996e-05, "loss": 0.3415, "num_tokens": 2424915607.0, "step": 3180 }, { "epoch": 4.340018776137237, "grad_norm": 0.3037875868070979, "learning_rate": 2.041138975662298e-05, "loss": 0.3529, "num_tokens": 2425783830.0, "step": 3181 }, { "epoch": 4.341384313390799, "grad_norm": 0.3088022923903192, "learning_rate": 2.0401487045077158e-05, "loss": 0.3416, "num_tokens": 2426582468.0, "step": 3182 }, { "epoch": 4.342749850644363, "grad_norm": 0.3309558683071714, "learning_rate": 2.0391584821168413e-05, "loss": 0.3489, "num_tokens": 2427393030.0, "step": 3183 }, { "epoch": 4.344115387897926, "grad_norm": 0.28328180574445894, "learning_rate": 2.0381683087917485e-05, "loss": 0.3468, "num_tokens": 2428214398.0, "step": 3184 }, { "epoch": 4.345480925151489, "grad_norm": 0.31729219769997025, "learning_rate": 2.037178184834496e-05, "loss": 0.3726, "num_tokens": 2429001351.0, "step": 3185 }, { "epoch": 4.346846462405052, "grad_norm": 0.32226912427242016, "learning_rate": 2.0361881105471286e-05, "loss": 0.3307, "num_tokens": 2429752578.0, "step": 3186 }, { "epoch": 4.3482119996586155, "grad_norm": 0.3236162598645486, "learning_rate": 2.035198086231675e-05, "loss": 0.3473, "num_tokens": 2430512562.0, "step": 3187 }, { "epoch": 4.349577536912179, "grad_norm": 0.3484702062355987, "learning_rate": 2.0342081121901466e-05, "loss": 0.352, "num_tokens": 2431256168.0, "step": 3188 }, { "epoch": 4.350943074165742, "grad_norm": 0.3061091430472571, "learning_rate": 2.0332181887245443e-05, "loss": 0.3423, "num_tokens": 2432042331.0, "step": 3189 }, { "epoch": 4.352308611419305, "grad_norm": 0.32269954030180376, "learning_rate": 2.03222831613685e-05, "loss": 0.3357, "num_tokens": 2432800248.0, "step": 3190 }, { "epoch": 4.3536741486728685, "grad_norm": 0.33981364950724086, "learning_rate": 2.031238494729031e-05, "loss": 0.3306, "num_tokens": 2433530159.0, "step": 3191 }, { "epoch": 4.355039685926432, "grad_norm": 0.31152092770802575, "learning_rate": 2.0302487248030385e-05, "loss": 0.3436, "num_tokens": 2434315894.0, "step": 3192 }, { "epoch": 4.356405223179995, "grad_norm": 0.357050749437103, "learning_rate": 2.029259006660809e-05, "loss": 0.3477, "num_tokens": 2435099043.0, "step": 3193 }, { "epoch": 4.357770760433558, "grad_norm": 0.3481721036454911, "learning_rate": 2.028269340604263e-05, "loss": 0.3474, "num_tokens": 2435958479.0, "step": 3194 }, { "epoch": 4.3591362976871215, "grad_norm": 0.3032131049161953, "learning_rate": 2.0272797269353033e-05, "loss": 0.3445, "num_tokens": 2436683495.0, "step": 3195 }, { "epoch": 4.360501834940685, "grad_norm": 0.3178846062740119, "learning_rate": 2.0262901659558194e-05, "loss": 0.3555, "num_tokens": 2437404356.0, "step": 3196 }, { "epoch": 4.361867372194248, "grad_norm": 0.30514280937194954, "learning_rate": 2.0253006579676824e-05, "loss": 0.3193, "num_tokens": 2438129504.0, "step": 3197 }, { "epoch": 4.363232909447811, "grad_norm": 0.3253046399938944, "learning_rate": 2.02431120327275e-05, "loss": 0.3412, "num_tokens": 2438825462.0, "step": 3198 }, { "epoch": 4.364598446701374, "grad_norm": 0.32023939126558126, "learning_rate": 2.02332180217286e-05, "loss": 0.3508, "num_tokens": 2439576051.0, "step": 3199 }, { "epoch": 4.365963983954937, "grad_norm": 0.3057516604758355, "learning_rate": 2.022332454969838e-05, "loss": 0.3625, "num_tokens": 2440383564.0, "step": 3200 }, { "epoch": 4.3673295212085, "grad_norm": 0.30339514090577396, "learning_rate": 2.0213431619654894e-05, "loss": 0.3317, "num_tokens": 2441081604.0, "step": 3201 }, { "epoch": 4.368695058462063, "grad_norm": 0.3312876326258409, "learning_rate": 2.0203539234616052e-05, "loss": 0.3386, "num_tokens": 2441844908.0, "step": 3202 }, { "epoch": 4.370060595715627, "grad_norm": 0.3059064687371581, "learning_rate": 2.019364739759959e-05, "loss": 0.3411, "num_tokens": 2442576566.0, "step": 3203 }, { "epoch": 4.37142613296919, "grad_norm": 0.3152070342759736, "learning_rate": 2.0183756111623084e-05, "loss": 0.3321, "num_tokens": 2443362562.0, "step": 3204 }, { "epoch": 4.372791670222753, "grad_norm": 0.28530026249419543, "learning_rate": 2.017386537970393e-05, "loss": 0.351, "num_tokens": 2444184204.0, "step": 3205 }, { "epoch": 4.374157207476316, "grad_norm": 0.34149023493646846, "learning_rate": 2.0163975204859364e-05, "loss": 0.3534, "num_tokens": 2444981261.0, "step": 3206 }, { "epoch": 4.37552274472988, "grad_norm": 0.30400844645306163, "learning_rate": 2.0154085590106455e-05, "loss": 0.3398, "num_tokens": 2445796301.0, "step": 3207 }, { "epoch": 4.376888281983443, "grad_norm": 0.30556043234462016, "learning_rate": 2.0144196538462085e-05, "loss": 0.3263, "num_tokens": 2446613119.0, "step": 3208 }, { "epoch": 4.378253819237006, "grad_norm": 0.343404501752241, "learning_rate": 2.0134308052942995e-05, "loss": 0.3571, "num_tokens": 2447398254.0, "step": 3209 }, { "epoch": 4.379619356490569, "grad_norm": 0.3376630300031014, "learning_rate": 2.0124420136565717e-05, "loss": 0.3294, "num_tokens": 2448110862.0, "step": 3210 }, { "epoch": 4.380984893744133, "grad_norm": 0.2904407244812554, "learning_rate": 2.011453279234663e-05, "loss": 0.3255, "num_tokens": 2448794908.0, "step": 3211 }, { "epoch": 4.382350430997696, "grad_norm": 0.3225975123934585, "learning_rate": 2.0104646023301937e-05, "loss": 0.337, "num_tokens": 2449577799.0, "step": 3212 }, { "epoch": 4.383715968251259, "grad_norm": 0.32206581651822125, "learning_rate": 2.009475983244766e-05, "loss": 0.3533, "num_tokens": 2450407875.0, "step": 3213 }, { "epoch": 4.3850815055048225, "grad_norm": 0.36885463543340946, "learning_rate": 2.008487422279965e-05, "loss": 0.3565, "num_tokens": 2451142631.0, "step": 3214 }, { "epoch": 4.386447042758386, "grad_norm": 0.3340857766051986, "learning_rate": 2.007498919737357e-05, "loss": 0.3503, "num_tokens": 2451907223.0, "step": 3215 }, { "epoch": 4.387812580011948, "grad_norm": 0.3254439469532262, "learning_rate": 2.0065104759184934e-05, "loss": 0.3414, "num_tokens": 2452625317.0, "step": 3216 }, { "epoch": 4.389178117265511, "grad_norm": 0.3104159286723605, "learning_rate": 2.0055220911249034e-05, "loss": 0.3333, "num_tokens": 2453378767.0, "step": 3217 }, { "epoch": 4.390543654519075, "grad_norm": 0.3718208560788279, "learning_rate": 2.0045337656581015e-05, "loss": 0.3419, "num_tokens": 2454101567.0, "step": 3218 }, { "epoch": 4.391909191772638, "grad_norm": 0.3472458453989601, "learning_rate": 2.0035454998195832e-05, "loss": 0.3486, "num_tokens": 2454837984.0, "step": 3219 }, { "epoch": 4.393274729026201, "grad_norm": 0.2821832148320457, "learning_rate": 2.002557293910824e-05, "loss": 0.3467, "num_tokens": 2455643241.0, "step": 3220 }, { "epoch": 4.394640266279764, "grad_norm": 0.36370507932902585, "learning_rate": 2.0015691482332853e-05, "loss": 0.3522, "num_tokens": 2456412463.0, "step": 3221 }, { "epoch": 4.396005803533328, "grad_norm": 0.38314174431925907, "learning_rate": 2.000581063088405e-05, "loss": 0.3575, "num_tokens": 2457147773.0, "step": 3222 }, { "epoch": 4.397371340786891, "grad_norm": 0.30335841872117486, "learning_rate": 1.9995930387776076e-05, "loss": 0.3305, "num_tokens": 2457841609.0, "step": 3223 }, { "epoch": 4.398736878040454, "grad_norm": 0.3495317628155296, "learning_rate": 1.9986050756022935e-05, "loss": 0.3437, "num_tokens": 2458671666.0, "step": 3224 }, { "epoch": 4.400102415294017, "grad_norm": 0.3382556323607734, "learning_rate": 1.9976171738638504e-05, "loss": 0.3409, "num_tokens": 2459428955.0, "step": 3225 }, { "epoch": 4.401467952547581, "grad_norm": 0.3400431063361749, "learning_rate": 1.996629333863643e-05, "loss": 0.3406, "num_tokens": 2460115392.0, "step": 3226 }, { "epoch": 4.402833489801144, "grad_norm": 0.3663184237539081, "learning_rate": 1.995641555903018e-05, "loss": 0.3534, "num_tokens": 2460839201.0, "step": 3227 }, { "epoch": 4.404199027054707, "grad_norm": 0.32209608073278734, "learning_rate": 1.9946538402833047e-05, "loss": 0.3399, "num_tokens": 2461582259.0, "step": 3228 }, { "epoch": 4.40556456430827, "grad_norm": 0.36276561179483924, "learning_rate": 1.993666187305812e-05, "loss": 0.3621, "num_tokens": 2462362976.0, "step": 3229 }, { "epoch": 4.406930101561834, "grad_norm": 0.30537509891475173, "learning_rate": 1.99267859727183e-05, "loss": 0.3378, "num_tokens": 2463196713.0, "step": 3230 }, { "epoch": 4.408295638815396, "grad_norm": 0.3143356717415248, "learning_rate": 1.991691070482629e-05, "loss": 0.3563, "num_tokens": 2463916038.0, "step": 3231 }, { "epoch": 4.409661176068959, "grad_norm": 0.3332047273524137, "learning_rate": 1.9907036072394625e-05, "loss": 0.3403, "num_tokens": 2464656861.0, "step": 3232 }, { "epoch": 4.4110267133225225, "grad_norm": 0.3582028932837768, "learning_rate": 1.9897162078435613e-05, "loss": 0.335, "num_tokens": 2465406953.0, "step": 3233 }, { "epoch": 4.412392250576086, "grad_norm": 0.3348544980291843, "learning_rate": 1.9887288725961374e-05, "loss": 0.3239, "num_tokens": 2466173926.0, "step": 3234 }, { "epoch": 4.413757787829649, "grad_norm": 0.34803717278313445, "learning_rate": 1.9877416017983855e-05, "loss": 0.3407, "num_tokens": 2466924657.0, "step": 3235 }, { "epoch": 4.415123325083212, "grad_norm": 0.37806680313307656, "learning_rate": 1.9867543957514792e-05, "loss": 0.3384, "num_tokens": 2467719516.0, "step": 3236 }, { "epoch": 4.4164888623367755, "grad_norm": 0.283459492384044, "learning_rate": 1.985767254756572e-05, "loss": 0.35, "num_tokens": 2468461885.0, "step": 3237 }, { "epoch": 4.417854399590339, "grad_norm": 0.40350269425751367, "learning_rate": 1.9847801791147973e-05, "loss": 0.3464, "num_tokens": 2469170003.0, "step": 3238 }, { "epoch": 4.419219936843902, "grad_norm": 0.34916304417904354, "learning_rate": 1.9837931691272696e-05, "loss": 0.3337, "num_tokens": 2469924858.0, "step": 3239 }, { "epoch": 4.420585474097465, "grad_norm": 0.310550344445638, "learning_rate": 1.982806225095083e-05, "loss": 0.337, "num_tokens": 2470662621.0, "step": 3240 }, { "epoch": 4.4219510113510285, "grad_norm": 0.3389202103688059, "learning_rate": 1.9818193473193113e-05, "loss": 0.3389, "num_tokens": 2471440349.0, "step": 3241 }, { "epoch": 4.423316548604592, "grad_norm": 0.36924479632901175, "learning_rate": 1.9808325361010087e-05, "loss": 0.3342, "num_tokens": 2472177764.0, "step": 3242 }, { "epoch": 4.424682085858155, "grad_norm": 0.32639447859422466, "learning_rate": 1.979845791741206e-05, "loss": 0.3445, "num_tokens": 2472907886.0, "step": 3243 }, { "epoch": 4.426047623111718, "grad_norm": 0.33589722378555226, "learning_rate": 1.9788591145409205e-05, "loss": 0.3491, "num_tokens": 2473720043.0, "step": 3244 }, { "epoch": 4.4274131603652815, "grad_norm": 0.29093352331599615, "learning_rate": 1.9778725048011412e-05, "loss": 0.3295, "num_tokens": 2474470385.0, "step": 3245 }, { "epoch": 4.428778697618845, "grad_norm": 0.3625636780678995, "learning_rate": 1.9768859628228414e-05, "loss": 0.3633, "num_tokens": 2475282151.0, "step": 3246 }, { "epoch": 4.430144234872408, "grad_norm": 0.3403993067396194, "learning_rate": 1.975899488906972e-05, "loss": 0.3635, "num_tokens": 2476063741.0, "step": 3247 }, { "epoch": 4.43150977212597, "grad_norm": 0.3315073817417819, "learning_rate": 1.9749130833544638e-05, "loss": 0.3447, "num_tokens": 2476809266.0, "step": 3248 }, { "epoch": 4.432875309379534, "grad_norm": 0.34398750657120675, "learning_rate": 1.973926746466226e-05, "loss": 0.3489, "num_tokens": 2477520149.0, "step": 3249 }, { "epoch": 4.434240846633097, "grad_norm": 0.3069156129073853, "learning_rate": 1.972940478543147e-05, "loss": 0.3561, "num_tokens": 2478320412.0, "step": 3250 }, { "epoch": 4.43560638388666, "grad_norm": 0.39579758063442666, "learning_rate": 1.971954279886096e-05, "loss": 0.3509, "num_tokens": 2479062242.0, "step": 3251 }, { "epoch": 4.436971921140223, "grad_norm": 0.3274245311379461, "learning_rate": 1.9709681507959174e-05, "loss": 0.3497, "num_tokens": 2479839646.0, "step": 3252 }, { "epoch": 4.438337458393787, "grad_norm": 0.35526548749438464, "learning_rate": 1.969982091573438e-05, "loss": 0.3478, "num_tokens": 2480638959.0, "step": 3253 }, { "epoch": 4.43970299564735, "grad_norm": 0.33001197988341163, "learning_rate": 1.9689961025194605e-05, "loss": 0.3421, "num_tokens": 2481365238.0, "step": 3254 }, { "epoch": 4.441068532900913, "grad_norm": 0.35119450622135145, "learning_rate": 1.968010183934769e-05, "loss": 0.3463, "num_tokens": 2482107740.0, "step": 3255 }, { "epoch": 4.4424340701544764, "grad_norm": 0.334640971702914, "learning_rate": 1.9670243361201237e-05, "loss": 0.3464, "num_tokens": 2482836297.0, "step": 3256 }, { "epoch": 4.44379960740804, "grad_norm": 0.30872534257338424, "learning_rate": 1.966038559376264e-05, "loss": 0.354, "num_tokens": 2483649352.0, "step": 3257 }, { "epoch": 4.445165144661603, "grad_norm": 0.3576954757277562, "learning_rate": 1.9650528540039077e-05, "loss": 0.3423, "num_tokens": 2484338254.0, "step": 3258 }, { "epoch": 4.446530681915166, "grad_norm": 0.32106086077441404, "learning_rate": 1.9640672203037504e-05, "loss": 0.3454, "num_tokens": 2485088732.0, "step": 3259 }, { "epoch": 4.4478962191687295, "grad_norm": 0.3072386173931353, "learning_rate": 1.963081658576468e-05, "loss": 0.308, "num_tokens": 2485799066.0, "step": 3260 }, { "epoch": 4.449261756422293, "grad_norm": 0.28444062619034416, "learning_rate": 1.9620961691227106e-05, "loss": 0.3383, "num_tokens": 2486661791.0, "step": 3261 }, { "epoch": 4.450627293675856, "grad_norm": 0.31683427063869957, "learning_rate": 1.9611107522431096e-05, "loss": 0.3386, "num_tokens": 2487353802.0, "step": 3262 }, { "epoch": 4.451992830929418, "grad_norm": 0.3490412194990681, "learning_rate": 1.9601254082382725e-05, "loss": 0.338, "num_tokens": 2488128649.0, "step": 3263 }, { "epoch": 4.453358368182982, "grad_norm": 0.32806660855998115, "learning_rate": 1.9591401374087855e-05, "loss": 0.3443, "num_tokens": 2488933739.0, "step": 3264 }, { "epoch": 4.454723905436545, "grad_norm": 0.3293374407232254, "learning_rate": 1.958154940055212e-05, "loss": 0.3658, "num_tokens": 2489743130.0, "step": 3265 }, { "epoch": 4.456089442690108, "grad_norm": 0.3494087589896921, "learning_rate": 1.9571698164780924e-05, "loss": 0.3487, "num_tokens": 2490509967.0, "step": 3266 }, { "epoch": 4.457454979943671, "grad_norm": 0.2991995669442338, "learning_rate": 1.956184766977946e-05, "loss": 0.3358, "num_tokens": 2491287752.0, "step": 3267 }, { "epoch": 4.458820517197235, "grad_norm": 0.3266320425731176, "learning_rate": 1.955199791855268e-05, "loss": 0.3706, "num_tokens": 2492043005.0, "step": 3268 }, { "epoch": 4.460186054450798, "grad_norm": 0.34240105544470234, "learning_rate": 1.954214891410533e-05, "loss": 0.3322, "num_tokens": 2492739112.0, "step": 3269 }, { "epoch": 4.461551591704361, "grad_norm": 0.3333646331685171, "learning_rate": 1.9532300659441895e-05, "loss": 0.3536, "num_tokens": 2493533341.0, "step": 3270 }, { "epoch": 4.462917128957924, "grad_norm": 0.32935911078641555, "learning_rate": 1.952245315756667e-05, "loss": 0.3552, "num_tokens": 2494303289.0, "step": 3271 }, { "epoch": 4.464282666211488, "grad_norm": 0.3366189162468809, "learning_rate": 1.9512606411483693e-05, "loss": 0.3509, "num_tokens": 2495021527.0, "step": 3272 }, { "epoch": 4.465648203465051, "grad_norm": 0.3452470043977027, "learning_rate": 1.9502760424196773e-05, "loss": 0.3498, "num_tokens": 2495732215.0, "step": 3273 }, { "epoch": 4.467013740718614, "grad_norm": 0.3419509023649937, "learning_rate": 1.949291519870951e-05, "loss": 0.3491, "num_tokens": 2496477046.0, "step": 3274 }, { "epoch": 4.468379277972177, "grad_norm": 0.313255743651396, "learning_rate": 1.948307073802524e-05, "loss": 0.3299, "num_tokens": 2497271207.0, "step": 3275 }, { "epoch": 4.469744815225741, "grad_norm": 0.31305863246274723, "learning_rate": 1.9473227045147096e-05, "loss": 0.3365, "num_tokens": 2498050945.0, "step": 3276 }, { "epoch": 4.471110352479304, "grad_norm": 0.33057533846937753, "learning_rate": 1.9463384123077946e-05, "loss": 0.3349, "num_tokens": 2498787668.0, "step": 3277 }, { "epoch": 4.472475889732867, "grad_norm": 0.3186453159799632, "learning_rate": 1.9453541974820456e-05, "loss": 0.3523, "num_tokens": 2499536440.0, "step": 3278 }, { "epoch": 4.47384142698643, "grad_norm": 0.32723509583770033, "learning_rate": 1.9443700603377037e-05, "loss": 0.3419, "num_tokens": 2500317033.0, "step": 3279 }, { "epoch": 4.475206964239993, "grad_norm": 0.2984847784930232, "learning_rate": 1.9433860011749846e-05, "loss": 0.3247, "num_tokens": 2501050533.0, "step": 3280 }, { "epoch": 4.476572501493556, "grad_norm": 0.32005250977230926, "learning_rate": 1.9424020202940844e-05, "loss": 0.3357, "num_tokens": 2501802209.0, "step": 3281 }, { "epoch": 4.477938038747119, "grad_norm": 0.3214592471121298, "learning_rate": 1.9414181179951715e-05, "loss": 0.329, "num_tokens": 2502546403.0, "step": 3282 }, { "epoch": 4.4793035760006825, "grad_norm": 0.296906500106654, "learning_rate": 1.9404342945783936e-05, "loss": 0.3326, "num_tokens": 2503320996.0, "step": 3283 }, { "epoch": 4.480669113254246, "grad_norm": 0.32409349126189385, "learning_rate": 1.939450550343871e-05, "loss": 0.3421, "num_tokens": 2504110952.0, "step": 3284 }, { "epoch": 4.482034650507809, "grad_norm": 0.3078313535390476, "learning_rate": 1.938466885591702e-05, "loss": 0.3504, "num_tokens": 2504822869.0, "step": 3285 }, { "epoch": 4.483400187761372, "grad_norm": 0.31635228023905493, "learning_rate": 1.9374833006219597e-05, "loss": 0.3506, "num_tokens": 2505633146.0, "step": 3286 }, { "epoch": 4.4847657250149355, "grad_norm": 0.3099978125662998, "learning_rate": 1.9364997957346947e-05, "loss": 0.3426, "num_tokens": 2506383091.0, "step": 3287 }, { "epoch": 4.486131262268499, "grad_norm": 0.30438439109753707, "learning_rate": 1.935516371229931e-05, "loss": 0.3461, "num_tokens": 2507197599.0, "step": 3288 }, { "epoch": 4.487496799522062, "grad_norm": 0.32174125983566326, "learning_rate": 1.9345330274076675e-05, "loss": 0.3425, "num_tokens": 2507937058.0, "step": 3289 }, { "epoch": 4.488862336775625, "grad_norm": 0.32145774311751524, "learning_rate": 1.9335497645678813e-05, "loss": 0.3541, "num_tokens": 2508703781.0, "step": 3290 }, { "epoch": 4.4902278740291885, "grad_norm": 0.3327481772187328, "learning_rate": 1.9325665830105232e-05, "loss": 0.3419, "num_tokens": 2509409480.0, "step": 3291 }, { "epoch": 4.491593411282752, "grad_norm": 0.31614851050659315, "learning_rate": 1.9315834830355192e-05, "loss": 0.3398, "num_tokens": 2510154327.0, "step": 3292 }, { "epoch": 4.492958948536315, "grad_norm": 0.3361508246851732, "learning_rate": 1.9306004649427698e-05, "loss": 0.3446, "num_tokens": 2511004262.0, "step": 3293 }, { "epoch": 4.494324485789878, "grad_norm": 0.3393678176064268, "learning_rate": 1.9296175290321527e-05, "loss": 0.3474, "num_tokens": 2511782251.0, "step": 3294 }, { "epoch": 4.495690023043442, "grad_norm": 0.299363240361854, "learning_rate": 1.928634675603518e-05, "loss": 0.3257, "num_tokens": 2512435363.0, "step": 3295 }, { "epoch": 4.497055560297005, "grad_norm": 0.32688649987540447, "learning_rate": 1.927651904956691e-05, "loss": 0.3616, "num_tokens": 2513169168.0, "step": 3296 }, { "epoch": 4.498421097550567, "grad_norm": 0.34605147675637965, "learning_rate": 1.9266692173914747e-05, "loss": 0.3408, "num_tokens": 2513949184.0, "step": 3297 }, { "epoch": 4.49978663480413, "grad_norm": 0.29479271649382155, "learning_rate": 1.925686613207642e-05, "loss": 0.344, "num_tokens": 2514760142.0, "step": 3298 }, { "epoch": 4.501152172057694, "grad_norm": 0.30998996027900305, "learning_rate": 1.924704092704945e-05, "loss": 0.339, "num_tokens": 2515582299.0, "step": 3299 }, { "epoch": 4.502517709311257, "grad_norm": 0.33542163525024676, "learning_rate": 1.923721656183107e-05, "loss": 0.3173, "num_tokens": 2516262855.0, "step": 3300 }, { "epoch": 4.50388324656482, "grad_norm": 0.3072403355898189, "learning_rate": 1.9227393039418273e-05, "loss": 0.335, "num_tokens": 2517010244.0, "step": 3301 }, { "epoch": 4.5052487838183835, "grad_norm": 0.32120413794241454, "learning_rate": 1.9217570362807784e-05, "loss": 0.356, "num_tokens": 2517795363.0, "step": 3302 }, { "epoch": 4.506614321071947, "grad_norm": 0.34424803857882896, "learning_rate": 1.9207748534996086e-05, "loss": 0.3405, "num_tokens": 2518603689.0, "step": 3303 }, { "epoch": 4.50797985832551, "grad_norm": 0.33633639498959483, "learning_rate": 1.9197927558979385e-05, "loss": 0.34, "num_tokens": 2519352132.0, "step": 3304 }, { "epoch": 4.509345395579073, "grad_norm": 0.2988776711879864, "learning_rate": 1.9188107437753633e-05, "loss": 0.3609, "num_tokens": 2520187666.0, "step": 3305 }, { "epoch": 4.5107109328326365, "grad_norm": 0.3560607482861981, "learning_rate": 1.9178288174314535e-05, "loss": 0.3439, "num_tokens": 2520939976.0, "step": 3306 }, { "epoch": 4.5120764700862, "grad_norm": 0.3251661078938475, "learning_rate": 1.9168469771657518e-05, "loss": 0.357, "num_tokens": 2521724011.0, "step": 3307 }, { "epoch": 4.513442007339763, "grad_norm": 0.30636069309631647, "learning_rate": 1.9158652232777748e-05, "loss": 0.3298, "num_tokens": 2522455080.0, "step": 3308 }, { "epoch": 4.514807544593326, "grad_norm": 0.3244027118498426, "learning_rate": 1.914883556067013e-05, "loss": 0.3497, "num_tokens": 2523213036.0, "step": 3309 }, { "epoch": 4.5161730818468895, "grad_norm": 0.31557664719218825, "learning_rate": 1.9139019758329315e-05, "loss": 0.3421, "num_tokens": 2524100233.0, "step": 3310 }, { "epoch": 4.517538619100453, "grad_norm": 0.318771994408985, "learning_rate": 1.9129204828749674e-05, "loss": 0.3356, "num_tokens": 2524830608.0, "step": 3311 }, { "epoch": 4.518904156354015, "grad_norm": 0.31828617160615047, "learning_rate": 1.9119390774925315e-05, "loss": 0.3465, "num_tokens": 2525507673.0, "step": 3312 }, { "epoch": 4.520269693607578, "grad_norm": 0.33188619134500374, "learning_rate": 1.9109577599850083e-05, "loss": 0.3457, "num_tokens": 2526187291.0, "step": 3313 }, { "epoch": 4.521635230861142, "grad_norm": 0.3178952401942032, "learning_rate": 1.909976530651754e-05, "loss": 0.3448, "num_tokens": 2526971354.0, "step": 3314 }, { "epoch": 4.523000768114705, "grad_norm": 0.3151490899477546, "learning_rate": 1.908995389792102e-05, "loss": 0.3456, "num_tokens": 2527729679.0, "step": 3315 }, { "epoch": 4.524366305368268, "grad_norm": 0.31909265849491186, "learning_rate": 1.908014337705353e-05, "loss": 0.3357, "num_tokens": 2528525732.0, "step": 3316 }, { "epoch": 4.525731842621831, "grad_norm": 0.3245851938406935, "learning_rate": 1.9070333746907855e-05, "loss": 0.3532, "num_tokens": 2529330692.0, "step": 3317 }, { "epoch": 4.527097379875395, "grad_norm": 0.3245631816554141, "learning_rate": 1.906052501047648e-05, "loss": 0.3406, "num_tokens": 2530092824.0, "step": 3318 }, { "epoch": 4.528462917128958, "grad_norm": 0.3138996185927402, "learning_rate": 1.905071717075162e-05, "loss": 0.3486, "num_tokens": 2530860365.0, "step": 3319 }, { "epoch": 4.529828454382521, "grad_norm": 0.31840536981246315, "learning_rate": 1.904091023072524e-05, "loss": 0.35, "num_tokens": 2531559643.0, "step": 3320 }, { "epoch": 4.531193991636084, "grad_norm": 0.3277225999697569, "learning_rate": 1.903110419338899e-05, "loss": 0.3536, "num_tokens": 2532359462.0, "step": 3321 }, { "epoch": 4.532559528889648, "grad_norm": 0.3164657271574981, "learning_rate": 1.9021299061734283e-05, "loss": 0.3489, "num_tokens": 2533120591.0, "step": 3322 }, { "epoch": 4.533925066143211, "grad_norm": 0.3060392836844503, "learning_rate": 1.9011494838752234e-05, "loss": 0.3455, "num_tokens": 2533901847.0, "step": 3323 }, { "epoch": 4.535290603396774, "grad_norm": 0.34378957263167786, "learning_rate": 1.900169152743369e-05, "loss": 0.3495, "num_tokens": 2534727619.0, "step": 3324 }, { "epoch": 4.536656140650337, "grad_norm": 0.2886534877925538, "learning_rate": 1.8991889130769216e-05, "loss": 0.3379, "num_tokens": 2535548012.0, "step": 3325 }, { "epoch": 4.538021677903901, "grad_norm": 0.33290862211265365, "learning_rate": 1.89820876517491e-05, "loss": 0.3496, "num_tokens": 2536296194.0, "step": 3326 }, { "epoch": 4.539387215157463, "grad_norm": 0.3227011790652056, "learning_rate": 1.8972287093363345e-05, "loss": 0.3382, "num_tokens": 2536967574.0, "step": 3327 }, { "epoch": 4.540752752411027, "grad_norm": 0.3161428402051027, "learning_rate": 1.896248745860168e-05, "loss": 0.3275, "num_tokens": 2537750449.0, "step": 3328 }, { "epoch": 4.5421182896645895, "grad_norm": 0.33489711014717216, "learning_rate": 1.895268875045355e-05, "loss": 0.3582, "num_tokens": 2538525532.0, "step": 3329 }, { "epoch": 4.543483826918153, "grad_norm": 0.31670603543727244, "learning_rate": 1.8942890971908105e-05, "loss": 0.3313, "num_tokens": 2539307832.0, "step": 3330 }, { "epoch": 4.544849364171716, "grad_norm": 0.293894558094675, "learning_rate": 1.893309412595424e-05, "loss": 0.3431, "num_tokens": 2540070224.0, "step": 3331 }, { "epoch": 4.546214901425279, "grad_norm": 0.32413295536198655, "learning_rate": 1.8923298215580534e-05, "loss": 0.3531, "num_tokens": 2540806333.0, "step": 3332 }, { "epoch": 4.5475804386788425, "grad_norm": 0.35601570786711506, "learning_rate": 1.891350324377531e-05, "loss": 0.3662, "num_tokens": 2541550248.0, "step": 3333 }, { "epoch": 4.548945975932406, "grad_norm": 0.3171341734138707, "learning_rate": 1.890370921352658e-05, "loss": 0.3388, "num_tokens": 2542252341.0, "step": 3334 }, { "epoch": 4.550311513185969, "grad_norm": 0.32722123630657013, "learning_rate": 1.8893916127822075e-05, "loss": 0.3612, "num_tokens": 2543048499.0, "step": 3335 }, { "epoch": 4.551677050439532, "grad_norm": 0.30943199833192, "learning_rate": 1.888412398964925e-05, "loss": 0.3415, "num_tokens": 2543837208.0, "step": 3336 }, { "epoch": 4.5530425876930956, "grad_norm": 0.3414314434241047, "learning_rate": 1.8874332801995258e-05, "loss": 0.3445, "num_tokens": 2544468680.0, "step": 3337 }, { "epoch": 4.554408124946659, "grad_norm": 0.3274067275394388, "learning_rate": 1.8864542567846968e-05, "loss": 0.3626, "num_tokens": 2545261263.0, "step": 3338 }, { "epoch": 4.555773662200222, "grad_norm": 0.33058956898774583, "learning_rate": 1.885475329019095e-05, "loss": 0.3675, "num_tokens": 2545984283.0, "step": 3339 }, { "epoch": 4.557139199453785, "grad_norm": 0.3121068172371353, "learning_rate": 1.8844964972013504e-05, "loss": 0.3313, "num_tokens": 2546719893.0, "step": 3340 }, { "epoch": 4.558504736707349, "grad_norm": 0.3317143390013421, "learning_rate": 1.88351776163006e-05, "loss": 0.3578, "num_tokens": 2547469000.0, "step": 3341 }, { "epoch": 4.559870273960912, "grad_norm": 0.3421124936576216, "learning_rate": 1.882539122603796e-05, "loss": 0.3407, "num_tokens": 2548285537.0, "step": 3342 }, { "epoch": 4.561235811214475, "grad_norm": 0.3437521359798431, "learning_rate": 1.881560580421097e-05, "loss": 0.3522, "num_tokens": 2549126393.0, "step": 3343 }, { "epoch": 4.562601348468037, "grad_norm": 0.3125843539026942, "learning_rate": 1.8805821353804747e-05, "loss": 0.3489, "num_tokens": 2549971245.0, "step": 3344 }, { "epoch": 4.563966885721602, "grad_norm": 0.3490690998384822, "learning_rate": 1.8796037877804098e-05, "loss": 0.3436, "num_tokens": 2550709560.0, "step": 3345 }, { "epoch": 4.565332422975164, "grad_norm": 0.29760329516780526, "learning_rate": 1.878625537919354e-05, "loss": 0.3566, "num_tokens": 2551515851.0, "step": 3346 }, { "epoch": 4.566697960228727, "grad_norm": 0.3438539179582479, "learning_rate": 1.87764738609573e-05, "loss": 0.3579, "num_tokens": 2552266174.0, "step": 3347 }, { "epoch": 4.5680634974822905, "grad_norm": 0.357101184804039, "learning_rate": 1.8766693326079274e-05, "loss": 0.3545, "num_tokens": 2552988256.0, "step": 3348 }, { "epoch": 4.569429034735854, "grad_norm": 0.3601538680295159, "learning_rate": 1.8756913777543098e-05, "loss": 0.3755, "num_tokens": 2553789787.0, "step": 3349 }, { "epoch": 4.570794571989417, "grad_norm": 0.3249813575994115, "learning_rate": 1.8747135218332088e-05, "loss": 0.3393, "num_tokens": 2554535598.0, "step": 3350 }, { "epoch": 4.57216010924298, "grad_norm": 0.33650566193831644, "learning_rate": 1.873735765142924e-05, "loss": 0.3408, "num_tokens": 2555277493.0, "step": 3351 }, { "epoch": 4.5735256464965435, "grad_norm": 0.3274104117519009, "learning_rate": 1.8727581079817288e-05, "loss": 0.3542, "num_tokens": 2556037836.0, "step": 3352 }, { "epoch": 4.574891183750107, "grad_norm": 0.304606902437341, "learning_rate": 1.8717805506478624e-05, "loss": 0.342, "num_tokens": 2556854768.0, "step": 3353 }, { "epoch": 4.57625672100367, "grad_norm": 0.31345808943470044, "learning_rate": 1.8708030934395373e-05, "loss": 0.3361, "num_tokens": 2557586391.0, "step": 3354 }, { "epoch": 4.577622258257233, "grad_norm": 0.30419738497944493, "learning_rate": 1.869825736654931e-05, "loss": 0.3323, "num_tokens": 2558331440.0, "step": 3355 }, { "epoch": 4.5789877955107965, "grad_norm": 0.28372743107365234, "learning_rate": 1.8688484805921944e-05, "loss": 0.3515, "num_tokens": 2559170428.0, "step": 3356 }, { "epoch": 4.58035333276436, "grad_norm": 0.29356301377359145, "learning_rate": 1.8678713255494455e-05, "loss": 0.3561, "num_tokens": 2559888014.0, "step": 3357 }, { "epoch": 4.581718870017923, "grad_norm": 0.3080417976191071, "learning_rate": 1.866894271824771e-05, "loss": 0.3292, "num_tokens": 2560626387.0, "step": 3358 }, { "epoch": 4.583084407271486, "grad_norm": 0.2936644145107914, "learning_rate": 1.8659173197162296e-05, "loss": 0.3366, "num_tokens": 2561300602.0, "step": 3359 }, { "epoch": 4.5844499445250495, "grad_norm": 0.2957702490378655, "learning_rate": 1.8649404695218455e-05, "loss": 0.345, "num_tokens": 2562128829.0, "step": 3360 }, { "epoch": 4.585815481778612, "grad_norm": 0.2948025527963055, "learning_rate": 1.863963721539614e-05, "loss": 0.3363, "num_tokens": 2562827276.0, "step": 3361 }, { "epoch": 4.587181019032175, "grad_norm": 0.30298457658454303, "learning_rate": 1.8629870760674988e-05, "loss": 0.3575, "num_tokens": 2563572334.0, "step": 3362 }, { "epoch": 4.588546556285738, "grad_norm": 0.31483398633950027, "learning_rate": 1.862010533403432e-05, "loss": 0.3642, "num_tokens": 2564385427.0, "step": 3363 }, { "epoch": 4.589912093539302, "grad_norm": 0.3089824854240472, "learning_rate": 1.8610340938453135e-05, "loss": 0.3462, "num_tokens": 2565140180.0, "step": 3364 }, { "epoch": 4.591277630792865, "grad_norm": 0.3096328335600115, "learning_rate": 1.860057757691015e-05, "loss": 0.3486, "num_tokens": 2565894902.0, "step": 3365 }, { "epoch": 4.592643168046428, "grad_norm": 0.311559535709403, "learning_rate": 1.8590815252383722e-05, "loss": 0.351, "num_tokens": 2566706943.0, "step": 3366 }, { "epoch": 4.594008705299991, "grad_norm": 0.34681847744906824, "learning_rate": 1.8581053967851916e-05, "loss": 0.3411, "num_tokens": 2567496426.0, "step": 3367 }, { "epoch": 4.595374242553555, "grad_norm": 0.31595918348643737, "learning_rate": 1.8571293726292494e-05, "loss": 0.3489, "num_tokens": 2568301001.0, "step": 3368 }, { "epoch": 4.596739779807118, "grad_norm": 0.32106886229590204, "learning_rate": 1.856153453068286e-05, "loss": 0.3534, "num_tokens": 2569107954.0, "step": 3369 }, { "epoch": 4.598105317060681, "grad_norm": 0.3235948194278206, "learning_rate": 1.8551776384000142e-05, "loss": 0.3479, "num_tokens": 2569814167.0, "step": 3370 }, { "epoch": 4.599470854314244, "grad_norm": 0.29299387505922136, "learning_rate": 1.8542019289221116e-05, "loss": 0.3469, "num_tokens": 2570570103.0, "step": 3371 }, { "epoch": 4.600836391567808, "grad_norm": 0.36543146545995286, "learning_rate": 1.8532263249322255e-05, "loss": 0.3616, "num_tokens": 2571375071.0, "step": 3372 }, { "epoch": 4.602201928821371, "grad_norm": 0.3085275797408251, "learning_rate": 1.85225082672797e-05, "loss": 0.3487, "num_tokens": 2572210349.0, "step": 3373 }, { "epoch": 4.603567466074934, "grad_norm": 0.29860358461861414, "learning_rate": 1.8512754346069274e-05, "loss": 0.3452, "num_tokens": 2573010038.0, "step": 3374 }, { "epoch": 4.604933003328497, "grad_norm": 0.30208223328873374, "learning_rate": 1.850300148866648e-05, "loss": 0.3312, "num_tokens": 2573771652.0, "step": 3375 }, { "epoch": 4.60629854058206, "grad_norm": 0.3207553865804577, "learning_rate": 1.8493249698046484e-05, "loss": 0.3574, "num_tokens": 2574514024.0, "step": 3376 }, { "epoch": 4.607664077835624, "grad_norm": 0.30536463393304486, "learning_rate": 1.8483498977184152e-05, "loss": 0.3464, "num_tokens": 2575260299.0, "step": 3377 }, { "epoch": 4.609029615089186, "grad_norm": 0.3182739044378789, "learning_rate": 1.8473749329053977e-05, "loss": 0.3499, "num_tokens": 2576056104.0, "step": 3378 }, { "epoch": 4.6103951523427495, "grad_norm": 0.3106695232217753, "learning_rate": 1.8464000756630186e-05, "loss": 0.354, "num_tokens": 2576795730.0, "step": 3379 }, { "epoch": 4.611760689596313, "grad_norm": 0.3089199425781793, "learning_rate": 1.845425326288663e-05, "loss": 0.3369, "num_tokens": 2577525944.0, "step": 3380 }, { "epoch": 4.613126226849876, "grad_norm": 0.29903735288884653, "learning_rate": 1.8444506850796852e-05, "loss": 0.3403, "num_tokens": 2578180828.0, "step": 3381 }, { "epoch": 4.614491764103439, "grad_norm": 0.3338291378020907, "learning_rate": 1.8434761523334063e-05, "loss": 0.3076, "num_tokens": 2578856727.0, "step": 3382 }, { "epoch": 4.615857301357003, "grad_norm": 0.347818829842605, "learning_rate": 1.842501728347113e-05, "loss": 0.3409, "num_tokens": 2579600204.0, "step": 3383 }, { "epoch": 4.617222838610566, "grad_norm": 0.32617580112522, "learning_rate": 1.8415274134180605e-05, "loss": 0.3388, "num_tokens": 2580358411.0, "step": 3384 }, { "epoch": 4.618588375864129, "grad_norm": 0.32216627890810506, "learning_rate": 1.8405532078434695e-05, "loss": 0.3374, "num_tokens": 2581186137.0, "step": 3385 }, { "epoch": 4.619953913117692, "grad_norm": 0.3096730184172973, "learning_rate": 1.8395791119205294e-05, "loss": 0.3502, "num_tokens": 2581968330.0, "step": 3386 }, { "epoch": 4.621319450371256, "grad_norm": 0.34115578482388914, "learning_rate": 1.8386051259463928e-05, "loss": 0.3489, "num_tokens": 2582745545.0, "step": 3387 }, { "epoch": 4.622684987624819, "grad_norm": 0.31920555467039213, "learning_rate": 1.8376312502181824e-05, "loss": 0.3449, "num_tokens": 2583482951.0, "step": 3388 }, { "epoch": 4.624050524878382, "grad_norm": 0.3037792318514636, "learning_rate": 1.8366574850329847e-05, "loss": 0.34, "num_tokens": 2584221626.0, "step": 3389 }, { "epoch": 4.625416062131945, "grad_norm": 0.3477820039024502, "learning_rate": 1.835683830687853e-05, "loss": 0.3544, "num_tokens": 2585020015.0, "step": 3390 }, { "epoch": 4.626781599385509, "grad_norm": 0.3065651702076384, "learning_rate": 1.8347102874798075e-05, "loss": 0.3509, "num_tokens": 2585814417.0, "step": 3391 }, { "epoch": 4.628147136639072, "grad_norm": 0.3047927609774334, "learning_rate": 1.833736855705834e-05, "loss": 0.3448, "num_tokens": 2586625349.0, "step": 3392 }, { "epoch": 4.629512673892634, "grad_norm": 0.3192857458441644, "learning_rate": 1.832763535662884e-05, "loss": 0.3552, "num_tokens": 2587410176.0, "step": 3393 }, { "epoch": 4.6308782111461975, "grad_norm": 0.3005816608750761, "learning_rate": 1.8317903276478755e-05, "loss": 0.3375, "num_tokens": 2588142873.0, "step": 3394 }, { "epoch": 4.632243748399761, "grad_norm": 0.32715287377307634, "learning_rate": 1.8308172319576924e-05, "loss": 0.3416, "num_tokens": 2588841395.0, "step": 3395 }, { "epoch": 4.633609285653324, "grad_norm": 0.27925938092082025, "learning_rate": 1.829844248889184e-05, "loss": 0.3371, "num_tokens": 2589591040.0, "step": 3396 }, { "epoch": 4.634974822906887, "grad_norm": 0.3286660322039907, "learning_rate": 1.8288713787391655e-05, "loss": 0.3271, "num_tokens": 2590353775.0, "step": 3397 }, { "epoch": 4.6363403601604505, "grad_norm": 0.34813534092260673, "learning_rate": 1.8278986218044167e-05, "loss": 0.3476, "num_tokens": 2591175150.0, "step": 3398 }, { "epoch": 4.637705897414014, "grad_norm": 0.3259961819345449, "learning_rate": 1.8269259783816838e-05, "loss": 0.338, "num_tokens": 2591933313.0, "step": 3399 }, { "epoch": 4.639071434667577, "grad_norm": 0.3267754253111732, "learning_rate": 1.8259534487676793e-05, "loss": 0.3584, "num_tokens": 2592706622.0, "step": 3400 }, { "epoch": 4.64043697192114, "grad_norm": 0.3116091569712068, "learning_rate": 1.824981033259078e-05, "loss": 0.3604, "num_tokens": 2593506766.0, "step": 3401 }, { "epoch": 4.6418025091747035, "grad_norm": 0.3269937718909499, "learning_rate": 1.8240087321525236e-05, "loss": 0.3497, "num_tokens": 2594192449.0, "step": 3402 }, { "epoch": 4.643168046428267, "grad_norm": 0.3162672470019984, "learning_rate": 1.8230365457446213e-05, "loss": 0.3805, "num_tokens": 2595070814.0, "step": 3403 }, { "epoch": 4.64453358368183, "grad_norm": 0.31796024307785015, "learning_rate": 1.8220644743319448e-05, "loss": 0.3475, "num_tokens": 2595865510.0, "step": 3404 }, { "epoch": 4.645899120935393, "grad_norm": 0.3254921877893145, "learning_rate": 1.8210925182110305e-05, "loss": 0.3399, "num_tokens": 2596615035.0, "step": 3405 }, { "epoch": 4.6472646581889565, "grad_norm": 0.32335579069982806, "learning_rate": 1.8201206776783786e-05, "loss": 0.335, "num_tokens": 2597330167.0, "step": 3406 }, { "epoch": 4.64863019544252, "grad_norm": 0.29453834194932815, "learning_rate": 1.8191489530304577e-05, "loss": 0.3392, "num_tokens": 2598059193.0, "step": 3407 }, { "epoch": 4.649995732696082, "grad_norm": 0.3268693978945437, "learning_rate": 1.8181773445636975e-05, "loss": 0.365, "num_tokens": 2598818097.0, "step": 3408 }, { "epoch": 4.651361269949646, "grad_norm": 0.2987759648685659, "learning_rate": 1.8172058525744948e-05, "loss": 0.3513, "num_tokens": 2599628169.0, "step": 3409 }, { "epoch": 4.652726807203209, "grad_norm": 0.309034211022751, "learning_rate": 1.8162344773592085e-05, "loss": 0.3329, "num_tokens": 2600280794.0, "step": 3410 }, { "epoch": 4.654092344456772, "grad_norm": 0.3261873681158273, "learning_rate": 1.815263219214164e-05, "loss": 0.3462, "num_tokens": 2601052424.0, "step": 3411 }, { "epoch": 4.655457881710335, "grad_norm": 0.28755782766171234, "learning_rate": 1.8142920784356497e-05, "loss": 0.3293, "num_tokens": 2601859565.0, "step": 3412 }, { "epoch": 4.656823418963898, "grad_norm": 0.2950798572538018, "learning_rate": 1.8133210553199175e-05, "loss": 0.3366, "num_tokens": 2602585349.0, "step": 3413 }, { "epoch": 4.658188956217462, "grad_norm": 0.32476046781409373, "learning_rate": 1.8123501501631874e-05, "loss": 0.3305, "num_tokens": 2603312276.0, "step": 3414 }, { "epoch": 4.659554493471025, "grad_norm": 0.3234332905456533, "learning_rate": 1.811379363261638e-05, "loss": 0.363, "num_tokens": 2604069856.0, "step": 3415 }, { "epoch": 4.660920030724588, "grad_norm": 0.342003550800321, "learning_rate": 1.810408694911415e-05, "loss": 0.3552, "num_tokens": 2604817797.0, "step": 3416 }, { "epoch": 4.662285567978151, "grad_norm": 0.3204821245702183, "learning_rate": 1.8094381454086278e-05, "loss": 0.3457, "num_tokens": 2605619999.0, "step": 3417 }, { "epoch": 4.663651105231715, "grad_norm": 0.3044295217627995, "learning_rate": 1.8084677150493486e-05, "loss": 0.335, "num_tokens": 2606353413.0, "step": 3418 }, { "epoch": 4.665016642485278, "grad_norm": 0.31616248430348226, "learning_rate": 1.8074974041296137e-05, "loss": 0.3474, "num_tokens": 2607151533.0, "step": 3419 }, { "epoch": 4.666382179738841, "grad_norm": 0.3169759288043412, "learning_rate": 1.806527212945423e-05, "loss": 0.3446, "num_tokens": 2607906933.0, "step": 3420 }, { "epoch": 4.667747716992404, "grad_norm": 0.3145388037578768, "learning_rate": 1.80555714179274e-05, "loss": 0.3485, "num_tokens": 2608675546.0, "step": 3421 }, { "epoch": 4.669113254245968, "grad_norm": 0.3158643633659084, "learning_rate": 1.80458719096749e-05, "loss": 0.3451, "num_tokens": 2609520908.0, "step": 3422 }, { "epoch": 4.670478791499531, "grad_norm": 0.3101172295229198, "learning_rate": 1.803617360765565e-05, "loss": 0.3429, "num_tokens": 2610236046.0, "step": 3423 }, { "epoch": 4.671844328753094, "grad_norm": 0.31339246557178985, "learning_rate": 1.8026476514828176e-05, "loss": 0.3509, "num_tokens": 2610994910.0, "step": 3424 }, { "epoch": 4.6732098660066566, "grad_norm": 0.30294084282966377, "learning_rate": 1.8016780634150635e-05, "loss": 0.3303, "num_tokens": 2611725688.0, "step": 3425 }, { "epoch": 4.67457540326022, "grad_norm": 0.30244438121868905, "learning_rate": 1.8007085968580825e-05, "loss": 0.3554, "num_tokens": 2612546821.0, "step": 3426 }, { "epoch": 4.675940940513783, "grad_norm": 0.2914270368282031, "learning_rate": 1.799739252107617e-05, "loss": 0.3577, "num_tokens": 2613366501.0, "step": 3427 }, { "epoch": 4.677306477767346, "grad_norm": 0.29510074313483375, "learning_rate": 1.7987700294593716e-05, "loss": 0.3285, "num_tokens": 2614117450.0, "step": 3428 }, { "epoch": 4.67867201502091, "grad_norm": 0.31854703400872525, "learning_rate": 1.7978009292090145e-05, "loss": 0.3579, "num_tokens": 2614974965.0, "step": 3429 }, { "epoch": 4.680037552274473, "grad_norm": 0.30981726131937803, "learning_rate": 1.7968319516521763e-05, "loss": 0.3562, "num_tokens": 2615787674.0, "step": 3430 }, { "epoch": 4.681403089528036, "grad_norm": 0.33965972411268375, "learning_rate": 1.795863097084449e-05, "loss": 0.3377, "num_tokens": 2616555299.0, "step": 3431 }, { "epoch": 4.682768626781599, "grad_norm": 0.2762333270718851, "learning_rate": 1.79489436580139e-05, "loss": 0.3525, "num_tokens": 2617320667.0, "step": 3432 }, { "epoch": 4.684134164035163, "grad_norm": 0.3412984695777027, "learning_rate": 1.7939257580985162e-05, "loss": 0.3467, "num_tokens": 2618049419.0, "step": 3433 }, { "epoch": 4.685499701288726, "grad_norm": 0.33023502810072775, "learning_rate": 1.7929572742713083e-05, "loss": 0.3494, "num_tokens": 2618887091.0, "step": 3434 }, { "epoch": 4.686865238542289, "grad_norm": 0.3072818026565498, "learning_rate": 1.791988914615208e-05, "loss": 0.334, "num_tokens": 2619647365.0, "step": 3435 }, { "epoch": 4.688230775795852, "grad_norm": 0.32750888957118185, "learning_rate": 1.7910206794256208e-05, "loss": 0.3354, "num_tokens": 2620471031.0, "step": 3436 }, { "epoch": 4.689596313049416, "grad_norm": 0.3114329872378945, "learning_rate": 1.790052568997913e-05, "loss": 0.3529, "num_tokens": 2621228066.0, "step": 3437 }, { "epoch": 4.690961850302979, "grad_norm": 0.32549460795432567, "learning_rate": 1.7890845836274126e-05, "loss": 0.3516, "num_tokens": 2622012784.0, "step": 3438 }, { "epoch": 4.692327387556542, "grad_norm": 0.3677518039752484, "learning_rate": 1.7881167236094107e-05, "loss": 0.3574, "num_tokens": 2622753444.0, "step": 3439 }, { "epoch": 4.693692924810105, "grad_norm": 0.301842323398526, "learning_rate": 1.787148989239159e-05, "loss": 0.3573, "num_tokens": 2623531027.0, "step": 3440 }, { "epoch": 4.695058462063669, "grad_norm": 0.3091280648697102, "learning_rate": 1.786181380811872e-05, "loss": 0.34, "num_tokens": 2624247871.0, "step": 3441 }, { "epoch": 4.696423999317231, "grad_norm": 0.34108851268095186, "learning_rate": 1.7852138986227246e-05, "loss": 0.3522, "num_tokens": 2624938296.0, "step": 3442 }, { "epoch": 4.697789536570794, "grad_norm": 0.29738632943239096, "learning_rate": 1.784246542966854e-05, "loss": 0.3401, "num_tokens": 2625731718.0, "step": 3443 }, { "epoch": 4.6991550738243575, "grad_norm": 0.3061778205597906, "learning_rate": 1.7832793141393584e-05, "loss": 0.3586, "num_tokens": 2626510612.0, "step": 3444 }, { "epoch": 4.700520611077921, "grad_norm": 0.32861431375171696, "learning_rate": 1.7823122124352977e-05, "loss": 0.3446, "num_tokens": 2627311777.0, "step": 3445 }, { "epoch": 4.701886148331484, "grad_norm": 0.31696789789087915, "learning_rate": 1.781345238149692e-05, "loss": 0.3452, "num_tokens": 2628058491.0, "step": 3446 }, { "epoch": 4.703251685585047, "grad_norm": 0.33287498854960346, "learning_rate": 1.780378391577524e-05, "loss": 0.3509, "num_tokens": 2628888563.0, "step": 3447 }, { "epoch": 4.7046172228386105, "grad_norm": 0.3099186063912354, "learning_rate": 1.7794116730137362e-05, "loss": 0.3436, "num_tokens": 2629637704.0, "step": 3448 }, { "epoch": 4.705982760092174, "grad_norm": 0.29167232331734044, "learning_rate": 1.7784450827532323e-05, "loss": 0.3455, "num_tokens": 2630385190.0, "step": 3449 }, { "epoch": 4.707348297345737, "grad_norm": 0.30294377796491145, "learning_rate": 1.7774786210908784e-05, "loss": 0.3648, "num_tokens": 2631152963.0, "step": 3450 }, { "epoch": 4.7087138345993, "grad_norm": 0.3261554070254651, "learning_rate": 1.7765122883214997e-05, "loss": 0.3301, "num_tokens": 2631867937.0, "step": 3451 }, { "epoch": 4.7100793718528635, "grad_norm": 0.33875580509746905, "learning_rate": 1.7755460847398815e-05, "loss": 0.3437, "num_tokens": 2632602049.0, "step": 3452 }, { "epoch": 4.711444909106427, "grad_norm": 0.31386733479272366, "learning_rate": 1.7745800106407717e-05, "loss": 0.3485, "num_tokens": 2633337686.0, "step": 3453 }, { "epoch": 4.71281044635999, "grad_norm": 0.3088874241925341, "learning_rate": 1.773614066318877e-05, "loss": 0.3552, "num_tokens": 2634062899.0, "step": 3454 }, { "epoch": 4.714175983613553, "grad_norm": 0.3542878647116571, "learning_rate": 1.7726482520688657e-05, "loss": 0.3608, "num_tokens": 2634824158.0, "step": 3455 }, { "epoch": 4.7155415208671165, "grad_norm": 0.3640869970473081, "learning_rate": 1.771682568185365e-05, "loss": 0.3526, "num_tokens": 2635593791.0, "step": 3456 }, { "epoch": 4.716907058120679, "grad_norm": 0.3142900401993257, "learning_rate": 1.770717014962965e-05, "loss": 0.3449, "num_tokens": 2636338475.0, "step": 3457 }, { "epoch": 4.718272595374243, "grad_norm": 0.31389050596330936, "learning_rate": 1.7697515926962116e-05, "loss": 0.345, "num_tokens": 2637073299.0, "step": 3458 }, { "epoch": 4.719638132627805, "grad_norm": 0.33839752529467554, "learning_rate": 1.768786301679616e-05, "loss": 0.356, "num_tokens": 2637795017.0, "step": 3459 }, { "epoch": 4.721003669881369, "grad_norm": 0.30417586192046214, "learning_rate": 1.7678211422076453e-05, "loss": 0.3298, "num_tokens": 2638492681.0, "step": 3460 }, { "epoch": 4.722369207134932, "grad_norm": 0.2995961455154876, "learning_rate": 1.7668561145747278e-05, "loss": 0.3366, "num_tokens": 2639232869.0, "step": 3461 }, { "epoch": 4.723734744388495, "grad_norm": 0.2955134787308375, "learning_rate": 1.7658912190752522e-05, "loss": 0.3305, "num_tokens": 2639977426.0, "step": 3462 }, { "epoch": 4.725100281642058, "grad_norm": 0.3134335821951478, "learning_rate": 1.7649264560035656e-05, "loss": 0.3384, "num_tokens": 2640760372.0, "step": 3463 }, { "epoch": 4.726465818895622, "grad_norm": 0.3218917483126715, "learning_rate": 1.763961825653976e-05, "loss": 0.3484, "num_tokens": 2641560339.0, "step": 3464 }, { "epoch": 4.727831356149185, "grad_norm": 0.32073038604570925, "learning_rate": 1.76299732832075e-05, "loss": 0.3441, "num_tokens": 2642327895.0, "step": 3465 }, { "epoch": 4.729196893402748, "grad_norm": 0.32594763551111045, "learning_rate": 1.762032964298115e-05, "loss": 0.3592, "num_tokens": 2643067545.0, "step": 3466 }, { "epoch": 4.730562430656311, "grad_norm": 0.30804198984846304, "learning_rate": 1.7610687338802552e-05, "loss": 0.3408, "num_tokens": 2643870442.0, "step": 3467 }, { "epoch": 4.731927967909875, "grad_norm": 0.3307955597533594, "learning_rate": 1.760104637361316e-05, "loss": 0.3552, "num_tokens": 2644616278.0, "step": 3468 }, { "epoch": 4.733293505163438, "grad_norm": 0.31801343954444766, "learning_rate": 1.7591406750354018e-05, "loss": 0.3409, "num_tokens": 2645417567.0, "step": 3469 }, { "epoch": 4.734659042417001, "grad_norm": 0.32139896227435105, "learning_rate": 1.7581768471965756e-05, "loss": 0.3314, "num_tokens": 2646177617.0, "step": 3470 }, { "epoch": 4.7360245796705644, "grad_norm": 0.31123427524640856, "learning_rate": 1.75721315413886e-05, "loss": 0.342, "num_tokens": 2646940155.0, "step": 3471 }, { "epoch": 4.737390116924128, "grad_norm": 0.3264209346318759, "learning_rate": 1.7562495961562352e-05, "loss": 0.3505, "num_tokens": 2647764375.0, "step": 3472 }, { "epoch": 4.738755654177691, "grad_norm": 0.2852624896938898, "learning_rate": 1.7552861735426415e-05, "loss": 0.333, "num_tokens": 2648516705.0, "step": 3473 }, { "epoch": 4.740121191431253, "grad_norm": 0.3216880956262033, "learning_rate": 1.754322886591977e-05, "loss": 0.342, "num_tokens": 2649260302.0, "step": 3474 }, { "epoch": 4.741486728684817, "grad_norm": 0.29065353890913814, "learning_rate": 1.7533597355981005e-05, "loss": 0.3413, "num_tokens": 2650048652.0, "step": 3475 }, { "epoch": 4.74285226593838, "grad_norm": 0.29023239436209786, "learning_rate": 1.7523967208548253e-05, "loss": 0.3482, "num_tokens": 2650786394.0, "step": 3476 }, { "epoch": 4.744217803191943, "grad_norm": 0.33154779466732726, "learning_rate": 1.751433842655926e-05, "loss": 0.3672, "num_tokens": 2651584884.0, "step": 3477 }, { "epoch": 4.745583340445506, "grad_norm": 0.29512274078537803, "learning_rate": 1.7504711012951366e-05, "loss": 0.3439, "num_tokens": 2652368070.0, "step": 3478 }, { "epoch": 4.74694887769907, "grad_norm": 0.3100758956952758, "learning_rate": 1.7495084970661462e-05, "loss": 0.3475, "num_tokens": 2653197918.0, "step": 3479 }, { "epoch": 4.748314414952633, "grad_norm": 0.2964631303808998, "learning_rate": 1.7485460302626042e-05, "loss": 0.3492, "num_tokens": 2654018814.0, "step": 3480 }, { "epoch": 4.749679952206196, "grad_norm": 0.32186330861662893, "learning_rate": 1.7475837011781175e-05, "loss": 0.336, "num_tokens": 2654760579.0, "step": 3481 }, { "epoch": 4.751045489459759, "grad_norm": 0.3427254654203508, "learning_rate": 1.7466215101062513e-05, "loss": 0.3492, "num_tokens": 2655530938.0, "step": 3482 }, { "epoch": 4.752411026713323, "grad_norm": 0.30069799888959375, "learning_rate": 1.7456594573405284e-05, "loss": 0.3559, "num_tokens": 2656333173.0, "step": 3483 }, { "epoch": 4.753776563966886, "grad_norm": 0.3175421996646432, "learning_rate": 1.7446975431744286e-05, "loss": 0.355, "num_tokens": 2657124467.0, "step": 3484 }, { "epoch": 4.755142101220449, "grad_norm": 0.3362320032579536, "learning_rate": 1.743735767901392e-05, "loss": 0.3353, "num_tokens": 2657828303.0, "step": 3485 }, { "epoch": 4.756507638474012, "grad_norm": 0.2986384053455996, "learning_rate": 1.7427741318148124e-05, "loss": 0.3465, "num_tokens": 2658563237.0, "step": 3486 }, { "epoch": 4.757873175727576, "grad_norm": 0.3045000085660609, "learning_rate": 1.741812635208045e-05, "loss": 0.327, "num_tokens": 2659327453.0, "step": 3487 }, { "epoch": 4.759238712981139, "grad_norm": 0.30885938711767963, "learning_rate": 1.7408512783743998e-05, "loss": 0.3283, "num_tokens": 2660005865.0, "step": 3488 }, { "epoch": 4.760604250234701, "grad_norm": 0.31449768245833826, "learning_rate": 1.7398900616071457e-05, "loss": 0.3548, "num_tokens": 2660789628.0, "step": 3489 }, { "epoch": 4.761969787488265, "grad_norm": 0.3368587659121489, "learning_rate": 1.7389289851995083e-05, "loss": 0.3388, "num_tokens": 2661476657.0, "step": 3490 }, { "epoch": 4.763335324741828, "grad_norm": 0.284219778266966, "learning_rate": 1.7379680494446702e-05, "loss": 0.3507, "num_tokens": 2662264009.0, "step": 3491 }, { "epoch": 4.764700861995391, "grad_norm": 0.35759573794407634, "learning_rate": 1.7370072546357713e-05, "loss": 0.36, "num_tokens": 2663131683.0, "step": 3492 }, { "epoch": 4.766066399248954, "grad_norm": 0.3023077668472424, "learning_rate": 1.7360466010659084e-05, "loss": 0.3537, "num_tokens": 2664015817.0, "step": 3493 }, { "epoch": 4.7674319365025175, "grad_norm": 0.31813580676094655, "learning_rate": 1.7350860890281356e-05, "loss": 0.3649, "num_tokens": 2664863719.0, "step": 3494 }, { "epoch": 4.768797473756081, "grad_norm": 0.3631227582538788, "learning_rate": 1.7341257188154625e-05, "loss": 0.3521, "num_tokens": 2665668907.0, "step": 3495 }, { "epoch": 4.770163011009644, "grad_norm": 0.3028465913632222, "learning_rate": 1.7331654907208583e-05, "loss": 0.3451, "num_tokens": 2666494912.0, "step": 3496 }, { "epoch": 4.771528548263207, "grad_norm": 0.2878476557243835, "learning_rate": 1.7322054050372455e-05, "loss": 0.3555, "num_tokens": 2667311062.0, "step": 3497 }, { "epoch": 4.7728940855167705, "grad_norm": 0.3466958468547949, "learning_rate": 1.7312454620575055e-05, "loss": 0.3268, "num_tokens": 2667997722.0, "step": 3498 }, { "epoch": 4.774259622770334, "grad_norm": 0.2929434220032979, "learning_rate": 1.7302856620744748e-05, "loss": 0.3402, "num_tokens": 2668764375.0, "step": 3499 }, { "epoch": 4.775625160023897, "grad_norm": 0.30538019539347, "learning_rate": 1.7293260053809472e-05, "loss": 0.3467, "num_tokens": 2669486381.0, "step": 3500 }, { "epoch": 4.77699069727746, "grad_norm": 0.3344717631488837, "learning_rate": 1.728366492269672e-05, "loss": 0.3581, "num_tokens": 2670286373.0, "step": 3501 }, { "epoch": 4.7783562345310235, "grad_norm": 0.3020353155648783, "learning_rate": 1.7274071230333546e-05, "loss": 0.3435, "num_tokens": 2671045700.0, "step": 3502 }, { "epoch": 4.779721771784587, "grad_norm": 0.3360715499674096, "learning_rate": 1.726447897964659e-05, "loss": 0.3383, "num_tokens": 2671735477.0, "step": 3503 }, { "epoch": 4.78108730903815, "grad_norm": 0.301112623832923, "learning_rate": 1.725488817356201e-05, "loss": 0.3482, "num_tokens": 2672527227.0, "step": 3504 }, { "epoch": 4.782452846291713, "grad_norm": 0.33551538828564115, "learning_rate": 1.724529881500556e-05, "loss": 0.356, "num_tokens": 2673358011.0, "step": 3505 }, { "epoch": 4.783818383545276, "grad_norm": 0.3325095214909383, "learning_rate": 1.7235710906902533e-05, "loss": 0.356, "num_tokens": 2674107022.0, "step": 3506 }, { "epoch": 4.785183920798839, "grad_norm": 0.34278270490563273, "learning_rate": 1.722612445217778e-05, "loss": 0.3536, "num_tokens": 2674869998.0, "step": 3507 }, { "epoch": 4.786549458052402, "grad_norm": 0.30574195101150364, "learning_rate": 1.7216539453755726e-05, "loss": 0.3547, "num_tokens": 2675624791.0, "step": 3508 }, { "epoch": 4.787914995305965, "grad_norm": 0.31989851662887764, "learning_rate": 1.7206955914560325e-05, "loss": 0.3516, "num_tokens": 2676405504.0, "step": 3509 }, { "epoch": 4.789280532559529, "grad_norm": 0.30350894040233367, "learning_rate": 1.7197373837515104e-05, "loss": 0.3461, "num_tokens": 2677178275.0, "step": 3510 }, { "epoch": 4.790646069813092, "grad_norm": 0.3352254433804761, "learning_rate": 1.7187793225543138e-05, "loss": 0.3453, "num_tokens": 2677910825.0, "step": 3511 }, { "epoch": 4.792011607066655, "grad_norm": 0.3031885736022401, "learning_rate": 1.7178214081567066e-05, "loss": 0.3364, "num_tokens": 2678684964.0, "step": 3512 }, { "epoch": 4.793377144320218, "grad_norm": 0.288693804544515, "learning_rate": 1.7168636408509064e-05, "loss": 0.325, "num_tokens": 2679461280.0, "step": 3513 }, { "epoch": 4.794742681573782, "grad_norm": 0.32146262213189963, "learning_rate": 1.715906020929086e-05, "loss": 0.3405, "num_tokens": 2680276281.0, "step": 3514 }, { "epoch": 4.796108218827345, "grad_norm": 0.2823912276616036, "learning_rate": 1.714948548683375e-05, "loss": 0.337, "num_tokens": 2681129655.0, "step": 3515 }, { "epoch": 4.797473756080908, "grad_norm": 0.29898270972697605, "learning_rate": 1.7139912244058555e-05, "loss": 0.3545, "num_tokens": 2681932261.0, "step": 3516 }, { "epoch": 4.7988392933344715, "grad_norm": 0.31685315056610325, "learning_rate": 1.7130340483885664e-05, "loss": 0.352, "num_tokens": 2682698141.0, "step": 3517 }, { "epoch": 4.800204830588035, "grad_norm": 0.31424296976264765, "learning_rate": 1.7120770209235002e-05, "loss": 0.3373, "num_tokens": 2683447516.0, "step": 3518 }, { "epoch": 4.801570367841598, "grad_norm": 0.3021936986386316, "learning_rate": 1.7111201423026054e-05, "loss": 0.3343, "num_tokens": 2684190675.0, "step": 3519 }, { "epoch": 4.802935905095161, "grad_norm": 0.3144618528219201, "learning_rate": 1.7101634128177825e-05, "loss": 0.3492, "num_tokens": 2684943531.0, "step": 3520 }, { "epoch": 4.804301442348724, "grad_norm": 0.3400371875941816, "learning_rate": 1.7092068327608903e-05, "loss": 0.3423, "num_tokens": 2685659176.0, "step": 3521 }, { "epoch": 4.805666979602288, "grad_norm": 0.3050118037480941, "learning_rate": 1.708250402423739e-05, "loss": 0.341, "num_tokens": 2686413442.0, "step": 3522 }, { "epoch": 4.80703251685585, "grad_norm": 0.3342875135294262, "learning_rate": 1.707294122098093e-05, "loss": 0.3726, "num_tokens": 2687150164.0, "step": 3523 }, { "epoch": 4.808398054109413, "grad_norm": 0.31359416434299336, "learning_rate": 1.7063379920756743e-05, "loss": 0.3498, "num_tokens": 2687866521.0, "step": 3524 }, { "epoch": 4.809763591362977, "grad_norm": 0.32986970133658533, "learning_rate": 1.7053820126481545e-05, "loss": 0.3387, "num_tokens": 2688701906.0, "step": 3525 }, { "epoch": 4.81112912861654, "grad_norm": 0.29967178909212633, "learning_rate": 1.7044261841071633e-05, "loss": 0.3539, "num_tokens": 2689404506.0, "step": 3526 }, { "epoch": 4.812494665870103, "grad_norm": 0.30754275843010676, "learning_rate": 1.7034705067442815e-05, "loss": 0.3346, "num_tokens": 2690207394.0, "step": 3527 }, { "epoch": 4.813860203123666, "grad_norm": 0.3244939628511778, "learning_rate": 1.7025149808510456e-05, "loss": 0.3461, "num_tokens": 2690948274.0, "step": 3528 }, { "epoch": 4.81522574037723, "grad_norm": 0.29058527253932326, "learning_rate": 1.7015596067189444e-05, "loss": 0.3438, "num_tokens": 2691757063.0, "step": 3529 }, { "epoch": 4.816591277630793, "grad_norm": 0.3416943679998623, "learning_rate": 1.7006043846394217e-05, "loss": 0.3362, "num_tokens": 2692546647.0, "step": 3530 }, { "epoch": 4.817956814884356, "grad_norm": 0.26676946575288457, "learning_rate": 1.6996493149038747e-05, "loss": 0.3274, "num_tokens": 2693296491.0, "step": 3531 }, { "epoch": 4.819322352137919, "grad_norm": 0.32388415024462763, "learning_rate": 1.6986943978036522e-05, "loss": 0.3685, "num_tokens": 2694076947.0, "step": 3532 }, { "epoch": 4.820687889391483, "grad_norm": 0.3653207768600629, "learning_rate": 1.6977396336300605e-05, "loss": 0.3673, "num_tokens": 2694795910.0, "step": 3533 }, { "epoch": 4.822053426645046, "grad_norm": 0.3086542425546716, "learning_rate": 1.6967850226743544e-05, "loss": 0.3631, "num_tokens": 2695595776.0, "step": 3534 }, { "epoch": 4.823418963898609, "grad_norm": 0.3088239093097917, "learning_rate": 1.6958305652277464e-05, "loss": 0.3449, "num_tokens": 2696356047.0, "step": 3535 }, { "epoch": 4.824784501152172, "grad_norm": 0.32887920705250595, "learning_rate": 1.694876261581399e-05, "loss": 0.33, "num_tokens": 2697091620.0, "step": 3536 }, { "epoch": 4.826150038405736, "grad_norm": 0.3093048208206274, "learning_rate": 1.693922112026429e-05, "loss": 0.3312, "num_tokens": 2697788249.0, "step": 3537 }, { "epoch": 4.827515575659298, "grad_norm": 0.29608550598407984, "learning_rate": 1.6929681168539068e-05, "loss": 0.3481, "num_tokens": 2698629778.0, "step": 3538 }, { "epoch": 4.828881112912862, "grad_norm": 0.323342618709132, "learning_rate": 1.6920142763548532e-05, "loss": 0.3376, "num_tokens": 2699357028.0, "step": 3539 }, { "epoch": 4.8302466501664245, "grad_norm": 0.3445315721517824, "learning_rate": 1.691060590820246e-05, "loss": 0.3624, "num_tokens": 2700151783.0, "step": 3540 }, { "epoch": 4.831612187419988, "grad_norm": 0.3244840661586043, "learning_rate": 1.6901070605410115e-05, "loss": 0.3483, "num_tokens": 2700880652.0, "step": 3541 }, { "epoch": 4.832977724673551, "grad_norm": 0.3349782642782902, "learning_rate": 1.689153685808032e-05, "loss": 0.3512, "num_tokens": 2701613737.0, "step": 3542 }, { "epoch": 4.834343261927114, "grad_norm": 0.30188730370212, "learning_rate": 1.6882004669121394e-05, "loss": 0.327, "num_tokens": 2702398696.0, "step": 3543 }, { "epoch": 4.8357087991806775, "grad_norm": 0.31814096993758684, "learning_rate": 1.6872474041441213e-05, "loss": 0.3582, "num_tokens": 2703244713.0, "step": 3544 }, { "epoch": 4.837074336434241, "grad_norm": 0.31830530410745306, "learning_rate": 1.686294497794714e-05, "loss": 0.3307, "num_tokens": 2703977497.0, "step": 3545 }, { "epoch": 4.838439873687804, "grad_norm": 0.30943244481601084, "learning_rate": 1.6853417481546084e-05, "loss": 0.3745, "num_tokens": 2704826854.0, "step": 3546 }, { "epoch": 4.839805410941367, "grad_norm": 0.31982212684281164, "learning_rate": 1.684389155514448e-05, "loss": 0.3527, "num_tokens": 2705615619.0, "step": 3547 }, { "epoch": 4.8411709481949305, "grad_norm": 0.3231415902158014, "learning_rate": 1.6834367201648264e-05, "loss": 0.3542, "num_tokens": 2706392137.0, "step": 3548 }, { "epoch": 4.842536485448494, "grad_norm": 0.3095352625361604, "learning_rate": 1.682484442396292e-05, "loss": 0.3604, "num_tokens": 2707169899.0, "step": 3549 }, { "epoch": 4.843902022702057, "grad_norm": 0.32086932630001147, "learning_rate": 1.681532322499342e-05, "loss": 0.3361, "num_tokens": 2707889627.0, "step": 3550 }, { "epoch": 4.84526755995562, "grad_norm": 0.3255379605703873, "learning_rate": 1.6805803607644275e-05, "loss": 0.3589, "num_tokens": 2708638035.0, "step": 3551 }, { "epoch": 4.846633097209184, "grad_norm": 0.28756506653374436, "learning_rate": 1.6796285574819505e-05, "loss": 0.3396, "num_tokens": 2709405308.0, "step": 3552 }, { "epoch": 4.847998634462747, "grad_norm": 0.3674226752597721, "learning_rate": 1.678676912942266e-05, "loss": 0.3524, "num_tokens": 2710251963.0, "step": 3553 }, { "epoch": 4.84936417171631, "grad_norm": 0.30971860610564883, "learning_rate": 1.677725427435678e-05, "loss": 0.3673, "num_tokens": 2711018406.0, "step": 3554 }, { "epoch": 4.850729708969872, "grad_norm": 0.2930032633492866, "learning_rate": 1.6767741012524444e-05, "loss": 0.3223, "num_tokens": 2711759197.0, "step": 3555 }, { "epoch": 4.852095246223436, "grad_norm": 0.3077204525805023, "learning_rate": 1.675822934682774e-05, "loss": 0.3672, "num_tokens": 2712655373.0, "step": 3556 }, { "epoch": 4.853460783476999, "grad_norm": 0.28417784531536194, "learning_rate": 1.6748719280168253e-05, "loss": 0.3411, "num_tokens": 2713446651.0, "step": 3557 }, { "epoch": 4.854826320730562, "grad_norm": 0.29885777072728575, "learning_rate": 1.6739210815447107e-05, "loss": 0.3572, "num_tokens": 2714220570.0, "step": 3558 }, { "epoch": 4.8561918579841254, "grad_norm": 0.29977157962438794, "learning_rate": 1.672970395556491e-05, "loss": 0.3352, "num_tokens": 2714983794.0, "step": 3559 }, { "epoch": 4.857557395237689, "grad_norm": 0.2804016709324082, "learning_rate": 1.672019870342181e-05, "loss": 0.3286, "num_tokens": 2715720729.0, "step": 3560 }, { "epoch": 4.858922932491252, "grad_norm": 0.3057397650890275, "learning_rate": 1.671069506191743e-05, "loss": 0.3504, "num_tokens": 2716502020.0, "step": 3561 }, { "epoch": 4.860288469744815, "grad_norm": 0.29845546154164326, "learning_rate": 1.6701193033950923e-05, "loss": 0.339, "num_tokens": 2717196436.0, "step": 3562 }, { "epoch": 4.8616540069983785, "grad_norm": 0.3205977859269924, "learning_rate": 1.6691692622420955e-05, "loss": 0.3491, "num_tokens": 2717925040.0, "step": 3563 }, { "epoch": 4.863019544251942, "grad_norm": 0.31506627378796953, "learning_rate": 1.6682193830225678e-05, "loss": 0.3313, "num_tokens": 2718688108.0, "step": 3564 }, { "epoch": 4.864385081505505, "grad_norm": 0.29438491778651177, "learning_rate": 1.6672696660262775e-05, "loss": 0.3628, "num_tokens": 2719534484.0, "step": 3565 }, { "epoch": 4.865750618759068, "grad_norm": 0.29573741949759647, "learning_rate": 1.666320111542941e-05, "loss": 0.3377, "num_tokens": 2720348891.0, "step": 3566 }, { "epoch": 4.8671161560126315, "grad_norm": 0.3099455065042236, "learning_rate": 1.6653707198622272e-05, "loss": 0.3305, "num_tokens": 2721079414.0, "step": 3567 }, { "epoch": 4.868481693266195, "grad_norm": 0.31305879361084604, "learning_rate": 1.664421491273754e-05, "loss": 0.3423, "num_tokens": 2721833865.0, "step": 3568 }, { "epoch": 4.869847230519758, "grad_norm": 0.3206544602474789, "learning_rate": 1.6634724260670894e-05, "loss": 0.3526, "num_tokens": 2722596641.0, "step": 3569 }, { "epoch": 4.87121276777332, "grad_norm": 0.310802225478032, "learning_rate": 1.662523524531753e-05, "loss": 0.3547, "num_tokens": 2723309796.0, "step": 3570 }, { "epoch": 4.8725783050268845, "grad_norm": 0.32962021283444687, "learning_rate": 1.6615747869572127e-05, "loss": 0.3424, "num_tokens": 2724062240.0, "step": 3571 }, { "epoch": 4.873943842280447, "grad_norm": 0.30823087007569133, "learning_rate": 1.6606262136328882e-05, "loss": 0.3412, "num_tokens": 2724784911.0, "step": 3572 }, { "epoch": 4.87530937953401, "grad_norm": 0.2953112136675549, "learning_rate": 1.659677804848147e-05, "loss": 0.3589, "num_tokens": 2725660346.0, "step": 3573 }, { "epoch": 4.876674916787573, "grad_norm": 0.34756157600864374, "learning_rate": 1.6587295608923088e-05, "loss": 0.3455, "num_tokens": 2726467827.0, "step": 3574 }, { "epoch": 4.878040454041137, "grad_norm": 0.3093278630257819, "learning_rate": 1.6577814820546407e-05, "loss": 0.3646, "num_tokens": 2727285599.0, "step": 3575 }, { "epoch": 4.8794059912947, "grad_norm": 0.3171227204181055, "learning_rate": 1.6568335686243608e-05, "loss": 0.3444, "num_tokens": 2728032046.0, "step": 3576 }, { "epoch": 4.880771528548263, "grad_norm": 0.344211173276502, "learning_rate": 1.6558858208906375e-05, "loss": 0.3571, "num_tokens": 2728796451.0, "step": 3577 }, { "epoch": 4.882137065801826, "grad_norm": 0.3243030213528027, "learning_rate": 1.654938239142586e-05, "loss": 0.3323, "num_tokens": 2729582635.0, "step": 3578 }, { "epoch": 4.88350260305539, "grad_norm": 0.29347339877021406, "learning_rate": 1.653990823669273e-05, "loss": 0.3447, "num_tokens": 2730321865.0, "step": 3579 }, { "epoch": 4.884868140308953, "grad_norm": 0.33261879256996635, "learning_rate": 1.6530435747597143e-05, "loss": 0.3293, "num_tokens": 2731093657.0, "step": 3580 }, { "epoch": 4.886233677562516, "grad_norm": 0.34823487041115797, "learning_rate": 1.6520964927028743e-05, "loss": 0.3623, "num_tokens": 2731884885.0, "step": 3581 }, { "epoch": 4.887599214816079, "grad_norm": 0.282632769476786, "learning_rate": 1.651149577787666e-05, "loss": 0.3415, "num_tokens": 2732693043.0, "step": 3582 }, { "epoch": 4.888964752069643, "grad_norm": 0.30575748693560495, "learning_rate": 1.6502028303029542e-05, "loss": 0.3453, "num_tokens": 2733435647.0, "step": 3583 }, { "epoch": 4.890330289323206, "grad_norm": 0.3494982615251806, "learning_rate": 1.6492562505375484e-05, "loss": 0.3686, "num_tokens": 2734171505.0, "step": 3584 }, { "epoch": 4.891695826576769, "grad_norm": 0.35278515321208287, "learning_rate": 1.6483098387802098e-05, "loss": 0.348, "num_tokens": 2734883730.0, "step": 3585 }, { "epoch": 4.893061363830332, "grad_norm": 0.31009102261858434, "learning_rate": 1.647363595319648e-05, "loss": 0.3485, "num_tokens": 2735634590.0, "step": 3586 }, { "epoch": 4.894426901083895, "grad_norm": 0.323768318535404, "learning_rate": 1.64641752044452e-05, "loss": 0.3393, "num_tokens": 2736399928.0, "step": 3587 }, { "epoch": 4.895792438337458, "grad_norm": 0.3349468360033143, "learning_rate": 1.6454716144434334e-05, "loss": 0.3512, "num_tokens": 2737162876.0, "step": 3588 }, { "epoch": 4.897157975591021, "grad_norm": 0.3076576348715843, "learning_rate": 1.6445258776049423e-05, "loss": 0.3527, "num_tokens": 2737920368.0, "step": 3589 }, { "epoch": 4.8985235128445845, "grad_norm": 0.3297192191492759, "learning_rate": 1.643580310217551e-05, "loss": 0.3531, "num_tokens": 2738693557.0, "step": 3590 }, { "epoch": 4.899889050098148, "grad_norm": 0.3040963070601063, "learning_rate": 1.6426349125697097e-05, "loss": 0.3644, "num_tokens": 2739477387.0, "step": 3591 }, { "epoch": 4.901254587351711, "grad_norm": 0.3203480003932142, "learning_rate": 1.6416896849498204e-05, "loss": 0.3617, "num_tokens": 2740316512.0, "step": 3592 }, { "epoch": 4.902620124605274, "grad_norm": 0.29989638823003795, "learning_rate": 1.640744627646229e-05, "loss": 0.343, "num_tokens": 2741100699.0, "step": 3593 }, { "epoch": 4.9039856618588376, "grad_norm": 0.3471230745471101, "learning_rate": 1.6397997409472325e-05, "loss": 0.3612, "num_tokens": 2741911977.0, "step": 3594 }, { "epoch": 4.905351199112401, "grad_norm": 0.2981510500066408, "learning_rate": 1.638855025141075e-05, "loss": 0.3338, "num_tokens": 2742631056.0, "step": 3595 }, { "epoch": 4.906716736365964, "grad_norm": 0.3426974212945193, "learning_rate": 1.637910480515948e-05, "loss": 0.3543, "num_tokens": 2743348445.0, "step": 3596 }, { "epoch": 4.908082273619527, "grad_norm": 0.3270807666370433, "learning_rate": 1.6369661073599913e-05, "loss": 0.3574, "num_tokens": 2744105719.0, "step": 3597 }, { "epoch": 4.909447810873091, "grad_norm": 0.3459215487749899, "learning_rate": 1.6360219059612923e-05, "loss": 0.35, "num_tokens": 2744835988.0, "step": 3598 }, { "epoch": 4.910813348126654, "grad_norm": 0.3169744687017708, "learning_rate": 1.6350778766078867e-05, "loss": 0.3318, "num_tokens": 2745620653.0, "step": 3599 }, { "epoch": 4.912178885380217, "grad_norm": 0.3218992006770818, "learning_rate": 1.6341340195877557e-05, "loss": 0.3536, "num_tokens": 2746332298.0, "step": 3600 }, { "epoch": 4.91354442263378, "grad_norm": 0.32802434571334216, "learning_rate": 1.6331903351888295e-05, "loss": 0.3385, "num_tokens": 2747033935.0, "step": 3601 }, { "epoch": 4.914909959887343, "grad_norm": 0.33121997142437887, "learning_rate": 1.6322468236989865e-05, "loss": 0.325, "num_tokens": 2747729987.0, "step": 3602 }, { "epoch": 4.916275497140907, "grad_norm": 0.3315898449341991, "learning_rate": 1.631303485406049e-05, "loss": 0.3571, "num_tokens": 2748484715.0, "step": 3603 }, { "epoch": 4.917641034394469, "grad_norm": 0.31537200205876803, "learning_rate": 1.6303603205977907e-05, "loss": 0.3703, "num_tokens": 2749251234.0, "step": 3604 }, { "epoch": 4.9190065716480325, "grad_norm": 0.32849908260570376, "learning_rate": 1.6294173295619296e-05, "loss": 0.3569, "num_tokens": 2749986031.0, "step": 3605 }, { "epoch": 4.920372108901596, "grad_norm": 0.3657179315097416, "learning_rate": 1.6284745125861313e-05, "loss": 0.3426, "num_tokens": 2750693276.0, "step": 3606 }, { "epoch": 4.921737646155159, "grad_norm": 0.32774793251358886, "learning_rate": 1.6275318699580082e-05, "loss": 0.3568, "num_tokens": 2751419063.0, "step": 3607 }, { "epoch": 4.923103183408722, "grad_norm": 0.29270118832441583, "learning_rate": 1.6265894019651205e-05, "loss": 0.3456, "num_tokens": 2752235292.0, "step": 3608 }, { "epoch": 4.9244687206622855, "grad_norm": 0.3188174691216849, "learning_rate": 1.6256471088949742e-05, "loss": 0.3356, "num_tokens": 2752981819.0, "step": 3609 }, { "epoch": 4.925834257915849, "grad_norm": 0.3155259441275917, "learning_rate": 1.624704991035021e-05, "loss": 0.3465, "num_tokens": 2753737463.0, "step": 3610 }, { "epoch": 4.927199795169412, "grad_norm": 0.33162839328692484, "learning_rate": 1.6237630486726623e-05, "loss": 0.3393, "num_tokens": 2754539426.0, "step": 3611 }, { "epoch": 4.928565332422975, "grad_norm": 0.2987137784524191, "learning_rate": 1.6228212820952414e-05, "loss": 0.3458, "num_tokens": 2755325882.0, "step": 3612 }, { "epoch": 4.9299308696765385, "grad_norm": 0.29982240233776647, "learning_rate": 1.621879691590053e-05, "loss": 0.3402, "num_tokens": 2756043334.0, "step": 3613 }, { "epoch": 4.931296406930102, "grad_norm": 0.33291026686681635, "learning_rate": 1.620938277444334e-05, "loss": 0.3371, "num_tokens": 2756755073.0, "step": 3614 }, { "epoch": 4.932661944183665, "grad_norm": 0.3038915945548779, "learning_rate": 1.6199970399452697e-05, "loss": 0.3361, "num_tokens": 2757492095.0, "step": 3615 }, { "epoch": 4.934027481437228, "grad_norm": 0.31932149050824893, "learning_rate": 1.6190559793799906e-05, "loss": 0.3429, "num_tokens": 2758317606.0, "step": 3616 }, { "epoch": 4.9353930186907915, "grad_norm": 0.3269935569901894, "learning_rate": 1.6181150960355734e-05, "loss": 0.3452, "num_tokens": 2759043312.0, "step": 3617 }, { "epoch": 4.936758555944355, "grad_norm": 0.30551533065610226, "learning_rate": 1.6171743901990414e-05, "loss": 0.3588, "num_tokens": 2759838458.0, "step": 3618 }, { "epoch": 4.938124093197917, "grad_norm": 0.297782365435162, "learning_rate": 1.6162338621573624e-05, "loss": 0.3292, "num_tokens": 2760573341.0, "step": 3619 }, { "epoch": 4.93948963045148, "grad_norm": 0.3152202477974792, "learning_rate": 1.6152935121974525e-05, "loss": 0.3473, "num_tokens": 2761411063.0, "step": 3620 }, { "epoch": 4.940855167705044, "grad_norm": 0.2940083379075142, "learning_rate": 1.6143533406061692e-05, "loss": 0.3517, "num_tokens": 2762103709.0, "step": 3621 }, { "epoch": 4.942220704958607, "grad_norm": 0.29167252108087577, "learning_rate": 1.6134133476703203e-05, "loss": 0.3505, "num_tokens": 2762953505.0, "step": 3622 }, { "epoch": 4.94358624221217, "grad_norm": 0.31670069566115067, "learning_rate": 1.6124735336766558e-05, "loss": 0.3285, "num_tokens": 2763703710.0, "step": 3623 }, { "epoch": 4.944951779465733, "grad_norm": 0.2999720685092725, "learning_rate": 1.611533898911872e-05, "loss": 0.3735, "num_tokens": 2764462657.0, "step": 3624 }, { "epoch": 4.946317316719297, "grad_norm": 0.3034058648365359, "learning_rate": 1.6105944436626123e-05, "loss": 0.339, "num_tokens": 2765188444.0, "step": 3625 }, { "epoch": 4.94768285397286, "grad_norm": 0.2864043984125416, "learning_rate": 1.6096551682154627e-05, "loss": 0.3552, "num_tokens": 2765965507.0, "step": 3626 }, { "epoch": 4.949048391226423, "grad_norm": 0.31009832450316893, "learning_rate": 1.6087160728569557e-05, "loss": 0.3583, "num_tokens": 2766842067.0, "step": 3627 }, { "epoch": 4.950413928479986, "grad_norm": 0.29344143012805834, "learning_rate": 1.6077771578735676e-05, "loss": 0.3331, "num_tokens": 2767582428.0, "step": 3628 }, { "epoch": 4.95177946573355, "grad_norm": 0.27937632079389285, "learning_rate": 1.6068384235517226e-05, "loss": 0.3546, "num_tokens": 2768317341.0, "step": 3629 }, { "epoch": 4.953145002987113, "grad_norm": 0.30460333117094834, "learning_rate": 1.6058998701777864e-05, "loss": 0.3405, "num_tokens": 2769082895.0, "step": 3630 }, { "epoch": 4.954510540240676, "grad_norm": 0.2895170235013401, "learning_rate": 1.6049614980380722e-05, "loss": 0.3381, "num_tokens": 2769872620.0, "step": 3631 }, { "epoch": 4.955876077494239, "grad_norm": 0.33770803107442127, "learning_rate": 1.6040233074188356e-05, "loss": 0.36, "num_tokens": 2770615475.0, "step": 3632 }, { "epoch": 4.957241614747803, "grad_norm": 0.3023149636836252, "learning_rate": 1.6030852986062782e-05, "loss": 0.3598, "num_tokens": 2771407134.0, "step": 3633 }, { "epoch": 4.958607152001366, "grad_norm": 0.2743994222709408, "learning_rate": 1.6021474718865463e-05, "loss": 0.323, "num_tokens": 2772155870.0, "step": 3634 }, { "epoch": 4.959972689254929, "grad_norm": 0.3180705364726274, "learning_rate": 1.6012098275457296e-05, "loss": 0.343, "num_tokens": 2772910956.0, "step": 3635 }, { "epoch": 4.9613382265084915, "grad_norm": 0.31287509999218527, "learning_rate": 1.600272365869864e-05, "loss": 0.3362, "num_tokens": 2773711165.0, "step": 3636 }, { "epoch": 4.962703763762055, "grad_norm": 0.2803125238354578, "learning_rate": 1.599335087144926e-05, "loss": 0.3378, "num_tokens": 2774518130.0, "step": 3637 }, { "epoch": 4.964069301015618, "grad_norm": 0.3018943435304752, "learning_rate": 1.598397991656841e-05, "loss": 0.3396, "num_tokens": 2775271792.0, "step": 3638 }, { "epoch": 4.965434838269181, "grad_norm": 0.30850695702861647, "learning_rate": 1.5974610796914765e-05, "loss": 0.3533, "num_tokens": 2776003204.0, "step": 3639 }, { "epoch": 4.966800375522745, "grad_norm": 0.27717414022445314, "learning_rate": 1.5965243515346417e-05, "loss": 0.3239, "num_tokens": 2776781439.0, "step": 3640 }, { "epoch": 4.968165912776308, "grad_norm": 0.305269858031393, "learning_rate": 1.5955878074720938e-05, "loss": 0.3455, "num_tokens": 2777534767.0, "step": 3641 }, { "epoch": 4.969531450029871, "grad_norm": 0.29906422014042494, "learning_rate": 1.5946514477895303e-05, "loss": 0.3442, "num_tokens": 2778328549.0, "step": 3642 }, { "epoch": 4.970896987283434, "grad_norm": 0.29444458072298996, "learning_rate": 1.5937152727725954e-05, "loss": 0.3346, "num_tokens": 2779012715.0, "step": 3643 }, { "epoch": 4.972262524536998, "grad_norm": 0.30357946068551145, "learning_rate": 1.5927792827068738e-05, "loss": 0.3345, "num_tokens": 2779781463.0, "step": 3644 }, { "epoch": 4.973628061790561, "grad_norm": 0.3048381327248012, "learning_rate": 1.5918434778778974e-05, "loss": 0.3327, "num_tokens": 2780532354.0, "step": 3645 }, { "epoch": 4.974993599044124, "grad_norm": 0.2949281233763973, "learning_rate": 1.590907858571138e-05, "loss": 0.3267, "num_tokens": 2781274866.0, "step": 3646 }, { "epoch": 4.976359136297687, "grad_norm": 0.31198548714399216, "learning_rate": 1.589972425072014e-05, "loss": 0.3488, "num_tokens": 2782104841.0, "step": 3647 }, { "epoch": 4.977724673551251, "grad_norm": 0.30334041853533716, "learning_rate": 1.5890371776658858e-05, "loss": 0.3533, "num_tokens": 2782912449.0, "step": 3648 }, { "epoch": 4.979090210804814, "grad_norm": 0.3190759694742525, "learning_rate": 1.5881021166380547e-05, "loss": 0.3521, "num_tokens": 2783653345.0, "step": 3649 }, { "epoch": 4.980455748058377, "grad_norm": 0.31778684138740615, "learning_rate": 1.5871672422737697e-05, "loss": 0.3453, "num_tokens": 2784429840.0, "step": 3650 }, { "epoch": 4.9818212853119395, "grad_norm": 0.28171735608696746, "learning_rate": 1.5862325548582193e-05, "loss": 0.3491, "num_tokens": 2785198177.0, "step": 3651 }, { "epoch": 4.983186822565504, "grad_norm": 0.33144937241135014, "learning_rate": 1.5852980546765365e-05, "loss": 0.3557, "num_tokens": 2786001429.0, "step": 3652 }, { "epoch": 4.984552359819066, "grad_norm": 0.30889931482811644, "learning_rate": 1.5843637420137964e-05, "loss": 0.3414, "num_tokens": 2786759508.0, "step": 3653 }, { "epoch": 4.985917897072629, "grad_norm": 0.3054884584336677, "learning_rate": 1.5834296171550187e-05, "loss": 0.3487, "num_tokens": 2787496913.0, "step": 3654 }, { "epoch": 4.9872834343261925, "grad_norm": 0.3159458340302714, "learning_rate": 1.5824956803851622e-05, "loss": 0.3329, "num_tokens": 2788260660.0, "step": 3655 }, { "epoch": 4.988648971579756, "grad_norm": 0.31247538574481226, "learning_rate": 1.5815619319891317e-05, "loss": 0.3329, "num_tokens": 2789039060.0, "step": 3656 }, { "epoch": 4.990014508833319, "grad_norm": 0.29997463419270654, "learning_rate": 1.5806283722517747e-05, "loss": 0.3544, "num_tokens": 2789797427.0, "step": 3657 }, { "epoch": 4.991380046086882, "grad_norm": 0.2948499011989241, "learning_rate": 1.5796950014578777e-05, "loss": 0.3258, "num_tokens": 2790494979.0, "step": 3658 }, { "epoch": 4.9927455833404455, "grad_norm": 0.2890868402103023, "learning_rate": 1.578761819892173e-05, "loss": 0.3685, "num_tokens": 2791259668.0, "step": 3659 }, { "epoch": 4.994111120594009, "grad_norm": 0.3036194128551678, "learning_rate": 1.577828827839333e-05, "loss": 0.3614, "num_tokens": 2792032408.0, "step": 3660 }, { "epoch": 4.995476657847572, "grad_norm": 0.3267341912963437, "learning_rate": 1.5768960255839744e-05, "loss": 0.3391, "num_tokens": 2792767945.0, "step": 3661 }, { "epoch": 4.996842195101135, "grad_norm": 0.33388287532078237, "learning_rate": 1.5759634134106536e-05, "loss": 0.349, "num_tokens": 2793428592.0, "step": 3662 }, { "epoch": 4.9982077323546985, "grad_norm": 0.323216546533206, "learning_rate": 1.5750309916038707e-05, "loss": 0.3485, "num_tokens": 2794167181.0, "step": 3663 }, { "epoch": 4.999573269608262, "grad_norm": 0.2971727762586377, "learning_rate": 1.574098760448067e-05, "loss": 0.3546, "num_tokens": 2794978290.0, "step": 3664 }, { "epoch": 5.0, "grad_norm": 0.2971727762586377, "learning_rate": 1.573166720227626e-05, "loss": 0.316, "num_tokens": 2795210680.0, "step": 3665 }, { "epoch": 5.001365537253563, "grad_norm": 0.5240247087176564, "learning_rate": 1.572234871226873e-05, "loss": 0.3037, "num_tokens": 2795964972.0, "step": 3666 }, { "epoch": 5.0027310745071265, "grad_norm": 0.414029821487259, "learning_rate": 1.5713032137300745e-05, "loss": 0.311, "num_tokens": 2796643409.0, "step": 3667 }, { "epoch": 5.00409661176069, "grad_norm": 0.4078163662578258, "learning_rate": 1.5703717480214392e-05, "loss": 0.3365, "num_tokens": 2797453033.0, "step": 3668 }, { "epoch": 5.005462149014253, "grad_norm": 0.340230165422914, "learning_rate": 1.5694404743851162e-05, "loss": 0.3056, "num_tokens": 2798174075.0, "step": 3669 }, { "epoch": 5.006827686267816, "grad_norm": 0.3163476567108623, "learning_rate": 1.568509393105198e-05, "loss": 0.3075, "num_tokens": 2799004723.0, "step": 3670 }, { "epoch": 5.0081932235213795, "grad_norm": 0.3386774755991487, "learning_rate": 1.567578504465716e-05, "loss": 0.3136, "num_tokens": 2799755750.0, "step": 3671 }, { "epoch": 5.009558760774943, "grad_norm": 0.4036203381886491, "learning_rate": 1.5666478087506445e-05, "loss": 0.3198, "num_tokens": 2800564904.0, "step": 3672 }, { "epoch": 5.010924298028506, "grad_norm": 0.3712739233786008, "learning_rate": 1.5657173062438986e-05, "loss": 0.3012, "num_tokens": 2801252036.0, "step": 3673 }, { "epoch": 5.012289835282068, "grad_norm": 0.3363101136289323, "learning_rate": 1.5647869972293333e-05, "loss": 0.3129, "num_tokens": 2802019845.0, "step": 3674 }, { "epoch": 5.013655372535632, "grad_norm": 0.3350725768655609, "learning_rate": 1.5638568819907475e-05, "loss": 0.3195, "num_tokens": 2802841037.0, "step": 3675 }, { "epoch": 5.015020909789195, "grad_norm": 0.31497804093204607, "learning_rate": 1.5629269608118768e-05, "loss": 0.3199, "num_tokens": 2803582236.0, "step": 3676 }, { "epoch": 5.016386447042758, "grad_norm": 0.3285627636331061, "learning_rate": 1.5619972339764024e-05, "loss": 0.315, "num_tokens": 2804409437.0, "step": 3677 }, { "epoch": 5.017751984296321, "grad_norm": 0.3217344739007156, "learning_rate": 1.5610677017679417e-05, "loss": 0.3177, "num_tokens": 2805154820.0, "step": 3678 }, { "epoch": 5.019117521549885, "grad_norm": 0.3206835606608309, "learning_rate": 1.5601383644700554e-05, "loss": 0.317, "num_tokens": 2805941653.0, "step": 3679 }, { "epoch": 5.020483058803448, "grad_norm": 0.30841318912929977, "learning_rate": 1.5592092223662442e-05, "loss": 0.3146, "num_tokens": 2806774308.0, "step": 3680 }, { "epoch": 5.021848596057011, "grad_norm": 0.27882145710390355, "learning_rate": 1.5582802757399485e-05, "loss": 0.3308, "num_tokens": 2807512591.0, "step": 3681 }, { "epoch": 5.023214133310574, "grad_norm": 0.30426561338481356, "learning_rate": 1.5573515248745506e-05, "loss": 0.3075, "num_tokens": 2808283677.0, "step": 3682 }, { "epoch": 5.024579670564138, "grad_norm": 0.3085334579430855, "learning_rate": 1.5564229700533712e-05, "loss": 0.3162, "num_tokens": 2808976730.0, "step": 3683 }, { "epoch": 5.025945207817701, "grad_norm": 0.3055278521555962, "learning_rate": 1.5554946115596733e-05, "loss": 0.3182, "num_tokens": 2809735010.0, "step": 3684 }, { "epoch": 5.027310745071264, "grad_norm": 0.34368391220985983, "learning_rate": 1.5545664496766583e-05, "loss": 0.3231, "num_tokens": 2810470048.0, "step": 3685 }, { "epoch": 5.028676282324827, "grad_norm": 0.3205026265841974, "learning_rate": 1.553638484687468e-05, "loss": 0.3226, "num_tokens": 2811258588.0, "step": 3686 }, { "epoch": 5.030041819578391, "grad_norm": 0.2974222360441414, "learning_rate": 1.5527107168751845e-05, "loss": 0.3225, "num_tokens": 2812074882.0, "step": 3687 }, { "epoch": 5.031407356831954, "grad_norm": 0.326610956419848, "learning_rate": 1.551783146522829e-05, "loss": 0.2949, "num_tokens": 2812746233.0, "step": 3688 }, { "epoch": 5.032772894085517, "grad_norm": 0.30616181849553636, "learning_rate": 1.5508557739133647e-05, "loss": 0.3273, "num_tokens": 2813594249.0, "step": 3689 }, { "epoch": 5.03413843133908, "grad_norm": 0.32096321400527705, "learning_rate": 1.54992859932969e-05, "loss": 0.2992, "num_tokens": 2814329150.0, "step": 3690 }, { "epoch": 5.035503968592643, "grad_norm": 0.3121756635188722, "learning_rate": 1.549001623054649e-05, "loss": 0.3094, "num_tokens": 2815023690.0, "step": 3691 }, { "epoch": 5.036869505846206, "grad_norm": 0.30671948836463214, "learning_rate": 1.5480748453710193e-05, "loss": 0.3009, "num_tokens": 2815717499.0, "step": 3692 }, { "epoch": 5.038235043099769, "grad_norm": 0.31184663040907706, "learning_rate": 1.547148266561523e-05, "loss": 0.3173, "num_tokens": 2816481407.0, "step": 3693 }, { "epoch": 5.039600580353333, "grad_norm": 0.31867688825973844, "learning_rate": 1.5462218869088176e-05, "loss": 0.3, "num_tokens": 2817263917.0, "step": 3694 }, { "epoch": 5.040966117606896, "grad_norm": 0.30860242928116544, "learning_rate": 1.5452957066955012e-05, "loss": 0.3198, "num_tokens": 2818118964.0, "step": 3695 }, { "epoch": 5.042331654860459, "grad_norm": 0.30785395326782283, "learning_rate": 1.5443697262041126e-05, "loss": 0.3233, "num_tokens": 2818851156.0, "step": 3696 }, { "epoch": 5.043697192114022, "grad_norm": 0.3242118554883565, "learning_rate": 1.5434439457171274e-05, "loss": 0.3214, "num_tokens": 2819665255.0, "step": 3697 }, { "epoch": 5.045062729367586, "grad_norm": 0.30300091115959926, "learning_rate": 1.5425183655169616e-05, "loss": 0.2999, "num_tokens": 2820434186.0, "step": 3698 }, { "epoch": 5.046428266621149, "grad_norm": 0.2927331555133609, "learning_rate": 1.541592985885968e-05, "loss": 0.3198, "num_tokens": 2821179220.0, "step": 3699 }, { "epoch": 5.047793803874712, "grad_norm": 0.3607479844345091, "learning_rate": 1.540667807106443e-05, "loss": 0.3159, "num_tokens": 2821918596.0, "step": 3700 }, { "epoch": 5.049159341128275, "grad_norm": 0.309974406432981, "learning_rate": 1.5397428294606163e-05, "loss": 0.3064, "num_tokens": 2822697516.0, "step": 3701 }, { "epoch": 5.050524878381839, "grad_norm": 0.3002661555514354, "learning_rate": 1.538818053230658e-05, "loss": 0.3118, "num_tokens": 2823421267.0, "step": 3702 }, { "epoch": 5.051890415635402, "grad_norm": 0.34422562219924235, "learning_rate": 1.5378934786986794e-05, "loss": 0.323, "num_tokens": 2824164920.0, "step": 3703 }, { "epoch": 5.053255952888965, "grad_norm": 0.3281738550195569, "learning_rate": 1.536969106146727e-05, "loss": 0.3125, "num_tokens": 2824857444.0, "step": 3704 }, { "epoch": 5.054621490142528, "grad_norm": 0.3217603249308727, "learning_rate": 1.5360449358567864e-05, "loss": 0.3188, "num_tokens": 2825571935.0, "step": 3705 }, { "epoch": 5.055987027396091, "grad_norm": 0.3260127570062259, "learning_rate": 1.5351209681107823e-05, "loss": 0.3275, "num_tokens": 2826424442.0, "step": 3706 }, { "epoch": 5.057352564649654, "grad_norm": 0.3600411175851736, "learning_rate": 1.534197203190578e-05, "loss": 0.3381, "num_tokens": 2827244630.0, "step": 3707 }, { "epoch": 5.058718101903217, "grad_norm": 0.31818700948513706, "learning_rate": 1.5332736413779726e-05, "loss": 0.3168, "num_tokens": 2827980597.0, "step": 3708 }, { "epoch": 5.0600836391567805, "grad_norm": 0.30428796383960266, "learning_rate": 1.532350282954707e-05, "loss": 0.3189, "num_tokens": 2828694470.0, "step": 3709 }, { "epoch": 5.061449176410344, "grad_norm": 0.3487122721113879, "learning_rate": 1.531427128202456e-05, "loss": 0.3124, "num_tokens": 2829360585.0, "step": 3710 }, { "epoch": 5.062814713663907, "grad_norm": 0.30723143697212624, "learning_rate": 1.5305041774028345e-05, "loss": 0.3124, "num_tokens": 2830203222.0, "step": 3711 }, { "epoch": 5.06418025091747, "grad_norm": 0.32115871631693055, "learning_rate": 1.5295814308373954e-05, "loss": 0.3177, "num_tokens": 2830991306.0, "step": 3712 }, { "epoch": 5.0655457881710335, "grad_norm": 0.3410877971728179, "learning_rate": 1.528658888787628e-05, "loss": 0.3213, "num_tokens": 2831704948.0, "step": 3713 }, { "epoch": 5.066911325424597, "grad_norm": 0.3301099698978964, "learning_rate": 1.527736551534961e-05, "loss": 0.33, "num_tokens": 2832444884.0, "step": 3714 }, { "epoch": 5.06827686267816, "grad_norm": 0.29494338533190734, "learning_rate": 1.5268144193607582e-05, "loss": 0.313, "num_tokens": 2833255268.0, "step": 3715 }, { "epoch": 5.069642399931723, "grad_norm": 0.2770162551573571, "learning_rate": 1.525892492546323e-05, "loss": 0.2966, "num_tokens": 2833992878.0, "step": 3716 }, { "epoch": 5.0710079371852865, "grad_norm": 0.35019306671025735, "learning_rate": 1.524970771372896e-05, "loss": 0.3242, "num_tokens": 2834710899.0, "step": 3717 }, { "epoch": 5.07237347443885, "grad_norm": 0.32175436800215673, "learning_rate": 1.5240492561216522e-05, "loss": 0.3074, "num_tokens": 2835427858.0, "step": 3718 }, { "epoch": 5.073739011692413, "grad_norm": 0.3029101247698951, "learning_rate": 1.523127947073709e-05, "loss": 0.3016, "num_tokens": 2836266860.0, "step": 3719 }, { "epoch": 5.075104548945976, "grad_norm": 0.32871803291442975, "learning_rate": 1.522206844510115e-05, "loss": 0.3292, "num_tokens": 2837056039.0, "step": 3720 }, { "epoch": 5.0764700861995395, "grad_norm": 0.31745037492886846, "learning_rate": 1.521285948711861e-05, "loss": 0.2983, "num_tokens": 2837827937.0, "step": 3721 }, { "epoch": 5.077835623453102, "grad_norm": 0.31384106537748985, "learning_rate": 1.5203652599598706e-05, "loss": 0.3247, "num_tokens": 2838594254.0, "step": 3722 }, { "epoch": 5.079201160706665, "grad_norm": 0.30673173238127444, "learning_rate": 1.5194447785350073e-05, "loss": 0.3122, "num_tokens": 2839353558.0, "step": 3723 }, { "epoch": 5.080566697960228, "grad_norm": 0.3288640707766841, "learning_rate": 1.5185245047180699e-05, "loss": 0.3056, "num_tokens": 2840127778.0, "step": 3724 }, { "epoch": 5.081932235213792, "grad_norm": 0.297414274866257, "learning_rate": 1.5176044387897939e-05, "loss": 0.3172, "num_tokens": 2840824736.0, "step": 3725 }, { "epoch": 5.083297772467355, "grad_norm": 0.3462379980695344, "learning_rate": 1.5166845810308518e-05, "loss": 0.3172, "num_tokens": 2841547904.0, "step": 3726 }, { "epoch": 5.084663309720918, "grad_norm": 0.33227898153313, "learning_rate": 1.5157649317218509e-05, "loss": 0.3058, "num_tokens": 2842292439.0, "step": 3727 }, { "epoch": 5.086028846974481, "grad_norm": 0.29913923709163676, "learning_rate": 1.5148454911433392e-05, "loss": 0.3096, "num_tokens": 2843018473.0, "step": 3728 }, { "epoch": 5.087394384228045, "grad_norm": 0.2834578535391443, "learning_rate": 1.5139262595757954e-05, "loss": 0.3134, "num_tokens": 2843820438.0, "step": 3729 }, { "epoch": 5.088759921481608, "grad_norm": 0.3214314250138854, "learning_rate": 1.5130072372996392e-05, "loss": 0.3144, "num_tokens": 2844602083.0, "step": 3730 }, { "epoch": 5.090125458735171, "grad_norm": 0.31056724076779224, "learning_rate": 1.5120884245952234e-05, "loss": 0.3155, "num_tokens": 2845430361.0, "step": 3731 }, { "epoch": 5.0914909959887344, "grad_norm": 0.3056387624646174, "learning_rate": 1.5111698217428385e-05, "loss": 0.3242, "num_tokens": 2846251497.0, "step": 3732 }, { "epoch": 5.092856533242298, "grad_norm": 0.3058529611567639, "learning_rate": 1.510251429022711e-05, "loss": 0.3036, "num_tokens": 2846985223.0, "step": 3733 }, { "epoch": 5.094222070495861, "grad_norm": 0.3494602699395946, "learning_rate": 1.5093332467150008e-05, "loss": 0.3114, "num_tokens": 2847711644.0, "step": 3734 }, { "epoch": 5.095587607749424, "grad_norm": 0.2906883283114877, "learning_rate": 1.5084152750998079e-05, "loss": 0.3132, "num_tokens": 2848517942.0, "step": 3735 }, { "epoch": 5.0969531450029875, "grad_norm": 0.2970056010402122, "learning_rate": 1.5074975144571631e-05, "loss": 0.3246, "num_tokens": 2849223412.0, "step": 3736 }, { "epoch": 5.098318682256551, "grad_norm": 0.32525012997435865, "learning_rate": 1.5065799650670383e-05, "loss": 0.3106, "num_tokens": 2849990418.0, "step": 3737 }, { "epoch": 5.099684219510113, "grad_norm": 0.29125021453503497, "learning_rate": 1.5056626272093358e-05, "loss": 0.3158, "num_tokens": 2850760436.0, "step": 3738 }, { "epoch": 5.101049756763676, "grad_norm": 0.30471158802849746, "learning_rate": 1.5047455011638974e-05, "loss": 0.3083, "num_tokens": 2851452950.0, "step": 3739 }, { "epoch": 5.10241529401724, "grad_norm": 0.3013623350848778, "learning_rate": 1.5038285872104972e-05, "loss": 0.3277, "num_tokens": 2852175881.0, "step": 3740 }, { "epoch": 5.103780831270803, "grad_norm": 0.3506859454076072, "learning_rate": 1.5029118856288465e-05, "loss": 0.3075, "num_tokens": 2852957481.0, "step": 3741 }, { "epoch": 5.105146368524366, "grad_norm": 0.2964779652916265, "learning_rate": 1.5019953966985914e-05, "loss": 0.3246, "num_tokens": 2853750089.0, "step": 3742 }, { "epoch": 5.106511905777929, "grad_norm": 0.3189443682243356, "learning_rate": 1.5010791206993123e-05, "loss": 0.3279, "num_tokens": 2854544525.0, "step": 3743 }, { "epoch": 5.107877443031493, "grad_norm": 0.3161106886113253, "learning_rate": 1.500163057910526e-05, "loss": 0.3122, "num_tokens": 2855364298.0, "step": 3744 }, { "epoch": 5.109242980285056, "grad_norm": 0.31623697265642814, "learning_rate": 1.4992472086116828e-05, "loss": 0.3226, "num_tokens": 2856089749.0, "step": 3745 }, { "epoch": 5.110608517538619, "grad_norm": 0.3244991009237146, "learning_rate": 1.4983315730821706e-05, "loss": 0.3202, "num_tokens": 2856825111.0, "step": 3746 }, { "epoch": 5.111974054792182, "grad_norm": 0.3175542169666718, "learning_rate": 1.4974161516013071e-05, "loss": 0.3251, "num_tokens": 2857597980.0, "step": 3747 }, { "epoch": 5.113339592045746, "grad_norm": 0.3317780699486216, "learning_rate": 1.4965009444483505e-05, "loss": 0.3375, "num_tokens": 2858340334.0, "step": 3748 }, { "epoch": 5.114705129299309, "grad_norm": 0.3183782699542311, "learning_rate": 1.4955859519024896e-05, "loss": 0.3325, "num_tokens": 2859133115.0, "step": 3749 }, { "epoch": 5.116070666552872, "grad_norm": 0.33267803442247623, "learning_rate": 1.4946711742428495e-05, "loss": 0.3305, "num_tokens": 2859931061.0, "step": 3750 }, { "epoch": 5.117436203806435, "grad_norm": 0.34014721103657414, "learning_rate": 1.4937566117484887e-05, "loss": 0.2996, "num_tokens": 2860671638.0, "step": 3751 }, { "epoch": 5.118801741059999, "grad_norm": 0.32294367815621977, "learning_rate": 1.4928422646984007e-05, "loss": 0.3185, "num_tokens": 2861466618.0, "step": 3752 }, { "epoch": 5.120167278313562, "grad_norm": 0.32115224557516664, "learning_rate": 1.4919281333715136e-05, "loss": 0.3146, "num_tokens": 2862204795.0, "step": 3753 }, { "epoch": 5.121532815567125, "grad_norm": 0.3237373434269598, "learning_rate": 1.4910142180466883e-05, "loss": 0.3134, "num_tokens": 2862941357.0, "step": 3754 }, { "epoch": 5.1228983528206875, "grad_norm": 0.290519325767907, "learning_rate": 1.4901005190027222e-05, "loss": 0.3173, "num_tokens": 2863732177.0, "step": 3755 }, { "epoch": 5.124263890074251, "grad_norm": 0.311754631507248, "learning_rate": 1.4891870365183447e-05, "loss": 0.3108, "num_tokens": 2864558257.0, "step": 3756 }, { "epoch": 5.125629427327814, "grad_norm": 0.3083537787698677, "learning_rate": 1.488273770872219e-05, "loss": 0.334, "num_tokens": 2865332519.0, "step": 3757 }, { "epoch": 5.126994964581377, "grad_norm": 0.29633506691897726, "learning_rate": 1.4873607223429445e-05, "loss": 0.3156, "num_tokens": 2866081744.0, "step": 3758 }, { "epoch": 5.1283605018349405, "grad_norm": 0.3047971590358222, "learning_rate": 1.4864478912090508e-05, "loss": 0.3093, "num_tokens": 2866771195.0, "step": 3759 }, { "epoch": 5.129726039088504, "grad_norm": 0.311720947607952, "learning_rate": 1.4855352777490054e-05, "loss": 0.3241, "num_tokens": 2867479050.0, "step": 3760 }, { "epoch": 5.131091576342067, "grad_norm": 0.31072535496239884, "learning_rate": 1.4846228822412046e-05, "loss": 0.3201, "num_tokens": 2868244794.0, "step": 3761 }, { "epoch": 5.13245711359563, "grad_norm": 0.3075603905339214, "learning_rate": 1.4837107049639832e-05, "loss": 0.3044, "num_tokens": 2869069644.0, "step": 3762 }, { "epoch": 5.1338226508491935, "grad_norm": 0.3211861179786699, "learning_rate": 1.4827987461956049e-05, "loss": 0.3113, "num_tokens": 2869873024.0, "step": 3763 }, { "epoch": 5.135188188102757, "grad_norm": 0.29281714725243285, "learning_rate": 1.48188700621427e-05, "loss": 0.322, "num_tokens": 2870678881.0, "step": 3764 }, { "epoch": 5.13655372535632, "grad_norm": 0.31263674342863385, "learning_rate": 1.4809754852981115e-05, "loss": 0.3109, "num_tokens": 2871524178.0, "step": 3765 }, { "epoch": 5.137919262609883, "grad_norm": 0.29651111286466875, "learning_rate": 1.4800641837251935e-05, "loss": 0.3052, "num_tokens": 2872279319.0, "step": 3766 }, { "epoch": 5.1392847998634466, "grad_norm": 0.3016514014148143, "learning_rate": 1.4791531017735153e-05, "loss": 0.3213, "num_tokens": 2873073161.0, "step": 3767 }, { "epoch": 5.14065033711701, "grad_norm": 0.2987136845959705, "learning_rate": 1.4782422397210083e-05, "loss": 0.3002, "num_tokens": 2873833436.0, "step": 3768 }, { "epoch": 5.142015874370573, "grad_norm": 0.34647031639522874, "learning_rate": 1.477331597845538e-05, "loss": 0.3296, "num_tokens": 2874557440.0, "step": 3769 }, { "epoch": 5.143381411624135, "grad_norm": 0.32982611835463393, "learning_rate": 1.4764211764249004e-05, "loss": 0.3331, "num_tokens": 2875323341.0, "step": 3770 }, { "epoch": 5.144746948877699, "grad_norm": 0.3170897414957033, "learning_rate": 1.4755109757368264e-05, "loss": 0.3051, "num_tokens": 2876016624.0, "step": 3771 }, { "epoch": 5.146112486131262, "grad_norm": 0.32450384451699726, "learning_rate": 1.4746009960589796e-05, "loss": 0.3188, "num_tokens": 2876723689.0, "step": 3772 }, { "epoch": 5.147478023384825, "grad_norm": 0.3167688512393823, "learning_rate": 1.4736912376689532e-05, "loss": 0.2991, "num_tokens": 2877452052.0, "step": 3773 }, { "epoch": 5.148843560638388, "grad_norm": 0.3164719554746893, "learning_rate": 1.4727817008442777e-05, "loss": 0.3318, "num_tokens": 2878273742.0, "step": 3774 }, { "epoch": 5.150209097891952, "grad_norm": 0.2957868108989803, "learning_rate": 1.4718723858624108e-05, "loss": 0.3324, "num_tokens": 2879075204.0, "step": 3775 }, { "epoch": 5.151574635145515, "grad_norm": 0.3259940425781791, "learning_rate": 1.4709632930007476e-05, "loss": 0.3062, "num_tokens": 2879853777.0, "step": 3776 }, { "epoch": 5.152940172399078, "grad_norm": 0.3119232774477423, "learning_rate": 1.4700544225366114e-05, "loss": 0.3171, "num_tokens": 2880626968.0, "step": 3777 }, { "epoch": 5.1543057096526415, "grad_norm": 0.31673412681766583, "learning_rate": 1.4691457747472595e-05, "loss": 0.3186, "num_tokens": 2881373668.0, "step": 3778 }, { "epoch": 5.155671246906205, "grad_norm": 0.3306420336066037, "learning_rate": 1.4682373499098814e-05, "loss": 0.3282, "num_tokens": 2882119236.0, "step": 3779 }, { "epoch": 5.157036784159768, "grad_norm": 0.3017030183679057, "learning_rate": 1.4673291483015977e-05, "loss": 0.3058, "num_tokens": 2882914376.0, "step": 3780 }, { "epoch": 5.158402321413331, "grad_norm": 0.3021865941185806, "learning_rate": 1.4664211701994618e-05, "loss": 0.3288, "num_tokens": 2883704652.0, "step": 3781 }, { "epoch": 5.1597678586668945, "grad_norm": 0.3158215162572718, "learning_rate": 1.4655134158804574e-05, "loss": 0.3122, "num_tokens": 2884510638.0, "step": 3782 }, { "epoch": 5.161133395920458, "grad_norm": 0.30672892973439975, "learning_rate": 1.464605885621503e-05, "loss": 0.3031, "num_tokens": 2885284587.0, "step": 3783 }, { "epoch": 5.162498933174021, "grad_norm": 0.26407733576547626, "learning_rate": 1.463698579699445e-05, "loss": 0.3034, "num_tokens": 2886097012.0, "step": 3784 }, { "epoch": 5.163864470427584, "grad_norm": 0.316020194311423, "learning_rate": 1.4627914983910643e-05, "loss": 0.3282, "num_tokens": 2886829757.0, "step": 3785 }, { "epoch": 5.1652300076811475, "grad_norm": 0.31995747349943254, "learning_rate": 1.4618846419730713e-05, "loss": 0.3249, "num_tokens": 2887610830.0, "step": 3786 }, { "epoch": 5.16659554493471, "grad_norm": 0.29000904608388217, "learning_rate": 1.4609780107221094e-05, "loss": 0.304, "num_tokens": 2888281944.0, "step": 3787 }, { "epoch": 5.167961082188273, "grad_norm": 0.3140213934248489, "learning_rate": 1.460071604914752e-05, "loss": 0.3316, "num_tokens": 2889179974.0, "step": 3788 }, { "epoch": 5.169326619441836, "grad_norm": 0.3318543681619917, "learning_rate": 1.459165424827504e-05, "loss": 0.3156, "num_tokens": 2889957121.0, "step": 3789 }, { "epoch": 5.1706921566954, "grad_norm": 0.2884514584557764, "learning_rate": 1.4582594707368026e-05, "loss": 0.304, "num_tokens": 2890692868.0, "step": 3790 }, { "epoch": 5.172057693948963, "grad_norm": 0.3027903816919719, "learning_rate": 1.4573537429190139e-05, "loss": 0.3298, "num_tokens": 2891503383.0, "step": 3791 }, { "epoch": 5.173423231202526, "grad_norm": 0.29915121275353157, "learning_rate": 1.456448241650437e-05, "loss": 0.3119, "num_tokens": 2892299751.0, "step": 3792 }, { "epoch": 5.174788768456089, "grad_norm": 0.3220630450348768, "learning_rate": 1.4555429672073014e-05, "loss": 0.3343, "num_tokens": 2893042581.0, "step": 3793 }, { "epoch": 5.176154305709653, "grad_norm": 0.3225197625328078, "learning_rate": 1.4546379198657672e-05, "loss": 0.3117, "num_tokens": 2893673349.0, "step": 3794 }, { "epoch": 5.177519842963216, "grad_norm": 0.30861469277906917, "learning_rate": 1.4537330999019248e-05, "loss": 0.3188, "num_tokens": 2894460176.0, "step": 3795 }, { "epoch": 5.178885380216779, "grad_norm": 0.3250165580283648, "learning_rate": 1.4528285075917947e-05, "loss": 0.3225, "num_tokens": 2895186534.0, "step": 3796 }, { "epoch": 5.180250917470342, "grad_norm": 0.34545519858030216, "learning_rate": 1.4519241432113304e-05, "loss": 0.3298, "num_tokens": 2895975883.0, "step": 3797 }, { "epoch": 5.181616454723906, "grad_norm": 0.33911953925979105, "learning_rate": 1.4510200070364135e-05, "loss": 0.3101, "num_tokens": 2896692568.0, "step": 3798 }, { "epoch": 5.182981991977469, "grad_norm": 0.285900397342956, "learning_rate": 1.4501160993428562e-05, "loss": 0.3093, "num_tokens": 2897458690.0, "step": 3799 }, { "epoch": 5.184347529231032, "grad_norm": 0.31264943188804467, "learning_rate": 1.4492124204064029e-05, "loss": 0.3303, "num_tokens": 2898234091.0, "step": 3800 }, { "epoch": 5.185713066484595, "grad_norm": 0.30269874666825086, "learning_rate": 1.4483089705027267e-05, "loss": 0.311, "num_tokens": 2899010777.0, "step": 3801 }, { "epoch": 5.187078603738159, "grad_norm": 0.3064928818414869, "learning_rate": 1.447405749907429e-05, "loss": 0.3051, "num_tokens": 2899760328.0, "step": 3802 }, { "epoch": 5.188444140991721, "grad_norm": 0.3218899962453956, "learning_rate": 1.4465027588960467e-05, "loss": 0.2998, "num_tokens": 2900535013.0, "step": 3803 }, { "epoch": 5.189809678245284, "grad_norm": 0.29284996578745315, "learning_rate": 1.4455999977440414e-05, "loss": 0.3094, "num_tokens": 2901259567.0, "step": 3804 }, { "epoch": 5.1911752154988475, "grad_norm": 0.28992067214118766, "learning_rate": 1.444697466726805e-05, "loss": 0.311, "num_tokens": 2902039980.0, "step": 3805 }, { "epoch": 5.192540752752411, "grad_norm": 0.3191962042203301, "learning_rate": 1.4437951661196637e-05, "loss": 0.3226, "num_tokens": 2902888144.0, "step": 3806 }, { "epoch": 5.193906290005974, "grad_norm": 0.2899218355336441, "learning_rate": 1.4428930961978678e-05, "loss": 0.3115, "num_tokens": 2903800533.0, "step": 3807 }, { "epoch": 5.195271827259537, "grad_norm": 0.29609922086122564, "learning_rate": 1.4419912572366012e-05, "loss": 0.3387, "num_tokens": 2904650321.0, "step": 3808 }, { "epoch": 5.1966373645131005, "grad_norm": 0.3014174566580907, "learning_rate": 1.4410896495109752e-05, "loss": 0.3116, "num_tokens": 2905378165.0, "step": 3809 }, { "epoch": 5.198002901766664, "grad_norm": 0.2952992131918432, "learning_rate": 1.4401882732960317e-05, "loss": 0.3209, "num_tokens": 2906165429.0, "step": 3810 }, { "epoch": 5.199368439020227, "grad_norm": 0.3110469761502004, "learning_rate": 1.4392871288667415e-05, "loss": 0.309, "num_tokens": 2906881168.0, "step": 3811 }, { "epoch": 5.20073397627379, "grad_norm": 0.30507884094208576, "learning_rate": 1.438386216498004e-05, "loss": 0.3291, "num_tokens": 2907700720.0, "step": 3812 }, { "epoch": 5.202099513527354, "grad_norm": 0.31533243958324353, "learning_rate": 1.4374855364646503e-05, "loss": 0.3084, "num_tokens": 2908502238.0, "step": 3813 }, { "epoch": 5.203465050780917, "grad_norm": 0.29745164656308387, "learning_rate": 1.436585089041436e-05, "loss": 0.3159, "num_tokens": 2909205202.0, "step": 3814 }, { "epoch": 5.20483058803448, "grad_norm": 0.31902851429835916, "learning_rate": 1.435684874503052e-05, "loss": 0.3202, "num_tokens": 2909930358.0, "step": 3815 }, { "epoch": 5.206196125288043, "grad_norm": 0.32072935618229326, "learning_rate": 1.4347848931241122e-05, "loss": 0.3176, "num_tokens": 2910651026.0, "step": 3816 }, { "epoch": 5.207561662541607, "grad_norm": 0.31382181950413246, "learning_rate": 1.4338851451791631e-05, "loss": 0.3133, "num_tokens": 2911457416.0, "step": 3817 }, { "epoch": 5.20892719979517, "grad_norm": 0.3425248316456271, "learning_rate": 1.432985630942679e-05, "loss": 0.3178, "num_tokens": 2912175709.0, "step": 3818 }, { "epoch": 5.210292737048732, "grad_norm": 0.2914054785834843, "learning_rate": 1.4320863506890619e-05, "loss": 0.3355, "num_tokens": 2912991295.0, "step": 3819 }, { "epoch": 5.2116582743022954, "grad_norm": 0.3445857621556914, "learning_rate": 1.4311873046926432e-05, "loss": 0.3117, "num_tokens": 2913724781.0, "step": 3820 }, { "epoch": 5.213023811555859, "grad_norm": 0.3097416658433111, "learning_rate": 1.4302884932276836e-05, "loss": 0.317, "num_tokens": 2914462084.0, "step": 3821 }, { "epoch": 5.214389348809422, "grad_norm": 0.30995939351320667, "learning_rate": 1.4293899165683725e-05, "loss": 0.2975, "num_tokens": 2915148584.0, "step": 3822 }, { "epoch": 5.215754886062985, "grad_norm": 0.32240492443540936, "learning_rate": 1.428491574988824e-05, "loss": 0.3359, "num_tokens": 2915892592.0, "step": 3823 }, { "epoch": 5.2171204233165485, "grad_norm": 0.3393461762281155, "learning_rate": 1.4275934687630862e-05, "loss": 0.3094, "num_tokens": 2916648004.0, "step": 3824 }, { "epoch": 5.218485960570112, "grad_norm": 0.31485359719612693, "learning_rate": 1.42669559816513e-05, "loss": 0.3089, "num_tokens": 2917369755.0, "step": 3825 }, { "epoch": 5.219851497823675, "grad_norm": 0.339985789332744, "learning_rate": 1.425797963468858e-05, "loss": 0.3137, "num_tokens": 2918139154.0, "step": 3826 }, { "epoch": 5.221217035077238, "grad_norm": 0.28780013558377565, "learning_rate": 1.4249005649481e-05, "loss": 0.326, "num_tokens": 2918883890.0, "step": 3827 }, { "epoch": 5.2225825723308015, "grad_norm": 0.3374372581521269, "learning_rate": 1.4240034028766125e-05, "loss": 0.3101, "num_tokens": 2919586829.0, "step": 3828 }, { "epoch": 5.223948109584365, "grad_norm": 0.3413359270969593, "learning_rate": 1.4231064775280813e-05, "loss": 0.3445, "num_tokens": 2920327593.0, "step": 3829 }, { "epoch": 5.225313646837928, "grad_norm": 0.33440345845327996, "learning_rate": 1.4222097891761194e-05, "loss": 0.3315, "num_tokens": 2921118664.0, "step": 3830 }, { "epoch": 5.226679184091491, "grad_norm": 0.314181146836879, "learning_rate": 1.4213133380942678e-05, "loss": 0.3065, "num_tokens": 2921833209.0, "step": 3831 }, { "epoch": 5.2280447213450545, "grad_norm": 0.30235466665886895, "learning_rate": 1.4204171245559936e-05, "loss": 0.3107, "num_tokens": 2922624987.0, "step": 3832 }, { "epoch": 5.229410258598618, "grad_norm": 0.3147371496420386, "learning_rate": 1.4195211488346953e-05, "loss": 0.321, "num_tokens": 2923419893.0, "step": 3833 }, { "epoch": 5.230775795852181, "grad_norm": 0.32313544762277435, "learning_rate": 1.4186254112036944e-05, "loss": 0.3106, "num_tokens": 2924142121.0, "step": 3834 }, { "epoch": 5.232141333105743, "grad_norm": 0.30073730331515197, "learning_rate": 1.4177299119362417e-05, "loss": 0.3276, "num_tokens": 2924989374.0, "step": 3835 }, { "epoch": 5.233506870359307, "grad_norm": 0.2930996994481403, "learning_rate": 1.4168346513055165e-05, "loss": 0.3174, "num_tokens": 2925790629.0, "step": 3836 }, { "epoch": 5.23487240761287, "grad_norm": 0.31949193416379895, "learning_rate": 1.4159396295846222e-05, "loss": 0.3276, "num_tokens": 2926646421.0, "step": 3837 }, { "epoch": 5.236237944866433, "grad_norm": 0.3205711609937783, "learning_rate": 1.4150448470465925e-05, "loss": 0.3391, "num_tokens": 2927405311.0, "step": 3838 }, { "epoch": 5.237603482119996, "grad_norm": 0.2934383461558718, "learning_rate": 1.4141503039643864e-05, "loss": 0.3291, "num_tokens": 2928146186.0, "step": 3839 }, { "epoch": 5.23896901937356, "grad_norm": 0.31127869965995525, "learning_rate": 1.4132560006108911e-05, "loss": 0.3207, "num_tokens": 2928999750.0, "step": 3840 }, { "epoch": 5.240334556627123, "grad_norm": 0.32419407994645744, "learning_rate": 1.412361937258917e-05, "loss": 0.3342, "num_tokens": 2929786118.0, "step": 3841 }, { "epoch": 5.241700093880686, "grad_norm": 0.2859458120992879, "learning_rate": 1.4114681141812079e-05, "loss": 0.3038, "num_tokens": 2930540256.0, "step": 3842 }, { "epoch": 5.243065631134249, "grad_norm": 0.2954280916913282, "learning_rate": 1.4105745316504282e-05, "loss": 0.3139, "num_tokens": 2931309753.0, "step": 3843 }, { "epoch": 5.244431168387813, "grad_norm": 0.3188728275128959, "learning_rate": 1.4096811899391715e-05, "loss": 0.3249, "num_tokens": 2932054402.0, "step": 3844 }, { "epoch": 5.245796705641376, "grad_norm": 0.3338144910606884, "learning_rate": 1.408788089319959e-05, "loss": 0.3296, "num_tokens": 2932769018.0, "step": 3845 }, { "epoch": 5.247162242894939, "grad_norm": 0.30862356447465006, "learning_rate": 1.4078952300652353e-05, "loss": 0.3167, "num_tokens": 2933567115.0, "step": 3846 }, { "epoch": 5.248527780148502, "grad_norm": 0.29220356998608576, "learning_rate": 1.4070026124473733e-05, "loss": 0.3095, "num_tokens": 2934334057.0, "step": 3847 }, { "epoch": 5.249893317402066, "grad_norm": 0.3138936608143327, "learning_rate": 1.4061102367386728e-05, "loss": 0.3208, "num_tokens": 2935128766.0, "step": 3848 }, { "epoch": 5.251258854655629, "grad_norm": 0.31937908108555757, "learning_rate": 1.4052181032113596e-05, "loss": 0.3183, "num_tokens": 2935873209.0, "step": 3849 }, { "epoch": 5.252624391909192, "grad_norm": 0.28331705690032777, "learning_rate": 1.404326212137583e-05, "loss": 0.3024, "num_tokens": 2936628137.0, "step": 3850 }, { "epoch": 5.2539899291627545, "grad_norm": 0.30058438402097487, "learning_rate": 1.403434563789421e-05, "loss": 0.3158, "num_tokens": 2937402047.0, "step": 3851 }, { "epoch": 5.255355466416318, "grad_norm": 0.32953431021390356, "learning_rate": 1.4025431584388779e-05, "loss": 0.3323, "num_tokens": 2938163175.0, "step": 3852 }, { "epoch": 5.256721003669881, "grad_norm": 0.341333002019573, "learning_rate": 1.4016519963578818e-05, "loss": 0.3153, "num_tokens": 2938919984.0, "step": 3853 }, { "epoch": 5.258086540923444, "grad_norm": 0.3071650517791861, "learning_rate": 1.4007610778182889e-05, "loss": 0.3086, "num_tokens": 2939690312.0, "step": 3854 }, { "epoch": 5.2594520781770076, "grad_norm": 0.3357245946079651, "learning_rate": 1.3998704030918784e-05, "loss": 0.2975, "num_tokens": 2940500635.0, "step": 3855 }, { "epoch": 5.260817615430571, "grad_norm": 0.34529630928967153, "learning_rate": 1.3989799724503575e-05, "loss": 0.3141, "num_tokens": 2941266229.0, "step": 3856 }, { "epoch": 5.262183152684134, "grad_norm": 0.30420331397192096, "learning_rate": 1.3980897861653575e-05, "loss": 0.3237, "num_tokens": 2942046011.0, "step": 3857 }, { "epoch": 5.263548689937697, "grad_norm": 0.3214216326218839, "learning_rate": 1.397199844508436e-05, "loss": 0.3174, "num_tokens": 2942822616.0, "step": 3858 }, { "epoch": 5.264914227191261, "grad_norm": 0.30757393315442266, "learning_rate": 1.3963101477510765e-05, "loss": 0.3351, "num_tokens": 2943726274.0, "step": 3859 }, { "epoch": 5.266279764444824, "grad_norm": 0.30740280519780594, "learning_rate": 1.3954206961646853e-05, "loss": 0.3221, "num_tokens": 2944519120.0, "step": 3860 }, { "epoch": 5.267645301698387, "grad_norm": 0.3519175433104601, "learning_rate": 1.3945314900205959e-05, "loss": 0.3289, "num_tokens": 2945287946.0, "step": 3861 }, { "epoch": 5.26901083895195, "grad_norm": 0.29948828316737464, "learning_rate": 1.3936425295900675e-05, "loss": 0.3159, "num_tokens": 2946050946.0, "step": 3862 }, { "epoch": 5.270376376205514, "grad_norm": 0.33749438212535254, "learning_rate": 1.3927538151442836e-05, "loss": 0.3346, "num_tokens": 2946838789.0, "step": 3863 }, { "epoch": 5.271741913459077, "grad_norm": 0.3226179026505166, "learning_rate": 1.3918653469543508e-05, "loss": 0.3029, "num_tokens": 2947594649.0, "step": 3864 }, { "epoch": 5.27310745071264, "grad_norm": 0.29868406421031884, "learning_rate": 1.3909771252913038e-05, "loss": 0.3192, "num_tokens": 2948385309.0, "step": 3865 }, { "epoch": 5.274472987966203, "grad_norm": 0.32300596564457085, "learning_rate": 1.3900891504261002e-05, "loss": 0.3218, "num_tokens": 2949147582.0, "step": 3866 }, { "epoch": 5.275838525219767, "grad_norm": 0.3159473936681644, "learning_rate": 1.3892014226296219e-05, "loss": 0.3114, "num_tokens": 2949926277.0, "step": 3867 }, { "epoch": 5.277204062473329, "grad_norm": 0.3128731555128646, "learning_rate": 1.3883139421726779e-05, "loss": 0.3218, "num_tokens": 2950730013.0, "step": 3868 }, { "epoch": 5.278569599726892, "grad_norm": 0.3318210077072103, "learning_rate": 1.3874267093259981e-05, "loss": 0.3267, "num_tokens": 2951485273.0, "step": 3869 }, { "epoch": 5.2799351369804555, "grad_norm": 0.3077260229731271, "learning_rate": 1.38653972436024e-05, "loss": 0.3141, "num_tokens": 2952191219.0, "step": 3870 }, { "epoch": 5.281300674234019, "grad_norm": 0.3188933476338234, "learning_rate": 1.3856529875459838e-05, "loss": 0.3463, "num_tokens": 2952952636.0, "step": 3871 }, { "epoch": 5.282666211487582, "grad_norm": 0.3092454465372411, "learning_rate": 1.3847664991537355e-05, "loss": 0.3314, "num_tokens": 2953693609.0, "step": 3872 }, { "epoch": 5.284031748741145, "grad_norm": 0.34181150414460565, "learning_rate": 1.3838802594539228e-05, "loss": 0.3343, "num_tokens": 2954524429.0, "step": 3873 }, { "epoch": 5.2853972859947085, "grad_norm": 0.3116994915151457, "learning_rate": 1.3829942687169e-05, "loss": 0.3064, "num_tokens": 2955351525.0, "step": 3874 }, { "epoch": 5.286762823248272, "grad_norm": 0.30015727833329936, "learning_rate": 1.382108527212945e-05, "loss": 0.3196, "num_tokens": 2956084086.0, "step": 3875 }, { "epoch": 5.288128360501835, "grad_norm": 0.31235225180878456, "learning_rate": 1.3812230352122567e-05, "loss": 0.3279, "num_tokens": 2956835961.0, "step": 3876 }, { "epoch": 5.289493897755398, "grad_norm": 0.31870956684459806, "learning_rate": 1.380337792984964e-05, "loss": 0.3113, "num_tokens": 2957563220.0, "step": 3877 }, { "epoch": 5.2908594350089615, "grad_norm": 0.3000272218971839, "learning_rate": 1.3794528008011132e-05, "loss": 0.3295, "num_tokens": 2958364073.0, "step": 3878 }, { "epoch": 5.292224972262525, "grad_norm": 0.3048684284155521, "learning_rate": 1.378568058930678e-05, "loss": 0.3098, "num_tokens": 2959060095.0, "step": 3879 }, { "epoch": 5.293590509516088, "grad_norm": 0.3252680015812635, "learning_rate": 1.3776835676435547e-05, "loss": 0.3097, "num_tokens": 2959773835.0, "step": 3880 }, { "epoch": 5.294956046769651, "grad_norm": 0.29865498817829805, "learning_rate": 1.3767993272095643e-05, "loss": 0.3186, "num_tokens": 2960500556.0, "step": 3881 }, { "epoch": 5.2963215840232145, "grad_norm": 0.3451808117830505, "learning_rate": 1.3759153378984487e-05, "loss": 0.3115, "num_tokens": 2961237474.0, "step": 3882 }, { "epoch": 5.297687121276777, "grad_norm": 0.29259340800214123, "learning_rate": 1.3750315999798755e-05, "loss": 0.3303, "num_tokens": 2962054518.0, "step": 3883 }, { "epoch": 5.29905265853034, "grad_norm": 0.31558101903368735, "learning_rate": 1.3741481137234354e-05, "loss": 0.3255, "num_tokens": 2962898403.0, "step": 3884 }, { "epoch": 5.300418195783903, "grad_norm": 0.3293913313754179, "learning_rate": 1.3732648793986397e-05, "loss": 0.3332, "num_tokens": 2963694779.0, "step": 3885 }, { "epoch": 5.301783733037467, "grad_norm": 0.2966022175898156, "learning_rate": 1.3723818972749279e-05, "loss": 0.3059, "num_tokens": 2964479884.0, "step": 3886 }, { "epoch": 5.30314927029103, "grad_norm": 0.2892574607418559, "learning_rate": 1.3714991676216575e-05, "loss": 0.3096, "num_tokens": 2965260789.0, "step": 3887 }, { "epoch": 5.304514807544593, "grad_norm": 0.27797926646505905, "learning_rate": 1.3706166907081113e-05, "loss": 0.3232, "num_tokens": 2966052441.0, "step": 3888 }, { "epoch": 5.305880344798156, "grad_norm": 0.31274020596552093, "learning_rate": 1.3697344668034967e-05, "loss": 0.3219, "num_tokens": 2966838727.0, "step": 3889 }, { "epoch": 5.30724588205172, "grad_norm": 0.3275361958930572, "learning_rate": 1.3688524961769396e-05, "loss": 0.3215, "num_tokens": 2967651690.0, "step": 3890 }, { "epoch": 5.308611419305283, "grad_norm": 0.3162363178896744, "learning_rate": 1.3679707790974917e-05, "loss": 0.3197, "num_tokens": 2968414422.0, "step": 3891 }, { "epoch": 5.309976956558846, "grad_norm": 0.3036762575324904, "learning_rate": 1.367089315834127e-05, "loss": 0.3124, "num_tokens": 2969132221.0, "step": 3892 }, { "epoch": 5.311342493812409, "grad_norm": 0.3181045355187036, "learning_rate": 1.3662081066557424e-05, "loss": 0.3192, "num_tokens": 2969875938.0, "step": 3893 }, { "epoch": 5.312708031065973, "grad_norm": 0.30502816705654423, "learning_rate": 1.365327151831155e-05, "loss": 0.3186, "num_tokens": 2970621325.0, "step": 3894 }, { "epoch": 5.314073568319536, "grad_norm": 0.35504328363669674, "learning_rate": 1.364446451629108e-05, "loss": 0.3215, "num_tokens": 2971298137.0, "step": 3895 }, { "epoch": 5.315439105573099, "grad_norm": 0.2979100597102859, "learning_rate": 1.3635660063182632e-05, "loss": 0.3128, "num_tokens": 2972063341.0, "step": 3896 }, { "epoch": 5.316804642826662, "grad_norm": 0.3280056098882309, "learning_rate": 1.3626858161672074e-05, "loss": 0.336, "num_tokens": 2972838274.0, "step": 3897 }, { "epoch": 5.318170180080226, "grad_norm": 0.33034419425950584, "learning_rate": 1.3618058814444484e-05, "loss": 0.3039, "num_tokens": 2973615333.0, "step": 3898 }, { "epoch": 5.319535717333789, "grad_norm": 0.32083471924545504, "learning_rate": 1.360926202418415e-05, "loss": 0.3358, "num_tokens": 2974414900.0, "step": 3899 }, { "epoch": 5.320901254587351, "grad_norm": 0.30974302708654816, "learning_rate": 1.3600467793574605e-05, "loss": 0.3324, "num_tokens": 2975235244.0, "step": 3900 }, { "epoch": 5.322266791840915, "grad_norm": 0.35014806344929905, "learning_rate": 1.359167612529858e-05, "loss": 0.3065, "num_tokens": 2975928748.0, "step": 3901 }, { "epoch": 5.323632329094478, "grad_norm": 0.2831112344143584, "learning_rate": 1.3582887022038045e-05, "loss": 0.3235, "num_tokens": 2976715939.0, "step": 3902 }, { "epoch": 5.324997866348041, "grad_norm": 0.30458224829960123, "learning_rate": 1.3574100486474152e-05, "loss": 0.3151, "num_tokens": 2977560098.0, "step": 3903 }, { "epoch": 5.326363403601604, "grad_norm": 0.400501146659686, "learning_rate": 1.3565316521287316e-05, "loss": 0.3154, "num_tokens": 2978261356.0, "step": 3904 }, { "epoch": 5.327728940855168, "grad_norm": 0.31489397949472403, "learning_rate": 1.3556535129157138e-05, "loss": 0.328, "num_tokens": 2979037412.0, "step": 3905 }, { "epoch": 5.329094478108731, "grad_norm": 0.31850781898127434, "learning_rate": 1.3547756312762422e-05, "loss": 0.3292, "num_tokens": 2979814603.0, "step": 3906 }, { "epoch": 5.330460015362294, "grad_norm": 0.3157475448587537, "learning_rate": 1.3538980074781231e-05, "loss": 0.3069, "num_tokens": 2980564647.0, "step": 3907 }, { "epoch": 5.331825552615857, "grad_norm": 0.35145106922228464, "learning_rate": 1.3530206417890797e-05, "loss": 0.3258, "num_tokens": 2981310300.0, "step": 3908 }, { "epoch": 5.333191089869421, "grad_norm": 0.31377126106395437, "learning_rate": 1.352143534476759e-05, "loss": 0.3234, "num_tokens": 2982033185.0, "step": 3909 }, { "epoch": 5.334556627122984, "grad_norm": 0.3298728415680622, "learning_rate": 1.3512666858087285e-05, "loss": 0.3228, "num_tokens": 2982848474.0, "step": 3910 }, { "epoch": 5.335922164376547, "grad_norm": 0.3357704455660853, "learning_rate": 1.3503900960524772e-05, "loss": 0.3355, "num_tokens": 2983653628.0, "step": 3911 }, { "epoch": 5.33728770163011, "grad_norm": 0.351364120341169, "learning_rate": 1.3495137654754137e-05, "loss": 0.3127, "num_tokens": 2984416772.0, "step": 3912 }, { "epoch": 5.338653238883674, "grad_norm": 0.32302754171786446, "learning_rate": 1.3486376943448686e-05, "loss": 0.3022, "num_tokens": 2985157701.0, "step": 3913 }, { "epoch": 5.340018776137237, "grad_norm": 0.3062462390531375, "learning_rate": 1.3477618829280936e-05, "loss": 0.3256, "num_tokens": 2985898060.0, "step": 3914 }, { "epoch": 5.341384313390799, "grad_norm": 0.358115269376158, "learning_rate": 1.3468863314922614e-05, "loss": 0.3094, "num_tokens": 2986605338.0, "step": 3915 }, { "epoch": 5.342749850644363, "grad_norm": 0.3223835674895282, "learning_rate": 1.3460110403044642e-05, "loss": 0.3057, "num_tokens": 2987388621.0, "step": 3916 }, { "epoch": 5.344115387897926, "grad_norm": 0.3156959747305689, "learning_rate": 1.3451360096317152e-05, "loss": 0.3288, "num_tokens": 2988145617.0, "step": 3917 }, { "epoch": 5.345480925151489, "grad_norm": 0.35614115367077703, "learning_rate": 1.3442612397409482e-05, "loss": 0.3348, "num_tokens": 2988849071.0, "step": 3918 }, { "epoch": 5.346846462405052, "grad_norm": 0.3239557654971014, "learning_rate": 1.3433867308990186e-05, "loss": 0.3307, "num_tokens": 2989698036.0, "step": 3919 }, { "epoch": 5.3482119996586155, "grad_norm": 0.3467427505438037, "learning_rate": 1.3425124833727012e-05, "loss": 0.3162, "num_tokens": 2990416949.0, "step": 3920 }, { "epoch": 5.349577536912179, "grad_norm": 0.3531248630506508, "learning_rate": 1.3416384974286895e-05, "loss": 0.2999, "num_tokens": 2991073570.0, "step": 3921 }, { "epoch": 5.350943074165742, "grad_norm": 0.3145411847102799, "learning_rate": 1.3407647733336001e-05, "loss": 0.3144, "num_tokens": 2991738334.0, "step": 3922 }, { "epoch": 5.352308611419305, "grad_norm": 0.36142320766648883, "learning_rate": 1.3398913113539678e-05, "loss": 0.3286, "num_tokens": 2992531767.0, "step": 3923 }, { "epoch": 5.3536741486728685, "grad_norm": 0.37363225380171894, "learning_rate": 1.3390181117562483e-05, "loss": 0.3106, "num_tokens": 2993317909.0, "step": 3924 }, { "epoch": 5.355039685926432, "grad_norm": 0.29921267517071987, "learning_rate": 1.3381451748068179e-05, "loss": 0.3127, "num_tokens": 2994073804.0, "step": 3925 }, { "epoch": 5.356405223179995, "grad_norm": 0.36452930436215036, "learning_rate": 1.33727250077197e-05, "loss": 0.3188, "num_tokens": 2994838006.0, "step": 3926 }, { "epoch": 5.357770760433558, "grad_norm": 0.3620240859449809, "learning_rate": 1.3364000899179205e-05, "loss": 0.3257, "num_tokens": 2995652277.0, "step": 3927 }, { "epoch": 5.3591362976871215, "grad_norm": 0.30268609598219665, "learning_rate": 1.3355279425108054e-05, "loss": 0.3327, "num_tokens": 2996398720.0, "step": 3928 }, { "epoch": 5.360501834940685, "grad_norm": 0.3448156173759298, "learning_rate": 1.3346560588166764e-05, "loss": 0.3278, "num_tokens": 2997131561.0, "step": 3929 }, { "epoch": 5.361867372194248, "grad_norm": 0.3483728962601337, "learning_rate": 1.3337844391015096e-05, "loss": 0.3181, "num_tokens": 2997934342.0, "step": 3930 }, { "epoch": 5.363232909447811, "grad_norm": 0.34933150971281035, "learning_rate": 1.3329130836311978e-05, "loss": 0.3187, "num_tokens": 2998687850.0, "step": 3931 }, { "epoch": 5.364598446701374, "grad_norm": 0.29981620961417527, "learning_rate": 1.3320419926715536e-05, "loss": 0.3179, "num_tokens": 2999417879.0, "step": 3932 }, { "epoch": 5.365963983954937, "grad_norm": 0.3434905567944371, "learning_rate": 1.3311711664883098e-05, "loss": 0.314, "num_tokens": 3000213010.0, "step": 3933 }, { "epoch": 5.3673295212085, "grad_norm": 0.32657283574863183, "learning_rate": 1.3303006053471177e-05, "loss": 0.3123, "num_tokens": 3000940295.0, "step": 3934 }, { "epoch": 5.368695058462063, "grad_norm": 0.32788851958198423, "learning_rate": 1.3294303095135468e-05, "loss": 0.3183, "num_tokens": 3001682320.0, "step": 3935 }, { "epoch": 5.370060595715627, "grad_norm": 0.3319579035282572, "learning_rate": 1.3285602792530869e-05, "loss": 0.3361, "num_tokens": 3002515148.0, "step": 3936 }, { "epoch": 5.37142613296919, "grad_norm": 0.2857122233144583, "learning_rate": 1.3276905148311481e-05, "loss": 0.3234, "num_tokens": 3003256720.0, "step": 3937 }, { "epoch": 5.372791670222753, "grad_norm": 0.3126226027035348, "learning_rate": 1.3268210165130559e-05, "loss": 0.3086, "num_tokens": 3003989555.0, "step": 3938 }, { "epoch": 5.374157207476316, "grad_norm": 0.3099893664288033, "learning_rate": 1.3259517845640576e-05, "loss": 0.3214, "num_tokens": 3004778789.0, "step": 3939 }, { "epoch": 5.37552274472988, "grad_norm": 0.31103533534814654, "learning_rate": 1.3250828192493179e-05, "loss": 0.3232, "num_tokens": 3005603740.0, "step": 3940 }, { "epoch": 5.376888281983443, "grad_norm": 0.303073117163565, "learning_rate": 1.3242141208339204e-05, "loss": 0.324, "num_tokens": 3006477235.0, "step": 3941 }, { "epoch": 5.378253819237006, "grad_norm": 0.2763997527326936, "learning_rate": 1.3233456895828677e-05, "loss": 0.3355, "num_tokens": 3007257369.0, "step": 3942 }, { "epoch": 5.379619356490569, "grad_norm": 0.29614991944014535, "learning_rate": 1.3224775257610811e-05, "loss": 0.3202, "num_tokens": 3008091255.0, "step": 3943 }, { "epoch": 5.380984893744133, "grad_norm": 0.31535241588210955, "learning_rate": 1.3216096296333985e-05, "loss": 0.3281, "num_tokens": 3008835966.0, "step": 3944 }, { "epoch": 5.382350430997696, "grad_norm": 0.2994159445928475, "learning_rate": 1.320742001464578e-05, "loss": 0.3186, "num_tokens": 3009581954.0, "step": 3945 }, { "epoch": 5.383715968251259, "grad_norm": 0.34620383119442466, "learning_rate": 1.319874641519296e-05, "loss": 0.3109, "num_tokens": 3010312747.0, "step": 3946 }, { "epoch": 5.3850815055048225, "grad_norm": 0.2912227070531855, "learning_rate": 1.3190075500621448e-05, "loss": 0.3235, "num_tokens": 3011082766.0, "step": 3947 }, { "epoch": 5.386447042758386, "grad_norm": 0.2939352201851818, "learning_rate": 1.3181407273576388e-05, "loss": 0.3022, "num_tokens": 3011822170.0, "step": 3948 }, { "epoch": 5.387812580011948, "grad_norm": 0.35914785229222623, "learning_rate": 1.3172741736702061e-05, "loss": 0.3301, "num_tokens": 3012652051.0, "step": 3949 }, { "epoch": 5.389178117265511, "grad_norm": 0.30682976270499646, "learning_rate": 1.3164078892641951e-05, "loss": 0.3179, "num_tokens": 3013437448.0, "step": 3950 }, { "epoch": 5.390543654519075, "grad_norm": 0.3024943905412036, "learning_rate": 1.315541874403873e-05, "loss": 0.3069, "num_tokens": 3014133575.0, "step": 3951 }, { "epoch": 5.391909191772638, "grad_norm": 0.3056812644204493, "learning_rate": 1.3146761293534219e-05, "loss": 0.3128, "num_tokens": 3014806358.0, "step": 3952 }, { "epoch": 5.393274729026201, "grad_norm": 0.32683919555944924, "learning_rate": 1.3138106543769433e-05, "loss": 0.3421, "num_tokens": 3015569583.0, "step": 3953 }, { "epoch": 5.394640266279764, "grad_norm": 0.3058866715876599, "learning_rate": 1.3129454497384565e-05, "loss": 0.3251, "num_tokens": 3016278910.0, "step": 3954 }, { "epoch": 5.396005803533328, "grad_norm": 0.3120180175903004, "learning_rate": 1.3120805157018985e-05, "loss": 0.3355, "num_tokens": 3017074705.0, "step": 3955 }, { "epoch": 5.397371340786891, "grad_norm": 0.30643123274957484, "learning_rate": 1.3112158525311214e-05, "loss": 0.3136, "num_tokens": 3017833239.0, "step": 3956 }, { "epoch": 5.398736878040454, "grad_norm": 0.3114139810071699, "learning_rate": 1.310351460489899e-05, "loss": 0.3334, "num_tokens": 3018622790.0, "step": 3957 }, { "epoch": 5.400102415294017, "grad_norm": 0.2938631549600634, "learning_rate": 1.309487339841918e-05, "loss": 0.3114, "num_tokens": 3019347669.0, "step": 3958 }, { "epoch": 5.401467952547581, "grad_norm": 0.3142162247497182, "learning_rate": 1.3086234908507849e-05, "loss": 0.3261, "num_tokens": 3020126173.0, "step": 3959 }, { "epoch": 5.402833489801144, "grad_norm": 0.30251892661901647, "learning_rate": 1.3077599137800232e-05, "loss": 0.3285, "num_tokens": 3020908127.0, "step": 3960 }, { "epoch": 5.404199027054707, "grad_norm": 0.30974979253620577, "learning_rate": 1.3068966088930713e-05, "loss": 0.3287, "num_tokens": 3021648448.0, "step": 3961 }, { "epoch": 5.40556456430827, "grad_norm": 0.3513368840305017, "learning_rate": 1.306033576453287e-05, "loss": 0.3116, "num_tokens": 3022383335.0, "step": 3962 }, { "epoch": 5.406930101561834, "grad_norm": 0.271718946785928, "learning_rate": 1.3051708167239441e-05, "loss": 0.3276, "num_tokens": 3023156801.0, "step": 3963 }, { "epoch": 5.408295638815396, "grad_norm": 0.30610414760553245, "learning_rate": 1.304308329968234e-05, "loss": 0.324, "num_tokens": 3023959678.0, "step": 3964 }, { "epoch": 5.409661176068959, "grad_norm": 0.31638407664564766, "learning_rate": 1.3034461164492623e-05, "loss": 0.3252, "num_tokens": 3024689913.0, "step": 3965 }, { "epoch": 5.4110267133225225, "grad_norm": 0.3013684843889557, "learning_rate": 1.3025841764300548e-05, "loss": 0.3226, "num_tokens": 3025407564.0, "step": 3966 }, { "epoch": 5.412392250576086, "grad_norm": 0.33343469260990444, "learning_rate": 1.3017225101735518e-05, "loss": 0.3213, "num_tokens": 3026177276.0, "step": 3967 }, { "epoch": 5.413757787829649, "grad_norm": 0.28871714532566684, "learning_rate": 1.3008611179426082e-05, "loss": 0.3231, "num_tokens": 3026923881.0, "step": 3968 }, { "epoch": 5.415123325083212, "grad_norm": 0.3509082537910586, "learning_rate": 1.3000000000000006e-05, "loss": 0.332, "num_tokens": 3027687394.0, "step": 3969 }, { "epoch": 5.4164888623367755, "grad_norm": 0.29636523277227217, "learning_rate": 1.2991391566084165e-05, "loss": 0.3183, "num_tokens": 3028389599.0, "step": 3970 }, { "epoch": 5.417854399590339, "grad_norm": 0.3298114146810703, "learning_rate": 1.298278588030463e-05, "loss": 0.3289, "num_tokens": 3029169920.0, "step": 3971 }, { "epoch": 5.419219936843902, "grad_norm": 0.3124925893991876, "learning_rate": 1.2974182945286615e-05, "loss": 0.3154, "num_tokens": 3029941531.0, "step": 3972 }, { "epoch": 5.420585474097465, "grad_norm": 0.33090097214785963, "learning_rate": 1.2965582763654521e-05, "loss": 0.3203, "num_tokens": 3030651189.0, "step": 3973 }, { "epoch": 5.4219510113510285, "grad_norm": 0.31267180145006196, "learning_rate": 1.2956985338031864e-05, "loss": 0.3102, "num_tokens": 3031366648.0, "step": 3974 }, { "epoch": 5.423316548604592, "grad_norm": 0.2849694322224067, "learning_rate": 1.2948390671041373e-05, "loss": 0.3084, "num_tokens": 3032151233.0, "step": 3975 }, { "epoch": 5.424682085858155, "grad_norm": 0.3297235860804055, "learning_rate": 1.2939798765304897e-05, "loss": 0.3232, "num_tokens": 3032860335.0, "step": 3976 }, { "epoch": 5.426047623111718, "grad_norm": 0.31126675560380324, "learning_rate": 1.293120962344344e-05, "loss": 0.3241, "num_tokens": 3033667505.0, "step": 3977 }, { "epoch": 5.4274131603652815, "grad_norm": 0.3239430842112817, "learning_rate": 1.2922623248077204e-05, "loss": 0.3265, "num_tokens": 3034450014.0, "step": 3978 }, { "epoch": 5.428778697618845, "grad_norm": 0.2997876371069391, "learning_rate": 1.2914039641825503e-05, "loss": 0.3126, "num_tokens": 3035308364.0, "step": 3979 }, { "epoch": 5.430144234872408, "grad_norm": 0.2918370007996775, "learning_rate": 1.2905458807306823e-05, "loss": 0.3208, "num_tokens": 3036121851.0, "step": 3980 }, { "epoch": 5.43150977212597, "grad_norm": 0.31734488376791276, "learning_rate": 1.2896880747138813e-05, "loss": 0.3176, "num_tokens": 3036833305.0, "step": 3981 }, { "epoch": 5.432875309379534, "grad_norm": 0.3120944063960534, "learning_rate": 1.288830546393827e-05, "loss": 0.3185, "num_tokens": 3037622713.0, "step": 3982 }, { "epoch": 5.434240846633097, "grad_norm": 0.31051191854164367, "learning_rate": 1.2879732960321131e-05, "loss": 0.3137, "num_tokens": 3038534317.0, "step": 3983 }, { "epoch": 5.43560638388666, "grad_norm": 0.3105443087438612, "learning_rate": 1.28711632389025e-05, "loss": 0.3182, "num_tokens": 3039338160.0, "step": 3984 }, { "epoch": 5.436971921140223, "grad_norm": 0.28151400048547615, "learning_rate": 1.2862596302296635e-05, "loss": 0.3198, "num_tokens": 3040113293.0, "step": 3985 }, { "epoch": 5.438337458393787, "grad_norm": 0.3539967698171404, "learning_rate": 1.2854032153116918e-05, "loss": 0.3418, "num_tokens": 3040894044.0, "step": 3986 }, { "epoch": 5.43970299564735, "grad_norm": 0.2856637615377639, "learning_rate": 1.2845470793975925e-05, "loss": 0.3113, "num_tokens": 3041723690.0, "step": 3987 }, { "epoch": 5.441068532900913, "grad_norm": 0.2936594716081331, "learning_rate": 1.2836912227485337e-05, "loss": 0.3191, "num_tokens": 3042486863.0, "step": 3988 }, { "epoch": 5.4424340701544764, "grad_norm": 0.3123864733526829, "learning_rate": 1.282835645625601e-05, "loss": 0.3059, "num_tokens": 3043273006.0, "step": 3989 }, { "epoch": 5.44379960740804, "grad_norm": 0.313612460734279, "learning_rate": 1.2819803482897944e-05, "loss": 0.3241, "num_tokens": 3043955791.0, "step": 3990 }, { "epoch": 5.445165144661603, "grad_norm": 0.30171685749139365, "learning_rate": 1.2811253310020266e-05, "loss": 0.3298, "num_tokens": 3044834069.0, "step": 3991 }, { "epoch": 5.446530681915166, "grad_norm": 0.32494545271235387, "learning_rate": 1.2802705940231277e-05, "loss": 0.329, "num_tokens": 3045602629.0, "step": 3992 }, { "epoch": 5.4478962191687295, "grad_norm": 0.32790324647664926, "learning_rate": 1.2794161376138403e-05, "loss": 0.3329, "num_tokens": 3046360595.0, "step": 3993 }, { "epoch": 5.449261756422293, "grad_norm": 0.3099823913997471, "learning_rate": 1.2785619620348228e-05, "loss": 0.3248, "num_tokens": 3047199492.0, "step": 3994 }, { "epoch": 5.450627293675856, "grad_norm": 0.31910002673005566, "learning_rate": 1.2777080675466453e-05, "loss": 0.3035, "num_tokens": 3047803460.0, "step": 3995 }, { "epoch": 5.451992830929418, "grad_norm": 0.3019582917482871, "learning_rate": 1.2768544544097967e-05, "loss": 0.322, "num_tokens": 3048637447.0, "step": 3996 }, { "epoch": 5.453358368182982, "grad_norm": 0.30254745815796247, "learning_rate": 1.2760011228846754e-05, "loss": 0.3333, "num_tokens": 3049428743.0, "step": 3997 }, { "epoch": 5.454723905436545, "grad_norm": 0.30922076136664345, "learning_rate": 1.2751480732315962e-05, "loss": 0.3323, "num_tokens": 3050164842.0, "step": 3998 }, { "epoch": 5.456089442690108, "grad_norm": 0.31101410458178963, "learning_rate": 1.2742953057107888e-05, "loss": 0.3049, "num_tokens": 3050907511.0, "step": 3999 }, { "epoch": 5.457454979943671, "grad_norm": 0.2790423961916549, "learning_rate": 1.2734428205823944e-05, "loss": 0.3169, "num_tokens": 3051682836.0, "step": 4000 }, { "epoch": 5.458820517197235, "grad_norm": 0.30363757658133245, "learning_rate": 1.2725906181064693e-05, "loss": 0.3274, "num_tokens": 3052478045.0, "step": 4001 }, { "epoch": 5.460186054450798, "grad_norm": 0.3084301713103453, "learning_rate": 1.271738698542984e-05, "loss": 0.3235, "num_tokens": 3053210446.0, "step": 4002 }, { "epoch": 5.461551591704361, "grad_norm": 0.27331131616316967, "learning_rate": 1.2708870621518222e-05, "loss": 0.3219, "num_tokens": 3054006308.0, "step": 4003 }, { "epoch": 5.462917128957924, "grad_norm": 0.3164052146705712, "learning_rate": 1.270035709192781e-05, "loss": 0.3366, "num_tokens": 3054741990.0, "step": 4004 }, { "epoch": 5.464282666211488, "grad_norm": 0.30990437049166786, "learning_rate": 1.2691846399255727e-05, "loss": 0.3354, "num_tokens": 3055526044.0, "step": 4005 }, { "epoch": 5.465648203465051, "grad_norm": 0.30473270589789775, "learning_rate": 1.2683338546098199e-05, "loss": 0.3337, "num_tokens": 3056252404.0, "step": 4006 }, { "epoch": 5.467013740718614, "grad_norm": 0.3181537364782996, "learning_rate": 1.2674833535050607e-05, "loss": 0.3293, "num_tokens": 3056966395.0, "step": 4007 }, { "epoch": 5.468379277972177, "grad_norm": 0.3001474585760613, "learning_rate": 1.2666331368707475e-05, "loss": 0.3138, "num_tokens": 3057664301.0, "step": 4008 }, { "epoch": 5.469744815225741, "grad_norm": 0.3126898366160934, "learning_rate": 1.2657832049662426e-05, "loss": 0.3419, "num_tokens": 3058438685.0, "step": 4009 }, { "epoch": 5.471110352479304, "grad_norm": 0.31562567952341397, "learning_rate": 1.2649335580508244e-05, "loss": 0.3154, "num_tokens": 3059214596.0, "step": 4010 }, { "epoch": 5.472475889732867, "grad_norm": 0.30650090177802647, "learning_rate": 1.2640841963836836e-05, "loss": 0.3221, "num_tokens": 3060016497.0, "step": 4011 }, { "epoch": 5.47384142698643, "grad_norm": 0.2971108615941606, "learning_rate": 1.2632351202239232e-05, "loss": 0.31, "num_tokens": 3060726131.0, "step": 4012 }, { "epoch": 5.475206964239993, "grad_norm": 0.3183829109130609, "learning_rate": 1.2623863298305597e-05, "loss": 0.3237, "num_tokens": 3061575665.0, "step": 4013 }, { "epoch": 5.476572501493556, "grad_norm": 0.32177360558377605, "learning_rate": 1.2615378254625235e-05, "loss": 0.3136, "num_tokens": 3062279694.0, "step": 4014 }, { "epoch": 5.477938038747119, "grad_norm": 0.2859588128807173, "learning_rate": 1.2606896073786543e-05, "loss": 0.3316, "num_tokens": 3063111464.0, "step": 4015 }, { "epoch": 5.4793035760006825, "grad_norm": 0.3003122828558798, "learning_rate": 1.2598416758377078e-05, "loss": 0.329, "num_tokens": 3063836991.0, "step": 4016 }, { "epoch": 5.480669113254246, "grad_norm": 0.30844551757338684, "learning_rate": 1.2589940310983519e-05, "loss": 0.3095, "num_tokens": 3064568829.0, "step": 4017 }, { "epoch": 5.482034650507809, "grad_norm": 0.3118630079861652, "learning_rate": 1.2581466734191652e-05, "loss": 0.3221, "num_tokens": 3065329798.0, "step": 4018 }, { "epoch": 5.483400187761372, "grad_norm": 0.3059413985340785, "learning_rate": 1.25729960305864e-05, "loss": 0.3268, "num_tokens": 3066050636.0, "step": 4019 }, { "epoch": 5.4847657250149355, "grad_norm": 0.3293002155805584, "learning_rate": 1.2564528202751811e-05, "loss": 0.3226, "num_tokens": 3066815131.0, "step": 4020 }, { "epoch": 5.486131262268499, "grad_norm": 0.3008520031355244, "learning_rate": 1.2556063253271054e-05, "loss": 0.3197, "num_tokens": 3067613614.0, "step": 4021 }, { "epoch": 5.487496799522062, "grad_norm": 0.3320273044150711, "learning_rate": 1.2547601184726421e-05, "loss": 0.3376, "num_tokens": 3068410579.0, "step": 4022 }, { "epoch": 5.488862336775625, "grad_norm": 0.3031547745254982, "learning_rate": 1.253914199969931e-05, "loss": 0.3319, "num_tokens": 3069256654.0, "step": 4023 }, { "epoch": 5.4902278740291885, "grad_norm": 0.3040631680741209, "learning_rate": 1.2530685700770262e-05, "loss": 0.319, "num_tokens": 3070071738.0, "step": 4024 }, { "epoch": 5.491593411282752, "grad_norm": 0.2865896951040955, "learning_rate": 1.2522232290518924e-05, "loss": 0.3317, "num_tokens": 3070909780.0, "step": 4025 }, { "epoch": 5.492958948536315, "grad_norm": 0.31005856225073714, "learning_rate": 1.2513781771524073e-05, "loss": 0.3163, "num_tokens": 3071648369.0, "step": 4026 }, { "epoch": 5.494324485789878, "grad_norm": 0.30718257809985655, "learning_rate": 1.2505334146363585e-05, "loss": 0.3165, "num_tokens": 3072395992.0, "step": 4027 }, { "epoch": 5.495690023043442, "grad_norm": 0.316017635058756, "learning_rate": 1.249688941761447e-05, "loss": 0.3195, "num_tokens": 3073163013.0, "step": 4028 }, { "epoch": 5.497055560297005, "grad_norm": 0.30225013364205444, "learning_rate": 1.2488447587852859e-05, "loss": 0.3267, "num_tokens": 3073916595.0, "step": 4029 }, { "epoch": 5.498421097550567, "grad_norm": 0.30595540304647495, "learning_rate": 1.2480008659653961e-05, "loss": 0.3189, "num_tokens": 3074579261.0, "step": 4030 }, { "epoch": 5.49978663480413, "grad_norm": 0.32854204971934037, "learning_rate": 1.2471572635592157e-05, "loss": 0.3157, "num_tokens": 3075404828.0, "step": 4031 }, { "epoch": 5.501152172057694, "grad_norm": 0.28816514580596814, "learning_rate": 1.2463139518240896e-05, "loss": 0.3173, "num_tokens": 3076147419.0, "step": 4032 }, { "epoch": 5.502517709311257, "grad_norm": 0.31489100979252094, "learning_rate": 1.2454709310172763e-05, "loss": 0.3292, "num_tokens": 3076899160.0, "step": 4033 }, { "epoch": 5.50388324656482, "grad_norm": 0.2853441962783889, "learning_rate": 1.2446282013959449e-05, "loss": 0.3137, "num_tokens": 3077621583.0, "step": 4034 }, { "epoch": 5.5052487838183835, "grad_norm": 0.30209210034989414, "learning_rate": 1.2437857632171762e-05, "loss": 0.3053, "num_tokens": 3078382603.0, "step": 4035 }, { "epoch": 5.506614321071947, "grad_norm": 0.3096404953606984, "learning_rate": 1.2429436167379594e-05, "loss": 0.3178, "num_tokens": 3079120246.0, "step": 4036 }, { "epoch": 5.50797985832551, "grad_norm": 0.2954048517189814, "learning_rate": 1.2421017622152e-05, "loss": 0.3291, "num_tokens": 3079897172.0, "step": 4037 }, { "epoch": 5.509345395579073, "grad_norm": 0.28486412112024884, "learning_rate": 1.2412601999057099e-05, "loss": 0.3148, "num_tokens": 3080690619.0, "step": 4038 }, { "epoch": 5.5107109328326365, "grad_norm": 0.3082864711261617, "learning_rate": 1.2404189300662121e-05, "loss": 0.315, "num_tokens": 3081505700.0, "step": 4039 }, { "epoch": 5.5120764700862, "grad_norm": 0.2897305580756642, "learning_rate": 1.2395779529533443e-05, "loss": 0.3302, "num_tokens": 3082290722.0, "step": 4040 }, { "epoch": 5.513442007339763, "grad_norm": 0.2858144140859202, "learning_rate": 1.2387372688236499e-05, "loss": 0.3245, "num_tokens": 3083052892.0, "step": 4041 }, { "epoch": 5.514807544593326, "grad_norm": 0.31730843885458465, "learning_rate": 1.2378968779335859e-05, "loss": 0.3096, "num_tokens": 3083776991.0, "step": 4042 }, { "epoch": 5.5161730818468895, "grad_norm": 0.2990492903824726, "learning_rate": 1.2370567805395192e-05, "loss": 0.3065, "num_tokens": 3084468521.0, "step": 4043 }, { "epoch": 5.517538619100453, "grad_norm": 0.3165436930185869, "learning_rate": 1.2362169768977284e-05, "loss": 0.3137, "num_tokens": 3085226689.0, "step": 4044 }, { "epoch": 5.518904156354015, "grad_norm": 0.29757991042207727, "learning_rate": 1.2353774672643987e-05, "loss": 0.3248, "num_tokens": 3086000490.0, "step": 4045 }, { "epoch": 5.520269693607578, "grad_norm": 0.2767151580248194, "learning_rate": 1.2345382518956302e-05, "loss": 0.3274, "num_tokens": 3086797562.0, "step": 4046 }, { "epoch": 5.521635230861142, "grad_norm": 0.3180542464517941, "learning_rate": 1.2336993310474306e-05, "loss": 0.3198, "num_tokens": 3087569172.0, "step": 4047 }, { "epoch": 5.523000768114705, "grad_norm": 0.3055646967213881, "learning_rate": 1.232860704975717e-05, "loss": 0.3191, "num_tokens": 3088338864.0, "step": 4048 }, { "epoch": 5.524366305368268, "grad_norm": 0.3018244336403417, "learning_rate": 1.2320223739363204e-05, "loss": 0.3023, "num_tokens": 3089019602.0, "step": 4049 }, { "epoch": 5.525731842621831, "grad_norm": 0.28788045153123565, "learning_rate": 1.2311843381849773e-05, "loss": 0.318, "num_tokens": 3089852351.0, "step": 4050 }, { "epoch": 5.527097379875395, "grad_norm": 0.3142173999986975, "learning_rate": 1.230346597977337e-05, "loss": 0.3194, "num_tokens": 3090563410.0, "step": 4051 }, { "epoch": 5.528462917128958, "grad_norm": 0.3111324534274379, "learning_rate": 1.2295091535689571e-05, "loss": 0.3377, "num_tokens": 3091345855.0, "step": 4052 }, { "epoch": 5.529828454382521, "grad_norm": 0.3056059694037532, "learning_rate": 1.2286720052153069e-05, "loss": 0.3263, "num_tokens": 3092141335.0, "step": 4053 }, { "epoch": 5.531193991636084, "grad_norm": 0.30406962288551886, "learning_rate": 1.2278351531717628e-05, "loss": 0.3267, "num_tokens": 3092951165.0, "step": 4054 }, { "epoch": 5.532559528889648, "grad_norm": 0.28707514384154675, "learning_rate": 1.2269985976936126e-05, "loss": 0.345, "num_tokens": 3093795756.0, "step": 4055 }, { "epoch": 5.533925066143211, "grad_norm": 0.3202924764771139, "learning_rate": 1.2261623390360535e-05, "loss": 0.3268, "num_tokens": 3094598915.0, "step": 4056 }, { "epoch": 5.535290603396774, "grad_norm": 0.32646287827896897, "learning_rate": 1.2253263774541901e-05, "loss": 0.3145, "num_tokens": 3095311056.0, "step": 4057 }, { "epoch": 5.536656140650337, "grad_norm": 0.30607556816531056, "learning_rate": 1.224490713203041e-05, "loss": 0.3209, "num_tokens": 3096137685.0, "step": 4058 }, { "epoch": 5.538021677903901, "grad_norm": 0.3000038613480245, "learning_rate": 1.2236553465375289e-05, "loss": 0.3252, "num_tokens": 3096925322.0, "step": 4059 }, { "epoch": 5.539387215157463, "grad_norm": 0.316074804804346, "learning_rate": 1.2228202777124887e-05, "loss": 0.3254, "num_tokens": 3097668110.0, "step": 4060 }, { "epoch": 5.540752752411027, "grad_norm": 0.29911674838978475, "learning_rate": 1.221985506982664e-05, "loss": 0.3286, "num_tokens": 3098403403.0, "step": 4061 }, { "epoch": 5.5421182896645895, "grad_norm": 0.332683512462865, "learning_rate": 1.2211510346027066e-05, "loss": 0.3109, "num_tokens": 3099170392.0, "step": 4062 }, { "epoch": 5.543483826918153, "grad_norm": 0.31435460602702747, "learning_rate": 1.2203168608271782e-05, "loss": 0.3272, "num_tokens": 3099942311.0, "step": 4063 }, { "epoch": 5.544849364171716, "grad_norm": 0.29648458226953445, "learning_rate": 1.2194829859105491e-05, "loss": 0.3124, "num_tokens": 3100812410.0, "step": 4064 }, { "epoch": 5.546214901425279, "grad_norm": 0.2931048057495925, "learning_rate": 1.218649410107199e-05, "loss": 0.3309, "num_tokens": 3101666417.0, "step": 4065 }, { "epoch": 5.5475804386788425, "grad_norm": 0.30324374242725916, "learning_rate": 1.2178161336714138e-05, "loss": 0.3294, "num_tokens": 3102413714.0, "step": 4066 }, { "epoch": 5.548945975932406, "grad_norm": 0.3088911047105499, "learning_rate": 1.2169831568573928e-05, "loss": 0.3126, "num_tokens": 3103176374.0, "step": 4067 }, { "epoch": 5.550311513185969, "grad_norm": 0.2944872538236688, "learning_rate": 1.2161504799192394e-05, "loss": 0.299, "num_tokens": 3103846975.0, "step": 4068 }, { "epoch": 5.551677050439532, "grad_norm": 0.33423647840850046, "learning_rate": 1.2153181031109672e-05, "loss": 0.338, "num_tokens": 3104597477.0, "step": 4069 }, { "epoch": 5.5530425876930956, "grad_norm": 0.30758710153876667, "learning_rate": 1.2144860266864998e-05, "loss": 0.3349, "num_tokens": 3105344862.0, "step": 4070 }, { "epoch": 5.554408124946659, "grad_norm": 0.3169219974244094, "learning_rate": 1.2136542508996658e-05, "loss": 0.3151, "num_tokens": 3106093499.0, "step": 4071 }, { "epoch": 5.555773662200222, "grad_norm": 0.2978136652115031, "learning_rate": 1.2128227760042051e-05, "loss": 0.3385, "num_tokens": 3106863822.0, "step": 4072 }, { "epoch": 5.557139199453785, "grad_norm": 0.31322205266568615, "learning_rate": 1.2119916022537644e-05, "loss": 0.3164, "num_tokens": 3107622345.0, "step": 4073 }, { "epoch": 5.558504736707349, "grad_norm": 0.3214948124829084, "learning_rate": 1.2111607299018995e-05, "loss": 0.3249, "num_tokens": 3108354943.0, "step": 4074 }, { "epoch": 5.559870273960912, "grad_norm": 0.30719167033571315, "learning_rate": 1.2103301592020713e-05, "loss": 0.3267, "num_tokens": 3109129219.0, "step": 4075 }, { "epoch": 5.561235811214475, "grad_norm": 0.3023208390658271, "learning_rate": 1.209499890407654e-05, "loss": 0.3254, "num_tokens": 3109889531.0, "step": 4076 }, { "epoch": 5.562601348468037, "grad_norm": 0.3149589514160636, "learning_rate": 1.2086699237719249e-05, "loss": 0.3379, "num_tokens": 3110625203.0, "step": 4077 }, { "epoch": 5.563966885721602, "grad_norm": 0.28848763911264313, "learning_rate": 1.207840259548071e-05, "loss": 0.3221, "num_tokens": 3111468690.0, "step": 4078 }, { "epoch": 5.565332422975164, "grad_norm": 0.29810751989752593, "learning_rate": 1.2070108979891878e-05, "loss": 0.319, "num_tokens": 3112182456.0, "step": 4079 }, { "epoch": 5.566697960228727, "grad_norm": 0.3082606952261968, "learning_rate": 1.2061818393482763e-05, "loss": 0.3261, "num_tokens": 3112976043.0, "step": 4080 }, { "epoch": 5.5680634974822905, "grad_norm": 0.29213324393113743, "learning_rate": 1.2053530838782468e-05, "loss": 0.3139, "num_tokens": 3113768274.0, "step": 4081 }, { "epoch": 5.569429034735854, "grad_norm": 0.323693515671729, "learning_rate": 1.2045246318319168e-05, "loss": 0.3073, "num_tokens": 3114557570.0, "step": 4082 }, { "epoch": 5.570794571989417, "grad_norm": 0.27073899179101335, "learning_rate": 1.2036964834620123e-05, "loss": 0.318, "num_tokens": 3115328265.0, "step": 4083 }, { "epoch": 5.57216010924298, "grad_norm": 0.28847033265445665, "learning_rate": 1.2028686390211637e-05, "loss": 0.3232, "num_tokens": 3116078488.0, "step": 4084 }, { "epoch": 5.5735256464965435, "grad_norm": 0.3263806831322436, "learning_rate": 1.202041098761911e-05, "loss": 0.3219, "num_tokens": 3116872104.0, "step": 4085 }, { "epoch": 5.574891183750107, "grad_norm": 0.3411477328382117, "learning_rate": 1.2012138629367013e-05, "loss": 0.3287, "num_tokens": 3117603523.0, "step": 4086 }, { "epoch": 5.57625672100367, "grad_norm": 0.2823143020127927, "learning_rate": 1.2003869317978885e-05, "loss": 0.3257, "num_tokens": 3118463469.0, "step": 4087 }, { "epoch": 5.577622258257233, "grad_norm": 0.28194251252293556, "learning_rate": 1.1995603055977339e-05, "loss": 0.3036, "num_tokens": 3119223708.0, "step": 4088 }, { "epoch": 5.5789877955107965, "grad_norm": 0.34505126925887997, "learning_rate": 1.1987339845884036e-05, "loss": 0.3104, "num_tokens": 3119923912.0, "step": 4089 }, { "epoch": 5.58035333276436, "grad_norm": 0.3177934975329451, "learning_rate": 1.1979079690219736e-05, "loss": 0.304, "num_tokens": 3120739563.0, "step": 4090 }, { "epoch": 5.581718870017923, "grad_norm": 0.32903586745213276, "learning_rate": 1.1970822591504257e-05, "loss": 0.3176, "num_tokens": 3121473005.0, "step": 4091 }, { "epoch": 5.583084407271486, "grad_norm": 0.30015075303097255, "learning_rate": 1.1962568552256476e-05, "loss": 0.3167, "num_tokens": 3122266349.0, "step": 4092 }, { "epoch": 5.5844499445250495, "grad_norm": 0.288074272375479, "learning_rate": 1.195431757499435e-05, "loss": 0.3112, "num_tokens": 3123071981.0, "step": 4093 }, { "epoch": 5.585815481778612, "grad_norm": 0.32127773070599835, "learning_rate": 1.1946069662234887e-05, "loss": 0.3186, "num_tokens": 3123918070.0, "step": 4094 }, { "epoch": 5.587181019032175, "grad_norm": 0.30871451784610654, "learning_rate": 1.193782481649417e-05, "loss": 0.32, "num_tokens": 3124719503.0, "step": 4095 }, { "epoch": 5.588546556285738, "grad_norm": 0.3257749299817924, "learning_rate": 1.1929583040287347e-05, "loss": 0.3326, "num_tokens": 3125485855.0, "step": 4096 }, { "epoch": 5.589912093539302, "grad_norm": 0.30747255769357645, "learning_rate": 1.1921344336128633e-05, "loss": 0.3068, "num_tokens": 3126223479.0, "step": 4097 }, { "epoch": 5.591277630792865, "grad_norm": 0.2916656700835596, "learning_rate": 1.1913108706531284e-05, "loss": 0.3258, "num_tokens": 3126977866.0, "step": 4098 }, { "epoch": 5.592643168046428, "grad_norm": 0.3127787605678545, "learning_rate": 1.1904876154007643e-05, "loss": 0.329, "num_tokens": 3127721005.0, "step": 4099 }, { "epoch": 5.594008705299991, "grad_norm": 0.3066714702549104, "learning_rate": 1.1896646681069108e-05, "loss": 0.3253, "num_tokens": 3128438761.0, "step": 4100 }, { "epoch": 5.595374242553555, "grad_norm": 0.3123953580776338, "learning_rate": 1.1888420290226127e-05, "loss": 0.3161, "num_tokens": 3129258469.0, "step": 4101 }, { "epoch": 5.596739779807118, "grad_norm": 0.2821636576743343, "learning_rate": 1.1880196983988227e-05, "loss": 0.328, "num_tokens": 3130016861.0, "step": 4102 }, { "epoch": 5.598105317060681, "grad_norm": 0.32157886514451794, "learning_rate": 1.1871976764863969e-05, "loss": 0.3279, "num_tokens": 3130761304.0, "step": 4103 }, { "epoch": 5.599470854314244, "grad_norm": 0.2885170638964732, "learning_rate": 1.1863759635360994e-05, "loss": 0.336, "num_tokens": 3131668800.0, "step": 4104 }, { "epoch": 5.600836391567808, "grad_norm": 0.29963038651262563, "learning_rate": 1.1855545597985989e-05, "loss": 0.3135, "num_tokens": 3132386950.0, "step": 4105 }, { "epoch": 5.602201928821371, "grad_norm": 0.28919531998911946, "learning_rate": 1.184733465524471e-05, "loss": 0.3065, "num_tokens": 3133173587.0, "step": 4106 }, { "epoch": 5.603567466074934, "grad_norm": 0.314393308870802, "learning_rate": 1.1839126809641953e-05, "loss": 0.3317, "num_tokens": 3133967098.0, "step": 4107 }, { "epoch": 5.604933003328497, "grad_norm": 0.3128972067603415, "learning_rate": 1.1830922063681567e-05, "loss": 0.3312, "num_tokens": 3134726030.0, "step": 4108 }, { "epoch": 5.60629854058206, "grad_norm": 0.3115001104627975, "learning_rate": 1.1822720419866488e-05, "loss": 0.3131, "num_tokens": 3135477071.0, "step": 4109 }, { "epoch": 5.607664077835624, "grad_norm": 0.30876981636898043, "learning_rate": 1.1814521880698649e-05, "loss": 0.3371, "num_tokens": 3136310599.0, "step": 4110 }, { "epoch": 5.609029615089186, "grad_norm": 0.28930729339985023, "learning_rate": 1.1806326448679105e-05, "loss": 0.3247, "num_tokens": 3137059520.0, "step": 4111 }, { "epoch": 5.6103951523427495, "grad_norm": 0.325231005475645, "learning_rate": 1.1798134126307906e-05, "loss": 0.3163, "num_tokens": 3137818275.0, "step": 4112 }, { "epoch": 5.611760689596313, "grad_norm": 0.33062267399574435, "learning_rate": 1.1789944916084177e-05, "loss": 0.3295, "num_tokens": 3138555024.0, "step": 4113 }, { "epoch": 5.613126226849876, "grad_norm": 0.2941643174260025, "learning_rate": 1.1781758820506098e-05, "loss": 0.325, "num_tokens": 3139425746.0, "step": 4114 }, { "epoch": 5.614491764103439, "grad_norm": 0.3221467442251328, "learning_rate": 1.1773575842070893e-05, "loss": 0.3257, "num_tokens": 3140206696.0, "step": 4115 }, { "epoch": 5.615857301357003, "grad_norm": 0.32042445990768437, "learning_rate": 1.176539598327482e-05, "loss": 0.3212, "num_tokens": 3140937706.0, "step": 4116 }, { "epoch": 5.617222838610566, "grad_norm": 0.31855224716819536, "learning_rate": 1.1757219246613212e-05, "loss": 0.3113, "num_tokens": 3141740337.0, "step": 4117 }, { "epoch": 5.618588375864129, "grad_norm": 0.28391884217207747, "learning_rate": 1.174904563458044e-05, "loss": 0.3144, "num_tokens": 3142455034.0, "step": 4118 }, { "epoch": 5.619953913117692, "grad_norm": 0.3068268835289114, "learning_rate": 1.1740875149669907e-05, "loss": 0.3245, "num_tokens": 3143258328.0, "step": 4119 }, { "epoch": 5.621319450371256, "grad_norm": 0.3273188166357368, "learning_rate": 1.1732707794374086e-05, "loss": 0.3276, "num_tokens": 3143988079.0, "step": 4120 }, { "epoch": 5.622684987624819, "grad_norm": 0.30599322527907774, "learning_rate": 1.1724543571184476e-05, "loss": 0.3282, "num_tokens": 3144745912.0, "step": 4121 }, { "epoch": 5.624050524878382, "grad_norm": 0.32328858413372685, "learning_rate": 1.1716382482591629e-05, "loss": 0.3281, "num_tokens": 3145570708.0, "step": 4122 }, { "epoch": 5.625416062131945, "grad_norm": 0.3006832427451345, "learning_rate": 1.170822453108515e-05, "loss": 0.3184, "num_tokens": 3146417194.0, "step": 4123 }, { "epoch": 5.626781599385509, "grad_norm": 0.29170434928621636, "learning_rate": 1.1700069719153654e-05, "loss": 0.311, "num_tokens": 3147240342.0, "step": 4124 }, { "epoch": 5.628147136639072, "grad_norm": 0.3238438128188253, "learning_rate": 1.169191804928484e-05, "loss": 0.3336, "num_tokens": 3148007324.0, "step": 4125 }, { "epoch": 5.629512673892634, "grad_norm": 0.3237419160868097, "learning_rate": 1.1683769523965425e-05, "loss": 0.3359, "num_tokens": 3148783698.0, "step": 4126 }, { "epoch": 5.6308782111461975, "grad_norm": 0.31452985857149496, "learning_rate": 1.1675624145681177e-05, "loss": 0.3201, "num_tokens": 3149564510.0, "step": 4127 }, { "epoch": 5.632243748399761, "grad_norm": 0.35289350487049964, "learning_rate": 1.1667481916916876e-05, "loss": 0.343, "num_tokens": 3150325159.0, "step": 4128 }, { "epoch": 5.633609285653324, "grad_norm": 0.29177544028885516, "learning_rate": 1.1659342840156394e-05, "loss": 0.3298, "num_tokens": 3151096884.0, "step": 4129 }, { "epoch": 5.634974822906887, "grad_norm": 0.32466592423290686, "learning_rate": 1.1651206917882587e-05, "loss": 0.3147, "num_tokens": 3151809893.0, "step": 4130 }, { "epoch": 5.6363403601604505, "grad_norm": 0.3177138115423458, "learning_rate": 1.1643074152577382e-05, "loss": 0.3089, "num_tokens": 3152617124.0, "step": 4131 }, { "epoch": 5.637705897414014, "grad_norm": 0.29621549380126877, "learning_rate": 1.1634944546721739e-05, "loss": 0.3148, "num_tokens": 3153397838.0, "step": 4132 }, { "epoch": 5.639071434667577, "grad_norm": 0.297719439797249, "learning_rate": 1.1626818102795637e-05, "loss": 0.3226, "num_tokens": 3154158712.0, "step": 4133 }, { "epoch": 5.64043697192114, "grad_norm": 0.3394443764373018, "learning_rate": 1.1618694823278104e-05, "loss": 0.3305, "num_tokens": 3154899646.0, "step": 4134 }, { "epoch": 5.6418025091747035, "grad_norm": 0.2979614031065191, "learning_rate": 1.1610574710647211e-05, "loss": 0.31, "num_tokens": 3155668206.0, "step": 4135 }, { "epoch": 5.643168046428267, "grad_norm": 0.3007850349808742, "learning_rate": 1.160245776738005e-05, "loss": 0.3156, "num_tokens": 3156406095.0, "step": 4136 }, { "epoch": 5.64453358368183, "grad_norm": 0.30350874582762577, "learning_rate": 1.1594343995952728e-05, "loss": 0.326, "num_tokens": 3157169685.0, "step": 4137 }, { "epoch": 5.645899120935393, "grad_norm": 0.2916064093563091, "learning_rate": 1.1586233398840437e-05, "loss": 0.3207, "num_tokens": 3157876834.0, "step": 4138 }, { "epoch": 5.6472646581889565, "grad_norm": 0.3154677653075232, "learning_rate": 1.1578125978517353e-05, "loss": 0.3245, "num_tokens": 3158662827.0, "step": 4139 }, { "epoch": 5.64863019544252, "grad_norm": 0.30805195318471335, "learning_rate": 1.157002173745669e-05, "loss": 0.3204, "num_tokens": 3159472617.0, "step": 4140 }, { "epoch": 5.649995732696082, "grad_norm": 0.3179197162749536, "learning_rate": 1.1561920678130722e-05, "loss": 0.3271, "num_tokens": 3160301663.0, "step": 4141 }, { "epoch": 5.651361269949646, "grad_norm": 0.29166358443931667, "learning_rate": 1.1553822803010715e-05, "loss": 0.3292, "num_tokens": 3161069948.0, "step": 4142 }, { "epoch": 5.652726807203209, "grad_norm": 0.3229257788747124, "learning_rate": 1.1545728114566985e-05, "loss": 0.3284, "num_tokens": 3161789416.0, "step": 4143 }, { "epoch": 5.654092344456772, "grad_norm": 0.3117086530860928, "learning_rate": 1.1537636615268873e-05, "loss": 0.3119, "num_tokens": 3162567169.0, "step": 4144 }, { "epoch": 5.655457881710335, "grad_norm": 0.297516581214965, "learning_rate": 1.1529548307584748e-05, "loss": 0.3232, "num_tokens": 3163366386.0, "step": 4145 }, { "epoch": 5.656823418963898, "grad_norm": 0.2911522901330559, "learning_rate": 1.1521463193981983e-05, "loss": 0.3261, "num_tokens": 3164158679.0, "step": 4146 }, { "epoch": 5.658188956217462, "grad_norm": 0.29447617020202116, "learning_rate": 1.1513381276927024e-05, "loss": 0.3223, "num_tokens": 3164915297.0, "step": 4147 }, { "epoch": 5.659554493471025, "grad_norm": 0.2945079714362806, "learning_rate": 1.1505302558885293e-05, "loss": 0.3226, "num_tokens": 3165729861.0, "step": 4148 }, { "epoch": 5.660920030724588, "grad_norm": 0.3044466458092491, "learning_rate": 1.1497227042321269e-05, "loss": 0.3261, "num_tokens": 3166450051.0, "step": 4149 }, { "epoch": 5.662285567978151, "grad_norm": 0.30264424417111396, "learning_rate": 1.1489154729698444e-05, "loss": 0.3226, "num_tokens": 3167189343.0, "step": 4150 }, { "epoch": 5.663651105231715, "grad_norm": 0.33313929130840775, "learning_rate": 1.1481085623479316e-05, "loss": 0.3111, "num_tokens": 3168016654.0, "step": 4151 }, { "epoch": 5.665016642485278, "grad_norm": 0.30589502882768593, "learning_rate": 1.1473019726125427e-05, "loss": 0.3112, "num_tokens": 3168769333.0, "step": 4152 }, { "epoch": 5.666382179738841, "grad_norm": 0.29260745696933405, "learning_rate": 1.1464957040097337e-05, "loss": 0.322, "num_tokens": 3169567851.0, "step": 4153 }, { "epoch": 5.667747716992404, "grad_norm": 0.30654867780349293, "learning_rate": 1.1456897567854627e-05, "loss": 0.3189, "num_tokens": 3170285210.0, "step": 4154 }, { "epoch": 5.669113254245968, "grad_norm": 0.3001907539359777, "learning_rate": 1.1448841311855881e-05, "loss": 0.3261, "num_tokens": 3171021253.0, "step": 4155 }, { "epoch": 5.670478791499531, "grad_norm": 0.3084284726291612, "learning_rate": 1.144078827455872e-05, "loss": 0.3273, "num_tokens": 3171781660.0, "step": 4156 }, { "epoch": 5.671844328753094, "grad_norm": 0.31909861635277065, "learning_rate": 1.143273845841977e-05, "loss": 0.3311, "num_tokens": 3172620979.0, "step": 4157 }, { "epoch": 5.6732098660066566, "grad_norm": 0.2941798597676558, "learning_rate": 1.142469186589469e-05, "loss": 0.3146, "num_tokens": 3173336689.0, "step": 4158 }, { "epoch": 5.67457540326022, "grad_norm": 0.29889945276170776, "learning_rate": 1.1416648499438154e-05, "loss": 0.3202, "num_tokens": 3174085565.0, "step": 4159 }, { "epoch": 5.675940940513783, "grad_norm": 0.3163427443814193, "learning_rate": 1.1408608361503824e-05, "loss": 0.3288, "num_tokens": 3174782401.0, "step": 4160 }, { "epoch": 5.677306477767346, "grad_norm": 0.30940499124582604, "learning_rate": 1.1400571454544412e-05, "loss": 0.3295, "num_tokens": 3175571919.0, "step": 4161 }, { "epoch": 5.67867201502091, "grad_norm": 0.29320538658360557, "learning_rate": 1.1392537781011628e-05, "loss": 0.3155, "num_tokens": 3176314937.0, "step": 4162 }, { "epoch": 5.680037552274473, "grad_norm": 0.2980219089077121, "learning_rate": 1.1384507343356194e-05, "loss": 0.3255, "num_tokens": 3177094054.0, "step": 4163 }, { "epoch": 5.681403089528036, "grad_norm": 0.3069542124084137, "learning_rate": 1.1376480144027853e-05, "loss": 0.3254, "num_tokens": 3177757331.0, "step": 4164 }, { "epoch": 5.682768626781599, "grad_norm": 0.2867270951923549, "learning_rate": 1.1368456185475351e-05, "loss": 0.3074, "num_tokens": 3178489502.0, "step": 4165 }, { "epoch": 5.684134164035163, "grad_norm": 0.312273962312952, "learning_rate": 1.1360435470146455e-05, "loss": 0.3156, "num_tokens": 3179243334.0, "step": 4166 }, { "epoch": 5.685499701288726, "grad_norm": 0.2931544244407575, "learning_rate": 1.1352418000487941e-05, "loss": 0.3211, "num_tokens": 3180018285.0, "step": 4167 }, { "epoch": 5.686865238542289, "grad_norm": 0.3092559067418983, "learning_rate": 1.134440377894559e-05, "loss": 0.3325, "num_tokens": 3180757264.0, "step": 4168 }, { "epoch": 5.688230775795852, "grad_norm": 0.2951216146121922, "learning_rate": 1.1336392807964185e-05, "loss": 0.3315, "num_tokens": 3181566912.0, "step": 4169 }, { "epoch": 5.689596313049416, "grad_norm": 0.2801409208763023, "learning_rate": 1.1328385089987532e-05, "loss": 0.3182, "num_tokens": 3182335422.0, "step": 4170 }, { "epoch": 5.690961850302979, "grad_norm": 0.29395248396149204, "learning_rate": 1.1320380627458447e-05, "loss": 0.3137, "num_tokens": 3183039877.0, "step": 4171 }, { "epoch": 5.692327387556542, "grad_norm": 0.29202468732741677, "learning_rate": 1.1312379422818733e-05, "loss": 0.3201, "num_tokens": 3183803075.0, "step": 4172 }, { "epoch": 5.693692924810105, "grad_norm": 0.3110584589844455, "learning_rate": 1.1304381478509216e-05, "loss": 0.3345, "num_tokens": 3184577701.0, "step": 4173 }, { "epoch": 5.695058462063669, "grad_norm": 0.30207887857733806, "learning_rate": 1.1296386796969718e-05, "loss": 0.3183, "num_tokens": 3185303235.0, "step": 4174 }, { "epoch": 5.696423999317231, "grad_norm": 0.27449046084226714, "learning_rate": 1.128839538063908e-05, "loss": 0.3171, "num_tokens": 3186082597.0, "step": 4175 }, { "epoch": 5.697789536570794, "grad_norm": 0.2867749269967168, "learning_rate": 1.1280407231955125e-05, "loss": 0.3281, "num_tokens": 3186984541.0, "step": 4176 }, { "epoch": 5.6991550738243575, "grad_norm": 0.28897636929606807, "learning_rate": 1.1272422353354706e-05, "loss": 0.3284, "num_tokens": 3187802906.0, "step": 4177 }, { "epoch": 5.700520611077921, "grad_norm": 0.2908454700293965, "learning_rate": 1.126444074727365e-05, "loss": 0.3267, "num_tokens": 3188534065.0, "step": 4178 }, { "epoch": 5.701886148331484, "grad_norm": 0.2954685720495762, "learning_rate": 1.1256462416146803e-05, "loss": 0.3171, "num_tokens": 3189333439.0, "step": 4179 }, { "epoch": 5.703251685585047, "grad_norm": 0.3034360897358399, "learning_rate": 1.1248487362408014e-05, "loss": 0.3265, "num_tokens": 3190031977.0, "step": 4180 }, { "epoch": 5.7046172228386105, "grad_norm": 0.2887066416427992, "learning_rate": 1.1240515588490112e-05, "loss": 0.3149, "num_tokens": 3190777536.0, "step": 4181 }, { "epoch": 5.705982760092174, "grad_norm": 0.3016365295175589, "learning_rate": 1.1232547096824966e-05, "loss": 0.3184, "num_tokens": 3191564273.0, "step": 4182 }, { "epoch": 5.707348297345737, "grad_norm": 0.28753695266488755, "learning_rate": 1.1224581889843394e-05, "loss": 0.3387, "num_tokens": 3192287475.0, "step": 4183 }, { "epoch": 5.7087138345993, "grad_norm": 0.3034267989540964, "learning_rate": 1.121661996997524e-05, "loss": 0.3106, "num_tokens": 3193030960.0, "step": 4184 }, { "epoch": 5.7100793718528635, "grad_norm": 0.2912821449244738, "learning_rate": 1.1208661339649348e-05, "loss": 0.3073, "num_tokens": 3193740604.0, "step": 4185 }, { "epoch": 5.711444909106427, "grad_norm": 0.2964042672926495, "learning_rate": 1.1200706001293554e-05, "loss": 0.3266, "num_tokens": 3194527109.0, "step": 4186 }, { "epoch": 5.71281044635999, "grad_norm": 0.2916977957222252, "learning_rate": 1.1192753957334674e-05, "loss": 0.3148, "num_tokens": 3195227991.0, "step": 4187 }, { "epoch": 5.714175983613553, "grad_norm": 0.32573718257013434, "learning_rate": 1.118480521019854e-05, "loss": 0.316, "num_tokens": 3195999454.0, "step": 4188 }, { "epoch": 5.7155415208671165, "grad_norm": 0.2770055910629167, "learning_rate": 1.1176859762309977e-05, "loss": 0.3193, "num_tokens": 3196727210.0, "step": 4189 }, { "epoch": 5.716907058120679, "grad_norm": 0.3050634206012255, "learning_rate": 1.1168917616092777e-05, "loss": 0.3247, "num_tokens": 3197448777.0, "step": 4190 }, { "epoch": 5.718272595374243, "grad_norm": 0.32060517367743696, "learning_rate": 1.1160978773969773e-05, "loss": 0.323, "num_tokens": 3198213864.0, "step": 4191 }, { "epoch": 5.719638132627805, "grad_norm": 0.3171703371046387, "learning_rate": 1.1153043238362738e-05, "loss": 0.3379, "num_tokens": 3198997536.0, "step": 4192 }, { "epoch": 5.721003669881369, "grad_norm": 0.31208596716642645, "learning_rate": 1.1145111011692471e-05, "loss": 0.3157, "num_tokens": 3199745049.0, "step": 4193 }, { "epoch": 5.722369207134932, "grad_norm": 0.3073060596047256, "learning_rate": 1.1137182096378751e-05, "loss": 0.3293, "num_tokens": 3200486957.0, "step": 4194 }, { "epoch": 5.723734744388495, "grad_norm": 0.3187456781068323, "learning_rate": 1.1129256494840344e-05, "loss": 0.303, "num_tokens": 3201174487.0, "step": 4195 }, { "epoch": 5.725100281642058, "grad_norm": 0.309674696602167, "learning_rate": 1.1121334209495005e-05, "loss": 0.3216, "num_tokens": 3201903970.0, "step": 4196 }, { "epoch": 5.726465818895622, "grad_norm": 0.3274159339783298, "learning_rate": 1.1113415242759484e-05, "loss": 0.3211, "num_tokens": 3202700110.0, "step": 4197 }, { "epoch": 5.727831356149185, "grad_norm": 0.32977386834464384, "learning_rate": 1.1105499597049523e-05, "loss": 0.3259, "num_tokens": 3203532829.0, "step": 4198 }, { "epoch": 5.729196893402748, "grad_norm": 0.2712970990626372, "learning_rate": 1.1097587274779822e-05, "loss": 0.3365, "num_tokens": 3204296785.0, "step": 4199 }, { "epoch": 5.730562430656311, "grad_norm": 0.32123321498012464, "learning_rate": 1.1089678278364108e-05, "loss": 0.3287, "num_tokens": 3205011171.0, "step": 4200 }, { "epoch": 5.731927967909875, "grad_norm": 0.31665263398470045, "learning_rate": 1.108177261021507e-05, "loss": 0.315, "num_tokens": 3205748357.0, "step": 4201 }, { "epoch": 5.733293505163438, "grad_norm": 0.30135287659680765, "learning_rate": 1.1073870272744367e-05, "loss": 0.3166, "num_tokens": 3206451879.0, "step": 4202 }, { "epoch": 5.734659042417001, "grad_norm": 0.2970591015023209, "learning_rate": 1.1065971268362685e-05, "loss": 0.3053, "num_tokens": 3207249454.0, "step": 4203 }, { "epoch": 5.7360245796705644, "grad_norm": 0.2820789192578342, "learning_rate": 1.105807559947965e-05, "loss": 0.3227, "num_tokens": 3208014832.0, "step": 4204 }, { "epoch": 5.737390116924128, "grad_norm": 0.3225709011673868, "learning_rate": 1.1050183268503893e-05, "loss": 0.3441, "num_tokens": 3208780386.0, "step": 4205 }, { "epoch": 5.738755654177691, "grad_norm": 0.30921109828355037, "learning_rate": 1.1042294277843029e-05, "loss": 0.3266, "num_tokens": 3209521371.0, "step": 4206 }, { "epoch": 5.740121191431253, "grad_norm": 0.30570936454620296, "learning_rate": 1.1034408629903648e-05, "loss": 0.3225, "num_tokens": 3210274446.0, "step": 4207 }, { "epoch": 5.741486728684817, "grad_norm": 0.284953517825479, "learning_rate": 1.10265263270913e-05, "loss": 0.308, "num_tokens": 3211058488.0, "step": 4208 }, { "epoch": 5.74285226593838, "grad_norm": 0.29749180973632977, "learning_rate": 1.1018647371810562e-05, "loss": 0.3278, "num_tokens": 3211836131.0, "step": 4209 }, { "epoch": 5.744217803191943, "grad_norm": 0.29422691107812926, "learning_rate": 1.1010771766464952e-05, "loss": 0.3166, "num_tokens": 3212598231.0, "step": 4210 }, { "epoch": 5.745583340445506, "grad_norm": 0.28493921701613933, "learning_rate": 1.1002899513456958e-05, "loss": 0.3338, "num_tokens": 3213297670.0, "step": 4211 }, { "epoch": 5.74694887769907, "grad_norm": 0.32842873318521165, "learning_rate": 1.099503061518809e-05, "loss": 0.3133, "num_tokens": 3214129169.0, "step": 4212 }, { "epoch": 5.748314414952633, "grad_norm": 0.3205699360496881, "learning_rate": 1.0987165074058792e-05, "loss": 0.3412, "num_tokens": 3214912290.0, "step": 4213 }, { "epoch": 5.749679952206196, "grad_norm": 0.3016054531953429, "learning_rate": 1.0979302892468505e-05, "loss": 0.3189, "num_tokens": 3215690671.0, "step": 4214 }, { "epoch": 5.751045489459759, "grad_norm": 0.2949867514168805, "learning_rate": 1.0971444072815638e-05, "loss": 0.3315, "num_tokens": 3216452994.0, "step": 4215 }, { "epoch": 5.752411026713323, "grad_norm": 0.3264178547513005, "learning_rate": 1.0963588617497584e-05, "loss": 0.3316, "num_tokens": 3217209036.0, "step": 4216 }, { "epoch": 5.753776563966886, "grad_norm": 0.3124786308226179, "learning_rate": 1.0955736528910692e-05, "loss": 0.3226, "num_tokens": 3217964047.0, "step": 4217 }, { "epoch": 5.755142101220449, "grad_norm": 0.29986049047229035, "learning_rate": 1.0947887809450299e-05, "loss": 0.3331, "num_tokens": 3218753988.0, "step": 4218 }, { "epoch": 5.756507638474012, "grad_norm": 0.2922420814184715, "learning_rate": 1.0940042461510716e-05, "loss": 0.3396, "num_tokens": 3219525105.0, "step": 4219 }, { "epoch": 5.757873175727576, "grad_norm": 0.34242887884749934, "learning_rate": 1.0932200487485196e-05, "loss": 0.3174, "num_tokens": 3220272817.0, "step": 4220 }, { "epoch": 5.759238712981139, "grad_norm": 0.289237317056099, "learning_rate": 1.0924361889766017e-05, "loss": 0.335, "num_tokens": 3221050873.0, "step": 4221 }, { "epoch": 5.760604250234701, "grad_norm": 0.3128220934690245, "learning_rate": 1.0916526670744372e-05, "loss": 0.3288, "num_tokens": 3221844959.0, "step": 4222 }, { "epoch": 5.761969787488265, "grad_norm": 0.3136822838977047, "learning_rate": 1.0908694832810457e-05, "loss": 0.3188, "num_tokens": 3222581258.0, "step": 4223 }, { "epoch": 5.763335324741828, "grad_norm": 0.2909146355201898, "learning_rate": 1.0900866378353424e-05, "loss": 0.314, "num_tokens": 3223325447.0, "step": 4224 }, { "epoch": 5.764700861995391, "grad_norm": 0.3464419461053933, "learning_rate": 1.0893041309761403e-05, "loss": 0.3303, "num_tokens": 3224034726.0, "step": 4225 }, { "epoch": 5.766066399248954, "grad_norm": 0.3098989360541727, "learning_rate": 1.0885219629421471e-05, "loss": 0.3229, "num_tokens": 3224788015.0, "step": 4226 }, { "epoch": 5.7674319365025175, "grad_norm": 0.2910469444004628, "learning_rate": 1.0877401339719688e-05, "loss": 0.3241, "num_tokens": 3225622323.0, "step": 4227 }, { "epoch": 5.768797473756081, "grad_norm": 0.3070187242401082, "learning_rate": 1.0869586443041085e-05, "loss": 0.327, "num_tokens": 3226453591.0, "step": 4228 }, { "epoch": 5.770163011009644, "grad_norm": 0.29175956396216657, "learning_rate": 1.0861774941769629e-05, "loss": 0.3268, "num_tokens": 3227226684.0, "step": 4229 }, { "epoch": 5.771528548263207, "grad_norm": 0.30059618218145706, "learning_rate": 1.0853966838288296e-05, "loss": 0.3256, "num_tokens": 3228114046.0, "step": 4230 }, { "epoch": 5.7728940855167705, "grad_norm": 0.28177433146768077, "learning_rate": 1.084616213497898e-05, "loss": 0.3055, "num_tokens": 3228849217.0, "step": 4231 }, { "epoch": 5.774259622770334, "grad_norm": 0.29006363771161303, "learning_rate": 1.0838360834222563e-05, "loss": 0.3063, "num_tokens": 3229527686.0, "step": 4232 }, { "epoch": 5.775625160023897, "grad_norm": 0.31046546904942945, "learning_rate": 1.0830562938398893e-05, "loss": 0.3165, "num_tokens": 3230322181.0, "step": 4233 }, { "epoch": 5.77699069727746, "grad_norm": 0.29550765842604615, "learning_rate": 1.082276844988676e-05, "loss": 0.3213, "num_tokens": 3231087725.0, "step": 4234 }, { "epoch": 5.7783562345310235, "grad_norm": 0.34273765960080577, "learning_rate": 1.0814977371063926e-05, "loss": 0.3239, "num_tokens": 3231798444.0, "step": 4235 }, { "epoch": 5.779721771784587, "grad_norm": 0.2852668746668438, "learning_rate": 1.0807189704307115e-05, "loss": 0.346, "num_tokens": 3232640034.0, "step": 4236 }, { "epoch": 5.78108730903815, "grad_norm": 0.30158044517277166, "learning_rate": 1.0799405451992002e-05, "loss": 0.3238, "num_tokens": 3233467000.0, "step": 4237 }, { "epoch": 5.782452846291713, "grad_norm": 0.3199365029562698, "learning_rate": 1.0791624616493235e-05, "loss": 0.3363, "num_tokens": 3234204552.0, "step": 4238 }, { "epoch": 5.783818383545276, "grad_norm": 0.3137171398445056, "learning_rate": 1.0783847200184408e-05, "loss": 0.3031, "num_tokens": 3234914773.0, "step": 4239 }, { "epoch": 5.785183920798839, "grad_norm": 0.2834033037104673, "learning_rate": 1.0776073205438063e-05, "loss": 0.3159, "num_tokens": 3235690965.0, "step": 4240 }, { "epoch": 5.786549458052402, "grad_norm": 0.29452773611894495, "learning_rate": 1.0768302634625716e-05, "loss": 0.3184, "num_tokens": 3236457810.0, "step": 4241 }, { "epoch": 5.787914995305965, "grad_norm": 0.31002656208778456, "learning_rate": 1.0760535490117836e-05, "loss": 0.3178, "num_tokens": 3237110740.0, "step": 4242 }, { "epoch": 5.789280532559529, "grad_norm": 0.3181536500298344, "learning_rate": 1.0752771774283834e-05, "loss": 0.332, "num_tokens": 3237857545.0, "step": 4243 }, { "epoch": 5.790646069813092, "grad_norm": 0.30797981743627506, "learning_rate": 1.0745011489492088e-05, "loss": 0.3179, "num_tokens": 3238588908.0, "step": 4244 }, { "epoch": 5.792011607066655, "grad_norm": 0.2735135269022546, "learning_rate": 1.0737254638109923e-05, "loss": 0.3175, "num_tokens": 3239333447.0, "step": 4245 }, { "epoch": 5.793377144320218, "grad_norm": 0.31267998552523063, "learning_rate": 1.072950122250362e-05, "loss": 0.3075, "num_tokens": 3240089534.0, "step": 4246 }, { "epoch": 5.794742681573782, "grad_norm": 0.2921472692428606, "learning_rate": 1.0721751245038413e-05, "loss": 0.3235, "num_tokens": 3240867072.0, "step": 4247 }, { "epoch": 5.796108218827345, "grad_norm": 0.321655937972664, "learning_rate": 1.0714004708078485e-05, "loss": 0.3151, "num_tokens": 3241549888.0, "step": 4248 }, { "epoch": 5.797473756080908, "grad_norm": 0.29551454834464924, "learning_rate": 1.0706261613986961e-05, "loss": 0.3225, "num_tokens": 3242338907.0, "step": 4249 }, { "epoch": 5.7988392933344715, "grad_norm": 0.29582267290616376, "learning_rate": 1.0698521965125927e-05, "loss": 0.308, "num_tokens": 3243071888.0, "step": 4250 }, { "epoch": 5.800204830588035, "grad_norm": 0.300068237005872, "learning_rate": 1.0690785763856421e-05, "loss": 0.3085, "num_tokens": 3243797760.0, "step": 4251 }, { "epoch": 5.801570367841598, "grad_norm": 0.30102218893646654, "learning_rate": 1.0683053012538416e-05, "loss": 0.3075, "num_tokens": 3244515569.0, "step": 4252 }, { "epoch": 5.802935905095161, "grad_norm": 0.3455477108044704, "learning_rate": 1.0675323713530838e-05, "loss": 0.3172, "num_tokens": 3245213256.0, "step": 4253 }, { "epoch": 5.804301442348724, "grad_norm": 0.31322603024091367, "learning_rate": 1.0667597869191567e-05, "loss": 0.3313, "num_tokens": 3245930182.0, "step": 4254 }, { "epoch": 5.805666979602288, "grad_norm": 0.29774921102013124, "learning_rate": 1.0659875481877422e-05, "loss": 0.3197, "num_tokens": 3246765786.0, "step": 4255 }, { "epoch": 5.80703251685585, "grad_norm": 0.2847923140904271, "learning_rate": 1.0652156553944173e-05, "loss": 0.3107, "num_tokens": 3247518844.0, "step": 4256 }, { "epoch": 5.808398054109413, "grad_norm": 0.29189531197600826, "learning_rate": 1.064444108774652e-05, "loss": 0.3271, "num_tokens": 3248212248.0, "step": 4257 }, { "epoch": 5.809763591362977, "grad_norm": 0.3186984366245349, "learning_rate": 1.0636729085638125e-05, "loss": 0.3184, "num_tokens": 3248924615.0, "step": 4258 }, { "epoch": 5.81112912861654, "grad_norm": 0.31408120663764516, "learning_rate": 1.0629020549971583e-05, "loss": 0.3149, "num_tokens": 3249707347.0, "step": 4259 }, { "epoch": 5.812494665870103, "grad_norm": 0.30989570328050375, "learning_rate": 1.0621315483098442e-05, "loss": 0.3194, "num_tokens": 3250419921.0, "step": 4260 }, { "epoch": 5.813860203123666, "grad_norm": 0.32069683816866146, "learning_rate": 1.0613613887369171e-05, "loss": 0.3308, "num_tokens": 3251170917.0, "step": 4261 }, { "epoch": 5.81522574037723, "grad_norm": 0.30518012649044585, "learning_rate": 1.0605915765133203e-05, "loss": 0.3337, "num_tokens": 3252012057.0, "step": 4262 }, { "epoch": 5.816591277630793, "grad_norm": 0.3207560004770347, "learning_rate": 1.0598221118738898e-05, "loss": 0.3304, "num_tokens": 3252781977.0, "step": 4263 }, { "epoch": 5.817956814884356, "grad_norm": 0.28372869127803646, "learning_rate": 1.059052995053356e-05, "loss": 0.3437, "num_tokens": 3253555334.0, "step": 4264 }, { "epoch": 5.819322352137919, "grad_norm": 0.27444932291763774, "learning_rate": 1.0582842262863442e-05, "loss": 0.3119, "num_tokens": 3254276471.0, "step": 4265 }, { "epoch": 5.820687889391483, "grad_norm": 0.3265167813648468, "learning_rate": 1.0575158058073707e-05, "loss": 0.3255, "num_tokens": 3255028291.0, "step": 4266 }, { "epoch": 5.822053426645046, "grad_norm": 0.3289660866914267, "learning_rate": 1.0567477338508478e-05, "loss": 0.3272, "num_tokens": 3255776714.0, "step": 4267 }, { "epoch": 5.823418963898609, "grad_norm": 0.29289174312397886, "learning_rate": 1.0559800106510814e-05, "loss": 0.3333, "num_tokens": 3256567695.0, "step": 4268 }, { "epoch": 5.824784501152172, "grad_norm": 0.3163801343491213, "learning_rate": 1.055212636442271e-05, "loss": 0.3332, "num_tokens": 3257350178.0, "step": 4269 }, { "epoch": 5.826150038405736, "grad_norm": 0.32760251460175005, "learning_rate": 1.0544456114585074e-05, "loss": 0.3243, "num_tokens": 3258108844.0, "step": 4270 }, { "epoch": 5.827515575659298, "grad_norm": 0.31032976819613717, "learning_rate": 1.0536789359337796e-05, "loss": 0.3183, "num_tokens": 3258862161.0, "step": 4271 }, { "epoch": 5.828881112912862, "grad_norm": 0.31158538200447294, "learning_rate": 1.0529126101019651e-05, "loss": 0.3316, "num_tokens": 3259649546.0, "step": 4272 }, { "epoch": 5.8302466501664245, "grad_norm": 0.3121952021051716, "learning_rate": 1.052146634196836e-05, "loss": 0.3073, "num_tokens": 3260378620.0, "step": 4273 }, { "epoch": 5.831612187419988, "grad_norm": 0.29331073812185865, "learning_rate": 1.0513810084520607e-05, "loss": 0.3283, "num_tokens": 3261203666.0, "step": 4274 }, { "epoch": 5.832977724673551, "grad_norm": 0.3030071909369633, "learning_rate": 1.0506157331011965e-05, "loss": 0.3253, "num_tokens": 3261949136.0, "step": 4275 }, { "epoch": 5.834343261927114, "grad_norm": 0.3285036676137428, "learning_rate": 1.049850808377697e-05, "loss": 0.3225, "num_tokens": 3262700366.0, "step": 4276 }, { "epoch": 5.8357087991806775, "grad_norm": 0.3044540665173213, "learning_rate": 1.0490862345149066e-05, "loss": 0.3106, "num_tokens": 3263438965.0, "step": 4277 }, { "epoch": 5.837074336434241, "grad_norm": 0.3149735747343668, "learning_rate": 1.0483220117460654e-05, "loss": 0.3223, "num_tokens": 3264120668.0, "step": 4278 }, { "epoch": 5.838439873687804, "grad_norm": 0.3074617716901, "learning_rate": 1.0475581403043025e-05, "loss": 0.3241, "num_tokens": 3264886474.0, "step": 4279 }, { "epoch": 5.839805410941367, "grad_norm": 0.3145404516286924, "learning_rate": 1.046794620422643e-05, "loss": 0.3264, "num_tokens": 3265675330.0, "step": 4280 }, { "epoch": 5.8411709481949305, "grad_norm": 0.28599336253244256, "learning_rate": 1.0460314523340045e-05, "loss": 0.311, "num_tokens": 3266519191.0, "step": 4281 }, { "epoch": 5.842536485448494, "grad_norm": 0.2941333381140806, "learning_rate": 1.0452686362711943e-05, "loss": 0.3375, "num_tokens": 3267309371.0, "step": 4282 }, { "epoch": 5.843902022702057, "grad_norm": 0.30020610316765656, "learning_rate": 1.0445061724669174e-05, "loss": 0.323, "num_tokens": 3268077241.0, "step": 4283 }, { "epoch": 5.84526755995562, "grad_norm": 0.2813087926028067, "learning_rate": 1.0437440611537663e-05, "loss": 0.2975, "num_tokens": 3268749832.0, "step": 4284 }, { "epoch": 5.846633097209184, "grad_norm": 0.3002767568183153, "learning_rate": 1.0429823025642292e-05, "loss": 0.3199, "num_tokens": 3269447963.0, "step": 4285 }, { "epoch": 5.847998634462747, "grad_norm": 0.2963400847803455, "learning_rate": 1.0422208969306854e-05, "loss": 0.3309, "num_tokens": 3270187708.0, "step": 4286 }, { "epoch": 5.84936417171631, "grad_norm": 0.3302676635521106, "learning_rate": 1.0414598444854074e-05, "loss": 0.3099, "num_tokens": 3270874520.0, "step": 4287 }, { "epoch": 5.850729708969872, "grad_norm": 0.289510323515922, "learning_rate": 1.0406991454605579e-05, "loss": 0.3043, "num_tokens": 3271553203.0, "step": 4288 }, { "epoch": 5.852095246223436, "grad_norm": 0.3216958592585355, "learning_rate": 1.0399388000881942e-05, "loss": 0.3087, "num_tokens": 3272304379.0, "step": 4289 }, { "epoch": 5.853460783476999, "grad_norm": 0.2861320348016959, "learning_rate": 1.0391788086002655e-05, "loss": 0.3109, "num_tokens": 3273074280.0, "step": 4290 }, { "epoch": 5.854826320730562, "grad_norm": 0.33808686213624334, "learning_rate": 1.0384191712286097e-05, "loss": 0.3105, "num_tokens": 3273777879.0, "step": 4291 }, { "epoch": 5.8561918579841254, "grad_norm": 0.3044951644670936, "learning_rate": 1.0376598882049624e-05, "loss": 0.3284, "num_tokens": 3274547114.0, "step": 4292 }, { "epoch": 5.857557395237689, "grad_norm": 0.3115129225384759, "learning_rate": 1.036900959760946e-05, "loss": 0.3301, "num_tokens": 3275295259.0, "step": 4293 }, { "epoch": 5.858922932491252, "grad_norm": 0.28732246857471233, "learning_rate": 1.0361423861280773e-05, "loss": 0.3115, "num_tokens": 3276069554.0, "step": 4294 }, { "epoch": 5.860288469744815, "grad_norm": 0.30553018603203913, "learning_rate": 1.0353841675377647e-05, "loss": 0.3147, "num_tokens": 3276838246.0, "step": 4295 }, { "epoch": 5.8616540069983785, "grad_norm": 0.29406785703695776, "learning_rate": 1.0346263042213068e-05, "loss": 0.3131, "num_tokens": 3277561695.0, "step": 4296 }, { "epoch": 5.863019544251942, "grad_norm": 0.295652927583649, "learning_rate": 1.0338687964098953e-05, "loss": 0.3424, "num_tokens": 3278396040.0, "step": 4297 }, { "epoch": 5.864385081505505, "grad_norm": 0.3059820460605685, "learning_rate": 1.0331116443346132e-05, "loss": 0.3392, "num_tokens": 3279124842.0, "step": 4298 }, { "epoch": 5.865750618759068, "grad_norm": 0.2884491994395566, "learning_rate": 1.0323548482264356e-05, "loss": 0.313, "num_tokens": 3279822553.0, "step": 4299 }, { "epoch": 5.8671161560126315, "grad_norm": 0.32138435277395794, "learning_rate": 1.0315984083162261e-05, "loss": 0.3215, "num_tokens": 3280533997.0, "step": 4300 }, { "epoch": 5.868481693266195, "grad_norm": 0.3048464126762474, "learning_rate": 1.0308423248347442e-05, "loss": 0.3172, "num_tokens": 3281247310.0, "step": 4301 }, { "epoch": 5.869847230519758, "grad_norm": 0.2839574427412979, "learning_rate": 1.0300865980126363e-05, "loss": 0.3103, "num_tokens": 3282052033.0, "step": 4302 }, { "epoch": 5.87121276777332, "grad_norm": 0.28827412297090765, "learning_rate": 1.0293312280804429e-05, "loss": 0.3152, "num_tokens": 3282753732.0, "step": 4303 }, { "epoch": 5.8725783050268845, "grad_norm": 0.3180895699710797, "learning_rate": 1.0285762152685952e-05, "loss": 0.3111, "num_tokens": 3283501176.0, "step": 4304 }, { "epoch": 5.873943842280447, "grad_norm": 0.3055355021758906, "learning_rate": 1.0278215598074133e-05, "loss": 0.3269, "num_tokens": 3284238988.0, "step": 4305 }, { "epoch": 5.87530937953401, "grad_norm": 0.3080074276609502, "learning_rate": 1.0270672619271109e-05, "loss": 0.3117, "num_tokens": 3285067399.0, "step": 4306 }, { "epoch": 5.876674916787573, "grad_norm": 0.27880921506916595, "learning_rate": 1.0263133218577913e-05, "loss": 0.3126, "num_tokens": 3285873888.0, "step": 4307 }, { "epoch": 5.878040454041137, "grad_norm": 0.3027097954291864, "learning_rate": 1.02555973982945e-05, "loss": 0.3155, "num_tokens": 3286664345.0, "step": 4308 }, { "epoch": 5.8794059912947, "grad_norm": 0.2772147347106467, "learning_rate": 1.02480651607197e-05, "loss": 0.3239, "num_tokens": 3287406288.0, "step": 4309 }, { "epoch": 5.880771528548263, "grad_norm": 0.29830994569139624, "learning_rate": 1.0240536508151302e-05, "loss": 0.3259, "num_tokens": 3288196384.0, "step": 4310 }, { "epoch": 5.882137065801826, "grad_norm": 0.28423899149014903, "learning_rate": 1.0233011442885946e-05, "loss": 0.3178, "num_tokens": 3288946042.0, "step": 4311 }, { "epoch": 5.88350260305539, "grad_norm": 0.28555545402621146, "learning_rate": 1.0225489967219215e-05, "loss": 0.3175, "num_tokens": 3289677088.0, "step": 4312 }, { "epoch": 5.884868140308953, "grad_norm": 0.29312642964775965, "learning_rate": 1.0217972083445591e-05, "loss": 0.3066, "num_tokens": 3290449035.0, "step": 4313 }, { "epoch": 5.886233677562516, "grad_norm": 0.2850270495187224, "learning_rate": 1.0210457793858444e-05, "loss": 0.3252, "num_tokens": 3291197208.0, "step": 4314 }, { "epoch": 5.887599214816079, "grad_norm": 0.2831897138117478, "learning_rate": 1.0202947100750058e-05, "loss": 0.3234, "num_tokens": 3291904843.0, "step": 4315 }, { "epoch": 5.888964752069643, "grad_norm": 0.276327381615386, "learning_rate": 1.0195440006411626e-05, "loss": 0.3297, "num_tokens": 3292651610.0, "step": 4316 }, { "epoch": 5.890330289323206, "grad_norm": 0.28351800063457394, "learning_rate": 1.0187936513133245e-05, "loss": 0.3162, "num_tokens": 3293390492.0, "step": 4317 }, { "epoch": 5.891695826576769, "grad_norm": 0.28784740720832236, "learning_rate": 1.0180436623203888e-05, "loss": 0.3147, "num_tokens": 3294147522.0, "step": 4318 }, { "epoch": 5.893061363830332, "grad_norm": 0.2966480833694351, "learning_rate": 1.0172940338911457e-05, "loss": 0.3065, "num_tokens": 3294816452.0, "step": 4319 }, { "epoch": 5.894426901083895, "grad_norm": 0.285253349953474, "learning_rate": 1.016544766254274e-05, "loss": 0.3388, "num_tokens": 3295650715.0, "step": 4320 }, { "epoch": 5.895792438337458, "grad_norm": 0.2744182341424143, "learning_rate": 1.0157958596383433e-05, "loss": 0.3308, "num_tokens": 3296426697.0, "step": 4321 }, { "epoch": 5.897157975591021, "grad_norm": 0.27890808017495017, "learning_rate": 1.0150473142718126e-05, "loss": 0.3175, "num_tokens": 3297169758.0, "step": 4322 }, { "epoch": 5.8985235128445845, "grad_norm": 0.28393840026858724, "learning_rate": 1.0142991303830301e-05, "loss": 0.3123, "num_tokens": 3297918546.0, "step": 4323 }, { "epoch": 5.899889050098148, "grad_norm": 0.3138859113134962, "learning_rate": 1.0135513082002346e-05, "loss": 0.3208, "num_tokens": 3298741246.0, "step": 4324 }, { "epoch": 5.901254587351711, "grad_norm": 0.29688632501031226, "learning_rate": 1.012803847951554e-05, "loss": 0.3024, "num_tokens": 3299477437.0, "step": 4325 }, { "epoch": 5.902620124605274, "grad_norm": 0.2931296059257757, "learning_rate": 1.0120567498650065e-05, "loss": 0.3349, "num_tokens": 3300209536.0, "step": 4326 }, { "epoch": 5.9039856618588376, "grad_norm": 0.300429447457993, "learning_rate": 1.0113100141684995e-05, "loss": 0.3048, "num_tokens": 3300988187.0, "step": 4327 }, { "epoch": 5.905351199112401, "grad_norm": 0.30162993400051413, "learning_rate": 1.0105636410898292e-05, "loss": 0.3254, "num_tokens": 3301695135.0, "step": 4328 }, { "epoch": 5.906716736365964, "grad_norm": 0.2906555828534959, "learning_rate": 1.0098176308566817e-05, "loss": 0.3134, "num_tokens": 3302452563.0, "step": 4329 }, { "epoch": 5.908082273619527, "grad_norm": 0.2918274267768744, "learning_rate": 1.0090719836966323e-05, "loss": 0.3314, "num_tokens": 3303217423.0, "step": 4330 }, { "epoch": 5.909447810873091, "grad_norm": 0.2892673584004003, "learning_rate": 1.0083266998371468e-05, "loss": 0.3308, "num_tokens": 3303971620.0, "step": 4331 }, { "epoch": 5.910813348126654, "grad_norm": 0.2918197737692803, "learning_rate": 1.0075817795055776e-05, "loss": 0.3167, "num_tokens": 3304761851.0, "step": 4332 }, { "epoch": 5.912178885380217, "grad_norm": 0.2985304098652222, "learning_rate": 1.0068372229291683e-05, "loss": 0.32, "num_tokens": 3305456057.0, "step": 4333 }, { "epoch": 5.91354442263378, "grad_norm": 0.29251715643920334, "learning_rate": 1.0060930303350514e-05, "loss": 0.3188, "num_tokens": 3306251305.0, "step": 4334 }, { "epoch": 5.914909959887343, "grad_norm": 0.2956261983965101, "learning_rate": 1.0053492019502457e-05, "loss": 0.3087, "num_tokens": 3306986725.0, "step": 4335 }, { "epoch": 5.916275497140907, "grad_norm": 0.28653593456255366, "learning_rate": 1.004605738001664e-05, "loss": 0.3232, "num_tokens": 3307713896.0, "step": 4336 }, { "epoch": 5.917641034394469, "grad_norm": 0.3037925408762056, "learning_rate": 1.003862638716103e-05, "loss": 0.3145, "num_tokens": 3308501007.0, "step": 4337 }, { "epoch": 5.9190065716480325, "grad_norm": 0.2901976928842585, "learning_rate": 1.0031199043202506e-05, "loss": 0.3247, "num_tokens": 3309187467.0, "step": 4338 }, { "epoch": 5.920372108901596, "grad_norm": 0.31744427015718013, "learning_rate": 1.0023775350406831e-05, "loss": 0.3376, "num_tokens": 3309956356.0, "step": 4339 }, { "epoch": 5.921737646155159, "grad_norm": 0.2814202550165525, "learning_rate": 1.0016355311038653e-05, "loss": 0.3194, "num_tokens": 3310763853.0, "step": 4340 }, { "epoch": 5.923103183408722, "grad_norm": 0.292375984273759, "learning_rate": 1.0008938927361495e-05, "loss": 0.3221, "num_tokens": 3311519765.0, "step": 4341 }, { "epoch": 5.9244687206622855, "grad_norm": 0.3030093641645358, "learning_rate": 1.0001526201637786e-05, "loss": 0.3211, "num_tokens": 3312340164.0, "step": 4342 }, { "epoch": 5.925834257915849, "grad_norm": 0.28586838658677144, "learning_rate": 9.994117136128826e-06, "loss": 0.3272, "num_tokens": 3313110884.0, "step": 4343 }, { "epoch": 5.927199795169412, "grad_norm": 0.29529367175402244, "learning_rate": 9.986711733094785e-06, "loss": 0.3358, "num_tokens": 3313869665.0, "step": 4344 }, { "epoch": 5.928565332422975, "grad_norm": 0.29744670395175327, "learning_rate": 9.979309994794757e-06, "loss": 0.3203, "num_tokens": 3314639717.0, "step": 4345 }, { "epoch": 5.9299308696765385, "grad_norm": 0.29148675509676164, "learning_rate": 9.971911923486672e-06, "loss": 0.3071, "num_tokens": 3315383057.0, "step": 4346 }, { "epoch": 5.931296406930102, "grad_norm": 0.2987827880968984, "learning_rate": 9.964517521427366e-06, "loss": 0.3299, "num_tokens": 3316161996.0, "step": 4347 }, { "epoch": 5.932661944183665, "grad_norm": 0.30031602860346696, "learning_rate": 9.957126790872551e-06, "loss": 0.3294, "num_tokens": 3316908831.0, "step": 4348 }, { "epoch": 5.934027481437228, "grad_norm": 0.30139414007212106, "learning_rate": 9.949739734076827e-06, "loss": 0.3112, "num_tokens": 3317664998.0, "step": 4349 }, { "epoch": 5.9353930186907915, "grad_norm": 0.3128464439250982, "learning_rate": 9.94235635329365e-06, "loss": 0.3224, "num_tokens": 3318394301.0, "step": 4350 }, { "epoch": 5.936758555944355, "grad_norm": 0.30367631101841047, "learning_rate": 9.93497665077538e-06, "loss": 0.3234, "num_tokens": 3319170978.0, "step": 4351 }, { "epoch": 5.938124093197917, "grad_norm": 0.2950342109485987, "learning_rate": 9.927600628773246e-06, "loss": 0.3253, "num_tokens": 3319939756.0, "step": 4352 }, { "epoch": 5.93948963045148, "grad_norm": 0.290729872968571, "learning_rate": 9.920228289537337e-06, "loss": 0.3227, "num_tokens": 3320724501.0, "step": 4353 }, { "epoch": 5.940855167705044, "grad_norm": 0.298989433915885, "learning_rate": 9.91285963531666e-06, "loss": 0.3158, "num_tokens": 3321457597.0, "step": 4354 }, { "epoch": 5.942220704958607, "grad_norm": 0.3070872010913399, "learning_rate": 9.905494668359049e-06, "loss": 0.3201, "num_tokens": 3322235945.0, "step": 4355 }, { "epoch": 5.94358624221217, "grad_norm": 0.27344731518909915, "learning_rate": 9.89813339091125e-06, "loss": 0.3174, "num_tokens": 3323024156.0, "step": 4356 }, { "epoch": 5.944951779465733, "grad_norm": 0.3006375068392689, "learning_rate": 9.890775805218864e-06, "loss": 0.3455, "num_tokens": 3323830175.0, "step": 4357 }, { "epoch": 5.946317316719297, "grad_norm": 0.28734880341761265, "learning_rate": 9.883421913526384e-06, "loss": 0.326, "num_tokens": 3324575269.0, "step": 4358 }, { "epoch": 5.94768285397286, "grad_norm": 0.2900782173799053, "learning_rate": 9.876071718077142e-06, "loss": 0.3434, "num_tokens": 3325306604.0, "step": 4359 }, { "epoch": 5.949048391226423, "grad_norm": 0.29578524035183096, "learning_rate": 9.86872522111338e-06, "loss": 0.3236, "num_tokens": 3326101615.0, "step": 4360 }, { "epoch": 5.950413928479986, "grad_norm": 0.3154859838733374, "learning_rate": 9.861382424876196e-06, "loss": 0.3201, "num_tokens": 3326854259.0, "step": 4361 }, { "epoch": 5.95177946573355, "grad_norm": 0.273487215051921, "learning_rate": 9.854043331605544e-06, "loss": 0.3231, "num_tokens": 3327667602.0, "step": 4362 }, { "epoch": 5.953145002987113, "grad_norm": 0.3006503879907621, "learning_rate": 9.846707943540287e-06, "loss": 0.3117, "num_tokens": 3328415257.0, "step": 4363 }, { "epoch": 5.954510540240676, "grad_norm": 0.28382060598928893, "learning_rate": 9.839376262918117e-06, "loss": 0.3072, "num_tokens": 3329117520.0, "step": 4364 }, { "epoch": 5.955876077494239, "grad_norm": 0.29671316493455563, "learning_rate": 9.832048291975616e-06, "loss": 0.323, "num_tokens": 3329912439.0, "step": 4365 }, { "epoch": 5.957241614747803, "grad_norm": 0.30104570603478337, "learning_rate": 9.82472403294824e-06, "loss": 0.3246, "num_tokens": 3330658866.0, "step": 4366 }, { "epoch": 5.958607152001366, "grad_norm": 0.2860435353490197, "learning_rate": 9.817403488070288e-06, "loss": 0.3192, "num_tokens": 3331435127.0, "step": 4367 }, { "epoch": 5.959972689254929, "grad_norm": 0.2798268077129179, "learning_rate": 9.810086659574948e-06, "loss": 0.3278, "num_tokens": 3332223902.0, "step": 4368 }, { "epoch": 5.9613382265084915, "grad_norm": 0.28169228494889237, "learning_rate": 9.802773549694267e-06, "loss": 0.3133, "num_tokens": 3332997906.0, "step": 4369 }, { "epoch": 5.962703763762055, "grad_norm": 0.270344694156009, "learning_rate": 9.795464160659166e-06, "loss": 0.3299, "num_tokens": 3333831393.0, "step": 4370 }, { "epoch": 5.964069301015618, "grad_norm": 0.29228244410654797, "learning_rate": 9.788158494699405e-06, "loss": 0.3228, "num_tokens": 3334587339.0, "step": 4371 }, { "epoch": 5.965434838269181, "grad_norm": 0.2740201656149236, "learning_rate": 9.780856554043649e-06, "loss": 0.3108, "num_tokens": 3335336624.0, "step": 4372 }, { "epoch": 5.966800375522745, "grad_norm": 0.2983149654532054, "learning_rate": 9.77355834091939e-06, "loss": 0.3198, "num_tokens": 3336144643.0, "step": 4373 }, { "epoch": 5.968165912776308, "grad_norm": 0.2957569166514149, "learning_rate": 9.76626385755299e-06, "loss": 0.3248, "num_tokens": 3336921502.0, "step": 4374 }, { "epoch": 5.969531450029871, "grad_norm": 0.301355873621299, "learning_rate": 9.758973106169699e-06, "loss": 0.3129, "num_tokens": 3337672822.0, "step": 4375 }, { "epoch": 5.970896987283434, "grad_norm": 0.30743208281972784, "learning_rate": 9.751686088993595e-06, "loss": 0.3317, "num_tokens": 3338517051.0, "step": 4376 }, { "epoch": 5.972262524536998, "grad_norm": 0.2946947486377858, "learning_rate": 9.744402808247635e-06, "loss": 0.3106, "num_tokens": 3339200990.0, "step": 4377 }, { "epoch": 5.973628061790561, "grad_norm": 0.30345791461087257, "learning_rate": 9.737123266153634e-06, "loss": 0.3137, "num_tokens": 3339932316.0, "step": 4378 }, { "epoch": 5.974993599044124, "grad_norm": 0.2988559410447275, "learning_rate": 9.72984746493227e-06, "loss": 0.3223, "num_tokens": 3340700745.0, "step": 4379 }, { "epoch": 5.976359136297687, "grad_norm": 0.3035617251039401, "learning_rate": 9.722575406803056e-06, "loss": 0.3274, "num_tokens": 3341448333.0, "step": 4380 }, { "epoch": 5.977724673551251, "grad_norm": 0.2983975150467277, "learning_rate": 9.71530709398441e-06, "loss": 0.3095, "num_tokens": 3342188770.0, "step": 4381 }, { "epoch": 5.979090210804814, "grad_norm": 0.2907538660342643, "learning_rate": 9.708042528693562e-06, "loss": 0.3017, "num_tokens": 3342827064.0, "step": 4382 }, { "epoch": 5.980455748058377, "grad_norm": 0.3170658931589408, "learning_rate": 9.700781713146608e-06, "loss": 0.3098, "num_tokens": 3343515813.0, "step": 4383 }, { "epoch": 5.9818212853119395, "grad_norm": 0.29804927928796726, "learning_rate": 9.693524649558532e-06, "loss": 0.3192, "num_tokens": 3344257150.0, "step": 4384 }, { "epoch": 5.983186822565504, "grad_norm": 0.2826082435503464, "learning_rate": 9.686271340143132e-06, "loss": 0.3126, "num_tokens": 3345005544.0, "step": 4385 }, { "epoch": 5.984552359819066, "grad_norm": 0.3017634201618048, "learning_rate": 9.679021787113083e-06, "loss": 0.313, "num_tokens": 3345769869.0, "step": 4386 }, { "epoch": 5.985917897072629, "grad_norm": 0.30214629832075635, "learning_rate": 9.67177599267991e-06, "loss": 0.3277, "num_tokens": 3346596643.0, "step": 4387 }, { "epoch": 5.9872834343261925, "grad_norm": 0.2834620800503947, "learning_rate": 9.664533959053999e-06, "loss": 0.3196, "num_tokens": 3347315899.0, "step": 4388 }, { "epoch": 5.988648971579756, "grad_norm": 0.3100031569367013, "learning_rate": 9.657295688444567e-06, "loss": 0.3139, "num_tokens": 3348024161.0, "step": 4389 }, { "epoch": 5.990014508833319, "grad_norm": 0.29501979638395215, "learning_rate": 9.650061183059703e-06, "loss": 0.3296, "num_tokens": 3348700695.0, "step": 4390 }, { "epoch": 5.991380046086882, "grad_norm": 0.2841890688190091, "learning_rate": 9.642830445106341e-06, "loss": 0.3222, "num_tokens": 3349506009.0, "step": 4391 }, { "epoch": 5.9927455833404455, "grad_norm": 0.28362209510510494, "learning_rate": 9.635603476790268e-06, "loss": 0.3245, "num_tokens": 3350256018.0, "step": 4392 }, { "epoch": 5.994111120594009, "grad_norm": 0.3079763655493896, "learning_rate": 9.628380280316124e-06, "loss": 0.3161, "num_tokens": 3351011012.0, "step": 4393 }, { "epoch": 5.995476657847572, "grad_norm": 0.2964847621321542, "learning_rate": 9.621160857887379e-06, "loss": 0.3196, "num_tokens": 3351795937.0, "step": 4394 }, { "epoch": 5.996842195101135, "grad_norm": 0.32747945017608, "learning_rate": 9.613945211706377e-06, "loss": 0.3249, "num_tokens": 3352557398.0, "step": 4395 }, { "epoch": 5.9982077323546985, "grad_norm": 0.2669817512283393, "learning_rate": 9.606733343974296e-06, "loss": 0.315, "num_tokens": 3353309510.0, "step": 4396 }, { "epoch": 5.999573269608262, "grad_norm": 0.29998418609451677, "learning_rate": 9.59952525689117e-06, "loss": 0.3399, "num_tokens": 3354066255.0, "step": 4397 }, { "epoch": 6.0, "grad_norm": 0.29998418609451677, "learning_rate": 9.592320952655863e-06, "loss": 0.3046, "num_tokens": 3354252816.0, "step": 4398 }, { "epoch": 6.001365537253563, "grad_norm": 0.611941004991842, "learning_rate": 9.585120433466103e-06, "loss": 0.3001, "num_tokens": 3355009547.0, "step": 4399 }, { "epoch": 6.0027310745071265, "grad_norm": 0.44601629524969877, "learning_rate": 9.577923701518455e-06, "loss": 0.2984, "num_tokens": 3355696719.0, "step": 4400 }, { "epoch": 6.00409661176069, "grad_norm": 0.39917771227701865, "learning_rate": 9.570730759008331e-06, "loss": 0.3028, "num_tokens": 3356437241.0, "step": 4401 }, { "epoch": 6.005462149014253, "grad_norm": 0.35509178808680064, "learning_rate": 9.563541608129996e-06, "loss": 0.3028, "num_tokens": 3357218728.0, "step": 4402 }, { "epoch": 6.006827686267816, "grad_norm": 0.31090565039102036, "learning_rate": 9.556356251076529e-06, "loss": 0.2974, "num_tokens": 3357980336.0, "step": 4403 }, { "epoch": 6.0081932235213795, "grad_norm": 0.30777643250559533, "learning_rate": 9.549174690039884e-06, "loss": 0.306, "num_tokens": 3358761707.0, "step": 4404 }, { "epoch": 6.009558760774943, "grad_norm": 0.3312088791375256, "learning_rate": 9.541996927210847e-06, "loss": 0.2974, "num_tokens": 3359497582.0, "step": 4405 }, { "epoch": 6.010924298028506, "grad_norm": 0.3557144290870905, "learning_rate": 9.53482296477903e-06, "loss": 0.2951, "num_tokens": 3360266768.0, "step": 4406 }, { "epoch": 6.012289835282068, "grad_norm": 0.3942653144678986, "learning_rate": 9.527652804932907e-06, "loss": 0.2999, "num_tokens": 3361038867.0, "step": 4407 }, { "epoch": 6.013655372535632, "grad_norm": 0.3137852759501011, "learning_rate": 9.520486449859783e-06, "loss": 0.3016, "num_tokens": 3361786004.0, "step": 4408 }, { "epoch": 6.015020909789195, "grad_norm": 0.35332120068143746, "learning_rate": 9.5133239017458e-06, "loss": 0.2964, "num_tokens": 3362559674.0, "step": 4409 }, { "epoch": 6.016386447042758, "grad_norm": 0.31222757062594125, "learning_rate": 9.50616516277594e-06, "loss": 0.2783, "num_tokens": 3363236421.0, "step": 4410 }, { "epoch": 6.017751984296321, "grad_norm": 0.28886458827744216, "learning_rate": 9.499010235134038e-06, "loss": 0.3037, "num_tokens": 3363974049.0, "step": 4411 }, { "epoch": 6.019117521549885, "grad_norm": 0.3063305294547598, "learning_rate": 9.491859121002732e-06, "loss": 0.2866, "num_tokens": 3364789946.0, "step": 4412 }, { "epoch": 6.020483058803448, "grad_norm": 0.2962380992690442, "learning_rate": 9.484711822563527e-06, "loss": 0.2831, "num_tokens": 3365568023.0, "step": 4413 }, { "epoch": 6.021848596057011, "grad_norm": 0.28586567962985676, "learning_rate": 9.47756834199676e-06, "loss": 0.2847, "num_tokens": 3366260484.0, "step": 4414 }, { "epoch": 6.023214133310574, "grad_norm": 0.30459808889546036, "learning_rate": 9.470428681481579e-06, "loss": 0.2929, "num_tokens": 3366963404.0, "step": 4415 }, { "epoch": 6.024579670564138, "grad_norm": 0.30806503617040937, "learning_rate": 9.463292843196011e-06, "loss": 0.295, "num_tokens": 3367782536.0, "step": 4416 }, { "epoch": 6.025945207817701, "grad_norm": 0.3054467186215234, "learning_rate": 9.456160829316872e-06, "loss": 0.2854, "num_tokens": 3368492002.0, "step": 4417 }, { "epoch": 6.027310745071264, "grad_norm": 0.29663005271671306, "learning_rate": 9.449032642019833e-06, "loss": 0.2998, "num_tokens": 3369247279.0, "step": 4418 }, { "epoch": 6.028676282324827, "grad_norm": 0.30036337281513986, "learning_rate": 9.441908283479404e-06, "loss": 0.2856, "num_tokens": 3370025726.0, "step": 4419 }, { "epoch": 6.030041819578391, "grad_norm": 0.3053208189954663, "learning_rate": 9.434787755868916e-06, "loss": 0.2926, "num_tokens": 3370697764.0, "step": 4420 }, { "epoch": 6.031407356831954, "grad_norm": 0.30692418680646505, "learning_rate": 9.427671061360526e-06, "loss": 0.298, "num_tokens": 3371422974.0, "step": 4421 }, { "epoch": 6.032772894085517, "grad_norm": 0.28973721460144924, "learning_rate": 9.420558202125232e-06, "loss": 0.2802, "num_tokens": 3372206398.0, "step": 4422 }, { "epoch": 6.03413843133908, "grad_norm": 0.3128089418561304, "learning_rate": 9.413449180332868e-06, "loss": 0.2791, "num_tokens": 3372940430.0, "step": 4423 }, { "epoch": 6.035503968592643, "grad_norm": 0.3268764869551704, "learning_rate": 9.406343998152071e-06, "loss": 0.3038, "num_tokens": 3373715318.0, "step": 4424 }, { "epoch": 6.036869505846206, "grad_norm": 0.2786920449074106, "learning_rate": 9.399242657750346e-06, "loss": 0.2987, "num_tokens": 3374438548.0, "step": 4425 }, { "epoch": 6.038235043099769, "grad_norm": 0.3020405562695889, "learning_rate": 9.392145161293988e-06, "loss": 0.2963, "num_tokens": 3375183388.0, "step": 4426 }, { "epoch": 6.039600580353333, "grad_norm": 0.3174164464208179, "learning_rate": 9.385051510948141e-06, "loss": 0.2928, "num_tokens": 3375949950.0, "step": 4427 }, { "epoch": 6.040966117606896, "grad_norm": 0.314930323317339, "learning_rate": 9.377961708876777e-06, "loss": 0.3089, "num_tokens": 3376741255.0, "step": 4428 }, { "epoch": 6.042331654860459, "grad_norm": 0.3009469821059994, "learning_rate": 9.370875757242678e-06, "loss": 0.2957, "num_tokens": 3377621258.0, "step": 4429 }, { "epoch": 6.043697192114022, "grad_norm": 0.30029268618804617, "learning_rate": 9.363793658207464e-06, "loss": 0.3031, "num_tokens": 3378354161.0, "step": 4430 }, { "epoch": 6.045062729367586, "grad_norm": 0.3059499909359165, "learning_rate": 9.356715413931575e-06, "loss": 0.3013, "num_tokens": 3379080139.0, "step": 4431 }, { "epoch": 6.046428266621149, "grad_norm": 0.3249157019887623, "learning_rate": 9.349641026574285e-06, "loss": 0.2727, "num_tokens": 3379819878.0, "step": 4432 }, { "epoch": 6.047793803874712, "grad_norm": 0.2896993021625353, "learning_rate": 9.342570498293668e-06, "loss": 0.2886, "num_tokens": 3380548057.0, "step": 4433 }, { "epoch": 6.049159341128275, "grad_norm": 0.30473017433319294, "learning_rate": 9.335503831246656e-06, "loss": 0.3033, "num_tokens": 3381369615.0, "step": 4434 }, { "epoch": 6.050524878381839, "grad_norm": 0.29555199259472353, "learning_rate": 9.328441027588967e-06, "loss": 0.2857, "num_tokens": 3382148636.0, "step": 4435 }, { "epoch": 6.051890415635402, "grad_norm": 0.3033423598858527, "learning_rate": 9.321382089475161e-06, "loss": 0.3064, "num_tokens": 3382901160.0, "step": 4436 }, { "epoch": 6.053255952888965, "grad_norm": 0.2979698719558246, "learning_rate": 9.314327019058621e-06, "loss": 0.3057, "num_tokens": 3383738696.0, "step": 4437 }, { "epoch": 6.054621490142528, "grad_norm": 0.31305141941042164, "learning_rate": 9.307275818491533e-06, "loss": 0.2896, "num_tokens": 3384400405.0, "step": 4438 }, { "epoch": 6.055987027396091, "grad_norm": 0.30318905300565535, "learning_rate": 9.300228489924917e-06, "loss": 0.3004, "num_tokens": 3385235183.0, "step": 4439 }, { "epoch": 6.057352564649654, "grad_norm": 0.30403124965537487, "learning_rate": 9.293185035508607e-06, "loss": 0.3002, "num_tokens": 3386014740.0, "step": 4440 }, { "epoch": 6.058718101903217, "grad_norm": 0.3122228361954403, "learning_rate": 9.286145457391265e-06, "loss": 0.3048, "num_tokens": 3386848644.0, "step": 4441 }, { "epoch": 6.0600836391567805, "grad_norm": 0.30358168057600743, "learning_rate": 9.279109757720342e-06, "loss": 0.2845, "num_tokens": 3387609591.0, "step": 4442 }, { "epoch": 6.061449176410344, "grad_norm": 0.2859402478929675, "learning_rate": 9.272077938642147e-06, "loss": 0.301, "num_tokens": 3388412567.0, "step": 4443 }, { "epoch": 6.062814713663907, "grad_norm": 0.28255383548041263, "learning_rate": 9.265050002301773e-06, "loss": 0.315, "num_tokens": 3389198038.0, "step": 4444 }, { "epoch": 6.06418025091747, "grad_norm": 0.3108766453167544, "learning_rate": 9.258025950843132e-06, "loss": 0.2944, "num_tokens": 3390069029.0, "step": 4445 }, { "epoch": 6.0655457881710335, "grad_norm": 0.30121810115426834, "learning_rate": 9.251005786408973e-06, "loss": 0.298, "num_tokens": 3390821019.0, "step": 4446 }, { "epoch": 6.066911325424597, "grad_norm": 0.30845286096483393, "learning_rate": 9.243989511140832e-06, "loss": 0.2966, "num_tokens": 3391566032.0, "step": 4447 }, { "epoch": 6.06827686267816, "grad_norm": 0.2953901000364955, "learning_rate": 9.236977127179077e-06, "loss": 0.2832, "num_tokens": 3392316762.0, "step": 4448 }, { "epoch": 6.069642399931723, "grad_norm": 0.2949890363467683, "learning_rate": 9.229968636662878e-06, "loss": 0.29, "num_tokens": 3393033351.0, "step": 4449 }, { "epoch": 6.0710079371852865, "grad_norm": 0.2970717575593725, "learning_rate": 9.222964041730231e-06, "loss": 0.3058, "num_tokens": 3393730096.0, "step": 4450 }, { "epoch": 6.07237347443885, "grad_norm": 0.32649900900220163, "learning_rate": 9.215963344517927e-06, "loss": 0.2992, "num_tokens": 3394541780.0, "step": 4451 }, { "epoch": 6.073739011692413, "grad_norm": 0.27717588575156554, "learning_rate": 9.208966547161573e-06, "loss": 0.3069, "num_tokens": 3395359254.0, "step": 4452 }, { "epoch": 6.075104548945976, "grad_norm": 0.29106223855803814, "learning_rate": 9.201973651795599e-06, "loss": 0.2945, "num_tokens": 3396095784.0, "step": 4453 }, { "epoch": 6.0764700861995395, "grad_norm": 0.29172753343676366, "learning_rate": 9.194984660553217e-06, "loss": 0.294, "num_tokens": 3396822887.0, "step": 4454 }, { "epoch": 6.077835623453102, "grad_norm": 0.2930169840829567, "learning_rate": 9.18799957556649e-06, "loss": 0.3081, "num_tokens": 3397611851.0, "step": 4455 }, { "epoch": 6.079201160706665, "grad_norm": 0.30107354602156294, "learning_rate": 9.181018398966245e-06, "loss": 0.2954, "num_tokens": 3398318025.0, "step": 4456 }, { "epoch": 6.080566697960228, "grad_norm": 0.2929262070944451, "learning_rate": 9.174041132882142e-06, "loss": 0.2811, "num_tokens": 3399049300.0, "step": 4457 }, { "epoch": 6.081932235213792, "grad_norm": 0.28989867144304554, "learning_rate": 9.167067779442646e-06, "loss": 0.3161, "num_tokens": 3399921574.0, "step": 4458 }, { "epoch": 6.083297772467355, "grad_norm": 0.2844636585320682, "learning_rate": 9.16009834077503e-06, "loss": 0.2914, "num_tokens": 3400719251.0, "step": 4459 }, { "epoch": 6.084663309720918, "grad_norm": 0.2881548351709398, "learning_rate": 9.153132819005355e-06, "loss": 0.3039, "num_tokens": 3401467639.0, "step": 4460 }, { "epoch": 6.086028846974481, "grad_norm": 0.29791812582534244, "learning_rate": 9.146171216258506e-06, "loss": 0.3139, "num_tokens": 3402161919.0, "step": 4461 }, { "epoch": 6.087394384228045, "grad_norm": 0.3124542436551846, "learning_rate": 9.139213534658175e-06, "loss": 0.2979, "num_tokens": 3402916033.0, "step": 4462 }, { "epoch": 6.088759921481608, "grad_norm": 0.2911747024887793, "learning_rate": 9.132259776326826e-06, "loss": 0.2958, "num_tokens": 3403637405.0, "step": 4463 }, { "epoch": 6.090125458735171, "grad_norm": 0.31006874024419995, "learning_rate": 9.125309943385778e-06, "loss": 0.295, "num_tokens": 3404425002.0, "step": 4464 }, { "epoch": 6.0914909959887344, "grad_norm": 0.29586310157400214, "learning_rate": 9.118364037955105e-06, "loss": 0.2917, "num_tokens": 3405215493.0, "step": 4465 }, { "epoch": 6.092856533242298, "grad_norm": 0.27620086860884385, "learning_rate": 9.111422062153708e-06, "loss": 0.2898, "num_tokens": 3406008591.0, "step": 4466 }, { "epoch": 6.094222070495861, "grad_norm": 0.2781765199354798, "learning_rate": 9.104484018099288e-06, "loss": 0.3024, "num_tokens": 3406765138.0, "step": 4467 }, { "epoch": 6.095587607749424, "grad_norm": 0.3097518535450598, "learning_rate": 9.097549907908332e-06, "loss": 0.2875, "num_tokens": 3407468920.0, "step": 4468 }, { "epoch": 6.0969531450029875, "grad_norm": 0.30324249332244363, "learning_rate": 9.090619733696142e-06, "loss": 0.2971, "num_tokens": 3408226598.0, "step": 4469 }, { "epoch": 6.098318682256551, "grad_norm": 0.30630517450409384, "learning_rate": 9.083693497576813e-06, "loss": 0.2934, "num_tokens": 3408954614.0, "step": 4470 }, { "epoch": 6.099684219510113, "grad_norm": 0.28474136679524276, "learning_rate": 9.076771201663244e-06, "loss": 0.2987, "num_tokens": 3409643700.0, "step": 4471 }, { "epoch": 6.101049756763676, "grad_norm": 0.2900340789504531, "learning_rate": 9.069852848067115e-06, "loss": 0.2959, "num_tokens": 3410447795.0, "step": 4472 }, { "epoch": 6.10241529401724, "grad_norm": 0.3020186756362292, "learning_rate": 9.062938438898936e-06, "loss": 0.3027, "num_tokens": 3411245692.0, "step": 4473 }, { "epoch": 6.103780831270803, "grad_norm": 0.285989273283809, "learning_rate": 9.056027976267974e-06, "loss": 0.2995, "num_tokens": 3411963004.0, "step": 4474 }, { "epoch": 6.105146368524366, "grad_norm": 0.30542586927000226, "learning_rate": 9.049121462282324e-06, "loss": 0.3178, "num_tokens": 3412725041.0, "step": 4475 }, { "epoch": 6.106511905777929, "grad_norm": 0.3079907332046366, "learning_rate": 9.042218899048865e-06, "loss": 0.3185, "num_tokens": 3413477648.0, "step": 4476 }, { "epoch": 6.107877443031493, "grad_norm": 0.30832399008615974, "learning_rate": 9.035320288673263e-06, "loss": 0.3042, "num_tokens": 3414244062.0, "step": 4477 }, { "epoch": 6.109242980285056, "grad_norm": 0.2955956504645491, "learning_rate": 9.028425633259987e-06, "loss": 0.2927, "num_tokens": 3415066114.0, "step": 4478 }, { "epoch": 6.110608517538619, "grad_norm": 0.30308215638936276, "learning_rate": 9.0215349349123e-06, "loss": 0.3073, "num_tokens": 3415798601.0, "step": 4479 }, { "epoch": 6.111974054792182, "grad_norm": 0.3239934823466942, "learning_rate": 9.014648195732251e-06, "loss": 0.3028, "num_tokens": 3416591434.0, "step": 4480 }, { "epoch": 6.113339592045746, "grad_norm": 0.29568806648128765, "learning_rate": 9.007765417820693e-06, "loss": 0.3086, "num_tokens": 3417386431.0, "step": 4481 }, { "epoch": 6.114705129299309, "grad_norm": 0.2978299163358654, "learning_rate": 9.000886603277266e-06, "loss": 0.3027, "num_tokens": 3418202590.0, "step": 4482 }, { "epoch": 6.116070666552872, "grad_norm": 0.31877036986846036, "learning_rate": 8.994011754200385e-06, "loss": 0.3142, "num_tokens": 3419044902.0, "step": 4483 }, { "epoch": 6.117436203806435, "grad_norm": 0.31466504688169045, "learning_rate": 8.987140872687277e-06, "loss": 0.291, "num_tokens": 3419861317.0, "step": 4484 }, { "epoch": 6.118801741059999, "grad_norm": 0.28364328418961604, "learning_rate": 8.980273960833955e-06, "loss": 0.3037, "num_tokens": 3420613672.0, "step": 4485 }, { "epoch": 6.120167278313562, "grad_norm": 0.29308091126614777, "learning_rate": 8.97341102073521e-06, "loss": 0.2958, "num_tokens": 3421490447.0, "step": 4486 }, { "epoch": 6.121532815567125, "grad_norm": 0.29112122991640205, "learning_rate": 8.966552054484625e-06, "loss": 0.3017, "num_tokens": 3422250044.0, "step": 4487 }, { "epoch": 6.1228983528206875, "grad_norm": 0.2963598404045903, "learning_rate": 8.959697064174575e-06, "loss": 0.2958, "num_tokens": 3423065073.0, "step": 4488 }, { "epoch": 6.124263890074251, "grad_norm": 0.3054631079786136, "learning_rate": 8.952846051896229e-06, "loss": 0.298, "num_tokens": 3423817357.0, "step": 4489 }, { "epoch": 6.125629427327814, "grad_norm": 0.31616388579494636, "learning_rate": 8.945999019739531e-06, "loss": 0.331, "num_tokens": 3424653070.0, "step": 4490 }, { "epoch": 6.126994964581377, "grad_norm": 0.3018638026347556, "learning_rate": 8.939155969793205e-06, "loss": 0.3177, "num_tokens": 3425514557.0, "step": 4491 }, { "epoch": 6.1283605018349405, "grad_norm": 0.3249937068046554, "learning_rate": 8.932316904144775e-06, "loss": 0.295, "num_tokens": 3426267036.0, "step": 4492 }, { "epoch": 6.129726039088504, "grad_norm": 0.28744629313546194, "learning_rate": 8.925481824880548e-06, "loss": 0.3011, "num_tokens": 3427004673.0, "step": 4493 }, { "epoch": 6.131091576342067, "grad_norm": 0.30200426473438013, "learning_rate": 8.918650734085608e-06, "loss": 0.2941, "num_tokens": 3427737863.0, "step": 4494 }, { "epoch": 6.13245711359563, "grad_norm": 0.3016882248849372, "learning_rate": 8.91182363384382e-06, "loss": 0.2996, "num_tokens": 3428510172.0, "step": 4495 }, { "epoch": 6.1338226508491935, "grad_norm": 0.31575571651607504, "learning_rate": 8.905000526237843e-06, "loss": 0.3009, "num_tokens": 3429166656.0, "step": 4496 }, { "epoch": 6.135188188102757, "grad_norm": 0.30339826975488837, "learning_rate": 8.898181413349107e-06, "loss": 0.3017, "num_tokens": 3429905954.0, "step": 4497 }, { "epoch": 6.13655372535632, "grad_norm": 0.3058700067696704, "learning_rate": 8.89136629725783e-06, "loss": 0.2908, "num_tokens": 3430563869.0, "step": 4498 }, { "epoch": 6.137919262609883, "grad_norm": 0.3655071970746073, "learning_rate": 8.884555180043017e-06, "loss": 0.288, "num_tokens": 3431279791.0, "step": 4499 }, { "epoch": 6.1392847998634466, "grad_norm": 0.30025366129677883, "learning_rate": 8.877748063782429e-06, "loss": 0.3008, "num_tokens": 3432049749.0, "step": 4500 }, { "epoch": 6.14065033711701, "grad_norm": 0.3050515964040872, "learning_rate": 8.870944950552632e-06, "loss": 0.2849, "num_tokens": 3432829417.0, "step": 4501 }, { "epoch": 6.142015874370573, "grad_norm": 0.3120770891413019, "learning_rate": 8.864145842428957e-06, "loss": 0.295, "num_tokens": 3433598933.0, "step": 4502 }, { "epoch": 6.143381411624135, "grad_norm": 0.3326133199197015, "learning_rate": 8.857350741485526e-06, "loss": 0.292, "num_tokens": 3434403845.0, "step": 4503 }, { "epoch": 6.144746948877699, "grad_norm": 0.28206065066368446, "learning_rate": 8.850559649795216e-06, "loss": 0.2957, "num_tokens": 3435176577.0, "step": 4504 }, { "epoch": 6.146112486131262, "grad_norm": 0.29115323211568434, "learning_rate": 8.843772569429712e-06, "loss": 0.3072, "num_tokens": 3436058064.0, "step": 4505 }, { "epoch": 6.147478023384825, "grad_norm": 0.2939777346990723, "learning_rate": 8.83698950245945e-06, "loss": 0.3048, "num_tokens": 3436878576.0, "step": 4506 }, { "epoch": 6.148843560638388, "grad_norm": 0.30356175111703443, "learning_rate": 8.830210450953636e-06, "loss": 0.3022, "num_tokens": 3437673093.0, "step": 4507 }, { "epoch": 6.150209097891952, "grad_norm": 0.2963812813362353, "learning_rate": 8.823435416980293e-06, "loss": 0.2975, "num_tokens": 3438375178.0, "step": 4508 }, { "epoch": 6.151574635145515, "grad_norm": 0.3205406059385608, "learning_rate": 8.816664402606167e-06, "loss": 0.2824, "num_tokens": 3439112672.0, "step": 4509 }, { "epoch": 6.152940172399078, "grad_norm": 0.301594446688867, "learning_rate": 8.809897409896811e-06, "loss": 0.3027, "num_tokens": 3439899433.0, "step": 4510 }, { "epoch": 6.1543057096526415, "grad_norm": 0.3117370558443123, "learning_rate": 8.803134440916541e-06, "loss": 0.2806, "num_tokens": 3440609305.0, "step": 4511 }, { "epoch": 6.155671246906205, "grad_norm": 0.3225636597448134, "learning_rate": 8.796375497728451e-06, "loss": 0.3039, "num_tokens": 3441238978.0, "step": 4512 }, { "epoch": 6.157036784159768, "grad_norm": 0.31236805622940217, "learning_rate": 8.789620582394385e-06, "loss": 0.2903, "num_tokens": 3442056775.0, "step": 4513 }, { "epoch": 6.158402321413331, "grad_norm": 0.2924745791629228, "learning_rate": 8.782869696974996e-06, "loss": 0.2869, "num_tokens": 3442815152.0, "step": 4514 }, { "epoch": 6.1597678586668945, "grad_norm": 0.3110409634994387, "learning_rate": 8.776122843529679e-06, "loss": 0.3044, "num_tokens": 3443518406.0, "step": 4515 }, { "epoch": 6.161133395920458, "grad_norm": 0.28705331430409226, "learning_rate": 8.769380024116593e-06, "loss": 0.3068, "num_tokens": 3444280412.0, "step": 4516 }, { "epoch": 6.162498933174021, "grad_norm": 0.29157150668699044, "learning_rate": 8.762641240792707e-06, "loss": 0.2981, "num_tokens": 3444984664.0, "step": 4517 }, { "epoch": 6.163864470427584, "grad_norm": 0.29277369778379114, "learning_rate": 8.755906495613706e-06, "loss": 0.2992, "num_tokens": 3445790250.0, "step": 4518 }, { "epoch": 6.1652300076811475, "grad_norm": 0.2877170001583034, "learning_rate": 8.749175790634084e-06, "loss": 0.2945, "num_tokens": 3446562538.0, "step": 4519 }, { "epoch": 6.16659554493471, "grad_norm": 0.2849011285917565, "learning_rate": 8.74244912790708e-06, "loss": 0.3083, "num_tokens": 3447364960.0, "step": 4520 }, { "epoch": 6.167961082188273, "grad_norm": 0.2903092910886752, "learning_rate": 8.735726509484722e-06, "loss": 0.2938, "num_tokens": 3448198825.0, "step": 4521 }, { "epoch": 6.169326619441836, "grad_norm": 0.28193245416816615, "learning_rate": 8.72900793741777e-06, "loss": 0.3042, "num_tokens": 3448973515.0, "step": 4522 }, { "epoch": 6.1706921566954, "grad_norm": 0.2928020429719269, "learning_rate": 8.72229341375578e-06, "loss": 0.2998, "num_tokens": 3449732948.0, "step": 4523 }, { "epoch": 6.172057693948963, "grad_norm": 0.29385048344473197, "learning_rate": 8.715582940547068e-06, "loss": 0.2884, "num_tokens": 3450423957.0, "step": 4524 }, { "epoch": 6.173423231202526, "grad_norm": 0.32769770937312015, "learning_rate": 8.708876519838685e-06, "loss": 0.3222, "num_tokens": 3451285379.0, "step": 4525 }, { "epoch": 6.174788768456089, "grad_norm": 0.28956665490796607, "learning_rate": 8.702174153676502e-06, "loss": 0.2921, "num_tokens": 3452094627.0, "step": 4526 }, { "epoch": 6.176154305709653, "grad_norm": 0.2978076155462878, "learning_rate": 8.695475844105098e-06, "loss": 0.2943, "num_tokens": 3452861074.0, "step": 4527 }, { "epoch": 6.177519842963216, "grad_norm": 0.2894765103055683, "learning_rate": 8.688781593167844e-06, "loss": 0.2932, "num_tokens": 3453596726.0, "step": 4528 }, { "epoch": 6.178885380216779, "grad_norm": 0.2903842214722527, "learning_rate": 8.682091402906871e-06, "loss": 0.2936, "num_tokens": 3454321168.0, "step": 4529 }, { "epoch": 6.180250917470342, "grad_norm": 0.30365214560492765, "learning_rate": 8.675405275363058e-06, "loss": 0.2788, "num_tokens": 3455015952.0, "step": 4530 }, { "epoch": 6.181616454723906, "grad_norm": 0.3073718513348531, "learning_rate": 8.668723212576054e-06, "loss": 0.3105, "num_tokens": 3455784264.0, "step": 4531 }, { "epoch": 6.182981991977469, "grad_norm": 0.31879171155593256, "learning_rate": 8.662045216584269e-06, "loss": 0.3081, "num_tokens": 3456527875.0, "step": 4532 }, { "epoch": 6.184347529231032, "grad_norm": 0.3033212967087488, "learning_rate": 8.655371289424879e-06, "loss": 0.3055, "num_tokens": 3457290527.0, "step": 4533 }, { "epoch": 6.185713066484595, "grad_norm": 0.3329876512059327, "learning_rate": 8.648701433133791e-06, "loss": 0.3091, "num_tokens": 3458066186.0, "step": 4534 }, { "epoch": 6.187078603738159, "grad_norm": 0.3071315964747986, "learning_rate": 8.642035649745708e-06, "loss": 0.2971, "num_tokens": 3458870754.0, "step": 4535 }, { "epoch": 6.188444140991721, "grad_norm": 0.30633229155024777, "learning_rate": 8.63537394129406e-06, "loss": 0.3046, "num_tokens": 3459651791.0, "step": 4536 }, { "epoch": 6.189809678245284, "grad_norm": 0.2972410567457784, "learning_rate": 8.62871630981105e-06, "loss": 0.2898, "num_tokens": 3460355593.0, "step": 4537 }, { "epoch": 6.1911752154988475, "grad_norm": 0.2978781149510203, "learning_rate": 8.622062757327638e-06, "loss": 0.2992, "num_tokens": 3461035221.0, "step": 4538 }, { "epoch": 6.192540752752411, "grad_norm": 0.3243598105744612, "learning_rate": 8.615413285873523e-06, "loss": 0.3032, "num_tokens": 3461808868.0, "step": 4539 }, { "epoch": 6.193906290005974, "grad_norm": 0.29633640032894376, "learning_rate": 8.608767897477179e-06, "loss": 0.2873, "num_tokens": 3462628996.0, "step": 4540 }, { "epoch": 6.195271827259537, "grad_norm": 0.30298426890029007, "learning_rate": 8.602126594165824e-06, "loss": 0.3041, "num_tokens": 3463377448.0, "step": 4541 }, { "epoch": 6.1966373645131005, "grad_norm": 0.2896477184477959, "learning_rate": 8.595489377965438e-06, "loss": 0.2998, "num_tokens": 3464185628.0, "step": 4542 }, { "epoch": 6.198002901766664, "grad_norm": 0.3108554514067236, "learning_rate": 8.58885625090073e-06, "loss": 0.2979, "num_tokens": 3464979233.0, "step": 4543 }, { "epoch": 6.199368439020227, "grad_norm": 0.29110773698690995, "learning_rate": 8.582227214995205e-06, "loss": 0.2992, "num_tokens": 3465772271.0, "step": 4544 }, { "epoch": 6.20073397627379, "grad_norm": 0.28072358314807627, "learning_rate": 8.575602272271077e-06, "loss": 0.2937, "num_tokens": 3466479562.0, "step": 4545 }, { "epoch": 6.202099513527354, "grad_norm": 0.3153581387166458, "learning_rate": 8.568981424749337e-06, "loss": 0.2986, "num_tokens": 3467190071.0, "step": 4546 }, { "epoch": 6.203465050780917, "grad_norm": 0.3212046020742733, "learning_rate": 8.56236467444972e-06, "loss": 0.3079, "num_tokens": 3468019980.0, "step": 4547 }, { "epoch": 6.20483058803448, "grad_norm": 0.27867904946026706, "learning_rate": 8.5557520233907e-06, "loss": 0.2948, "num_tokens": 3468716464.0, "step": 4548 }, { "epoch": 6.206196125288043, "grad_norm": 0.30355008125393906, "learning_rate": 8.549143473589516e-06, "loss": 0.2882, "num_tokens": 3469433911.0, "step": 4549 }, { "epoch": 6.207561662541607, "grad_norm": 0.2956370782204565, "learning_rate": 8.54253902706215e-06, "loss": 0.2965, "num_tokens": 3470219400.0, "step": 4550 }, { "epoch": 6.20892719979517, "grad_norm": 0.30054980156099803, "learning_rate": 8.535938685823344e-06, "loss": 0.3033, "num_tokens": 3470966642.0, "step": 4551 }, { "epoch": 6.210292737048732, "grad_norm": 0.3082824140323851, "learning_rate": 8.529342451886551e-06, "loss": 0.3023, "num_tokens": 3471709148.0, "step": 4552 }, { "epoch": 6.2116582743022954, "grad_norm": 0.2957663971607134, "learning_rate": 8.522750327264024e-06, "loss": 0.3109, "num_tokens": 3472527777.0, "step": 4553 }, { "epoch": 6.213023811555859, "grad_norm": 0.32234910421137125, "learning_rate": 8.516162313966717e-06, "loss": 0.3019, "num_tokens": 3473302167.0, "step": 4554 }, { "epoch": 6.214389348809422, "grad_norm": 0.30139952585853635, "learning_rate": 8.509578414004353e-06, "loss": 0.2994, "num_tokens": 3474108664.0, "step": 4555 }, { "epoch": 6.215754886062985, "grad_norm": 0.28498954546413857, "learning_rate": 8.5029986293854e-06, "loss": 0.2932, "num_tokens": 3474804538.0, "step": 4556 }, { "epoch": 6.2171204233165485, "grad_norm": 0.3126733830633691, "learning_rate": 8.496422962117051e-06, "loss": 0.3034, "num_tokens": 3475548271.0, "step": 4557 }, { "epoch": 6.218485960570112, "grad_norm": 0.2997448226318243, "learning_rate": 8.48985141420527e-06, "loss": 0.3015, "num_tokens": 3476291397.0, "step": 4558 }, { "epoch": 6.219851497823675, "grad_norm": 0.2985714090831618, "learning_rate": 8.483283987654746e-06, "loss": 0.31, "num_tokens": 3477124355.0, "step": 4559 }, { "epoch": 6.221217035077238, "grad_norm": 0.28822980010129784, "learning_rate": 8.476720684468921e-06, "loss": 0.3028, "num_tokens": 3477906054.0, "step": 4560 }, { "epoch": 6.2225825723308015, "grad_norm": 0.29883639346716545, "learning_rate": 8.47016150664997e-06, "loss": 0.2919, "num_tokens": 3478660140.0, "step": 4561 }, { "epoch": 6.223948109584365, "grad_norm": 0.2897109362851504, "learning_rate": 8.463606456198816e-06, "loss": 0.296, "num_tokens": 3479478041.0, "step": 4562 }, { "epoch": 6.225313646837928, "grad_norm": 0.29260346223692796, "learning_rate": 8.45705553511512e-06, "loss": 0.2956, "num_tokens": 3480180854.0, "step": 4563 }, { "epoch": 6.226679184091491, "grad_norm": 0.2818746899573719, "learning_rate": 8.450508745397283e-06, "loss": 0.2919, "num_tokens": 3480959968.0, "step": 4564 }, { "epoch": 6.2280447213450545, "grad_norm": 0.302230806635026, "learning_rate": 8.443966089042458e-06, "loss": 0.2965, "num_tokens": 3481780576.0, "step": 4565 }, { "epoch": 6.229410258598618, "grad_norm": 0.29168576211246233, "learning_rate": 8.43742756804651e-06, "loss": 0.2913, "num_tokens": 3482538169.0, "step": 4566 }, { "epoch": 6.230775795852181, "grad_norm": 0.30262005700763706, "learning_rate": 8.430893184404067e-06, "loss": 0.3076, "num_tokens": 3483309776.0, "step": 4567 }, { "epoch": 6.232141333105743, "grad_norm": 0.2938160709084442, "learning_rate": 8.424362940108483e-06, "loss": 0.2903, "num_tokens": 3484077354.0, "step": 4568 }, { "epoch": 6.233506870359307, "grad_norm": 0.28810695614779164, "learning_rate": 8.417836837151857e-06, "loss": 0.2861, "num_tokens": 3484863806.0, "step": 4569 }, { "epoch": 6.23487240761287, "grad_norm": 0.28221899781088705, "learning_rate": 8.411314877525022e-06, "loss": 0.3032, "num_tokens": 3485718376.0, "step": 4570 }, { "epoch": 6.236237944866433, "grad_norm": 0.2822629485247184, "learning_rate": 8.40479706321754e-06, "loss": 0.2807, "num_tokens": 3486430360.0, "step": 4571 }, { "epoch": 6.237603482119996, "grad_norm": 0.29790391535713495, "learning_rate": 8.398283396217712e-06, "loss": 0.3074, "num_tokens": 3487188606.0, "step": 4572 }, { "epoch": 6.23896901937356, "grad_norm": 0.3030339765643622, "learning_rate": 8.391773878512582e-06, "loss": 0.2947, "num_tokens": 3487953543.0, "step": 4573 }, { "epoch": 6.240334556627123, "grad_norm": 0.2945316877487374, "learning_rate": 8.385268512087926e-06, "loss": 0.2971, "num_tokens": 3488739397.0, "step": 4574 }, { "epoch": 6.241700093880686, "grad_norm": 0.30035249885458565, "learning_rate": 8.378767298928237e-06, "loss": 0.3074, "num_tokens": 3489591658.0, "step": 4575 }, { "epoch": 6.243065631134249, "grad_norm": 0.2845258956098411, "learning_rate": 8.372270241016758e-06, "loss": 0.2847, "num_tokens": 3490270154.0, "step": 4576 }, { "epoch": 6.244431168387813, "grad_norm": 0.3047053607250908, "learning_rate": 8.365777340335466e-06, "loss": 0.3021, "num_tokens": 3490938599.0, "step": 4577 }, { "epoch": 6.245796705641376, "grad_norm": 0.3073047519455548, "learning_rate": 8.359288598865052e-06, "loss": 0.3003, "num_tokens": 3491691608.0, "step": 4578 }, { "epoch": 6.247162242894939, "grad_norm": 0.287148177163203, "learning_rate": 8.352804018584967e-06, "loss": 0.3032, "num_tokens": 3492428160.0, "step": 4579 }, { "epoch": 6.248527780148502, "grad_norm": 0.30503867263853, "learning_rate": 8.34632360147336e-06, "loss": 0.3117, "num_tokens": 3493268337.0, "step": 4580 }, { "epoch": 6.249893317402066, "grad_norm": 0.2822924291688667, "learning_rate": 8.339847349507132e-06, "loss": 0.3061, "num_tokens": 3494004643.0, "step": 4581 }, { "epoch": 6.251258854655629, "grad_norm": 0.2987770139718527, "learning_rate": 8.333375264661908e-06, "loss": 0.2981, "num_tokens": 3494777242.0, "step": 4582 }, { "epoch": 6.252624391909192, "grad_norm": 0.2920199708174235, "learning_rate": 8.326907348912043e-06, "loss": 0.3097, "num_tokens": 3495611764.0, "step": 4583 }, { "epoch": 6.2539899291627545, "grad_norm": 0.29863487294888247, "learning_rate": 8.32044360423061e-06, "loss": 0.3101, "num_tokens": 3496354529.0, "step": 4584 }, { "epoch": 6.255355466416318, "grad_norm": 0.3147621568018632, "learning_rate": 8.313984032589423e-06, "loss": 0.307, "num_tokens": 3497144767.0, "step": 4585 }, { "epoch": 6.256721003669881, "grad_norm": 0.3037871067997136, "learning_rate": 8.307528635959021e-06, "loss": 0.2937, "num_tokens": 3497844724.0, "step": 4586 }, { "epoch": 6.258086540923444, "grad_norm": 0.3053666538785438, "learning_rate": 8.301077416308651e-06, "loss": 0.3047, "num_tokens": 3498572429.0, "step": 4587 }, { "epoch": 6.2594520781770076, "grad_norm": 0.32049843876177353, "learning_rate": 8.294630375606322e-06, "loss": 0.2856, "num_tokens": 3499266427.0, "step": 4588 }, { "epoch": 6.260817615430571, "grad_norm": 0.3063635083844993, "learning_rate": 8.28818751581873e-06, "loss": 0.305, "num_tokens": 3500080062.0, "step": 4589 }, { "epoch": 6.262183152684134, "grad_norm": 0.28489497233466415, "learning_rate": 8.281748838911323e-06, "loss": 0.3051, "num_tokens": 3500895054.0, "step": 4590 }, { "epoch": 6.263548689937697, "grad_norm": 0.2804996943496296, "learning_rate": 8.275314346848256e-06, "loss": 0.3072, "num_tokens": 3501620088.0, "step": 4591 }, { "epoch": 6.264914227191261, "grad_norm": 0.31933170287594714, "learning_rate": 8.268884041592417e-06, "loss": 0.2893, "num_tokens": 3502307483.0, "step": 4592 }, { "epoch": 6.266279764444824, "grad_norm": 0.29337875050370144, "learning_rate": 8.262457925105414e-06, "loss": 0.2772, "num_tokens": 3502992365.0, "step": 4593 }, { "epoch": 6.267645301698387, "grad_norm": 0.2929465036342751, "learning_rate": 8.256035999347572e-06, "loss": 0.296, "num_tokens": 3503751298.0, "step": 4594 }, { "epoch": 6.26901083895195, "grad_norm": 0.2781374199515906, "learning_rate": 8.249618266277954e-06, "loss": 0.2932, "num_tokens": 3504505387.0, "step": 4595 }, { "epoch": 6.270376376205514, "grad_norm": 0.29374584574885537, "learning_rate": 8.24320472785431e-06, "loss": 0.3038, "num_tokens": 3505223012.0, "step": 4596 }, { "epoch": 6.271741913459077, "grad_norm": 0.31351215364887963, "learning_rate": 8.236795386033164e-06, "loss": 0.3009, "num_tokens": 3505873858.0, "step": 4597 }, { "epoch": 6.27310745071264, "grad_norm": 0.31462577595110136, "learning_rate": 8.230390242769701e-06, "loss": 0.2858, "num_tokens": 3506574211.0, "step": 4598 }, { "epoch": 6.274472987966203, "grad_norm": 0.30947714823436046, "learning_rate": 8.223989300017867e-06, "loss": 0.3005, "num_tokens": 3507348540.0, "step": 4599 }, { "epoch": 6.275838525219767, "grad_norm": 0.2959041003306376, "learning_rate": 8.217592559730312e-06, "loss": 0.2973, "num_tokens": 3508064816.0, "step": 4600 }, { "epoch": 6.277204062473329, "grad_norm": 0.29038540190258083, "learning_rate": 8.2112000238584e-06, "loss": 0.3053, "num_tokens": 3508834231.0, "step": 4601 }, { "epoch": 6.278569599726892, "grad_norm": 0.28018729041453444, "learning_rate": 8.204811694352216e-06, "loss": 0.285, "num_tokens": 3509620070.0, "step": 4602 }, { "epoch": 6.2799351369804555, "grad_norm": 0.27720798861609824, "learning_rate": 8.198427573160565e-06, "loss": 0.2889, "num_tokens": 3510319187.0, "step": 4603 }, { "epoch": 6.281300674234019, "grad_norm": 0.2981710668328452, "learning_rate": 8.19204766223097e-06, "loss": 0.3154, "num_tokens": 3511110675.0, "step": 4604 }, { "epoch": 6.282666211487582, "grad_norm": 0.2958788361934978, "learning_rate": 8.185671963509655e-06, "loss": 0.3067, "num_tokens": 3511831234.0, "step": 4605 }, { "epoch": 6.284031748741145, "grad_norm": 0.30751355272189357, "learning_rate": 8.179300478941585e-06, "loss": 0.3055, "num_tokens": 3512589321.0, "step": 4606 }, { "epoch": 6.2853972859947085, "grad_norm": 0.2982651342219284, "learning_rate": 8.172933210470411e-06, "loss": 0.2999, "num_tokens": 3513447670.0, "step": 4607 }, { "epoch": 6.286762823248272, "grad_norm": 0.28017135701350954, "learning_rate": 8.166570160038514e-06, "loss": 0.3071, "num_tokens": 3514202403.0, "step": 4608 }, { "epoch": 6.288128360501835, "grad_norm": 0.29206093109164966, "learning_rate": 8.160211329586991e-06, "loss": 0.3135, "num_tokens": 3515010614.0, "step": 4609 }, { "epoch": 6.289493897755398, "grad_norm": 0.2886394332217839, "learning_rate": 8.153856721055637e-06, "loss": 0.2984, "num_tokens": 3515788396.0, "step": 4610 }, { "epoch": 6.2908594350089615, "grad_norm": 0.3043043952157243, "learning_rate": 8.147506336382973e-06, "loss": 0.2962, "num_tokens": 3516520038.0, "step": 4611 }, { "epoch": 6.292224972262525, "grad_norm": 0.2971896491863897, "learning_rate": 8.141160177506222e-06, "loss": 0.2993, "num_tokens": 3517320143.0, "step": 4612 }, { "epoch": 6.293590509516088, "grad_norm": 0.3101891490061094, "learning_rate": 8.134818246361331e-06, "loss": 0.3084, "num_tokens": 3518097785.0, "step": 4613 }, { "epoch": 6.294956046769651, "grad_norm": 0.2997885593889986, "learning_rate": 8.128480544882934e-06, "loss": 0.2903, "num_tokens": 3518852718.0, "step": 4614 }, { "epoch": 6.2963215840232145, "grad_norm": 0.2902880624514265, "learning_rate": 8.12214707500441e-06, "loss": 0.3085, "num_tokens": 3519572072.0, "step": 4615 }, { "epoch": 6.297687121276777, "grad_norm": 0.3094027170647446, "learning_rate": 8.115817838657813e-06, "loss": 0.3182, "num_tokens": 3520359111.0, "step": 4616 }, { "epoch": 6.29905265853034, "grad_norm": 0.2997775991692503, "learning_rate": 8.109492837773912e-06, "loss": 0.3051, "num_tokens": 3521137754.0, "step": 4617 }, { "epoch": 6.300418195783903, "grad_norm": 0.28619263672344264, "learning_rate": 8.103172074282207e-06, "loss": 0.306, "num_tokens": 3521896847.0, "step": 4618 }, { "epoch": 6.301783733037467, "grad_norm": 0.3077105342354075, "learning_rate": 8.096855550110878e-06, "loss": 0.2946, "num_tokens": 3522641389.0, "step": 4619 }, { "epoch": 6.30314927029103, "grad_norm": 0.31028539476166817, "learning_rate": 8.090543267186829e-06, "loss": 0.2976, "num_tokens": 3523373801.0, "step": 4620 }, { "epoch": 6.304514807544593, "grad_norm": 0.3106386047566905, "learning_rate": 8.08423522743566e-06, "loss": 0.297, "num_tokens": 3524114552.0, "step": 4621 }, { "epoch": 6.305880344798156, "grad_norm": 0.2982938327043602, "learning_rate": 8.077931432781685e-06, "loss": 0.3084, "num_tokens": 3525025106.0, "step": 4622 }, { "epoch": 6.30724588205172, "grad_norm": 0.2721416196217965, "learning_rate": 8.071631885147914e-06, "loss": 0.2923, "num_tokens": 3525793061.0, "step": 4623 }, { "epoch": 6.308611419305283, "grad_norm": 0.29309478015933915, "learning_rate": 8.065336586456067e-06, "loss": 0.295, "num_tokens": 3526553122.0, "step": 4624 }, { "epoch": 6.309976956558846, "grad_norm": 0.29553673181250245, "learning_rate": 8.05904553862657e-06, "loss": 0.2966, "num_tokens": 3527355848.0, "step": 4625 }, { "epoch": 6.311342493812409, "grad_norm": 0.31561934312266704, "learning_rate": 8.052758743578547e-06, "loss": 0.3124, "num_tokens": 3528120215.0, "step": 4626 }, { "epoch": 6.312708031065973, "grad_norm": 0.3206047593879715, "learning_rate": 8.046476203229834e-06, "loss": 0.2978, "num_tokens": 3528894356.0, "step": 4627 }, { "epoch": 6.314073568319536, "grad_norm": 0.29151823415821854, "learning_rate": 8.040197919496951e-06, "loss": 0.307, "num_tokens": 3529765730.0, "step": 4628 }, { "epoch": 6.315439105573099, "grad_norm": 0.2683600891773982, "learning_rate": 8.033923894295132e-06, "loss": 0.3087, "num_tokens": 3530548872.0, "step": 4629 }, { "epoch": 6.316804642826662, "grad_norm": 0.2973521114030235, "learning_rate": 8.027654129538316e-06, "loss": 0.3025, "num_tokens": 3531326054.0, "step": 4630 }, { "epoch": 6.318170180080226, "grad_norm": 0.306139460923637, "learning_rate": 8.021388627139137e-06, "loss": 0.294, "num_tokens": 3532057861.0, "step": 4631 }, { "epoch": 6.319535717333789, "grad_norm": 0.3109575178552593, "learning_rate": 8.015127389008923e-06, "loss": 0.3129, "num_tokens": 3532875188.0, "step": 4632 }, { "epoch": 6.320901254587351, "grad_norm": 0.3111879133251611, "learning_rate": 8.008870417057709e-06, "loss": 0.3184, "num_tokens": 3533637412.0, "step": 4633 }, { "epoch": 6.322266791840915, "grad_norm": 0.2934050740755671, "learning_rate": 8.002617713194225e-06, "loss": 0.2895, "num_tokens": 3534342808.0, "step": 4634 }, { "epoch": 6.323632329094478, "grad_norm": 0.32926659767772354, "learning_rate": 7.996369279325901e-06, "loss": 0.2907, "num_tokens": 3535110054.0, "step": 4635 }, { "epoch": 6.324997866348041, "grad_norm": 0.29981851658785896, "learning_rate": 7.990125117358873e-06, "loss": 0.3016, "num_tokens": 3535836612.0, "step": 4636 }, { "epoch": 6.326363403601604, "grad_norm": 0.3158475180024076, "learning_rate": 7.983885229197948e-06, "loss": 0.2956, "num_tokens": 3536573923.0, "step": 4637 }, { "epoch": 6.327728940855168, "grad_norm": 0.30049832325890086, "learning_rate": 7.977649616746653e-06, "loss": 0.3086, "num_tokens": 3537321773.0, "step": 4638 }, { "epoch": 6.329094478108731, "grad_norm": 0.30061218621210034, "learning_rate": 7.971418281907209e-06, "loss": 0.2934, "num_tokens": 3538040214.0, "step": 4639 }, { "epoch": 6.330460015362294, "grad_norm": 0.30327403446341283, "learning_rate": 7.965191226580515e-06, "loss": 0.304, "num_tokens": 3538740712.0, "step": 4640 }, { "epoch": 6.331825552615857, "grad_norm": 0.29990715636007015, "learning_rate": 7.958968452666184e-06, "loss": 0.3041, "num_tokens": 3539472508.0, "step": 4641 }, { "epoch": 6.333191089869421, "grad_norm": 0.3095489089732085, "learning_rate": 7.952749962062512e-06, "loss": 0.3044, "num_tokens": 3540207921.0, "step": 4642 }, { "epoch": 6.334556627122984, "grad_norm": 0.29821151145784963, "learning_rate": 7.946535756666492e-06, "loss": 0.3165, "num_tokens": 3541059715.0, "step": 4643 }, { "epoch": 6.335922164376547, "grad_norm": 0.27282140860878784, "learning_rate": 7.940325838373808e-06, "loss": 0.3004, "num_tokens": 3541872348.0, "step": 4644 }, { "epoch": 6.33728770163011, "grad_norm": 0.29724972398866517, "learning_rate": 7.934120209078843e-06, "loss": 0.3065, "num_tokens": 3542563785.0, "step": 4645 }, { "epoch": 6.338653238883674, "grad_norm": 0.29475720678737444, "learning_rate": 7.927918870674657e-06, "loss": 0.3019, "num_tokens": 3543395323.0, "step": 4646 }, { "epoch": 6.340018776137237, "grad_norm": 0.28466452413845234, "learning_rate": 7.921721825053013e-06, "loss": 0.2977, "num_tokens": 3544132690.0, "step": 4647 }, { "epoch": 6.341384313390799, "grad_norm": 0.2958956650756493, "learning_rate": 7.915529074104365e-06, "loss": 0.3074, "num_tokens": 3544918936.0, "step": 4648 }, { "epoch": 6.342749850644363, "grad_norm": 0.29383506858950265, "learning_rate": 7.909340619717848e-06, "loss": 0.2949, "num_tokens": 3545656346.0, "step": 4649 }, { "epoch": 6.344115387897926, "grad_norm": 0.28398639024411826, "learning_rate": 7.90315646378129e-06, "loss": 0.3065, "num_tokens": 3546321398.0, "step": 4650 }, { "epoch": 6.345480925151489, "grad_norm": 0.31414963929478595, "learning_rate": 7.896976608181213e-06, "loss": 0.3138, "num_tokens": 3547078530.0, "step": 4651 }, { "epoch": 6.346846462405052, "grad_norm": 0.28729024625011995, "learning_rate": 7.890801054802825e-06, "loss": 0.3039, "num_tokens": 3547874258.0, "step": 4652 }, { "epoch": 6.3482119996586155, "grad_norm": 0.28950099603058793, "learning_rate": 7.884629805530017e-06, "loss": 0.2891, "num_tokens": 3548627865.0, "step": 4653 }, { "epoch": 6.349577536912179, "grad_norm": 0.29193820885191624, "learning_rate": 7.878462862245375e-06, "loss": 0.3177, "num_tokens": 3549423721.0, "step": 4654 }, { "epoch": 6.350943074165742, "grad_norm": 0.30699938921793946, "learning_rate": 7.872300226830159e-06, "loss": 0.3042, "num_tokens": 3550157737.0, "step": 4655 }, { "epoch": 6.352308611419305, "grad_norm": 0.2987639264778801, "learning_rate": 7.866141901164324e-06, "loss": 0.3003, "num_tokens": 3550943083.0, "step": 4656 }, { "epoch": 6.3536741486728685, "grad_norm": 0.29171414886577446, "learning_rate": 7.859987887126512e-06, "loss": 0.3092, "num_tokens": 3551699719.0, "step": 4657 }, { "epoch": 6.355039685926432, "grad_norm": 0.29728047770832133, "learning_rate": 7.853838186594035e-06, "loss": 0.3002, "num_tokens": 3552438492.0, "step": 4658 }, { "epoch": 6.356405223179995, "grad_norm": 0.2971607276735271, "learning_rate": 7.847692801442921e-06, "loss": 0.294, "num_tokens": 3553167155.0, "step": 4659 }, { "epoch": 6.357770760433558, "grad_norm": 0.30909509923058, "learning_rate": 7.841551733547843e-06, "loss": 0.301, "num_tokens": 3553891467.0, "step": 4660 }, { "epoch": 6.3591362976871215, "grad_norm": 0.3164656110759712, "learning_rate": 7.835414984782181e-06, "loss": 0.298, "num_tokens": 3554615623.0, "step": 4661 }, { "epoch": 6.360501834940685, "grad_norm": 0.32973838151158297, "learning_rate": 7.829282557017994e-06, "loss": 0.3127, "num_tokens": 3555347041.0, "step": 4662 }, { "epoch": 6.361867372194248, "grad_norm": 0.3106520623681838, "learning_rate": 7.823154452126015e-06, "loss": 0.2875, "num_tokens": 3556119211.0, "step": 4663 }, { "epoch": 6.363232909447811, "grad_norm": 0.30385778270018804, "learning_rate": 7.817030671975666e-06, "loss": 0.3046, "num_tokens": 3556841686.0, "step": 4664 }, { "epoch": 6.364598446701374, "grad_norm": 0.3014543594515548, "learning_rate": 7.810911218435044e-06, "loss": 0.3274, "num_tokens": 3557625885.0, "step": 4665 }, { "epoch": 6.365963983954937, "grad_norm": 0.3049823399386101, "learning_rate": 7.804796093370938e-06, "loss": 0.3085, "num_tokens": 3558487340.0, "step": 4666 }, { "epoch": 6.3673295212085, "grad_norm": 0.30003390402254254, "learning_rate": 7.798685298648792e-06, "loss": 0.2893, "num_tokens": 3559233752.0, "step": 4667 }, { "epoch": 6.368695058462063, "grad_norm": 0.31360428865226697, "learning_rate": 7.792578836132767e-06, "loss": 0.2888, "num_tokens": 3560019530.0, "step": 4668 }, { "epoch": 6.370060595715627, "grad_norm": 0.29209925554468447, "learning_rate": 7.786476707685662e-06, "loss": 0.3046, "num_tokens": 3560818457.0, "step": 4669 }, { "epoch": 6.37142613296919, "grad_norm": 0.3241516390722224, "learning_rate": 7.780378915168976e-06, "loss": 0.2896, "num_tokens": 3561567263.0, "step": 4670 }, { "epoch": 6.372791670222753, "grad_norm": 0.2879487799007486, "learning_rate": 7.77428546044289e-06, "loss": 0.3148, "num_tokens": 3562364537.0, "step": 4671 }, { "epoch": 6.374157207476316, "grad_norm": 0.30313673233849026, "learning_rate": 7.768196345366241e-06, "loss": 0.3189, "num_tokens": 3563196530.0, "step": 4672 }, { "epoch": 6.37552274472988, "grad_norm": 0.2974449111818501, "learning_rate": 7.762111571796562e-06, "loss": 0.3069, "num_tokens": 3563980698.0, "step": 4673 }, { "epoch": 6.376888281983443, "grad_norm": 0.32146450743529303, "learning_rate": 7.756031141590051e-06, "loss": 0.3021, "num_tokens": 3564723579.0, "step": 4674 }, { "epoch": 6.378253819237006, "grad_norm": 0.2938443546907603, "learning_rate": 7.749955056601588e-06, "loss": 0.2949, "num_tokens": 3565479726.0, "step": 4675 }, { "epoch": 6.379619356490569, "grad_norm": 0.2963861245804469, "learning_rate": 7.74388331868471e-06, "loss": 0.3104, "num_tokens": 3566287084.0, "step": 4676 }, { "epoch": 6.380984893744133, "grad_norm": 0.2983077871670581, "learning_rate": 7.737815929691665e-06, "loss": 0.3176, "num_tokens": 3567103567.0, "step": 4677 }, { "epoch": 6.382350430997696, "grad_norm": 0.3056113404025851, "learning_rate": 7.731752891473334e-06, "loss": 0.3028, "num_tokens": 3567937581.0, "step": 4678 }, { "epoch": 6.383715968251259, "grad_norm": 0.29843775186605814, "learning_rate": 7.725694205879284e-06, "loss": 0.2893, "num_tokens": 3568629369.0, "step": 4679 }, { "epoch": 6.3850815055048225, "grad_norm": 0.2823066314148754, "learning_rate": 7.71963987475777e-06, "loss": 0.2918, "num_tokens": 3569371182.0, "step": 4680 }, { "epoch": 6.386447042758386, "grad_norm": 0.31135340451085003, "learning_rate": 7.713589899955698e-06, "loss": 0.303, "num_tokens": 3570143252.0, "step": 4681 }, { "epoch": 6.387812580011948, "grad_norm": 0.2884616428805699, "learning_rate": 7.707544283318656e-06, "loss": 0.3028, "num_tokens": 3570932834.0, "step": 4682 }, { "epoch": 6.389178117265511, "grad_norm": 0.3065482408690861, "learning_rate": 7.701503026690898e-06, "loss": 0.2942, "num_tokens": 3571671921.0, "step": 4683 }, { "epoch": 6.390543654519075, "grad_norm": 0.3284874264076778, "learning_rate": 7.695466131915357e-06, "loss": 0.2859, "num_tokens": 3572378021.0, "step": 4684 }, { "epoch": 6.391909191772638, "grad_norm": 0.3090843549950797, "learning_rate": 7.689433600833612e-06, "loss": 0.3163, "num_tokens": 3573148087.0, "step": 4685 }, { "epoch": 6.393274729026201, "grad_norm": 0.31977986184985796, "learning_rate": 7.68340543528595e-06, "loss": 0.2983, "num_tokens": 3573886052.0, "step": 4686 }, { "epoch": 6.394640266279764, "grad_norm": 0.2846476233376592, "learning_rate": 7.677381637111285e-06, "loss": 0.3173, "num_tokens": 3574623596.0, "step": 4687 }, { "epoch": 6.396005803533328, "grad_norm": 0.30881357027645084, "learning_rate": 7.671362208147217e-06, "loss": 0.3109, "num_tokens": 3575381442.0, "step": 4688 }, { "epoch": 6.397371340786891, "grad_norm": 0.3113243113395768, "learning_rate": 7.665347150230029e-06, "loss": 0.3038, "num_tokens": 3576216422.0, "step": 4689 }, { "epoch": 6.398736878040454, "grad_norm": 0.2819316744426183, "learning_rate": 7.659336465194638e-06, "loss": 0.2854, "num_tokens": 3576924796.0, "step": 4690 }, { "epoch": 6.400102415294017, "grad_norm": 0.2931931136430947, "learning_rate": 7.65333015487465e-06, "loss": 0.3056, "num_tokens": 3577681690.0, "step": 4691 }, { "epoch": 6.401467952547581, "grad_norm": 0.28743842397941877, "learning_rate": 7.647328221102332e-06, "loss": 0.3186, "num_tokens": 3578391121.0, "step": 4692 }, { "epoch": 6.402833489801144, "grad_norm": 0.30603149005869734, "learning_rate": 7.641330665708616e-06, "loss": 0.289, "num_tokens": 3579136786.0, "step": 4693 }, { "epoch": 6.404199027054707, "grad_norm": 0.2975391280107239, "learning_rate": 7.635337490523088e-06, "loss": 0.3025, "num_tokens": 3579886292.0, "step": 4694 }, { "epoch": 6.40556456430827, "grad_norm": 0.27343983187562565, "learning_rate": 7.62934869737401e-06, "loss": 0.3076, "num_tokens": 3580696816.0, "step": 4695 }, { "epoch": 6.406930101561834, "grad_norm": 0.2976784548663288, "learning_rate": 7.62336428808831e-06, "loss": 0.2974, "num_tokens": 3581384894.0, "step": 4696 }, { "epoch": 6.408295638815396, "grad_norm": 0.3121133553587404, "learning_rate": 7.617384264491558e-06, "loss": 0.3124, "num_tokens": 3582141568.0, "step": 4697 }, { "epoch": 6.409661176068959, "grad_norm": 0.29436267589389226, "learning_rate": 7.611408628408017e-06, "loss": 0.3047, "num_tokens": 3582908066.0, "step": 4698 }, { "epoch": 6.4110267133225225, "grad_norm": 0.2767988522125657, "learning_rate": 7.605437381660583e-06, "loss": 0.2945, "num_tokens": 3583619650.0, "step": 4699 }, { "epoch": 6.412392250576086, "grad_norm": 0.28759266203795114, "learning_rate": 7.599470526070827e-06, "loss": 0.3092, "num_tokens": 3584395909.0, "step": 4700 }, { "epoch": 6.413757787829649, "grad_norm": 0.3026390786963697, "learning_rate": 7.5935080634589855e-06, "loss": 0.3022, "num_tokens": 3585119871.0, "step": 4701 }, { "epoch": 6.415123325083212, "grad_norm": 0.28880176409993713, "learning_rate": 7.587549995643937e-06, "loss": 0.3011, "num_tokens": 3585897030.0, "step": 4702 }, { "epoch": 6.4164888623367755, "grad_norm": 0.2853691143770594, "learning_rate": 7.581596324443232e-06, "loss": 0.2931, "num_tokens": 3586670896.0, "step": 4703 }, { "epoch": 6.417854399590339, "grad_norm": 0.28550239920314074, "learning_rate": 7.575647051673084e-06, "loss": 0.2957, "num_tokens": 3587366213.0, "step": 4704 }, { "epoch": 6.419219936843902, "grad_norm": 0.29568555428971316, "learning_rate": 7.569702179148357e-06, "loss": 0.3023, "num_tokens": 3588061765.0, "step": 4705 }, { "epoch": 6.420585474097465, "grad_norm": 0.30354357017348055, "learning_rate": 7.5637617086825645e-06, "loss": 0.3086, "num_tokens": 3588956382.0, "step": 4706 }, { "epoch": 6.4219510113510285, "grad_norm": 0.2659231414215999, "learning_rate": 7.557825642087904e-06, "loss": 0.2981, "num_tokens": 3589758235.0, "step": 4707 }, { "epoch": 6.423316548604592, "grad_norm": 0.27039029304804785, "learning_rate": 7.551893981175196e-06, "loss": 0.3121, "num_tokens": 3590468603.0, "step": 4708 }, { "epoch": 6.424682085858155, "grad_norm": 0.32471257788378804, "learning_rate": 7.545966727753942e-06, "loss": 0.3047, "num_tokens": 3591286349.0, "step": 4709 }, { "epoch": 6.426047623111718, "grad_norm": 0.2936350289063608, "learning_rate": 7.540043883632293e-06, "loss": 0.3073, "num_tokens": 3592059649.0, "step": 4710 }, { "epoch": 6.4274131603652815, "grad_norm": 0.28936591413344365, "learning_rate": 7.5341254506170425e-06, "loss": 0.3088, "num_tokens": 3592828068.0, "step": 4711 }, { "epoch": 6.428778697618845, "grad_norm": 0.3055488521429299, "learning_rate": 7.528211430513651e-06, "loss": 0.3061, "num_tokens": 3593538463.0, "step": 4712 }, { "epoch": 6.430144234872408, "grad_norm": 0.2846526969691956, "learning_rate": 7.522301825126235e-06, "loss": 0.2991, "num_tokens": 3594261662.0, "step": 4713 }, { "epoch": 6.43150977212597, "grad_norm": 0.3117240479588225, "learning_rate": 7.5163966362575546e-06, "loss": 0.3081, "num_tokens": 3594982771.0, "step": 4714 }, { "epoch": 6.432875309379534, "grad_norm": 0.29494975935610823, "learning_rate": 7.51049586570903e-06, "loss": 0.3015, "num_tokens": 3595740416.0, "step": 4715 }, { "epoch": 6.434240846633097, "grad_norm": 0.29995123985966876, "learning_rate": 7.504599515280734e-06, "loss": 0.2946, "num_tokens": 3596513326.0, "step": 4716 }, { "epoch": 6.43560638388666, "grad_norm": 0.3056212601986264, "learning_rate": 7.498707586771379e-06, "loss": 0.2976, "num_tokens": 3597320666.0, "step": 4717 }, { "epoch": 6.436971921140223, "grad_norm": 0.2773763665203659, "learning_rate": 7.492820081978343e-06, "loss": 0.3113, "num_tokens": 3598039515.0, "step": 4718 }, { "epoch": 6.438337458393787, "grad_norm": 0.2993587519807072, "learning_rate": 7.486937002697651e-06, "loss": 0.2824, "num_tokens": 3598730197.0, "step": 4719 }, { "epoch": 6.43970299564735, "grad_norm": 0.28808451385892575, "learning_rate": 7.481058350723967e-06, "loss": 0.3035, "num_tokens": 3599487307.0, "step": 4720 }, { "epoch": 6.441068532900913, "grad_norm": 0.30448973426426185, "learning_rate": 7.475184127850619e-06, "loss": 0.3067, "num_tokens": 3600270378.0, "step": 4721 }, { "epoch": 6.4424340701544764, "grad_norm": 0.2921905195340493, "learning_rate": 7.469314335869577e-06, "loss": 0.2866, "num_tokens": 3601058466.0, "step": 4722 }, { "epoch": 6.44379960740804, "grad_norm": 0.2859839333689969, "learning_rate": 7.4634489765714594e-06, "loss": 0.2956, "num_tokens": 3601839270.0, "step": 4723 }, { "epoch": 6.445165144661603, "grad_norm": 0.2732945919974261, "learning_rate": 7.457588051745537e-06, "loss": 0.294, "num_tokens": 3602527181.0, "step": 4724 }, { "epoch": 6.446530681915166, "grad_norm": 0.3034092273316307, "learning_rate": 7.451731563179724e-06, "loss": 0.3207, "num_tokens": 3603318558.0, "step": 4725 }, { "epoch": 6.4478962191687295, "grad_norm": 0.2814555270317181, "learning_rate": 7.445879512660574e-06, "loss": 0.3046, "num_tokens": 3604040605.0, "step": 4726 }, { "epoch": 6.449261756422293, "grad_norm": 0.310052831988517, "learning_rate": 7.440031901973301e-06, "loss": 0.3227, "num_tokens": 3604834282.0, "step": 4727 }, { "epoch": 6.450627293675856, "grad_norm": 0.3047389575874249, "learning_rate": 7.434188732901762e-06, "loss": 0.2948, "num_tokens": 3605558682.0, "step": 4728 }, { "epoch": 6.451992830929418, "grad_norm": 0.3061289525530711, "learning_rate": 7.428350007228442e-06, "loss": 0.3059, "num_tokens": 3606373347.0, "step": 4729 }, { "epoch": 6.453358368182982, "grad_norm": 0.28990113019166897, "learning_rate": 7.422515726734491e-06, "loss": 0.306, "num_tokens": 3607172486.0, "step": 4730 }, { "epoch": 6.454723905436545, "grad_norm": 0.2859076575470067, "learning_rate": 7.416685893199694e-06, "loss": 0.302, "num_tokens": 3607940230.0, "step": 4731 }, { "epoch": 6.456089442690108, "grad_norm": 0.2809828980474259, "learning_rate": 7.410860508402485e-06, "loss": 0.2916, "num_tokens": 3608714880.0, "step": 4732 }, { "epoch": 6.457454979943671, "grad_norm": 0.2687663648270643, "learning_rate": 7.405039574119932e-06, "loss": 0.3055, "num_tokens": 3609574849.0, "step": 4733 }, { "epoch": 6.458820517197235, "grad_norm": 0.2841734736724961, "learning_rate": 7.39922309212775e-06, "loss": 0.3065, "num_tokens": 3610230051.0, "step": 4734 }, { "epoch": 6.460186054450798, "grad_norm": 0.3035768679467728, "learning_rate": 7.393411064200295e-06, "loss": 0.3116, "num_tokens": 3610990679.0, "step": 4735 }, { "epoch": 6.461551591704361, "grad_norm": 0.31889917796164474, "learning_rate": 7.387603492110569e-06, "loss": 0.3068, "num_tokens": 3611700606.0, "step": 4736 }, { "epoch": 6.462917128957924, "grad_norm": 0.2934886074367756, "learning_rate": 7.3818003776302105e-06, "loss": 0.2988, "num_tokens": 3612447433.0, "step": 4737 }, { "epoch": 6.464282666211488, "grad_norm": 0.28996244711801217, "learning_rate": 7.376001722529493e-06, "loss": 0.2901, "num_tokens": 3613188646.0, "step": 4738 }, { "epoch": 6.465648203465051, "grad_norm": 0.28807899390278824, "learning_rate": 7.370207528577338e-06, "loss": 0.2946, "num_tokens": 3613943320.0, "step": 4739 }, { "epoch": 6.467013740718614, "grad_norm": 0.2854622335083322, "learning_rate": 7.364417797541308e-06, "loss": 0.2983, "num_tokens": 3614720354.0, "step": 4740 }, { "epoch": 6.468379277972177, "grad_norm": 0.28985847744851717, "learning_rate": 7.358632531187585e-06, "loss": 0.3007, "num_tokens": 3615558618.0, "step": 4741 }, { "epoch": 6.469744815225741, "grad_norm": 0.268511081015245, "learning_rate": 7.352851731281024e-06, "loss": 0.2895, "num_tokens": 3616309017.0, "step": 4742 }, { "epoch": 6.471110352479304, "grad_norm": 0.28709568318556666, "learning_rate": 7.347075399585081e-06, "loss": 0.2919, "num_tokens": 3617083777.0, "step": 4743 }, { "epoch": 6.472475889732867, "grad_norm": 0.27674916373955205, "learning_rate": 7.341303537861868e-06, "loss": 0.3075, "num_tokens": 3617860559.0, "step": 4744 }, { "epoch": 6.47384142698643, "grad_norm": 0.31069433096519167, "learning_rate": 7.335536147872131e-06, "loss": 0.2963, "num_tokens": 3618579668.0, "step": 4745 }, { "epoch": 6.475206964239993, "grad_norm": 0.3032859927715298, "learning_rate": 7.329773231375258e-06, "loss": 0.2896, "num_tokens": 3619314303.0, "step": 4746 }, { "epoch": 6.476572501493556, "grad_norm": 0.28339066487869313, "learning_rate": 7.324014790129248e-06, "loss": 0.2889, "num_tokens": 3620108500.0, "step": 4747 }, { "epoch": 6.477938038747119, "grad_norm": 0.2828120384000507, "learning_rate": 7.3182608258907724e-06, "loss": 0.2993, "num_tokens": 3620834478.0, "step": 4748 }, { "epoch": 6.4793035760006825, "grad_norm": 0.29326533371775737, "learning_rate": 7.312511340415108e-06, "loss": 0.3048, "num_tokens": 3621583313.0, "step": 4749 }, { "epoch": 6.480669113254246, "grad_norm": 0.32271276110554353, "learning_rate": 7.306766335456162e-06, "loss": 0.2895, "num_tokens": 3622376276.0, "step": 4750 }, { "epoch": 6.482034650507809, "grad_norm": 0.2673891662567735, "learning_rate": 7.3010258127665065e-06, "loss": 0.292, "num_tokens": 3623135906.0, "step": 4751 }, { "epoch": 6.483400187761372, "grad_norm": 0.3010077101777421, "learning_rate": 7.295289774097312e-06, "loss": 0.2984, "num_tokens": 3623972103.0, "step": 4752 }, { "epoch": 6.4847657250149355, "grad_norm": 0.2746923896033789, "learning_rate": 7.289558221198401e-06, "loss": 0.3045, "num_tokens": 3624740520.0, "step": 4753 }, { "epoch": 6.486131262268499, "grad_norm": 0.2808143124613418, "learning_rate": 7.2838311558182195e-06, "loss": 0.2969, "num_tokens": 3625508282.0, "step": 4754 }, { "epoch": 6.487496799522062, "grad_norm": 0.28455843333965325, "learning_rate": 7.278108579703858e-06, "loss": 0.2942, "num_tokens": 3626263570.0, "step": 4755 }, { "epoch": 6.488862336775625, "grad_norm": 0.2957029330567113, "learning_rate": 7.27239049460101e-06, "loss": 0.3153, "num_tokens": 3627041910.0, "step": 4756 }, { "epoch": 6.4902278740291885, "grad_norm": 0.28384518213019866, "learning_rate": 7.266676902254025e-06, "loss": 0.2914, "num_tokens": 3627798121.0, "step": 4757 }, { "epoch": 6.491593411282752, "grad_norm": 0.2781887455700226, "learning_rate": 7.260967804405876e-06, "loss": 0.304, "num_tokens": 3628617831.0, "step": 4758 }, { "epoch": 6.492958948536315, "grad_norm": 0.2862352988621492, "learning_rate": 7.255263202798146e-06, "loss": 0.2882, "num_tokens": 3629336062.0, "step": 4759 }, { "epoch": 6.494324485789878, "grad_norm": 0.28847927199346923, "learning_rate": 7.249563099171084e-06, "loss": 0.3206, "num_tokens": 3630136894.0, "step": 4760 }, { "epoch": 6.495690023043442, "grad_norm": 0.27788780596468193, "learning_rate": 7.243867495263527e-06, "loss": 0.2934, "num_tokens": 3630831243.0, "step": 4761 }, { "epoch": 6.497055560297005, "grad_norm": 0.29676565518586173, "learning_rate": 7.238176392812966e-06, "loss": 0.2886, "num_tokens": 3631583309.0, "step": 4762 }, { "epoch": 6.498421097550567, "grad_norm": 0.2861904318147343, "learning_rate": 7.232489793555504e-06, "loss": 0.3088, "num_tokens": 3632375695.0, "step": 4763 }, { "epoch": 6.49978663480413, "grad_norm": 0.2777897959094846, "learning_rate": 7.226807699225884e-06, "loss": 0.3069, "num_tokens": 3633109951.0, "step": 4764 }, { "epoch": 6.501152172057694, "grad_norm": 0.31979771574931337, "learning_rate": 7.221130111557459e-06, "loss": 0.3005, "num_tokens": 3633861680.0, "step": 4765 }, { "epoch": 6.502517709311257, "grad_norm": 0.30549856110857654, "learning_rate": 7.215457032282219e-06, "loss": 0.3048, "num_tokens": 3634606790.0, "step": 4766 }, { "epoch": 6.50388324656482, "grad_norm": 0.30006587301999044, "learning_rate": 7.20978846313078e-06, "loss": 0.2988, "num_tokens": 3635356600.0, "step": 4767 }, { "epoch": 6.5052487838183835, "grad_norm": 0.27935690830351184, "learning_rate": 7.20412440583236e-06, "loss": 0.2947, "num_tokens": 3636035498.0, "step": 4768 }, { "epoch": 6.506614321071947, "grad_norm": 0.29030799071375607, "learning_rate": 7.198464862114838e-06, "loss": 0.302, "num_tokens": 3636799787.0, "step": 4769 }, { "epoch": 6.50797985832551, "grad_norm": 0.2999975857897557, "learning_rate": 7.1928098337046835e-06, "loss": 0.3002, "num_tokens": 3637565458.0, "step": 4770 }, { "epoch": 6.509345395579073, "grad_norm": 0.2757440591011246, "learning_rate": 7.187159322327004e-06, "loss": 0.3156, "num_tokens": 3638317227.0, "step": 4771 }, { "epoch": 6.5107109328326365, "grad_norm": 0.2920990813279582, "learning_rate": 7.1815133297055295e-06, "loss": 0.3064, "num_tokens": 3639077023.0, "step": 4772 }, { "epoch": 6.5120764700862, "grad_norm": 0.2985908838726272, "learning_rate": 7.175871857562599e-06, "loss": 0.2998, "num_tokens": 3639882942.0, "step": 4773 }, { "epoch": 6.513442007339763, "grad_norm": 0.28458528230524166, "learning_rate": 7.1702349076191845e-06, "loss": 0.3079, "num_tokens": 3640609825.0, "step": 4774 }, { "epoch": 6.514807544593326, "grad_norm": 0.2986497493898195, "learning_rate": 7.164602481594881e-06, "loss": 0.3073, "num_tokens": 3641366935.0, "step": 4775 }, { "epoch": 6.5161730818468895, "grad_norm": 0.28107008316817594, "learning_rate": 7.158974581207898e-06, "loss": 0.2958, "num_tokens": 3642090363.0, "step": 4776 }, { "epoch": 6.517538619100453, "grad_norm": 0.30465416792394884, "learning_rate": 7.153351208175048e-06, "loss": 0.3001, "num_tokens": 3642852870.0, "step": 4777 }, { "epoch": 6.518904156354015, "grad_norm": 0.30526622845223067, "learning_rate": 7.147732364211801e-06, "loss": 0.3044, "num_tokens": 3643649150.0, "step": 4778 }, { "epoch": 6.520269693607578, "grad_norm": 0.2769059014777644, "learning_rate": 7.142118051032209e-06, "loss": 0.2979, "num_tokens": 3644368437.0, "step": 4779 }, { "epoch": 6.521635230861142, "grad_norm": 0.2883934163690336, "learning_rate": 7.136508270348959e-06, "loss": 0.3038, "num_tokens": 3645072397.0, "step": 4780 }, { "epoch": 6.523000768114705, "grad_norm": 0.27798109178915253, "learning_rate": 7.130903023873359e-06, "loss": 0.2929, "num_tokens": 3645817812.0, "step": 4781 }, { "epoch": 6.524366305368268, "grad_norm": 0.2690270616914759, "learning_rate": 7.1253023133153135e-06, "loss": 0.3216, "num_tokens": 3646607288.0, "step": 4782 }, { "epoch": 6.525731842621831, "grad_norm": 0.2864734103788774, "learning_rate": 7.119706140383364e-06, "loss": 0.3144, "num_tokens": 3647410401.0, "step": 4783 }, { "epoch": 6.527097379875395, "grad_norm": 0.2852599423001945, "learning_rate": 7.114114506784662e-06, "loss": 0.3133, "num_tokens": 3648146124.0, "step": 4784 }, { "epoch": 6.528462917128958, "grad_norm": 0.281124632936939, "learning_rate": 7.108527414224974e-06, "loss": 0.3162, "num_tokens": 3648964007.0, "step": 4785 }, { "epoch": 6.529828454382521, "grad_norm": 0.28175362382250196, "learning_rate": 7.10294486440867e-06, "loss": 0.3006, "num_tokens": 3649729878.0, "step": 4786 }, { "epoch": 6.531193991636084, "grad_norm": 0.2885743607057571, "learning_rate": 7.097366859038757e-06, "loss": 0.2928, "num_tokens": 3650463165.0, "step": 4787 }, { "epoch": 6.532559528889648, "grad_norm": 0.290144537874731, "learning_rate": 7.091793399816834e-06, "loss": 0.3012, "num_tokens": 3651201748.0, "step": 4788 }, { "epoch": 6.533925066143211, "grad_norm": 0.2854454815005936, "learning_rate": 7.086224488443128e-06, "loss": 0.3195, "num_tokens": 3652092189.0, "step": 4789 }, { "epoch": 6.535290603396774, "grad_norm": 0.295521059013521, "learning_rate": 7.08066012661647e-06, "loss": 0.3034, "num_tokens": 3652873036.0, "step": 4790 }, { "epoch": 6.536656140650337, "grad_norm": 0.2950926187888295, "learning_rate": 7.075100316034303e-06, "loss": 0.3045, "num_tokens": 3653637831.0, "step": 4791 }, { "epoch": 6.538021677903901, "grad_norm": 0.2832394762367411, "learning_rate": 7.069545058392688e-06, "loss": 0.3144, "num_tokens": 3654425575.0, "step": 4792 }, { "epoch": 6.539387215157463, "grad_norm": 0.29871210324981035, "learning_rate": 7.063994355386293e-06, "loss": 0.2999, "num_tokens": 3655244904.0, "step": 4793 }, { "epoch": 6.540752752411027, "grad_norm": 0.2964230195199834, "learning_rate": 7.0584482087084e-06, "loss": 0.2998, "num_tokens": 3656016792.0, "step": 4794 }, { "epoch": 6.5421182896645895, "grad_norm": 0.28639828209783397, "learning_rate": 7.05290662005089e-06, "loss": 0.3116, "num_tokens": 3656772482.0, "step": 4795 }, { "epoch": 6.543483826918153, "grad_norm": 0.2886942806005213, "learning_rate": 7.047369591104267e-06, "loss": 0.3128, "num_tokens": 3657523431.0, "step": 4796 }, { "epoch": 6.544849364171716, "grad_norm": 0.3052148718244778, "learning_rate": 7.04183712355764e-06, "loss": 0.304, "num_tokens": 3658304464.0, "step": 4797 }, { "epoch": 6.546214901425279, "grad_norm": 0.2956025079382483, "learning_rate": 7.036309219098723e-06, "loss": 0.3087, "num_tokens": 3659140542.0, "step": 4798 }, { "epoch": 6.5475804386788425, "grad_norm": 0.29961845644481505, "learning_rate": 7.030785879413844e-06, "loss": 0.3195, "num_tokens": 3659934477.0, "step": 4799 }, { "epoch": 6.548945975932406, "grad_norm": 0.2725031222885836, "learning_rate": 7.025267106187927e-06, "loss": 0.3096, "num_tokens": 3660760644.0, "step": 4800 }, { "epoch": 6.550311513185969, "grad_norm": 0.29248029019758504, "learning_rate": 7.0197529011045165e-06, "loss": 0.3058, "num_tokens": 3661503341.0, "step": 4801 }, { "epoch": 6.551677050439532, "grad_norm": 0.3159932749082512, "learning_rate": 7.014243265845757e-06, "loss": 0.3, "num_tokens": 3662262489.0, "step": 4802 }, { "epoch": 6.5530425876930956, "grad_norm": 0.29704515334434456, "learning_rate": 7.008738202092399e-06, "loss": 0.3038, "num_tokens": 3662991156.0, "step": 4803 }, { "epoch": 6.554408124946659, "grad_norm": 0.2823322541645102, "learning_rate": 7.003237711523803e-06, "loss": 0.2994, "num_tokens": 3663740009.0, "step": 4804 }, { "epoch": 6.555773662200222, "grad_norm": 0.30252040499176097, "learning_rate": 6.997741795817921e-06, "loss": 0.3104, "num_tokens": 3664588188.0, "step": 4805 }, { "epoch": 6.557139199453785, "grad_norm": 0.29136406096425166, "learning_rate": 6.992250456651326e-06, "loss": 0.2985, "num_tokens": 3665370956.0, "step": 4806 }, { "epoch": 6.558504736707349, "grad_norm": 0.28685792041228414, "learning_rate": 6.986763695699186e-06, "loss": 0.3019, "num_tokens": 3666065625.0, "step": 4807 }, { "epoch": 6.559870273960912, "grad_norm": 0.30330649921123565, "learning_rate": 6.981281514635279e-06, "loss": 0.298, "num_tokens": 3666795284.0, "step": 4808 }, { "epoch": 6.561235811214475, "grad_norm": 0.2965263043343765, "learning_rate": 6.975803915131972e-06, "loss": 0.2983, "num_tokens": 3667517081.0, "step": 4809 }, { "epoch": 6.562601348468037, "grad_norm": 0.29580239146231846, "learning_rate": 6.970330898860248e-06, "loss": 0.3113, "num_tokens": 3668306469.0, "step": 4810 }, { "epoch": 6.563966885721602, "grad_norm": 0.29024930795056614, "learning_rate": 6.964862467489692e-06, "loss": 0.2949, "num_tokens": 3669123553.0, "step": 4811 }, { "epoch": 6.565332422975164, "grad_norm": 0.2972706379640035, "learning_rate": 6.959398622688471e-06, "loss": 0.3048, "num_tokens": 3669894804.0, "step": 4812 }, { "epoch": 6.566697960228727, "grad_norm": 0.2974362050806468, "learning_rate": 6.953939366123388e-06, "loss": 0.2977, "num_tokens": 3670699111.0, "step": 4813 }, { "epoch": 6.5680634974822905, "grad_norm": 0.27657371169786266, "learning_rate": 6.948484699459808e-06, "loss": 0.3078, "num_tokens": 3671498463.0, "step": 4814 }, { "epoch": 6.569429034735854, "grad_norm": 0.30135499335201027, "learning_rate": 6.94303462436172e-06, "loss": 0.3125, "num_tokens": 3672329311.0, "step": 4815 }, { "epoch": 6.570794571989417, "grad_norm": 0.28979737300053093, "learning_rate": 6.937589142491709e-06, "loss": 0.3211, "num_tokens": 3673106078.0, "step": 4816 }, { "epoch": 6.57216010924298, "grad_norm": 0.3069523393759541, "learning_rate": 6.932148255510955e-06, "loss": 0.3224, "num_tokens": 3673862511.0, "step": 4817 }, { "epoch": 6.5735256464965435, "grad_norm": 0.2831901956610899, "learning_rate": 6.926711965079233e-06, "loss": 0.3021, "num_tokens": 3674651619.0, "step": 4818 }, { "epoch": 6.574891183750107, "grad_norm": 0.32640327179933326, "learning_rate": 6.92128027285492e-06, "loss": 0.287, "num_tokens": 3675421560.0, "step": 4819 }, { "epoch": 6.57625672100367, "grad_norm": 0.2886095296105779, "learning_rate": 6.915853180494998e-06, "loss": 0.3053, "num_tokens": 3676168927.0, "step": 4820 }, { "epoch": 6.577622258257233, "grad_norm": 0.31250549691351326, "learning_rate": 6.910430689655022e-06, "loss": 0.294, "num_tokens": 3676945516.0, "step": 4821 }, { "epoch": 6.5789877955107965, "grad_norm": 0.2822974483261971, "learning_rate": 6.905012801989176e-06, "loss": 0.2961, "num_tokens": 3677747801.0, "step": 4822 }, { "epoch": 6.58035333276436, "grad_norm": 0.28166519136154583, "learning_rate": 6.899599519150214e-06, "loss": 0.2986, "num_tokens": 3678562736.0, "step": 4823 }, { "epoch": 6.581718870017923, "grad_norm": 0.28344202246937855, "learning_rate": 6.8941908427894944e-06, "loss": 0.3214, "num_tokens": 3679339737.0, "step": 4824 }, { "epoch": 6.583084407271486, "grad_norm": 0.29289972714157964, "learning_rate": 6.888786774556972e-06, "loss": 0.3123, "num_tokens": 3680147407.0, "step": 4825 }, { "epoch": 6.5844499445250495, "grad_norm": 0.2951485066406855, "learning_rate": 6.883387316101198e-06, "loss": 0.2961, "num_tokens": 3680920441.0, "step": 4826 }, { "epoch": 6.585815481778612, "grad_norm": 0.2737698869274869, "learning_rate": 6.877992469069308e-06, "loss": 0.2898, "num_tokens": 3681649657.0, "step": 4827 }, { "epoch": 6.587181019032175, "grad_norm": 0.29819709630279706, "learning_rate": 6.8726022351070334e-06, "loss": 0.3017, "num_tokens": 3682381101.0, "step": 4828 }, { "epoch": 6.588546556285738, "grad_norm": 0.29315950378317773, "learning_rate": 6.867216615858708e-06, "loss": 0.3095, "num_tokens": 3683183073.0, "step": 4829 }, { "epoch": 6.589912093539302, "grad_norm": 0.28277638202916083, "learning_rate": 6.861835612967242e-06, "loss": 0.3039, "num_tokens": 3683941267.0, "step": 4830 }, { "epoch": 6.591277630792865, "grad_norm": 0.2672895404527418, "learning_rate": 6.856459228074161e-06, "loss": 0.2939, "num_tokens": 3684741206.0, "step": 4831 }, { "epoch": 6.592643168046428, "grad_norm": 0.2750611591224955, "learning_rate": 6.851087462819554e-06, "loss": 0.2956, "num_tokens": 3685479790.0, "step": 4832 }, { "epoch": 6.594008705299991, "grad_norm": 0.28734179252162273, "learning_rate": 6.845720318842117e-06, "loss": 0.3039, "num_tokens": 3686224594.0, "step": 4833 }, { "epoch": 6.595374242553555, "grad_norm": 0.2842877002917975, "learning_rate": 6.840357797779142e-06, "loss": 0.3027, "num_tokens": 3686971812.0, "step": 4834 }, { "epoch": 6.596739779807118, "grad_norm": 0.31602572757256897, "learning_rate": 6.834999901266487e-06, "loss": 0.3075, "num_tokens": 3687677318.0, "step": 4835 }, { "epoch": 6.598105317060681, "grad_norm": 0.2880633224732342, "learning_rate": 6.829646630938623e-06, "loss": 0.2973, "num_tokens": 3688487786.0, "step": 4836 }, { "epoch": 6.599470854314244, "grad_norm": 0.2806323466637477, "learning_rate": 6.824297988428601e-06, "loss": 0.3152, "num_tokens": 3689217720.0, "step": 4837 }, { "epoch": 6.600836391567808, "grad_norm": 0.29406821761939844, "learning_rate": 6.818953975368061e-06, "loss": 0.3079, "num_tokens": 3689963805.0, "step": 4838 }, { "epoch": 6.602201928821371, "grad_norm": 0.28788542545678386, "learning_rate": 6.813614593387221e-06, "loss": 0.2983, "num_tokens": 3690700761.0, "step": 4839 }, { "epoch": 6.603567466074934, "grad_norm": 0.2857587029543282, "learning_rate": 6.808279844114911e-06, "loss": 0.2962, "num_tokens": 3691429771.0, "step": 4840 }, { "epoch": 6.604933003328497, "grad_norm": 0.2958676918267845, "learning_rate": 6.802949729178516e-06, "loss": 0.3081, "num_tokens": 3692150449.0, "step": 4841 }, { "epoch": 6.60629854058206, "grad_norm": 0.29079641087389146, "learning_rate": 6.797624250204034e-06, "loss": 0.3125, "num_tokens": 3692970592.0, "step": 4842 }, { "epoch": 6.607664077835624, "grad_norm": 0.28646211768945873, "learning_rate": 6.7923034088160365e-06, "loss": 0.2926, "num_tokens": 3693687446.0, "step": 4843 }, { "epoch": 6.609029615089186, "grad_norm": 0.2909134629451016, "learning_rate": 6.786987206637676e-06, "loss": 0.2894, "num_tokens": 3694429415.0, "step": 4844 }, { "epoch": 6.6103951523427495, "grad_norm": 0.29929365095053356, "learning_rate": 6.7816756452906995e-06, "loss": 0.3088, "num_tokens": 3695137197.0, "step": 4845 }, { "epoch": 6.611760689596313, "grad_norm": 0.29753856246131244, "learning_rate": 6.7763687263954326e-06, "loss": 0.2883, "num_tokens": 3695880173.0, "step": 4846 }, { "epoch": 6.613126226849876, "grad_norm": 0.30510365826844793, "learning_rate": 6.771066451570793e-06, "loss": 0.3053, "num_tokens": 3696658450.0, "step": 4847 }, { "epoch": 6.614491764103439, "grad_norm": 0.33346704915837605, "learning_rate": 6.765768822434262e-06, "loss": 0.2815, "num_tokens": 3697385255.0, "step": 4848 }, { "epoch": 6.615857301357003, "grad_norm": 0.30949427676801583, "learning_rate": 6.7604758406019325e-06, "loss": 0.2939, "num_tokens": 3698167435.0, "step": 4849 }, { "epoch": 6.617222838610566, "grad_norm": 0.27357578996955356, "learning_rate": 6.755187507688456e-06, "loss": 0.3106, "num_tokens": 3698967758.0, "step": 4850 }, { "epoch": 6.618588375864129, "grad_norm": 0.31171098220868804, "learning_rate": 6.749903825307067e-06, "loss": 0.3096, "num_tokens": 3699754695.0, "step": 4851 }, { "epoch": 6.619953913117692, "grad_norm": 0.2839208588598692, "learning_rate": 6.744624795069603e-06, "loss": 0.3146, "num_tokens": 3700560364.0, "step": 4852 }, { "epoch": 6.621319450371256, "grad_norm": 0.28715536505923556, "learning_rate": 6.7393504185864565e-06, "loss": 0.3132, "num_tokens": 3701296715.0, "step": 4853 }, { "epoch": 6.622684987624819, "grad_norm": 0.30127565118049243, "learning_rate": 6.734080697466614e-06, "loss": 0.3223, "num_tokens": 3702122819.0, "step": 4854 }, { "epoch": 6.624050524878382, "grad_norm": 0.28969923154851235, "learning_rate": 6.72881563331764e-06, "loss": 0.2873, "num_tokens": 3702869826.0, "step": 4855 }, { "epoch": 6.625416062131945, "grad_norm": 0.28176983630515445, "learning_rate": 6.723555227745682e-06, "loss": 0.2927, "num_tokens": 3703667911.0, "step": 4856 }, { "epoch": 6.626781599385509, "grad_norm": 0.2763271549502513, "learning_rate": 6.718299482355448e-06, "loss": 0.2888, "num_tokens": 3704374127.0, "step": 4857 }, { "epoch": 6.628147136639072, "grad_norm": 0.28624783615969923, "learning_rate": 6.713048398750258e-06, "loss": 0.3049, "num_tokens": 3705185823.0, "step": 4858 }, { "epoch": 6.629512673892634, "grad_norm": 0.2803727906979525, "learning_rate": 6.7078019785319745e-06, "loss": 0.3214, "num_tokens": 3705958395.0, "step": 4859 }, { "epoch": 6.6308782111461975, "grad_norm": 0.3059733590504472, "learning_rate": 6.702560223301058e-06, "loss": 0.3001, "num_tokens": 3706688539.0, "step": 4860 }, { "epoch": 6.632243748399761, "grad_norm": 0.3066605878441961, "learning_rate": 6.697323134656546e-06, "loss": 0.3015, "num_tokens": 3707515907.0, "step": 4861 }, { "epoch": 6.633609285653324, "grad_norm": 0.27586350031260043, "learning_rate": 6.69209071419604e-06, "loss": 0.2956, "num_tokens": 3708351392.0, "step": 4862 }, { "epoch": 6.634974822906887, "grad_norm": 0.2728621647004553, "learning_rate": 6.686862963515728e-06, "loss": 0.2968, "num_tokens": 3709123282.0, "step": 4863 }, { "epoch": 6.6363403601604505, "grad_norm": 0.2857911727082444, "learning_rate": 6.681639884210369e-06, "loss": 0.2943, "num_tokens": 3709912489.0, "step": 4864 }, { "epoch": 6.637705897414014, "grad_norm": 0.293062295578744, "learning_rate": 6.676421477873305e-06, "loss": 0.3013, "num_tokens": 3710640352.0, "step": 4865 }, { "epoch": 6.639071434667577, "grad_norm": 0.3029604227809178, "learning_rate": 6.671207746096435e-06, "loss": 0.2994, "num_tokens": 3711468154.0, "step": 4866 }, { "epoch": 6.64043697192114, "grad_norm": 0.2837121407804943, "learning_rate": 6.665998690470248e-06, "loss": 0.3235, "num_tokens": 3712280015.0, "step": 4867 }, { "epoch": 6.6418025091747035, "grad_norm": 0.29450084260856146, "learning_rate": 6.660794312583803e-06, "loss": 0.295, "num_tokens": 3713061381.0, "step": 4868 }, { "epoch": 6.643168046428267, "grad_norm": 0.2905034981551081, "learning_rate": 6.655594614024726e-06, "loss": 0.3149, "num_tokens": 3713804399.0, "step": 4869 }, { "epoch": 6.64453358368183, "grad_norm": 0.293445378901972, "learning_rate": 6.650399596379229e-06, "loss": 0.3174, "num_tokens": 3714572796.0, "step": 4870 }, { "epoch": 6.645899120935393, "grad_norm": 0.3024712089925851, "learning_rate": 6.645209261232075e-06, "loss": 0.3066, "num_tokens": 3715305150.0, "step": 4871 }, { "epoch": 6.6472646581889565, "grad_norm": 0.3028248153310855, "learning_rate": 6.640023610166617e-06, "loss": 0.3019, "num_tokens": 3716122203.0, "step": 4872 }, { "epoch": 6.64863019544252, "grad_norm": 0.2820892662880436, "learning_rate": 6.6348426447647756e-06, "loss": 0.2806, "num_tokens": 3716873064.0, "step": 4873 }, { "epoch": 6.649995732696082, "grad_norm": 0.29682373953928426, "learning_rate": 6.629666366607031e-06, "loss": 0.2778, "num_tokens": 3717562519.0, "step": 4874 }, { "epoch": 6.651361269949646, "grad_norm": 0.29385640261143037, "learning_rate": 6.624494777272443e-06, "loss": 0.3068, "num_tokens": 3718371766.0, "step": 4875 }, { "epoch": 6.652726807203209, "grad_norm": 0.29663293613150066, "learning_rate": 6.619327878338646e-06, "loss": 0.2962, "num_tokens": 3719163033.0, "step": 4876 }, { "epoch": 6.654092344456772, "grad_norm": 0.2868612743621081, "learning_rate": 6.614165671381831e-06, "loss": 0.3098, "num_tokens": 3719898484.0, "step": 4877 }, { "epoch": 6.655457881710335, "grad_norm": 0.30854148686234006, "learning_rate": 6.609008157976764e-06, "loss": 0.3011, "num_tokens": 3720597694.0, "step": 4878 }, { "epoch": 6.656823418963898, "grad_norm": 0.31380276926129475, "learning_rate": 6.603855339696787e-06, "loss": 0.298, "num_tokens": 3721256361.0, "step": 4879 }, { "epoch": 6.658188956217462, "grad_norm": 0.30712593777644437, "learning_rate": 6.598707218113791e-06, "loss": 0.3057, "num_tokens": 3722039498.0, "step": 4880 }, { "epoch": 6.659554493471025, "grad_norm": 0.3015380884555517, "learning_rate": 6.5935637947982485e-06, "loss": 0.3009, "num_tokens": 3722817309.0, "step": 4881 }, { "epoch": 6.660920030724588, "grad_norm": 0.2902247291189878, "learning_rate": 6.588425071319202e-06, "loss": 0.2893, "num_tokens": 3723557858.0, "step": 4882 }, { "epoch": 6.662285567978151, "grad_norm": 0.2964395304608082, "learning_rate": 6.583291049244244e-06, "loss": 0.2986, "num_tokens": 3724285837.0, "step": 4883 }, { "epoch": 6.663651105231715, "grad_norm": 0.3042053397453765, "learning_rate": 6.578161730139544e-06, "loss": 0.3126, "num_tokens": 3725080782.0, "step": 4884 }, { "epoch": 6.665016642485278, "grad_norm": 0.2861159822304546, "learning_rate": 6.573037115569838e-06, "loss": 0.2958, "num_tokens": 3725826676.0, "step": 4885 }, { "epoch": 6.666382179738841, "grad_norm": 0.3065095391550887, "learning_rate": 6.567917207098422e-06, "loss": 0.3075, "num_tokens": 3726582591.0, "step": 4886 }, { "epoch": 6.667747716992404, "grad_norm": 0.2861012891044838, "learning_rate": 6.56280200628716e-06, "loss": 0.2933, "num_tokens": 3727341210.0, "step": 4887 }, { "epoch": 6.669113254245968, "grad_norm": 0.296369755635909, "learning_rate": 6.557691514696482e-06, "loss": 0.3047, "num_tokens": 3728081889.0, "step": 4888 }, { "epoch": 6.670478791499531, "grad_norm": 0.2893253554980538, "learning_rate": 6.552585733885369e-06, "loss": 0.2897, "num_tokens": 3728818306.0, "step": 4889 }, { "epoch": 6.671844328753094, "grad_norm": 0.28813906899731373, "learning_rate": 6.547484665411379e-06, "loss": 0.3089, "num_tokens": 3729539453.0, "step": 4890 }, { "epoch": 6.6732098660066566, "grad_norm": 0.3093556067568672, "learning_rate": 6.54238831083063e-06, "loss": 0.3037, "num_tokens": 3730260660.0, "step": 4891 }, { "epoch": 6.67457540326022, "grad_norm": 0.3042108059155777, "learning_rate": 6.537296671697786e-06, "loss": 0.3071, "num_tokens": 3731053538.0, "step": 4892 }, { "epoch": 6.675940940513783, "grad_norm": 0.26780573946817937, "learning_rate": 6.532209749566103e-06, "loss": 0.2878, "num_tokens": 3731823775.0, "step": 4893 }, { "epoch": 6.677306477767346, "grad_norm": 0.2825619742207979, "learning_rate": 6.5271275459873685e-06, "loss": 0.2838, "num_tokens": 3732471064.0, "step": 4894 }, { "epoch": 6.67867201502091, "grad_norm": 0.29374847441123664, "learning_rate": 6.522050062511947e-06, "loss": 0.3023, "num_tokens": 3733266382.0, "step": 4895 }, { "epoch": 6.680037552274473, "grad_norm": 0.29043948312786627, "learning_rate": 6.516977300688758e-06, "loss": 0.304, "num_tokens": 3734064279.0, "step": 4896 }, { "epoch": 6.681403089528036, "grad_norm": 0.29611750485842153, "learning_rate": 6.511909262065287e-06, "loss": 0.2998, "num_tokens": 3734864126.0, "step": 4897 }, { "epoch": 6.682768626781599, "grad_norm": 0.2860336520279, "learning_rate": 6.506845948187561e-06, "loss": 0.3018, "num_tokens": 3735651652.0, "step": 4898 }, { "epoch": 6.684134164035163, "grad_norm": 0.27989335645017316, "learning_rate": 6.501787360600187e-06, "loss": 0.2976, "num_tokens": 3736421919.0, "step": 4899 }, { "epoch": 6.685499701288726, "grad_norm": 0.2768312559389223, "learning_rate": 6.496733500846324e-06, "loss": 0.3089, "num_tokens": 3737214326.0, "step": 4900 }, { "epoch": 6.686865238542289, "grad_norm": 0.28527998802650184, "learning_rate": 6.491684370467669e-06, "loss": 0.3081, "num_tokens": 3738018102.0, "step": 4901 }, { "epoch": 6.688230775795852, "grad_norm": 0.27427612093871034, "learning_rate": 6.486639971004515e-06, "loss": 0.3032, "num_tokens": 3738812017.0, "step": 4902 }, { "epoch": 6.689596313049416, "grad_norm": 0.29320447397866756, "learning_rate": 6.481600303995674e-06, "loss": 0.2838, "num_tokens": 3739540111.0, "step": 4903 }, { "epoch": 6.690961850302979, "grad_norm": 0.29974878053697884, "learning_rate": 6.476565370978536e-06, "loss": 0.2973, "num_tokens": 3740274556.0, "step": 4904 }, { "epoch": 6.692327387556542, "grad_norm": 0.3060214997016435, "learning_rate": 6.4715351734890465e-06, "loss": 0.302, "num_tokens": 3741075465.0, "step": 4905 }, { "epoch": 6.693692924810105, "grad_norm": 0.29877417167200665, "learning_rate": 6.4665097130616875e-06, "loss": 0.3239, "num_tokens": 3741915610.0, "step": 4906 }, { "epoch": 6.695058462063669, "grad_norm": 0.2873019014512984, "learning_rate": 6.461488991229518e-06, "loss": 0.3092, "num_tokens": 3742706368.0, "step": 4907 }, { "epoch": 6.696423999317231, "grad_norm": 0.28042245080529354, "learning_rate": 6.456473009524142e-06, "loss": 0.3069, "num_tokens": 3743389555.0, "step": 4908 }, { "epoch": 6.697789536570794, "grad_norm": 0.2955303093409405, "learning_rate": 6.451461769475724e-06, "loss": 0.3092, "num_tokens": 3744121560.0, "step": 4909 }, { "epoch": 6.6991550738243575, "grad_norm": 0.30819678063561057, "learning_rate": 6.446455272612959e-06, "loss": 0.2885, "num_tokens": 3744903609.0, "step": 4910 }, { "epoch": 6.700520611077921, "grad_norm": 0.26284994859794, "learning_rate": 6.441453520463133e-06, "loss": 0.3083, "num_tokens": 3745673216.0, "step": 4911 }, { "epoch": 6.701886148331484, "grad_norm": 0.2836991968427516, "learning_rate": 6.436456514552053e-06, "loss": 0.3055, "num_tokens": 3746439228.0, "step": 4912 }, { "epoch": 6.703251685585047, "grad_norm": 0.2891639477836564, "learning_rate": 6.431464256404084e-06, "loss": 0.2983, "num_tokens": 3747191897.0, "step": 4913 }, { "epoch": 6.7046172228386105, "grad_norm": 0.2896856261704876, "learning_rate": 6.426476747542163e-06, "loss": 0.3073, "num_tokens": 3747957549.0, "step": 4914 }, { "epoch": 6.705982760092174, "grad_norm": 0.3134952609064321, "learning_rate": 6.421493989487749e-06, "loss": 0.2972, "num_tokens": 3748693883.0, "step": 4915 }, { "epoch": 6.707348297345737, "grad_norm": 0.29241420451859795, "learning_rate": 6.4165159837608714e-06, "loss": 0.3037, "num_tokens": 3749499329.0, "step": 4916 }, { "epoch": 6.7087138345993, "grad_norm": 0.28245414906367794, "learning_rate": 6.411542731880104e-06, "loss": 0.3027, "num_tokens": 3750307157.0, "step": 4917 }, { "epoch": 6.7100793718528635, "grad_norm": 0.29761102242104026, "learning_rate": 6.406574235362576e-06, "loss": 0.3055, "num_tokens": 3751108463.0, "step": 4918 }, { "epoch": 6.711444909106427, "grad_norm": 0.28026172499806035, "learning_rate": 6.401610495723944e-06, "loss": 0.2881, "num_tokens": 3751852071.0, "step": 4919 }, { "epoch": 6.71281044635999, "grad_norm": 0.2806182234953125, "learning_rate": 6.396651514478451e-06, "loss": 0.3016, "num_tokens": 3752661377.0, "step": 4920 }, { "epoch": 6.714175983613553, "grad_norm": 0.3031529434025972, "learning_rate": 6.391697293138856e-06, "loss": 0.2974, "num_tokens": 3753459166.0, "step": 4921 }, { "epoch": 6.7155415208671165, "grad_norm": 0.273173028986241, "learning_rate": 6.386747833216473e-06, "loss": 0.3167, "num_tokens": 3754313391.0, "step": 4922 }, { "epoch": 6.716907058120679, "grad_norm": 0.29273429434394105, "learning_rate": 6.38180313622118e-06, "loss": 0.2999, "num_tokens": 3755051643.0, "step": 4923 }, { "epoch": 6.718272595374243, "grad_norm": 0.2828310162342511, "learning_rate": 6.376863203661379e-06, "loss": 0.3083, "num_tokens": 3755840840.0, "step": 4924 }, { "epoch": 6.719638132627805, "grad_norm": 0.2921327811927366, "learning_rate": 6.3719280370440354e-06, "loss": 0.2932, "num_tokens": 3756641170.0, "step": 4925 }, { "epoch": 6.721003669881369, "grad_norm": 0.2835320775274295, "learning_rate": 6.366997637874652e-06, "loss": 0.2988, "num_tokens": 3757405074.0, "step": 4926 }, { "epoch": 6.722369207134932, "grad_norm": 0.29504246487401825, "learning_rate": 6.362072007657286e-06, "loss": 0.2983, "num_tokens": 3758130099.0, "step": 4927 }, { "epoch": 6.723734744388495, "grad_norm": 0.28344603147100217, "learning_rate": 6.357151147894525e-06, "loss": 0.3064, "num_tokens": 3758891801.0, "step": 4928 }, { "epoch": 6.725100281642058, "grad_norm": 0.2984310528210109, "learning_rate": 6.352235060087516e-06, "loss": 0.3076, "num_tokens": 3759670298.0, "step": 4929 }, { "epoch": 6.726465818895622, "grad_norm": 0.3004360637554939, "learning_rate": 6.347323745735946e-06, "loss": 0.2917, "num_tokens": 3760409195.0, "step": 4930 }, { "epoch": 6.727831356149185, "grad_norm": 0.2821041243046113, "learning_rate": 6.342417206338033e-06, "loss": 0.3134, "num_tokens": 3761175529.0, "step": 4931 }, { "epoch": 6.729196893402748, "grad_norm": 0.28266274486513393, "learning_rate": 6.337515443390567e-06, "loss": 0.3016, "num_tokens": 3761901582.0, "step": 4932 }, { "epoch": 6.730562430656311, "grad_norm": 0.2977661714242812, "learning_rate": 6.332618458388852e-06, "loss": 0.3093, "num_tokens": 3762676076.0, "step": 4933 }, { "epoch": 6.731927967909875, "grad_norm": 0.27986108791076314, "learning_rate": 6.327726252826747e-06, "loss": 0.302, "num_tokens": 3763498810.0, "step": 4934 }, { "epoch": 6.733293505163438, "grad_norm": 0.28302292006102275, "learning_rate": 6.322838828196654e-06, "loss": 0.2961, "num_tokens": 3764265053.0, "step": 4935 }, { "epoch": 6.734659042417001, "grad_norm": 0.29070088508617065, "learning_rate": 6.317956185989517e-06, "loss": 0.3117, "num_tokens": 3765071239.0, "step": 4936 }, { "epoch": 6.7360245796705644, "grad_norm": 0.2815162224589744, "learning_rate": 6.313078327694814e-06, "loss": 0.316, "num_tokens": 3765904500.0, "step": 4937 }, { "epoch": 6.737390116924128, "grad_norm": 0.2704737797265164, "learning_rate": 6.308205254800569e-06, "loss": 0.3018, "num_tokens": 3766668435.0, "step": 4938 }, { "epoch": 6.738755654177691, "grad_norm": 0.2868476430816966, "learning_rate": 6.303336968793354e-06, "loss": 0.2849, "num_tokens": 3767442022.0, "step": 4939 }, { "epoch": 6.740121191431253, "grad_norm": 0.27669720195727765, "learning_rate": 6.298473471158253e-06, "loss": 0.3122, "num_tokens": 3768215505.0, "step": 4940 }, { "epoch": 6.741486728684817, "grad_norm": 0.29175296484249635, "learning_rate": 6.29361476337893e-06, "loss": 0.3009, "num_tokens": 3768892235.0, "step": 4941 }, { "epoch": 6.74285226593838, "grad_norm": 0.30097705767254196, "learning_rate": 6.288760846937555e-06, "loss": 0.3012, "num_tokens": 3769776220.0, "step": 4942 }, { "epoch": 6.744217803191943, "grad_norm": 0.28619844283579154, "learning_rate": 6.2839117233148484e-06, "loss": 0.2942, "num_tokens": 3770488861.0, "step": 4943 }, { "epoch": 6.745583340445506, "grad_norm": 0.3076155477386356, "learning_rate": 6.279067393990072e-06, "loss": 0.2935, "num_tokens": 3771198472.0, "step": 4944 }, { "epoch": 6.74694887769907, "grad_norm": 0.29502130949732047, "learning_rate": 6.2742278604410135e-06, "loss": 0.3045, "num_tokens": 3771961797.0, "step": 4945 }, { "epoch": 6.748314414952633, "grad_norm": 0.2951749992171346, "learning_rate": 6.269393124144012e-06, "loss": 0.3066, "num_tokens": 3772739107.0, "step": 4946 }, { "epoch": 6.749679952206196, "grad_norm": 0.299867150664693, "learning_rate": 6.264563186573933e-06, "loss": 0.3073, "num_tokens": 3773502271.0, "step": 4947 }, { "epoch": 6.751045489459759, "grad_norm": 0.29270934868735066, "learning_rate": 6.2597380492041805e-06, "loss": 0.3064, "num_tokens": 3774242645.0, "step": 4948 }, { "epoch": 6.752411026713323, "grad_norm": 0.2721703576548412, "learning_rate": 6.254917713506699e-06, "loss": 0.314, "num_tokens": 3775086458.0, "step": 4949 }, { "epoch": 6.753776563966886, "grad_norm": 0.2790647407672826, "learning_rate": 6.250102180951962e-06, "loss": 0.2919, "num_tokens": 3775843821.0, "step": 4950 }, { "epoch": 6.755142101220449, "grad_norm": 0.2793794853002977, "learning_rate": 6.245291453008977e-06, "loss": 0.3004, "num_tokens": 3776655378.0, "step": 4951 }, { "epoch": 6.756507638474012, "grad_norm": 0.2898843024156639, "learning_rate": 6.240485531145292e-06, "loss": 0.3047, "num_tokens": 3777461879.0, "step": 4952 }, { "epoch": 6.757873175727576, "grad_norm": 0.29427022874789344, "learning_rate": 6.235684416826989e-06, "loss": 0.2984, "num_tokens": 3778223092.0, "step": 4953 }, { "epoch": 6.759238712981139, "grad_norm": 0.29535756649459677, "learning_rate": 6.230888111518673e-06, "loss": 0.3095, "num_tokens": 3778985790.0, "step": 4954 }, { "epoch": 6.760604250234701, "grad_norm": 0.2762479012916092, "learning_rate": 6.226096616683489e-06, "loss": 0.2937, "num_tokens": 3779709374.0, "step": 4955 }, { "epoch": 6.761969787488265, "grad_norm": 0.2929379164276804, "learning_rate": 6.221309933783119e-06, "loss": 0.3082, "num_tokens": 3780483118.0, "step": 4956 }, { "epoch": 6.763335324741828, "grad_norm": 0.2732483447168066, "learning_rate": 6.216528064277772e-06, "loss": 0.2924, "num_tokens": 3781283858.0, "step": 4957 }, { "epoch": 6.764700861995391, "grad_norm": 0.28999254410772046, "learning_rate": 6.211751009626187e-06, "loss": 0.3108, "num_tokens": 3782077737.0, "step": 4958 }, { "epoch": 6.766066399248954, "grad_norm": 0.28885102708644544, "learning_rate": 6.206978771285643e-06, "loss": 0.2805, "num_tokens": 3782772764.0, "step": 4959 }, { "epoch": 6.7674319365025175, "grad_norm": 0.2926173894152502, "learning_rate": 6.202211350711934e-06, "loss": 0.299, "num_tokens": 3783554966.0, "step": 4960 }, { "epoch": 6.768797473756081, "grad_norm": 0.27337200300351117, "learning_rate": 6.197448749359399e-06, "loss": 0.3057, "num_tokens": 3784310842.0, "step": 4961 }, { "epoch": 6.770163011009644, "grad_norm": 0.3095220356042182, "learning_rate": 6.192690968680902e-06, "loss": 0.3037, "num_tokens": 3785048854.0, "step": 4962 }, { "epoch": 6.771528548263207, "grad_norm": 0.29574283856072553, "learning_rate": 6.187938010127831e-06, "loss": 0.3069, "num_tokens": 3785824720.0, "step": 4963 }, { "epoch": 6.7728940855167705, "grad_norm": 0.27089406717700837, "learning_rate": 6.183189875150113e-06, "loss": 0.3154, "num_tokens": 3786591500.0, "step": 4964 }, { "epoch": 6.774259622770334, "grad_norm": 0.29297625461724985, "learning_rate": 6.178446565196194e-06, "loss": 0.3158, "num_tokens": 3787397898.0, "step": 4965 }, { "epoch": 6.775625160023897, "grad_norm": 0.2906808388095171, "learning_rate": 6.173708081713056e-06, "loss": 0.3033, "num_tokens": 3788116540.0, "step": 4966 }, { "epoch": 6.77699069727746, "grad_norm": 0.29956296756130574, "learning_rate": 6.1689744261462046e-06, "loss": 0.3011, "num_tokens": 3788856192.0, "step": 4967 }, { "epoch": 6.7783562345310235, "grad_norm": 0.2799491334423892, "learning_rate": 6.164245599939672e-06, "loss": 0.3089, "num_tokens": 3789642109.0, "step": 4968 }, { "epoch": 6.779721771784587, "grad_norm": 0.2977270058425338, "learning_rate": 6.159521604536017e-06, "loss": 0.3055, "num_tokens": 3790393148.0, "step": 4969 }, { "epoch": 6.78108730903815, "grad_norm": 0.3077491417206324, "learning_rate": 6.15480244137633e-06, "loss": 0.2942, "num_tokens": 3791179744.0, "step": 4970 }, { "epoch": 6.782452846291713, "grad_norm": 0.27389568965380934, "learning_rate": 6.150088111900221e-06, "loss": 0.2986, "num_tokens": 3791989393.0, "step": 4971 }, { "epoch": 6.783818383545276, "grad_norm": 0.2768922473880302, "learning_rate": 6.1453786175458275e-06, "loss": 0.281, "num_tokens": 3792755321.0, "step": 4972 }, { "epoch": 6.785183920798839, "grad_norm": 0.2701005047744615, "learning_rate": 6.14067395974981e-06, "loss": 0.31, "num_tokens": 3793559913.0, "step": 4973 }, { "epoch": 6.786549458052402, "grad_norm": 0.29314009153794823, "learning_rate": 6.13597413994736e-06, "loss": 0.3067, "num_tokens": 3794307912.0, "step": 4974 }, { "epoch": 6.787914995305965, "grad_norm": 0.29869423130969636, "learning_rate": 6.1312791595721855e-06, "loss": 0.2896, "num_tokens": 3795024514.0, "step": 4975 }, { "epoch": 6.789280532559529, "grad_norm": 0.2903749665030764, "learning_rate": 6.12658902005653e-06, "loss": 0.2899, "num_tokens": 3795813635.0, "step": 4976 }, { "epoch": 6.790646069813092, "grad_norm": 0.2853643943993002, "learning_rate": 6.121903722831139e-06, "loss": 0.3073, "num_tokens": 3796530108.0, "step": 4977 }, { "epoch": 6.792011607066655, "grad_norm": 0.29366750621844795, "learning_rate": 6.1172232693253005e-06, "loss": 0.2819, "num_tokens": 3797198179.0, "step": 4978 }, { "epoch": 6.793377144320218, "grad_norm": 0.3015980012770455, "learning_rate": 6.112547660966821e-06, "loss": 0.2859, "num_tokens": 3797902352.0, "step": 4979 }, { "epoch": 6.794742681573782, "grad_norm": 0.2931621651019501, "learning_rate": 6.107876899182025e-06, "loss": 0.3078, "num_tokens": 3798656909.0, "step": 4980 }, { "epoch": 6.796108218827345, "grad_norm": 0.2758387591350508, "learning_rate": 6.103210985395749e-06, "loss": 0.3023, "num_tokens": 3799396961.0, "step": 4981 }, { "epoch": 6.797473756080908, "grad_norm": 0.2890325151027791, "learning_rate": 6.098549921031378e-06, "loss": 0.3145, "num_tokens": 3800264390.0, "step": 4982 }, { "epoch": 6.7988392933344715, "grad_norm": 0.28044805662757477, "learning_rate": 6.093893707510794e-06, "loss": 0.2927, "num_tokens": 3801063181.0, "step": 4983 }, { "epoch": 6.800204830588035, "grad_norm": 0.29934207784478867, "learning_rate": 6.0892423462543955e-06, "loss": 0.2869, "num_tokens": 3801811720.0, "step": 4984 }, { "epoch": 6.801570367841598, "grad_norm": 0.2829669886267452, "learning_rate": 6.0845958386811285e-06, "loss": 0.3137, "num_tokens": 3802586615.0, "step": 4985 }, { "epoch": 6.802935905095161, "grad_norm": 0.2898723139645192, "learning_rate": 6.079954186208432e-06, "loss": 0.2934, "num_tokens": 3803334416.0, "step": 4986 }, { "epoch": 6.804301442348724, "grad_norm": 0.2867078892289295, "learning_rate": 6.075317390252273e-06, "loss": 0.3179, "num_tokens": 3804120562.0, "step": 4987 }, { "epoch": 6.805666979602288, "grad_norm": 0.29612212993442444, "learning_rate": 6.070685452227141e-06, "loss": 0.3098, "num_tokens": 3804907449.0, "step": 4988 }, { "epoch": 6.80703251685585, "grad_norm": 0.3133889792509136, "learning_rate": 6.06605837354604e-06, "loss": 0.2934, "num_tokens": 3805605039.0, "step": 4989 }, { "epoch": 6.808398054109413, "grad_norm": 0.28502029338427715, "learning_rate": 6.061436155620484e-06, "loss": 0.3109, "num_tokens": 3806454566.0, "step": 4990 }, { "epoch": 6.809763591362977, "grad_norm": 0.290457225268607, "learning_rate": 6.056818799860516e-06, "loss": 0.3029, "num_tokens": 3807283743.0, "step": 4991 }, { "epoch": 6.81112912861654, "grad_norm": 0.2723104973610973, "learning_rate": 6.052206307674696e-06, "loss": 0.3094, "num_tokens": 3808003476.0, "step": 4992 }, { "epoch": 6.812494665870103, "grad_norm": 0.29537852838265044, "learning_rate": 6.047598680470086e-06, "loss": 0.2953, "num_tokens": 3808761311.0, "step": 4993 }, { "epoch": 6.813860203123666, "grad_norm": 0.27627312194920456, "learning_rate": 6.042995919652284e-06, "loss": 0.3096, "num_tokens": 3809601840.0, "step": 4994 }, { "epoch": 6.81522574037723, "grad_norm": 0.2803494310937334, "learning_rate": 6.038398026625386e-06, "loss": 0.2794, "num_tokens": 3810296487.0, "step": 4995 }, { "epoch": 6.816591277630793, "grad_norm": 0.2876838197164577, "learning_rate": 6.03380500279201e-06, "loss": 0.3022, "num_tokens": 3811048864.0, "step": 4996 }, { "epoch": 6.817956814884356, "grad_norm": 0.29184572018946997, "learning_rate": 6.029216849553292e-06, "loss": 0.2904, "num_tokens": 3811750342.0, "step": 4997 }, { "epoch": 6.819322352137919, "grad_norm": 0.2895114028536241, "learning_rate": 6.0246335683088796e-06, "loss": 0.3085, "num_tokens": 3812495918.0, "step": 4998 }, { "epoch": 6.820687889391483, "grad_norm": 0.3012590554062551, "learning_rate": 6.020055160456929e-06, "loss": 0.3044, "num_tokens": 3813294193.0, "step": 4999 }, { "epoch": 6.822053426645046, "grad_norm": 0.2785972294157619, "learning_rate": 6.015481627394115e-06, "loss": 0.301, "num_tokens": 3814097527.0, "step": 5000 }, { "epoch": 6.823418963898609, "grad_norm": 0.2642437729344967, "learning_rate": 6.010912970515631e-06, "loss": 0.3206, "num_tokens": 3814895511.0, "step": 5001 }, { "epoch": 6.824784501152172, "grad_norm": 0.2881802799308491, "learning_rate": 6.0063491912151614e-06, "loss": 0.3134, "num_tokens": 3815668973.0, "step": 5002 }, { "epoch": 6.826150038405736, "grad_norm": 0.2731011137279772, "learning_rate": 6.001790290884938e-06, "loss": 0.3099, "num_tokens": 3816435258.0, "step": 5003 }, { "epoch": 6.827515575659298, "grad_norm": 0.2839832760841372, "learning_rate": 5.997236270915666e-06, "loss": 0.3121, "num_tokens": 3817205954.0, "step": 5004 }, { "epoch": 6.828881112912862, "grad_norm": 0.28053281955149256, "learning_rate": 5.992687132696591e-06, "loss": 0.3027, "num_tokens": 3817991659.0, "step": 5005 }, { "epoch": 6.8302466501664245, "grad_norm": 0.2781042557090496, "learning_rate": 5.988142877615454e-06, "loss": 0.311, "num_tokens": 3818833465.0, "step": 5006 }, { "epoch": 6.831612187419988, "grad_norm": 0.292724228327413, "learning_rate": 5.9836035070585095e-06, "loss": 0.3083, "num_tokens": 3819547114.0, "step": 5007 }, { "epoch": 6.832977724673551, "grad_norm": 0.28818084959199997, "learning_rate": 5.979069022410523e-06, "loss": 0.2905, "num_tokens": 3820328263.0, "step": 5008 }, { "epoch": 6.834343261927114, "grad_norm": 0.274818359643631, "learning_rate": 5.9745394250547695e-06, "loss": 0.3019, "num_tokens": 3821154686.0, "step": 5009 }, { "epoch": 6.8357087991806775, "grad_norm": 0.2733703632043778, "learning_rate": 5.970014716373037e-06, "loss": 0.3114, "num_tokens": 3821893728.0, "step": 5010 }, { "epoch": 6.837074336434241, "grad_norm": 0.2889295136979445, "learning_rate": 5.965494897745609e-06, "loss": 0.3096, "num_tokens": 3822652488.0, "step": 5011 }, { "epoch": 6.838439873687804, "grad_norm": 0.28748794940328687, "learning_rate": 5.960979970551298e-06, "loss": 0.3016, "num_tokens": 3823479741.0, "step": 5012 }, { "epoch": 6.839805410941367, "grad_norm": 0.2811321979288326, "learning_rate": 5.956469936167405e-06, "loss": 0.2969, "num_tokens": 3824186577.0, "step": 5013 }, { "epoch": 6.8411709481949305, "grad_norm": 0.28479155968351166, "learning_rate": 5.951964795969748e-06, "loss": 0.3019, "num_tokens": 3824975533.0, "step": 5014 }, { "epoch": 6.842536485448494, "grad_norm": 0.28250653453282515, "learning_rate": 5.947464551332654e-06, "loss": 0.3131, "num_tokens": 3825818876.0, "step": 5015 }, { "epoch": 6.843902022702057, "grad_norm": 0.2862041290068137, "learning_rate": 5.942969203628946e-06, "loss": 0.3036, "num_tokens": 3826577474.0, "step": 5016 }, { "epoch": 6.84526755995562, "grad_norm": 0.27998466063473776, "learning_rate": 5.938478754229964e-06, "loss": 0.3108, "num_tokens": 3827338494.0, "step": 5017 }, { "epoch": 6.846633097209184, "grad_norm": 0.2856965844209647, "learning_rate": 5.933993204505546e-06, "loss": 0.3009, "num_tokens": 3828093501.0, "step": 5018 }, { "epoch": 6.847998634462747, "grad_norm": 0.2820787911197648, "learning_rate": 5.929512555824047e-06, "loss": 0.2844, "num_tokens": 3828802521.0, "step": 5019 }, { "epoch": 6.84936417171631, "grad_norm": 0.288959365525045, "learning_rate": 5.9250368095523045e-06, "loss": 0.3106, "num_tokens": 3829577322.0, "step": 5020 }, { "epoch": 6.850729708969872, "grad_norm": 0.28152872328832856, "learning_rate": 5.920565967055693e-06, "loss": 0.3041, "num_tokens": 3830378979.0, "step": 5021 }, { "epoch": 6.852095246223436, "grad_norm": 0.27870701668281417, "learning_rate": 5.916100029698059e-06, "loss": 0.3018, "num_tokens": 3831153186.0, "step": 5022 }, { "epoch": 6.853460783476999, "grad_norm": 0.28230236467529973, "learning_rate": 5.911638998841774e-06, "loss": 0.31, "num_tokens": 3831932008.0, "step": 5023 }, { "epoch": 6.854826320730562, "grad_norm": 0.2897173437121138, "learning_rate": 5.9071828758477036e-06, "loss": 0.3053, "num_tokens": 3832698183.0, "step": 5024 }, { "epoch": 6.8561918579841254, "grad_norm": 0.28684876496207057, "learning_rate": 5.902731662075215e-06, "loss": 0.2871, "num_tokens": 3833439499.0, "step": 5025 }, { "epoch": 6.857557395237689, "grad_norm": 0.2806748815711873, "learning_rate": 5.8982853588821835e-06, "loss": 0.3093, "num_tokens": 3834150219.0, "step": 5026 }, { "epoch": 6.858922932491252, "grad_norm": 0.2972629573414805, "learning_rate": 5.89384396762498e-06, "loss": 0.292, "num_tokens": 3834860406.0, "step": 5027 }, { "epoch": 6.860288469744815, "grad_norm": 0.2880387916720618, "learning_rate": 5.88940748965849e-06, "loss": 0.3084, "num_tokens": 3835644412.0, "step": 5028 }, { "epoch": 6.8616540069983785, "grad_norm": 0.2914535760783687, "learning_rate": 5.8849759263360795e-06, "loss": 0.3024, "num_tokens": 3836365438.0, "step": 5029 }, { "epoch": 6.863019544251942, "grad_norm": 0.28857575501030686, "learning_rate": 5.880549279009633e-06, "loss": 0.3101, "num_tokens": 3837117155.0, "step": 5030 }, { "epoch": 6.864385081505505, "grad_norm": 0.30278777918343586, "learning_rate": 5.8761275490295244e-06, "loss": 0.2981, "num_tokens": 3837823810.0, "step": 5031 }, { "epoch": 6.865750618759068, "grad_norm": 0.30288008032483077, "learning_rate": 5.871710737744638e-06, "loss": 0.3, "num_tokens": 3838534500.0, "step": 5032 }, { "epoch": 6.8671161560126315, "grad_norm": 0.3020952657550729, "learning_rate": 5.8672988465023515e-06, "loss": 0.3066, "num_tokens": 3839369779.0, "step": 5033 }, { "epoch": 6.868481693266195, "grad_norm": 0.27344342238852776, "learning_rate": 5.862891876648536e-06, "loss": 0.3181, "num_tokens": 3840143420.0, "step": 5034 }, { "epoch": 6.869847230519758, "grad_norm": 0.2891251575659213, "learning_rate": 5.8584898295275696e-06, "loss": 0.3055, "num_tokens": 3840932082.0, "step": 5035 }, { "epoch": 6.87121276777332, "grad_norm": 0.2851261103680355, "learning_rate": 5.854092706482327e-06, "loss": 0.3047, "num_tokens": 3841694212.0, "step": 5036 }, { "epoch": 6.8725783050268845, "grad_norm": 0.28685985486211996, "learning_rate": 5.849700508854179e-06, "loss": 0.3066, "num_tokens": 3842486151.0, "step": 5037 }, { "epoch": 6.873943842280447, "grad_norm": 0.2787647804925751, "learning_rate": 5.845313237982999e-06, "loss": 0.3087, "num_tokens": 3843261714.0, "step": 5038 }, { "epoch": 6.87530937953401, "grad_norm": 0.2882376643279525, "learning_rate": 5.840930895207149e-06, "loss": 0.3025, "num_tokens": 3844006127.0, "step": 5039 }, { "epoch": 6.876674916787573, "grad_norm": 0.26862163659054883, "learning_rate": 5.836553481863492e-06, "loss": 0.3105, "num_tokens": 3844796036.0, "step": 5040 }, { "epoch": 6.878040454041137, "grad_norm": 0.29049527955961035, "learning_rate": 5.8321809992873865e-06, "loss": 0.304, "num_tokens": 3845533214.0, "step": 5041 }, { "epoch": 6.8794059912947, "grad_norm": 0.2751816466275523, "learning_rate": 5.827813448812693e-06, "loss": 0.3043, "num_tokens": 3846285358.0, "step": 5042 }, { "epoch": 6.880771528548263, "grad_norm": 0.2926432179262122, "learning_rate": 5.823450831771757e-06, "loss": 0.2989, "num_tokens": 3847041612.0, "step": 5043 }, { "epoch": 6.882137065801826, "grad_norm": 0.28068537429609997, "learning_rate": 5.8190931494954265e-06, "loss": 0.3004, "num_tokens": 3847812082.0, "step": 5044 }, { "epoch": 6.88350260305539, "grad_norm": 0.2886628753214944, "learning_rate": 5.814740403313042e-06, "loss": 0.3048, "num_tokens": 3848620062.0, "step": 5045 }, { "epoch": 6.884868140308953, "grad_norm": 0.2726145379810471, "learning_rate": 5.81039259455243e-06, "loss": 0.2929, "num_tokens": 3849348078.0, "step": 5046 }, { "epoch": 6.886233677562516, "grad_norm": 0.2821291702331937, "learning_rate": 5.806049724539931e-06, "loss": 0.3018, "num_tokens": 3850105462.0, "step": 5047 }, { "epoch": 6.887599214816079, "grad_norm": 0.28405875041367984, "learning_rate": 5.8017117946003576e-06, "loss": 0.3089, "num_tokens": 3850848302.0, "step": 5048 }, { "epoch": 6.888964752069643, "grad_norm": 0.2927165836683703, "learning_rate": 5.797378806057029e-06, "loss": 0.2977, "num_tokens": 3851549222.0, "step": 5049 }, { "epoch": 6.890330289323206, "grad_norm": 0.293413787645001, "learning_rate": 5.793050760231749e-06, "loss": 0.3065, "num_tokens": 3852279486.0, "step": 5050 }, { "epoch": 6.891695826576769, "grad_norm": 0.2842223774574781, "learning_rate": 5.788727658444823e-06, "loss": 0.3004, "num_tokens": 3853023235.0, "step": 5051 }, { "epoch": 6.893061363830332, "grad_norm": 0.29001126856811815, "learning_rate": 5.784409502015035e-06, "loss": 0.3079, "num_tokens": 3853804095.0, "step": 5052 }, { "epoch": 6.894426901083895, "grad_norm": 0.2867991009638734, "learning_rate": 5.780096292259669e-06, "loss": 0.2954, "num_tokens": 3854634477.0, "step": 5053 }, { "epoch": 6.895792438337458, "grad_norm": 0.2626064157333756, "learning_rate": 5.775788030494503e-06, "loss": 0.3092, "num_tokens": 3855461523.0, "step": 5054 }, { "epoch": 6.897157975591021, "grad_norm": 0.2871478851897046, "learning_rate": 5.771484718033793e-06, "loss": 0.295, "num_tokens": 3856284734.0, "step": 5055 }, { "epoch": 6.8985235128445845, "grad_norm": 0.25550178275414454, "learning_rate": 5.767186356190305e-06, "loss": 0.2834, "num_tokens": 3857004904.0, "step": 5056 }, { "epoch": 6.899889050098148, "grad_norm": 0.29360931667812196, "learning_rate": 5.762892946275271e-06, "loss": 0.3006, "num_tokens": 3857767665.0, "step": 5057 }, { "epoch": 6.901254587351711, "grad_norm": 0.2794225009062899, "learning_rate": 5.758604489598432e-06, "loss": 0.3026, "num_tokens": 3858507107.0, "step": 5058 }, { "epoch": 6.902620124605274, "grad_norm": 0.2902283488918725, "learning_rate": 5.754320987468006e-06, "loss": 0.2955, "num_tokens": 3859253769.0, "step": 5059 }, { "epoch": 6.9039856618588376, "grad_norm": 0.2919020798201435, "learning_rate": 5.750042441190713e-06, "loss": 0.3141, "num_tokens": 3860080498.0, "step": 5060 }, { "epoch": 6.905351199112401, "grad_norm": 0.27791260474869856, "learning_rate": 5.745768852071738e-06, "loss": 0.2862, "num_tokens": 3860793528.0, "step": 5061 }, { "epoch": 6.906716736365964, "grad_norm": 0.30307225218682715, "learning_rate": 5.7415002214147775e-06, "loss": 0.3158, "num_tokens": 3861572081.0, "step": 5062 }, { "epoch": 6.908082273619527, "grad_norm": 0.2731158829338556, "learning_rate": 5.737236550522005e-06, "loss": 0.2951, "num_tokens": 3862373481.0, "step": 5063 }, { "epoch": 6.909447810873091, "grad_norm": 0.28879869996757723, "learning_rate": 5.7329778406940775e-06, "loss": 0.3112, "num_tokens": 3863095340.0, "step": 5064 }, { "epoch": 6.910813348126654, "grad_norm": 0.29442120698387964, "learning_rate": 5.72872409323015e-06, "loss": 0.2916, "num_tokens": 3863842990.0, "step": 5065 }, { "epoch": 6.912178885380217, "grad_norm": 0.28129951829083555, "learning_rate": 5.724475309427851e-06, "loss": 0.2971, "num_tokens": 3864608076.0, "step": 5066 }, { "epoch": 6.91354442263378, "grad_norm": 0.2924581716490116, "learning_rate": 5.7202314905833e-06, "loss": 0.2998, "num_tokens": 3865371467.0, "step": 5067 }, { "epoch": 6.914909959887343, "grad_norm": 0.2819685257668616, "learning_rate": 5.715992637991106e-06, "loss": 0.3026, "num_tokens": 3866195731.0, "step": 5068 }, { "epoch": 6.916275497140907, "grad_norm": 0.28346447091914845, "learning_rate": 5.711758752944358e-06, "loss": 0.2999, "num_tokens": 3866932539.0, "step": 5069 }, { "epoch": 6.917641034394469, "grad_norm": 0.2943894592890218, "learning_rate": 5.707529836734631e-06, "loss": 0.3019, "num_tokens": 3867667610.0, "step": 5070 }, { "epoch": 6.9190065716480325, "grad_norm": 0.2851964474940891, "learning_rate": 5.7033058906519825e-06, "loss": 0.2975, "num_tokens": 3868387990.0, "step": 5071 }, { "epoch": 6.920372108901596, "grad_norm": 0.2945266209684662, "learning_rate": 5.6990869159849615e-06, "loss": 0.299, "num_tokens": 3869145399.0, "step": 5072 }, { "epoch": 6.921737646155159, "grad_norm": 0.2899576640049709, "learning_rate": 5.694872914020581e-06, "loss": 0.3051, "num_tokens": 3870018970.0, "step": 5073 }, { "epoch": 6.923103183408722, "grad_norm": 0.26282858064908093, "learning_rate": 5.690663886044368e-06, "loss": 0.3099, "num_tokens": 3870775909.0, "step": 5074 }, { "epoch": 6.9244687206622855, "grad_norm": 0.30310526643597113, "learning_rate": 5.686459833340302e-06, "loss": 0.3101, "num_tokens": 3871577356.0, "step": 5075 }, { "epoch": 6.925834257915849, "grad_norm": 0.2906402220970064, "learning_rate": 5.682260757190861e-06, "loss": 0.3105, "num_tokens": 3872360216.0, "step": 5076 }, { "epoch": 6.927199795169412, "grad_norm": 0.2915319550361507, "learning_rate": 5.678066658877006e-06, "loss": 0.2867, "num_tokens": 3873143448.0, "step": 5077 }, { "epoch": 6.928565332422975, "grad_norm": 0.26828246739193656, "learning_rate": 5.6738775396781676e-06, "loss": 0.3028, "num_tokens": 3873878392.0, "step": 5078 }, { "epoch": 6.9299308696765385, "grad_norm": 0.3066901705421582, "learning_rate": 5.669693400872266e-06, "loss": 0.2945, "num_tokens": 3874635322.0, "step": 5079 }, { "epoch": 6.931296406930102, "grad_norm": 0.2742249323418004, "learning_rate": 5.6655142437357035e-06, "loss": 0.3055, "num_tokens": 3875413840.0, "step": 5080 }, { "epoch": 6.932661944183665, "grad_norm": 0.27395330856353545, "learning_rate": 5.6613400695433605e-06, "loss": 0.3191, "num_tokens": 3876167627.0, "step": 5081 }, { "epoch": 6.934027481437228, "grad_norm": 0.31645438763638406, "learning_rate": 5.65717087956859e-06, "loss": 0.3001, "num_tokens": 3876937321.0, "step": 5082 }, { "epoch": 6.9353930186907915, "grad_norm": 0.27889638879918793, "learning_rate": 5.653006675083243e-06, "loss": 0.3164, "num_tokens": 3877754464.0, "step": 5083 }, { "epoch": 6.936758555944355, "grad_norm": 0.27060496771332293, "learning_rate": 5.648847457357631e-06, "loss": 0.2889, "num_tokens": 3878573801.0, "step": 5084 }, { "epoch": 6.938124093197917, "grad_norm": 0.2906987054865664, "learning_rate": 5.644693227660545e-06, "loss": 0.2985, "num_tokens": 3879322391.0, "step": 5085 }, { "epoch": 6.93948963045148, "grad_norm": 0.2861533904466326, "learning_rate": 5.6405439872592734e-06, "loss": 0.3084, "num_tokens": 3880078095.0, "step": 5086 }, { "epoch": 6.940855167705044, "grad_norm": 0.29598945313034264, "learning_rate": 5.63639973741956e-06, "loss": 0.304, "num_tokens": 3880792429.0, "step": 5087 }, { "epoch": 6.942220704958607, "grad_norm": 0.30242768874979653, "learning_rate": 5.632260479405642e-06, "loss": 0.2981, "num_tokens": 3881590359.0, "step": 5088 }, { "epoch": 6.94358624221217, "grad_norm": 0.29168693927792244, "learning_rate": 5.628126214480222e-06, "loss": 0.3055, "num_tokens": 3882303463.0, "step": 5089 }, { "epoch": 6.944951779465733, "grad_norm": 0.3015311640184358, "learning_rate": 5.623996943904494e-06, "loss": 0.309, "num_tokens": 3883126732.0, "step": 5090 }, { "epoch": 6.946317316719297, "grad_norm": 0.29755097394450336, "learning_rate": 5.619872668938106e-06, "loss": 0.2962, "num_tokens": 3883865117.0, "step": 5091 }, { "epoch": 6.94768285397286, "grad_norm": 0.27799380283921965, "learning_rate": 5.615753390839209e-06, "loss": 0.3048, "num_tokens": 3884616440.0, "step": 5092 }, { "epoch": 6.949048391226423, "grad_norm": 0.30351401870120964, "learning_rate": 5.611639110864412e-06, "loss": 0.308, "num_tokens": 3885409433.0, "step": 5093 }, { "epoch": 6.950413928479986, "grad_norm": 0.2984888517547274, "learning_rate": 5.6075298302687975e-06, "loss": 0.3137, "num_tokens": 3886130430.0, "step": 5094 }, { "epoch": 6.95177946573355, "grad_norm": 0.27902344556327774, "learning_rate": 5.603425550305941e-06, "loss": 0.2985, "num_tokens": 3886959051.0, "step": 5095 }, { "epoch": 6.953145002987113, "grad_norm": 0.2738247752748768, "learning_rate": 5.59932627222787e-06, "loss": 0.2966, "num_tokens": 3887738326.0, "step": 5096 }, { "epoch": 6.954510540240676, "grad_norm": 0.27136504380519805, "learning_rate": 5.595231997285099e-06, "loss": 0.3118, "num_tokens": 3888520295.0, "step": 5097 }, { "epoch": 6.955876077494239, "grad_norm": 0.3066687126791328, "learning_rate": 5.591142726726617e-06, "loss": 0.3101, "num_tokens": 3889234019.0, "step": 5098 }, { "epoch": 6.957241614747803, "grad_norm": 0.32412348660995366, "learning_rate": 5.587058461799885e-06, "loss": 0.3223, "num_tokens": 3890025719.0, "step": 5099 }, { "epoch": 6.958607152001366, "grad_norm": 0.2960706031196194, "learning_rate": 5.582979203750829e-06, "loss": 0.3125, "num_tokens": 3890729564.0, "step": 5100 }, { "epoch": 6.959972689254929, "grad_norm": 0.3102363054882868, "learning_rate": 5.578904953823856e-06, "loss": 0.308, "num_tokens": 3891490208.0, "step": 5101 }, { "epoch": 6.9613382265084915, "grad_norm": 0.2923864898168412, "learning_rate": 5.574835713261846e-06, "loss": 0.3124, "num_tokens": 3892264007.0, "step": 5102 }, { "epoch": 6.962703763762055, "grad_norm": 0.29604430321052255, "learning_rate": 5.570771483306146e-06, "loss": 0.3056, "num_tokens": 3893051470.0, "step": 5103 }, { "epoch": 6.964069301015618, "grad_norm": 0.2940594766081426, "learning_rate": 5.566712265196579e-06, "loss": 0.3114, "num_tokens": 3893775920.0, "step": 5104 }, { "epoch": 6.965434838269181, "grad_norm": 0.2950641069216973, "learning_rate": 5.562658060171433e-06, "loss": 0.3077, "num_tokens": 3894556649.0, "step": 5105 }, { "epoch": 6.966800375522745, "grad_norm": 0.2806623483674088, "learning_rate": 5.558608869467472e-06, "loss": 0.291, "num_tokens": 3895252351.0, "step": 5106 }, { "epoch": 6.968165912776308, "grad_norm": 0.2914489965320985, "learning_rate": 5.554564694319928e-06, "loss": 0.3049, "num_tokens": 3895975972.0, "step": 5107 }, { "epoch": 6.969531450029871, "grad_norm": 0.28930379676540474, "learning_rate": 5.5505255359625086e-06, "loss": 0.3085, "num_tokens": 3896684622.0, "step": 5108 }, { "epoch": 6.970896987283434, "grad_norm": 0.3005562122725083, "learning_rate": 5.546491395627378e-06, "loss": 0.2887, "num_tokens": 3897492108.0, "step": 5109 }, { "epoch": 6.972262524536998, "grad_norm": 0.2672886625787262, "learning_rate": 5.542462274545182e-06, "loss": 0.2959, "num_tokens": 3898236031.0, "step": 5110 }, { "epoch": 6.973628061790561, "grad_norm": 0.29790404699175804, "learning_rate": 5.538438173945032e-06, "loss": 0.3095, "num_tokens": 3898994040.0, "step": 5111 }, { "epoch": 6.974993599044124, "grad_norm": 0.3016388764491383, "learning_rate": 5.534419095054504e-06, "loss": 0.3005, "num_tokens": 3899722854.0, "step": 5112 }, { "epoch": 6.976359136297687, "grad_norm": 0.2820898377480697, "learning_rate": 5.53040503909965e-06, "loss": 0.2898, "num_tokens": 3900373100.0, "step": 5113 }, { "epoch": 6.977724673551251, "grad_norm": 0.2900778278489219, "learning_rate": 5.526396007304978e-06, "loss": 0.3071, "num_tokens": 3901170777.0, "step": 5114 }, { "epoch": 6.979090210804814, "grad_norm": 0.2810456694490665, "learning_rate": 5.522392000893473e-06, "loss": 0.3124, "num_tokens": 3901928346.0, "step": 5115 }, { "epoch": 6.980455748058377, "grad_norm": 0.29357630740096224, "learning_rate": 5.518393021086587e-06, "loss": 0.3077, "num_tokens": 3902697700.0, "step": 5116 }, { "epoch": 6.9818212853119395, "grad_norm": 0.26988239606738035, "learning_rate": 5.514399069104229e-06, "loss": 0.2818, "num_tokens": 3903458551.0, "step": 5117 }, { "epoch": 6.983186822565504, "grad_norm": 0.27166079605412957, "learning_rate": 5.510410146164784e-06, "loss": 0.3066, "num_tokens": 3904206584.0, "step": 5118 }, { "epoch": 6.984552359819066, "grad_norm": 0.28605779631916445, "learning_rate": 5.506426253485101e-06, "loss": 0.3079, "num_tokens": 3904903156.0, "step": 5119 }, { "epoch": 6.985917897072629, "grad_norm": 0.29074441922943633, "learning_rate": 5.502447392280493e-06, "loss": 0.3009, "num_tokens": 3905701816.0, "step": 5120 }, { "epoch": 6.9872834343261925, "grad_norm": 0.2799277368816346, "learning_rate": 5.498473563764734e-06, "loss": 0.31, "num_tokens": 3906416443.0, "step": 5121 }, { "epoch": 6.988648971579756, "grad_norm": 0.2974217631305312, "learning_rate": 5.494504769150076e-06, "loss": 0.2923, "num_tokens": 3907144273.0, "step": 5122 }, { "epoch": 6.990014508833319, "grad_norm": 0.2868114035418176, "learning_rate": 5.490541009647216e-06, "loss": 0.2971, "num_tokens": 3907852900.0, "step": 5123 }, { "epoch": 6.991380046086882, "grad_norm": 0.3000693456490604, "learning_rate": 5.486582286465333e-06, "loss": 0.3071, "num_tokens": 3908541402.0, "step": 5124 }, { "epoch": 6.9927455833404455, "grad_norm": 0.3030258016185588, "learning_rate": 5.48262860081206e-06, "loss": 0.3146, "num_tokens": 3909305930.0, "step": 5125 }, { "epoch": 6.994111120594009, "grad_norm": 0.2800573647268576, "learning_rate": 5.478679953893489e-06, "loss": 0.3119, "num_tokens": 3910040889.0, "step": 5126 }, { "epoch": 6.995476657847572, "grad_norm": 0.30560738256676, "learning_rate": 5.474736346914193e-06, "loss": 0.3094, "num_tokens": 3910802752.0, "step": 5127 }, { "epoch": 6.996842195101135, "grad_norm": 0.2918659325159573, "learning_rate": 5.470797781077184e-06, "loss": 0.322, "num_tokens": 3911606255.0, "step": 5128 }, { "epoch": 6.9982077323546985, "grad_norm": 0.29028834019163635, "learning_rate": 5.466864257583956e-06, "loss": 0.3021, "num_tokens": 3912293608.0, "step": 5129 }, { "epoch": 6.999573269608262, "grad_norm": 0.2884616959950561, "learning_rate": 5.4629357776344526e-06, "loss": 0.2999, "num_tokens": 3913065563.0, "step": 5130 }, { "epoch": 7.0, "grad_norm": 0.4182250018082081, "learning_rate": 5.4590123424270865e-06, "loss": 0.2799, "num_tokens": 3913294952.0, "step": 5131 }, { "epoch": 7.001365537253563, "grad_norm": 0.5076395347456364, "learning_rate": 5.455093953158723e-06, "loss": 0.2887, "num_tokens": 3914074205.0, "step": 5132 }, { "epoch": 7.0027310745071265, "grad_norm": 0.3879267311929935, "learning_rate": 5.451180611024695e-06, "loss": 0.2964, "num_tokens": 3914793807.0, "step": 5133 }, { "epoch": 7.00409661176069, "grad_norm": 0.37972389627367864, "learning_rate": 5.447272317218799e-06, "loss": 0.2873, "num_tokens": 3915502865.0, "step": 5134 }, { "epoch": 7.005462149014253, "grad_norm": 0.31386677301573185, "learning_rate": 5.4433690729332735e-06, "loss": 0.2946, "num_tokens": 3916305587.0, "step": 5135 }, { "epoch": 7.006827686267816, "grad_norm": 0.3022970137262877, "learning_rate": 5.439470879358842e-06, "loss": 0.2884, "num_tokens": 3917091738.0, "step": 5136 }, { "epoch": 7.0081932235213795, "grad_norm": 0.2857925051131968, "learning_rate": 5.435577737684668e-06, "loss": 0.2742, "num_tokens": 3917824523.0, "step": 5137 }, { "epoch": 7.009558760774943, "grad_norm": 0.30654713457563965, "learning_rate": 5.431689649098382e-06, "loss": 0.2957, "num_tokens": 3918580998.0, "step": 5138 }, { "epoch": 7.010924298028506, "grad_norm": 0.32190258447394676, "learning_rate": 5.427806614786075e-06, "loss": 0.2851, "num_tokens": 3919376389.0, "step": 5139 }, { "epoch": 7.012289835282068, "grad_norm": 0.32626465907670243, "learning_rate": 5.4239286359322865e-06, "loss": 0.2897, "num_tokens": 3920154551.0, "step": 5140 }, { "epoch": 7.013655372535632, "grad_norm": 0.3469814439307285, "learning_rate": 5.42005571372002e-06, "loss": 0.2957, "num_tokens": 3920890306.0, "step": 5141 }, { "epoch": 7.015020909789195, "grad_norm": 0.3512668478514299, "learning_rate": 5.41618784933074e-06, "loss": 0.2906, "num_tokens": 3921627925.0, "step": 5142 }, { "epoch": 7.016386447042758, "grad_norm": 0.3165941423627085, "learning_rate": 5.412325043944369e-06, "loss": 0.2898, "num_tokens": 3922396555.0, "step": 5143 }, { "epoch": 7.017751984296321, "grad_norm": 0.30495039704984656, "learning_rate": 5.408467298739268e-06, "loss": 0.2848, "num_tokens": 3923089937.0, "step": 5144 }, { "epoch": 7.019117521549885, "grad_norm": 0.30375948023135807, "learning_rate": 5.404614614892283e-06, "loss": 0.278, "num_tokens": 3923873116.0, "step": 5145 }, { "epoch": 7.020483058803448, "grad_norm": 0.3301652861824822, "learning_rate": 5.400766993578689e-06, "loss": 0.2736, "num_tokens": 3924601066.0, "step": 5146 }, { "epoch": 7.021848596057011, "grad_norm": 0.3021962011106856, "learning_rate": 5.396924435972233e-06, "loss": 0.2808, "num_tokens": 3925279444.0, "step": 5147 }, { "epoch": 7.023214133310574, "grad_norm": 0.30831630041221536, "learning_rate": 5.393086943245119e-06, "loss": 0.2675, "num_tokens": 3925967991.0, "step": 5148 }, { "epoch": 7.024579670564138, "grad_norm": 0.29539237716522393, "learning_rate": 5.389254516567991e-06, "loss": 0.3073, "num_tokens": 3926752285.0, "step": 5149 }, { "epoch": 7.025945207817701, "grad_norm": 0.3003673072678064, "learning_rate": 5.3854271571099596e-06, "loss": 0.2798, "num_tokens": 3927524338.0, "step": 5150 }, { "epoch": 7.027310745071264, "grad_norm": 0.3038080750149305, "learning_rate": 5.381604866038586e-06, "loss": 0.2915, "num_tokens": 3928286419.0, "step": 5151 }, { "epoch": 7.028676282324827, "grad_norm": 0.28371133221595757, "learning_rate": 5.377787644519889e-06, "loss": 0.2843, "num_tokens": 3929035407.0, "step": 5152 }, { "epoch": 7.030041819578391, "grad_norm": 0.2830185313717071, "learning_rate": 5.373975493718329e-06, "loss": 0.2796, "num_tokens": 3929756090.0, "step": 5153 }, { "epoch": 7.031407356831954, "grad_norm": 0.2879390879255694, "learning_rate": 5.370168414796839e-06, "loss": 0.2958, "num_tokens": 3930505896.0, "step": 5154 }, { "epoch": 7.032772894085517, "grad_norm": 0.3043257003038754, "learning_rate": 5.36636640891679e-06, "loss": 0.2917, "num_tokens": 3931276559.0, "step": 5155 }, { "epoch": 7.03413843133908, "grad_norm": 0.2905742591873813, "learning_rate": 5.362569477238002e-06, "loss": 0.2694, "num_tokens": 3931979868.0, "step": 5156 }, { "epoch": 7.035503968592643, "grad_norm": 0.29111038371121256, "learning_rate": 5.358777620918767e-06, "loss": 0.2883, "num_tokens": 3932781351.0, "step": 5157 }, { "epoch": 7.036869505846206, "grad_norm": 0.2790506324145955, "learning_rate": 5.354990841115808e-06, "loss": 0.2827, "num_tokens": 3933566298.0, "step": 5158 }, { "epoch": 7.038235043099769, "grad_norm": 0.27617793635001525, "learning_rate": 5.351209138984307e-06, "loss": 0.2857, "num_tokens": 3934355893.0, "step": 5159 }, { "epoch": 7.039600580353333, "grad_norm": 0.2985124202148911, "learning_rate": 5.347432515677902e-06, "loss": 0.2886, "num_tokens": 3935154702.0, "step": 5160 }, { "epoch": 7.040966117606896, "grad_norm": 0.2819773530659147, "learning_rate": 5.3436609723486765e-06, "loss": 0.2937, "num_tokens": 3935899687.0, "step": 5161 }, { "epoch": 7.042331654860459, "grad_norm": 0.2880712222972772, "learning_rate": 5.339894510147161e-06, "loss": 0.2751, "num_tokens": 3936679875.0, "step": 5162 }, { "epoch": 7.043697192114022, "grad_norm": 0.28046743818735814, "learning_rate": 5.336133130222343e-06, "loss": 0.2835, "num_tokens": 3937506676.0, "step": 5163 }, { "epoch": 7.045062729367586, "grad_norm": 0.283137482134194, "learning_rate": 5.332376833721659e-06, "loss": 0.3012, "num_tokens": 3938308883.0, "step": 5164 }, { "epoch": 7.046428266621149, "grad_norm": 0.28919328829153007, "learning_rate": 5.328625621790983e-06, "loss": 0.2846, "num_tokens": 3939097617.0, "step": 5165 }, { "epoch": 7.047793803874712, "grad_norm": 0.2695808177607949, "learning_rate": 5.324879495574662e-06, "loss": 0.2865, "num_tokens": 3939971176.0, "step": 5166 }, { "epoch": 7.049159341128275, "grad_norm": 0.31878735130239877, "learning_rate": 5.321138456215462e-06, "loss": 0.2835, "num_tokens": 3940689815.0, "step": 5167 }, { "epoch": 7.050524878381839, "grad_norm": 0.2848496995910331, "learning_rate": 5.317402504854622e-06, "loss": 0.2848, "num_tokens": 3941426599.0, "step": 5168 }, { "epoch": 7.051890415635402, "grad_norm": 0.30558614149286584, "learning_rate": 5.313671642631817e-06, "loss": 0.284, "num_tokens": 3942240856.0, "step": 5169 }, { "epoch": 7.053255952888965, "grad_norm": 0.28305023435619614, "learning_rate": 5.309945870685171e-06, "loss": 0.2806, "num_tokens": 3942975016.0, "step": 5170 }, { "epoch": 7.054621490142528, "grad_norm": 0.28787039616373344, "learning_rate": 5.3062251901512515e-06, "loss": 0.2831, "num_tokens": 3943661683.0, "step": 5171 }, { "epoch": 7.055987027396091, "grad_norm": 0.2721414256402305, "learning_rate": 5.302509602165081e-06, "loss": 0.2692, "num_tokens": 3944516478.0, "step": 5172 }, { "epoch": 7.057352564649654, "grad_norm": 0.30452282571675726, "learning_rate": 5.298799107860128e-06, "loss": 0.2899, "num_tokens": 3945337931.0, "step": 5173 }, { "epoch": 7.058718101903217, "grad_norm": 0.3040725028480979, "learning_rate": 5.295093708368294e-06, "loss": 0.2781, "num_tokens": 3946072265.0, "step": 5174 }, { "epoch": 7.0600836391567805, "grad_norm": 0.2883080784885704, "learning_rate": 5.2913934048199474e-06, "loss": 0.2808, "num_tokens": 3946816635.0, "step": 5175 }, { "epoch": 7.061449176410344, "grad_norm": 0.29150728177416096, "learning_rate": 5.287698198343882e-06, "loss": 0.2841, "num_tokens": 3947525486.0, "step": 5176 }, { "epoch": 7.062814713663907, "grad_norm": 0.2888842608139551, "learning_rate": 5.284008090067348e-06, "loss": 0.2739, "num_tokens": 3948230953.0, "step": 5177 }, { "epoch": 7.06418025091747, "grad_norm": 0.28851708395516706, "learning_rate": 5.280323081116039e-06, "loss": 0.3147, "num_tokens": 3949094737.0, "step": 5178 }, { "epoch": 7.0655457881710335, "grad_norm": 0.2830533465130103, "learning_rate": 5.276643172614089e-06, "loss": 0.278, "num_tokens": 3949846816.0, "step": 5179 }, { "epoch": 7.066911325424597, "grad_norm": 0.2663980586740756, "learning_rate": 5.272968365684078e-06, "loss": 0.288, "num_tokens": 3950649749.0, "step": 5180 }, { "epoch": 7.06827686267816, "grad_norm": 0.2919303541259642, "learning_rate": 5.269298661447034e-06, "loss": 0.2992, "num_tokens": 3951497077.0, "step": 5181 }, { "epoch": 7.069642399931723, "grad_norm": 0.2750843601900457, "learning_rate": 5.265634061022427e-06, "loss": 0.2937, "num_tokens": 3952241781.0, "step": 5182 }, { "epoch": 7.0710079371852865, "grad_norm": 0.3003688260825482, "learning_rate": 5.261974565528156e-06, "loss": 0.2644, "num_tokens": 3952888798.0, "step": 5183 }, { "epoch": 7.07237347443885, "grad_norm": 0.2908615626633064, "learning_rate": 5.258320176080591e-06, "loss": 0.2897, "num_tokens": 3953662463.0, "step": 5184 }, { "epoch": 7.073739011692413, "grad_norm": 0.3072473007370898, "learning_rate": 5.254670893794515e-06, "loss": 0.2851, "num_tokens": 3954346863.0, "step": 5185 }, { "epoch": 7.075104548945976, "grad_norm": 0.2872117264268303, "learning_rate": 5.251026719783173e-06, "loss": 0.2821, "num_tokens": 3955074556.0, "step": 5186 }, { "epoch": 7.0764700861995395, "grad_norm": 0.2938844015019141, "learning_rate": 5.2473876551582424e-06, "loss": 0.2857, "num_tokens": 3955813277.0, "step": 5187 }, { "epoch": 7.077835623453102, "grad_norm": 0.28071725390052893, "learning_rate": 5.243753701029844e-06, "loss": 0.2848, "num_tokens": 3956612099.0, "step": 5188 }, { "epoch": 7.079201160706665, "grad_norm": 0.27706303632879703, "learning_rate": 5.240124858506539e-06, "loss": 0.2828, "num_tokens": 3957356102.0, "step": 5189 }, { "epoch": 7.080566697960228, "grad_norm": 0.2815429036762741, "learning_rate": 5.236501128695331e-06, "loss": 0.2676, "num_tokens": 3958132376.0, "step": 5190 }, { "epoch": 7.081932235213792, "grad_norm": 0.2764894811410257, "learning_rate": 5.232882512701662e-06, "loss": 0.2892, "num_tokens": 3959046671.0, "step": 5191 }, { "epoch": 7.083297772467355, "grad_norm": 0.27771571429341, "learning_rate": 5.229269011629419e-06, "loss": 0.2843, "num_tokens": 3959795343.0, "step": 5192 }, { "epoch": 7.084663309720918, "grad_norm": 0.27126433183844106, "learning_rate": 5.225660626580924e-06, "loss": 0.2924, "num_tokens": 3960626269.0, "step": 5193 }, { "epoch": 7.086028846974481, "grad_norm": 0.2894603453251648, "learning_rate": 5.2220573586569326e-06, "loss": 0.2851, "num_tokens": 3961374886.0, "step": 5194 }, { "epoch": 7.087394384228045, "grad_norm": 0.3015832323688165, "learning_rate": 5.218459208956652e-06, "loss": 0.2855, "num_tokens": 3962085468.0, "step": 5195 }, { "epoch": 7.088759921481608, "grad_norm": 0.31914042952168226, "learning_rate": 5.214866178577721e-06, "loss": 0.2999, "num_tokens": 3962855172.0, "step": 5196 }, { "epoch": 7.090125458735171, "grad_norm": 0.29663373143944916, "learning_rate": 5.2112782686162155e-06, "loss": 0.2864, "num_tokens": 3963592741.0, "step": 5197 }, { "epoch": 7.0914909959887344, "grad_norm": 0.2938540336342747, "learning_rate": 5.207695480166651e-06, "loss": 0.2942, "num_tokens": 3964312617.0, "step": 5198 }, { "epoch": 7.092856533242298, "grad_norm": 0.2894641340687619, "learning_rate": 5.204117814321985e-06, "loss": 0.2939, "num_tokens": 3965119639.0, "step": 5199 }, { "epoch": 7.094222070495861, "grad_norm": 0.28137548491644004, "learning_rate": 5.200545272173604e-06, "loss": 0.2824, "num_tokens": 3965902789.0, "step": 5200 }, { "epoch": 7.095587607749424, "grad_norm": 0.30455965223033654, "learning_rate": 5.196977854811341e-06, "loss": 0.2899, "num_tokens": 3966652780.0, "step": 5201 }, { "epoch": 7.0969531450029875, "grad_norm": 0.2878274394371381, "learning_rate": 5.193415563323455e-06, "loss": 0.2836, "num_tokens": 3967399168.0, "step": 5202 }, { "epoch": 7.098318682256551, "grad_norm": 0.29991078762362594, "learning_rate": 5.189858398796648e-06, "loss": 0.2918, "num_tokens": 3968127319.0, "step": 5203 }, { "epoch": 7.099684219510113, "grad_norm": 0.2950388128834921, "learning_rate": 5.18630636231606e-06, "loss": 0.295, "num_tokens": 3968875999.0, "step": 5204 }, { "epoch": 7.101049756763676, "grad_norm": 0.3000393742365759, "learning_rate": 5.182759454965264e-06, "loss": 0.2978, "num_tokens": 3969653185.0, "step": 5205 }, { "epoch": 7.10241529401724, "grad_norm": 0.29911304149768314, "learning_rate": 5.179217677826263e-06, "loss": 0.2635, "num_tokens": 3970346946.0, "step": 5206 }, { "epoch": 7.103780831270803, "grad_norm": 0.2893914552625092, "learning_rate": 5.1756810319795e-06, "loss": 0.2822, "num_tokens": 3971090741.0, "step": 5207 }, { "epoch": 7.105146368524366, "grad_norm": 0.3001333880402455, "learning_rate": 5.172149518503859e-06, "loss": 0.2738, "num_tokens": 3971791525.0, "step": 5208 }, { "epoch": 7.106511905777929, "grad_norm": 0.2751905845144113, "learning_rate": 5.168623138476645e-06, "loss": 0.296, "num_tokens": 3972587498.0, "step": 5209 }, { "epoch": 7.107877443031493, "grad_norm": 0.31536786787370813, "learning_rate": 5.16510189297361e-06, "loss": 0.3014, "num_tokens": 3973395546.0, "step": 5210 }, { "epoch": 7.109242980285056, "grad_norm": 0.28704526819362747, "learning_rate": 5.161585783068926e-06, "loss": 0.278, "num_tokens": 3974158046.0, "step": 5211 }, { "epoch": 7.110608517538619, "grad_norm": 0.2939289754032879, "learning_rate": 5.158074809835212e-06, "loss": 0.2804, "num_tokens": 3974918711.0, "step": 5212 }, { "epoch": 7.111974054792182, "grad_norm": 0.2879472442385299, "learning_rate": 5.154568974343509e-06, "loss": 0.2762, "num_tokens": 3975619096.0, "step": 5213 }, { "epoch": 7.113339592045746, "grad_norm": 0.28681085179276183, "learning_rate": 5.151068277663303e-06, "loss": 0.279, "num_tokens": 3976411812.0, "step": 5214 }, { "epoch": 7.114705129299309, "grad_norm": 0.30154997269215406, "learning_rate": 5.147572720862494e-06, "loss": 0.2957, "num_tokens": 3977190015.0, "step": 5215 }, { "epoch": 7.116070666552872, "grad_norm": 0.27186607869697027, "learning_rate": 5.144082305007436e-06, "loss": 0.2778, "num_tokens": 3978011404.0, "step": 5216 }, { "epoch": 7.117436203806435, "grad_norm": 0.28834556934442845, "learning_rate": 5.140597031162901e-06, "loss": 0.3, "num_tokens": 3978749431.0, "step": 5217 }, { "epoch": 7.118801741059999, "grad_norm": 0.2949447659283517, "learning_rate": 5.1371169003920875e-06, "loss": 0.2829, "num_tokens": 3979440228.0, "step": 5218 }, { "epoch": 7.120167278313562, "grad_norm": 0.3198683311024918, "learning_rate": 5.133641913756643e-06, "loss": 0.2789, "num_tokens": 3980106663.0, "step": 5219 }, { "epoch": 7.121532815567125, "grad_norm": 0.29570135377897633, "learning_rate": 5.130172072316629e-06, "loss": 0.2886, "num_tokens": 3980890384.0, "step": 5220 }, { "epoch": 7.1228983528206875, "grad_norm": 0.2900220139340534, "learning_rate": 5.126707377130544e-06, "loss": 0.29, "num_tokens": 3981587170.0, "step": 5221 }, { "epoch": 7.124263890074251, "grad_norm": 0.29591408484773035, "learning_rate": 5.123247829255319e-06, "loss": 0.3068, "num_tokens": 3982355916.0, "step": 5222 }, { "epoch": 7.125629427327814, "grad_norm": 0.2904449678391113, "learning_rate": 5.119793429746313e-06, "loss": 0.2846, "num_tokens": 3983142109.0, "step": 5223 }, { "epoch": 7.126994964581377, "grad_norm": 0.29804369997896485, "learning_rate": 5.116344179657308e-06, "loss": 0.283, "num_tokens": 3983957122.0, "step": 5224 }, { "epoch": 7.1283605018349405, "grad_norm": 0.29202604503252055, "learning_rate": 5.112900080040529e-06, "loss": 0.2776, "num_tokens": 3984734339.0, "step": 5225 }, { "epoch": 7.129726039088504, "grad_norm": 0.2821422590492495, "learning_rate": 5.109461131946617e-06, "loss": 0.281, "num_tokens": 3985481119.0, "step": 5226 }, { "epoch": 7.131091576342067, "grad_norm": 0.3093735684482989, "learning_rate": 5.106027336424644e-06, "loss": 0.3059, "num_tokens": 3986131475.0, "step": 5227 }, { "epoch": 7.13245711359563, "grad_norm": 0.2816267939521894, "learning_rate": 5.1025986945221185e-06, "loss": 0.2914, "num_tokens": 3986911871.0, "step": 5228 }, { "epoch": 7.1338226508491935, "grad_norm": 0.2877529540089698, "learning_rate": 5.0991752072849645e-06, "loss": 0.28, "num_tokens": 3987664281.0, "step": 5229 }, { "epoch": 7.135188188102757, "grad_norm": 0.28220256683249356, "learning_rate": 5.095756875757543e-06, "loss": 0.2771, "num_tokens": 3988414397.0, "step": 5230 }, { "epoch": 7.13655372535632, "grad_norm": 0.2841968867738198, "learning_rate": 5.0923437009826345e-06, "loss": 0.2913, "num_tokens": 3989206827.0, "step": 5231 }, { "epoch": 7.137919262609883, "grad_norm": 0.2848182167631017, "learning_rate": 5.088935684001461e-06, "loss": 0.2926, "num_tokens": 3989997720.0, "step": 5232 }, { "epoch": 7.1392847998634466, "grad_norm": 0.2909136164838903, "learning_rate": 5.085532825853651e-06, "loss": 0.2783, "num_tokens": 3990729886.0, "step": 5233 }, { "epoch": 7.14065033711701, "grad_norm": 0.28339306722295715, "learning_rate": 5.0821351275772725e-06, "loss": 0.2848, "num_tokens": 3991510829.0, "step": 5234 }, { "epoch": 7.142015874370573, "grad_norm": 0.2962912119881301, "learning_rate": 5.078742590208818e-06, "loss": 0.2852, "num_tokens": 3992281046.0, "step": 5235 }, { "epoch": 7.143381411624135, "grad_norm": 0.30868518131631756, "learning_rate": 5.075355214783196e-06, "loss": 0.2994, "num_tokens": 3993014490.0, "step": 5236 }, { "epoch": 7.144746948877699, "grad_norm": 0.2799554162506697, "learning_rate": 5.071973002333761e-06, "loss": 0.2965, "num_tokens": 3993826218.0, "step": 5237 }, { "epoch": 7.146112486131262, "grad_norm": 0.2801780687792036, "learning_rate": 5.0685959538922705e-06, "loss": 0.2818, "num_tokens": 3994579875.0, "step": 5238 }, { "epoch": 7.147478023384825, "grad_norm": 0.2887235034650628, "learning_rate": 5.065224070488915e-06, "loss": 0.2889, "num_tokens": 3995277028.0, "step": 5239 }, { "epoch": 7.148843560638388, "grad_norm": 0.3094862101910607, "learning_rate": 5.061857353152317e-06, "loss": 0.3079, "num_tokens": 3996081399.0, "step": 5240 }, { "epoch": 7.150209097891952, "grad_norm": 0.30782305336818544, "learning_rate": 5.05849580290951e-06, "loss": 0.2821, "num_tokens": 3996773342.0, "step": 5241 }, { "epoch": 7.151574635145515, "grad_norm": 0.29602448045346424, "learning_rate": 5.055139420785958e-06, "loss": 0.2871, "num_tokens": 3997472013.0, "step": 5242 }, { "epoch": 7.152940172399078, "grad_norm": 0.3000959010499544, "learning_rate": 5.05178820780555e-06, "loss": 0.2875, "num_tokens": 3998229257.0, "step": 5243 }, { "epoch": 7.1543057096526415, "grad_norm": 0.2799979075542774, "learning_rate": 5.048442164990596e-06, "loss": 0.2924, "num_tokens": 3999018092.0, "step": 5244 }, { "epoch": 7.155671246906205, "grad_norm": 0.27139978473180626, "learning_rate": 5.045101293361825e-06, "loss": 0.2867, "num_tokens": 3999806287.0, "step": 5245 }, { "epoch": 7.157036784159768, "grad_norm": 0.2875806155258447, "learning_rate": 5.041765593938399e-06, "loss": 0.294, "num_tokens": 4000646441.0, "step": 5246 }, { "epoch": 7.158402321413331, "grad_norm": 0.27761778095205697, "learning_rate": 5.038435067737888e-06, "loss": 0.2949, "num_tokens": 4001419788.0, "step": 5247 }, { "epoch": 7.1597678586668945, "grad_norm": 0.2834141335237049, "learning_rate": 5.0351097157762964e-06, "loss": 0.2856, "num_tokens": 4002187722.0, "step": 5248 }, { "epoch": 7.161133395920458, "grad_norm": 0.2831866743623015, "learning_rate": 5.031789539068047e-06, "loss": 0.2825, "num_tokens": 4002967758.0, "step": 5249 }, { "epoch": 7.162498933174021, "grad_norm": 0.2893423932440752, "learning_rate": 5.028474538625975e-06, "loss": 0.3016, "num_tokens": 4003709428.0, "step": 5250 }, { "epoch": 7.163864470427584, "grad_norm": 0.279907851184894, "learning_rate": 5.025164715461347e-06, "loss": 0.2856, "num_tokens": 4004456712.0, "step": 5251 }, { "epoch": 7.1652300076811475, "grad_norm": 0.28939982518407714, "learning_rate": 5.0218600705838475e-06, "loss": 0.2775, "num_tokens": 4005232657.0, "step": 5252 }, { "epoch": 7.16659554493471, "grad_norm": 0.2760614944085921, "learning_rate": 5.018560605001581e-06, "loss": 0.2712, "num_tokens": 4005948120.0, "step": 5253 }, { "epoch": 7.167961082188273, "grad_norm": 0.2774397327803342, "learning_rate": 5.015266319721066e-06, "loss": 0.2797, "num_tokens": 4006749220.0, "step": 5254 }, { "epoch": 7.169326619441836, "grad_norm": 0.27979143916686244, "learning_rate": 5.0119772157472555e-06, "loss": 0.293, "num_tokens": 4007549096.0, "step": 5255 }, { "epoch": 7.1706921566954, "grad_norm": 0.2909393599450843, "learning_rate": 5.008693294083504e-06, "loss": 0.2921, "num_tokens": 4008362989.0, "step": 5256 }, { "epoch": 7.172057693948963, "grad_norm": 0.29339617245267285, "learning_rate": 5.005414555731598e-06, "loss": 0.3019, "num_tokens": 4009074208.0, "step": 5257 }, { "epoch": 7.173423231202526, "grad_norm": 0.29500968222094837, "learning_rate": 5.002141001691743e-06, "loss": 0.2932, "num_tokens": 4009863877.0, "step": 5258 }, { "epoch": 7.174788768456089, "grad_norm": 0.28260863643840256, "learning_rate": 4.99887263296255e-06, "loss": 0.2921, "num_tokens": 4010680283.0, "step": 5259 }, { "epoch": 7.176154305709653, "grad_norm": 0.29518935437231303, "learning_rate": 4.995609450541058e-06, "loss": 0.3059, "num_tokens": 4011489335.0, "step": 5260 }, { "epoch": 7.177519842963216, "grad_norm": 0.2796236592443488, "learning_rate": 4.992351455422727e-06, "loss": 0.2963, "num_tokens": 4012313455.0, "step": 5261 }, { "epoch": 7.178885380216779, "grad_norm": 0.28924274830701646, "learning_rate": 4.98909864860143e-06, "loss": 0.2783, "num_tokens": 4013077802.0, "step": 5262 }, { "epoch": 7.180250917470342, "grad_norm": 0.284379450980499, "learning_rate": 4.9858510310694515e-06, "loss": 0.295, "num_tokens": 4013828045.0, "step": 5263 }, { "epoch": 7.181616454723906, "grad_norm": 0.2950557612260734, "learning_rate": 4.982608603817509e-06, "loss": 0.2709, "num_tokens": 4014546381.0, "step": 5264 }, { "epoch": 7.182981991977469, "grad_norm": 0.28794018507139224, "learning_rate": 4.97937136783472e-06, "loss": 0.2885, "num_tokens": 4015265409.0, "step": 5265 }, { "epoch": 7.184347529231032, "grad_norm": 0.29037062322860746, "learning_rate": 4.976139324108622e-06, "loss": 0.2807, "num_tokens": 4016022785.0, "step": 5266 }, { "epoch": 7.185713066484595, "grad_norm": 0.2833540837856527, "learning_rate": 4.97291247362518e-06, "loss": 0.297, "num_tokens": 4016812196.0, "step": 5267 }, { "epoch": 7.187078603738159, "grad_norm": 0.31218945361237793, "learning_rate": 4.969690817368759e-06, "loss": 0.3087, "num_tokens": 4017542592.0, "step": 5268 }, { "epoch": 7.188444140991721, "grad_norm": 0.30050215262797797, "learning_rate": 4.96647435632215e-06, "loss": 0.3009, "num_tokens": 4018279113.0, "step": 5269 }, { "epoch": 7.189809678245284, "grad_norm": 0.3035160159373512, "learning_rate": 4.963263091466555e-06, "loss": 0.2838, "num_tokens": 4019025885.0, "step": 5270 }, { "epoch": 7.1911752154988475, "grad_norm": 0.2718422726801319, "learning_rate": 4.960057023781595e-06, "loss": 0.286, "num_tokens": 4019803771.0, "step": 5271 }, { "epoch": 7.192540752752411, "grad_norm": 0.298675063839605, "learning_rate": 4.956856154245297e-06, "loss": 0.3021, "num_tokens": 4020500025.0, "step": 5272 }, { "epoch": 7.193906290005974, "grad_norm": 0.29091363870387604, "learning_rate": 4.953660483834112e-06, "loss": 0.288, "num_tokens": 4021267421.0, "step": 5273 }, { "epoch": 7.195271827259537, "grad_norm": 0.31101222271876433, "learning_rate": 4.950470013522897e-06, "loss": 0.3083, "num_tokens": 4022055079.0, "step": 5274 }, { "epoch": 7.1966373645131005, "grad_norm": 0.28866187623697365, "learning_rate": 4.947284744284931e-06, "loss": 0.2797, "num_tokens": 4022786569.0, "step": 5275 }, { "epoch": 7.198002901766664, "grad_norm": 0.30463717862498735, "learning_rate": 4.9441046770919e-06, "loss": 0.3074, "num_tokens": 4023563415.0, "step": 5276 }, { "epoch": 7.199368439020227, "grad_norm": 0.276591226095406, "learning_rate": 4.940929812913901e-06, "loss": 0.2998, "num_tokens": 4024330662.0, "step": 5277 }, { "epoch": 7.20073397627379, "grad_norm": 0.308470211577835, "learning_rate": 4.937760152719452e-06, "loss": 0.2882, "num_tokens": 4025054989.0, "step": 5278 }, { "epoch": 7.202099513527354, "grad_norm": 0.3006637208506525, "learning_rate": 4.934595697475478e-06, "loss": 0.29, "num_tokens": 4025859833.0, "step": 5279 }, { "epoch": 7.203465050780917, "grad_norm": 0.28749701367710195, "learning_rate": 4.931436448147315e-06, "loss": 0.2851, "num_tokens": 4026618808.0, "step": 5280 }, { "epoch": 7.20483058803448, "grad_norm": 0.2975028081921011, "learning_rate": 4.928282405698719e-06, "loss": 0.283, "num_tokens": 4027365038.0, "step": 5281 }, { "epoch": 7.206196125288043, "grad_norm": 0.29199622156395993, "learning_rate": 4.925133571091845e-06, "loss": 0.2938, "num_tokens": 4028098243.0, "step": 5282 }, { "epoch": 7.207561662541607, "grad_norm": 0.3148739011151634, "learning_rate": 4.9219899452872694e-06, "loss": 0.2935, "num_tokens": 4028896408.0, "step": 5283 }, { "epoch": 7.20892719979517, "grad_norm": 0.2769914474264207, "learning_rate": 4.9188515292439746e-06, "loss": 0.2854, "num_tokens": 4029683429.0, "step": 5284 }, { "epoch": 7.210292737048732, "grad_norm": 0.2938448484673576, "learning_rate": 4.915718323919358e-06, "loss": 0.2875, "num_tokens": 4030399212.0, "step": 5285 }, { "epoch": 7.2116582743022954, "grad_norm": 0.2686388384967971, "learning_rate": 4.912590330269224e-06, "loss": 0.2913, "num_tokens": 4031200427.0, "step": 5286 }, { "epoch": 7.213023811555859, "grad_norm": 0.2953187823592184, "learning_rate": 4.909467549247786e-06, "loss": 0.2898, "num_tokens": 4031969134.0, "step": 5287 }, { "epoch": 7.214389348809422, "grad_norm": 0.30357414011969996, "learning_rate": 4.906349981807673e-06, "loss": 0.2834, "num_tokens": 4032688357.0, "step": 5288 }, { "epoch": 7.215754886062985, "grad_norm": 0.29156864564223767, "learning_rate": 4.903237628899912e-06, "loss": 0.2849, "num_tokens": 4033427241.0, "step": 5289 }, { "epoch": 7.2171204233165485, "grad_norm": 0.3012548143330721, "learning_rate": 4.900130491473958e-06, "loss": 0.2862, "num_tokens": 4034162179.0, "step": 5290 }, { "epoch": 7.218485960570112, "grad_norm": 0.2959209991370389, "learning_rate": 4.897028570477655e-06, "loss": 0.2874, "num_tokens": 4034930423.0, "step": 5291 }, { "epoch": 7.219851497823675, "grad_norm": 0.26746315718102914, "learning_rate": 4.89393186685727e-06, "loss": 0.283, "num_tokens": 4035742098.0, "step": 5292 }, { "epoch": 7.221217035077238, "grad_norm": 0.30850624982864794, "learning_rate": 4.890840381557473e-06, "loss": 0.2894, "num_tokens": 4036466032.0, "step": 5293 }, { "epoch": 7.2225825723308015, "grad_norm": 0.2765344172769319, "learning_rate": 4.887754115521341e-06, "loss": 0.3015, "num_tokens": 4037337559.0, "step": 5294 }, { "epoch": 7.223948109584365, "grad_norm": 0.28479033227206413, "learning_rate": 4.8846730696903586e-06, "loss": 0.28, "num_tokens": 4038085019.0, "step": 5295 }, { "epoch": 7.225313646837928, "grad_norm": 0.28575371697361646, "learning_rate": 4.8815972450044216e-06, "loss": 0.2959, "num_tokens": 4038864870.0, "step": 5296 }, { "epoch": 7.226679184091491, "grad_norm": 0.28999697091551446, "learning_rate": 4.878526642401829e-06, "loss": 0.2762, "num_tokens": 4039598186.0, "step": 5297 }, { "epoch": 7.2280447213450545, "grad_norm": 0.2806747265773839, "learning_rate": 4.875461262819288e-06, "loss": 0.2777, "num_tokens": 4040391786.0, "step": 5298 }, { "epoch": 7.229410258598618, "grad_norm": 0.28022556605555676, "learning_rate": 4.872401107191921e-06, "loss": 0.2865, "num_tokens": 4041200871.0, "step": 5299 }, { "epoch": 7.230775795852181, "grad_norm": 0.29574454318936816, "learning_rate": 4.869346176453237e-06, "loss": 0.2948, "num_tokens": 4041987374.0, "step": 5300 }, { "epoch": 7.232141333105743, "grad_norm": 0.2783519218119721, "learning_rate": 4.866296471535172e-06, "loss": 0.2877, "num_tokens": 4042730587.0, "step": 5301 }, { "epoch": 7.233506870359307, "grad_norm": 0.30015644833017335, "learning_rate": 4.863251993368053e-06, "loss": 0.2865, "num_tokens": 4043475368.0, "step": 5302 }, { "epoch": 7.23487240761287, "grad_norm": 0.2769118960226417, "learning_rate": 4.8602127428806275e-06, "loss": 0.2936, "num_tokens": 4044282821.0, "step": 5303 }, { "epoch": 7.236237944866433, "grad_norm": 0.29357525338872265, "learning_rate": 4.857178721000026e-06, "loss": 0.3007, "num_tokens": 4045066716.0, "step": 5304 }, { "epoch": 7.237603482119996, "grad_norm": 0.2816511634207548, "learning_rate": 4.854149928651809e-06, "loss": 0.272, "num_tokens": 4045781706.0, "step": 5305 }, { "epoch": 7.23896901937356, "grad_norm": 0.29790860051314294, "learning_rate": 4.851126366759924e-06, "loss": 0.2833, "num_tokens": 4046590802.0, "step": 5306 }, { "epoch": 7.240334556627123, "grad_norm": 0.2758035225748882, "learning_rate": 4.848108036246727e-06, "loss": 0.2776, "num_tokens": 4047347391.0, "step": 5307 }, { "epoch": 7.241700093880686, "grad_norm": 0.28087779454044887, "learning_rate": 4.845094938032988e-06, "loss": 0.2829, "num_tokens": 4048077602.0, "step": 5308 }, { "epoch": 7.243065631134249, "grad_norm": 0.27435778897465196, "learning_rate": 4.842087073037863e-06, "loss": 0.2783, "num_tokens": 4048817737.0, "step": 5309 }, { "epoch": 7.244431168387813, "grad_norm": 0.2825995562403925, "learning_rate": 4.839084442178928e-06, "loss": 0.2848, "num_tokens": 4049550182.0, "step": 5310 }, { "epoch": 7.245796705641376, "grad_norm": 0.284310455645346, "learning_rate": 4.836087046372156e-06, "loss": 0.276, "num_tokens": 4050365159.0, "step": 5311 }, { "epoch": 7.247162242894939, "grad_norm": 0.28292112474450776, "learning_rate": 4.833094886531918e-06, "loss": 0.3064, "num_tokens": 4051172523.0, "step": 5312 }, { "epoch": 7.248527780148502, "grad_norm": 0.29444106957375404, "learning_rate": 4.830107963570993e-06, "loss": 0.2877, "num_tokens": 4051956919.0, "step": 5313 }, { "epoch": 7.249893317402066, "grad_norm": 0.2686268358078965, "learning_rate": 4.827126278400565e-06, "loss": 0.2918, "num_tokens": 4052825881.0, "step": 5314 }, { "epoch": 7.251258854655629, "grad_norm": 0.2901860456595225, "learning_rate": 4.824149831930216e-06, "loss": 0.2682, "num_tokens": 4053491715.0, "step": 5315 }, { "epoch": 7.252624391909192, "grad_norm": 0.2760767359747712, "learning_rate": 4.821178625067928e-06, "loss": 0.3059, "num_tokens": 4054309973.0, "step": 5316 }, { "epoch": 7.2539899291627545, "grad_norm": 0.27861783625453085, "learning_rate": 4.818212658720093e-06, "loss": 0.2854, "num_tokens": 4055068646.0, "step": 5317 }, { "epoch": 7.255355466416318, "grad_norm": 0.28468360757298994, "learning_rate": 4.815251933791495e-06, "loss": 0.279, "num_tokens": 4055864372.0, "step": 5318 }, { "epoch": 7.256721003669881, "grad_norm": 0.27295043794451745, "learning_rate": 4.8122964511853245e-06, "loss": 0.3105, "num_tokens": 4056684677.0, "step": 5319 }, { "epoch": 7.258086540923444, "grad_norm": 0.28810639106422575, "learning_rate": 4.809346211803171e-06, "loss": 0.289, "num_tokens": 4057429568.0, "step": 5320 }, { "epoch": 7.2594520781770076, "grad_norm": 0.2736173992118674, "learning_rate": 4.806401216545022e-06, "loss": 0.2841, "num_tokens": 4058243318.0, "step": 5321 }, { "epoch": 7.260817615430571, "grad_norm": 0.28649005320478577, "learning_rate": 4.803461466309271e-06, "loss": 0.2678, "num_tokens": 4058996157.0, "step": 5322 }, { "epoch": 7.262183152684134, "grad_norm": 0.2927639846553351, "learning_rate": 4.800526961992707e-06, "loss": 0.2918, "num_tokens": 4059741185.0, "step": 5323 }, { "epoch": 7.263548689937697, "grad_norm": 0.28176074040583643, "learning_rate": 4.797597704490525e-06, "loss": 0.2861, "num_tokens": 4060489501.0, "step": 5324 }, { "epoch": 7.264914227191261, "grad_norm": 0.2897763478487487, "learning_rate": 4.794673694696306e-06, "loss": 0.2857, "num_tokens": 4061238913.0, "step": 5325 }, { "epoch": 7.266279764444824, "grad_norm": 0.2939665153786435, "learning_rate": 4.791754933502046e-06, "loss": 0.2815, "num_tokens": 4062009300.0, "step": 5326 }, { "epoch": 7.267645301698387, "grad_norm": 0.2778979691675216, "learning_rate": 4.788841421798131e-06, "loss": 0.2949, "num_tokens": 4062835571.0, "step": 5327 }, { "epoch": 7.26901083895195, "grad_norm": 0.2910252409582658, "learning_rate": 4.7859331604733426e-06, "loss": 0.2877, "num_tokens": 4063601960.0, "step": 5328 }, { "epoch": 7.270376376205514, "grad_norm": 0.2810923675628799, "learning_rate": 4.783030150414873e-06, "loss": 0.2889, "num_tokens": 4064380083.0, "step": 5329 }, { "epoch": 7.271741913459077, "grad_norm": 0.28339428199065303, "learning_rate": 4.7801323925082985e-06, "loss": 0.296, "num_tokens": 4065134852.0, "step": 5330 }, { "epoch": 7.27310745071264, "grad_norm": 0.28785500494083577, "learning_rate": 4.777239887637603e-06, "loss": 0.2872, "num_tokens": 4065922443.0, "step": 5331 }, { "epoch": 7.274472987966203, "grad_norm": 0.2931081741082885, "learning_rate": 4.774352636685162e-06, "loss": 0.2758, "num_tokens": 4066625748.0, "step": 5332 }, { "epoch": 7.275838525219767, "grad_norm": 0.28724543906123556, "learning_rate": 4.7714706405317565e-06, "loss": 0.2868, "num_tokens": 4067402938.0, "step": 5333 }, { "epoch": 7.277204062473329, "grad_norm": 0.28270358355778247, "learning_rate": 4.7685939000565495e-06, "loss": 0.2942, "num_tokens": 4068197958.0, "step": 5334 }, { "epoch": 7.278569599726892, "grad_norm": 0.3046976262970825, "learning_rate": 4.765722416137115e-06, "loss": 0.3047, "num_tokens": 4068973786.0, "step": 5335 }, { "epoch": 7.2799351369804555, "grad_norm": 0.26921608690045506, "learning_rate": 4.762856189649421e-06, "loss": 0.29, "num_tokens": 4069788410.0, "step": 5336 }, { "epoch": 7.281300674234019, "grad_norm": 0.2978500158654436, "learning_rate": 4.759995221467824e-06, "loss": 0.3018, "num_tokens": 4070518045.0, "step": 5337 }, { "epoch": 7.282666211487582, "grad_norm": 0.2836249245064509, "learning_rate": 4.757139512465086e-06, "loss": 0.2915, "num_tokens": 4071321170.0, "step": 5338 }, { "epoch": 7.284031748741145, "grad_norm": 0.29258448990405206, "learning_rate": 4.754289063512357e-06, "loss": 0.2901, "num_tokens": 4072146421.0, "step": 5339 }, { "epoch": 7.2853972859947085, "grad_norm": 0.2879291217965267, "learning_rate": 4.751443875479185e-06, "loss": 0.3038, "num_tokens": 4072943824.0, "step": 5340 }, { "epoch": 7.286762823248272, "grad_norm": 0.2771065232400092, "learning_rate": 4.748603949233516e-06, "loss": 0.2945, "num_tokens": 4073785930.0, "step": 5341 }, { "epoch": 7.288128360501835, "grad_norm": 0.30077316753106287, "learning_rate": 4.74576928564169e-06, "loss": 0.2889, "num_tokens": 4074511130.0, "step": 5342 }, { "epoch": 7.289493897755398, "grad_norm": 0.2832856889088096, "learning_rate": 4.742939885568436e-06, "loss": 0.2842, "num_tokens": 4075253713.0, "step": 5343 }, { "epoch": 7.2908594350089615, "grad_norm": 0.2917061403180716, "learning_rate": 4.7401157498768815e-06, "loss": 0.2823, "num_tokens": 4076045187.0, "step": 5344 }, { "epoch": 7.292224972262525, "grad_norm": 0.2890374663107977, "learning_rate": 4.737296879428551e-06, "loss": 0.2955, "num_tokens": 4076796876.0, "step": 5345 }, { "epoch": 7.293590509516088, "grad_norm": 0.30749701211082503, "learning_rate": 4.734483275083359e-06, "loss": 0.2934, "num_tokens": 4077549042.0, "step": 5346 }, { "epoch": 7.294956046769651, "grad_norm": 0.29135820537249696, "learning_rate": 4.731674937699615e-06, "loss": 0.2978, "num_tokens": 4078340311.0, "step": 5347 }, { "epoch": 7.2963215840232145, "grad_norm": 0.2909405293218808, "learning_rate": 4.728871868134019e-06, "loss": 0.2889, "num_tokens": 4079079403.0, "step": 5348 }, { "epoch": 7.297687121276777, "grad_norm": 0.2861870160720877, "learning_rate": 4.726074067241664e-06, "loss": 0.2957, "num_tokens": 4079852914.0, "step": 5349 }, { "epoch": 7.29905265853034, "grad_norm": 0.30353087467674406, "learning_rate": 4.723281535876047e-06, "loss": 0.2808, "num_tokens": 4080616859.0, "step": 5350 }, { "epoch": 7.300418195783903, "grad_norm": 0.2986373278916795, "learning_rate": 4.720494274889037e-06, "loss": 0.2805, "num_tokens": 4081351601.0, "step": 5351 }, { "epoch": 7.301783733037467, "grad_norm": 0.26943977837882344, "learning_rate": 4.7177122851309125e-06, "loss": 0.2875, "num_tokens": 4082144677.0, "step": 5352 }, { "epoch": 7.30314927029103, "grad_norm": 0.2929286808319261, "learning_rate": 4.714935567450339e-06, "loss": 0.2855, "num_tokens": 4082902894.0, "step": 5353 }, { "epoch": 7.304514807544593, "grad_norm": 0.297397383945972, "learning_rate": 4.712164122694372e-06, "loss": 0.2892, "num_tokens": 4083646842.0, "step": 5354 }, { "epoch": 7.305880344798156, "grad_norm": 0.29548776252071407, "learning_rate": 4.709397951708455e-06, "loss": 0.2906, "num_tokens": 4084426267.0, "step": 5355 }, { "epoch": 7.30724588205172, "grad_norm": 0.28549730153107333, "learning_rate": 4.706637055336437e-06, "loss": 0.2838, "num_tokens": 4085264453.0, "step": 5356 }, { "epoch": 7.308611419305283, "grad_norm": 0.2964084085794388, "learning_rate": 4.703881434420536e-06, "loss": 0.2947, "num_tokens": 4086013757.0, "step": 5357 }, { "epoch": 7.309976956558846, "grad_norm": 0.2845232540126285, "learning_rate": 4.701131089801381e-06, "loss": 0.2725, "num_tokens": 4086749464.0, "step": 5358 }, { "epoch": 7.311342493812409, "grad_norm": 0.2791045639443235, "learning_rate": 4.69838602231798e-06, "loss": 0.2956, "num_tokens": 4087581399.0, "step": 5359 }, { "epoch": 7.312708031065973, "grad_norm": 0.2927936451800904, "learning_rate": 4.6956462328077336e-06, "loss": 0.2931, "num_tokens": 4088355418.0, "step": 5360 }, { "epoch": 7.314073568319536, "grad_norm": 0.2859567702385211, "learning_rate": 4.692911722106433e-06, "loss": 0.2761, "num_tokens": 4089107344.0, "step": 5361 }, { "epoch": 7.315439105573099, "grad_norm": 0.28887138138099205, "learning_rate": 4.690182491048259e-06, "loss": 0.2868, "num_tokens": 4089891100.0, "step": 5362 }, { "epoch": 7.316804642826662, "grad_norm": 0.2630657522959853, "learning_rate": 4.687458540465785e-06, "loss": 0.2776, "num_tokens": 4090679412.0, "step": 5363 }, { "epoch": 7.318170180080226, "grad_norm": 0.2891017141238803, "learning_rate": 4.6847398711899655e-06, "loss": 0.3027, "num_tokens": 4091462342.0, "step": 5364 }, { "epoch": 7.319535717333789, "grad_norm": 0.29979248176564816, "learning_rate": 4.682026484050155e-06, "loss": 0.283, "num_tokens": 4092183924.0, "step": 5365 }, { "epoch": 7.320901254587351, "grad_norm": 0.3019528542038237, "learning_rate": 4.679318379874083e-06, "loss": 0.283, "num_tokens": 4092989254.0, "step": 5366 }, { "epoch": 7.322266791840915, "grad_norm": 0.3061231181270298, "learning_rate": 4.676615559487879e-06, "loss": 0.2811, "num_tokens": 4093707672.0, "step": 5367 }, { "epoch": 7.323632329094478, "grad_norm": 0.2809658501088766, "learning_rate": 4.673918023716059e-06, "loss": 0.2967, "num_tokens": 4094524170.0, "step": 5368 }, { "epoch": 7.324997866348041, "grad_norm": 0.2838164975328332, "learning_rate": 4.671225773381514e-06, "loss": 0.3045, "num_tokens": 4095282366.0, "step": 5369 }, { "epoch": 7.326363403601604, "grad_norm": 0.2774292946118217, "learning_rate": 4.668538809305547e-06, "loss": 0.2941, "num_tokens": 4096062714.0, "step": 5370 }, { "epoch": 7.327728940855168, "grad_norm": 0.283141160411257, "learning_rate": 4.665857132307824e-06, "loss": 0.2872, "num_tokens": 4096850799.0, "step": 5371 }, { "epoch": 7.329094478108731, "grad_norm": 0.2870863083292524, "learning_rate": 4.663180743206413e-06, "loss": 0.2939, "num_tokens": 4097628559.0, "step": 5372 }, { "epoch": 7.330460015362294, "grad_norm": 0.2879216317260683, "learning_rate": 4.660509642817765e-06, "loss": 0.2754, "num_tokens": 4098377219.0, "step": 5373 }, { "epoch": 7.331825552615857, "grad_norm": 0.27484122156369606, "learning_rate": 4.6578438319567135e-06, "loss": 0.285, "num_tokens": 4099111959.0, "step": 5374 }, { "epoch": 7.333191089869421, "grad_norm": 0.3029442408753387, "learning_rate": 4.6551833114364854e-06, "loss": 0.287, "num_tokens": 4099844682.0, "step": 5375 }, { "epoch": 7.334556627122984, "grad_norm": 0.3002949289008917, "learning_rate": 4.652528082068687e-06, "loss": 0.2812, "num_tokens": 4100552257.0, "step": 5376 }, { "epoch": 7.335922164376547, "grad_norm": 0.2791400170407446, "learning_rate": 4.649878144663317e-06, "loss": 0.2992, "num_tokens": 4101346822.0, "step": 5377 }, { "epoch": 7.33728770163011, "grad_norm": 0.3056349164278809, "learning_rate": 4.647233500028753e-06, "loss": 0.3045, "num_tokens": 4102137010.0, "step": 5378 }, { "epoch": 7.338653238883674, "grad_norm": 0.29870601492404814, "learning_rate": 4.644594148971767e-06, "loss": 0.3068, "num_tokens": 4102899674.0, "step": 5379 }, { "epoch": 7.340018776137237, "grad_norm": 0.30456291871009405, "learning_rate": 4.641960092297505e-06, "loss": 0.2803, "num_tokens": 4103651815.0, "step": 5380 }, { "epoch": 7.341384313390799, "grad_norm": 0.27333538078598185, "learning_rate": 4.6393313308095066e-06, "loss": 0.2808, "num_tokens": 4104452462.0, "step": 5381 }, { "epoch": 7.342749850644363, "grad_norm": 0.2959661463501412, "learning_rate": 4.636707865309695e-06, "loss": 0.2904, "num_tokens": 4105217468.0, "step": 5382 }, { "epoch": 7.344115387897926, "grad_norm": 0.2931112776246048, "learning_rate": 4.6340896965983705e-06, "loss": 0.2817, "num_tokens": 4106020919.0, "step": 5383 }, { "epoch": 7.345480925151489, "grad_norm": 0.3072629292939685, "learning_rate": 4.631476825474225e-06, "loss": 0.2941, "num_tokens": 4106748395.0, "step": 5384 }, { "epoch": 7.346846462405052, "grad_norm": 0.29028130080516557, "learning_rate": 4.628869252734334e-06, "loss": 0.2964, "num_tokens": 4107578130.0, "step": 5385 }, { "epoch": 7.3482119996586155, "grad_norm": 0.2950295062692248, "learning_rate": 4.6262669791741574e-06, "loss": 0.2945, "num_tokens": 4108323312.0, "step": 5386 }, { "epoch": 7.349577536912179, "grad_norm": 0.28502991285632956, "learning_rate": 4.623670005587528e-06, "loss": 0.2685, "num_tokens": 4109011541.0, "step": 5387 }, { "epoch": 7.350943074165742, "grad_norm": 0.30062746351682784, "learning_rate": 4.6210783327666794e-06, "loss": 0.293, "num_tokens": 4109776069.0, "step": 5388 }, { "epoch": 7.352308611419305, "grad_norm": 0.28202579754065105, "learning_rate": 4.618491961502214e-06, "loss": 0.2812, "num_tokens": 4110515018.0, "step": 5389 }, { "epoch": 7.3536741486728685, "grad_norm": 0.29189150164657673, "learning_rate": 4.615910892583118e-06, "loss": 0.2868, "num_tokens": 4111244827.0, "step": 5390 }, { "epoch": 7.355039685926432, "grad_norm": 0.2715797785975363, "learning_rate": 4.613335126796773e-06, "loss": 0.2801, "num_tokens": 4112066462.0, "step": 5391 }, { "epoch": 7.356405223179995, "grad_norm": 0.27964763982084956, "learning_rate": 4.610764664928924e-06, "loss": 0.2689, "num_tokens": 4112725106.0, "step": 5392 }, { "epoch": 7.357770760433558, "grad_norm": 0.28890728814337074, "learning_rate": 4.608199507763714e-06, "loss": 0.2849, "num_tokens": 4113472777.0, "step": 5393 }, { "epoch": 7.3591362976871215, "grad_norm": 0.3005250088305393, "learning_rate": 4.60563965608366e-06, "loss": 0.287, "num_tokens": 4114192929.0, "step": 5394 }, { "epoch": 7.360501834940685, "grad_norm": 0.26979262757335437, "learning_rate": 4.603085110669662e-06, "loss": 0.2859, "num_tokens": 4115004909.0, "step": 5395 }, { "epoch": 7.361867372194248, "grad_norm": 0.2801099151943942, "learning_rate": 4.600535872300995e-06, "loss": 0.2777, "num_tokens": 4115809696.0, "step": 5396 }, { "epoch": 7.363232909447811, "grad_norm": 0.2923394129422507, "learning_rate": 4.597991941755332e-06, "loss": 0.3005, "num_tokens": 4116574713.0, "step": 5397 }, { "epoch": 7.364598446701374, "grad_norm": 0.29478724081005436, "learning_rate": 4.595453319808711e-06, "loss": 0.3073, "num_tokens": 4117285435.0, "step": 5398 }, { "epoch": 7.365963983954937, "grad_norm": 0.25938367384964006, "learning_rate": 4.592920007235552e-06, "loss": 0.2741, "num_tokens": 4118112486.0, "step": 5399 }, { "epoch": 7.3673295212085, "grad_norm": 0.28956901090755266, "learning_rate": 4.590392004808664e-06, "loss": 0.312, "num_tokens": 4118988403.0, "step": 5400 }, { "epoch": 7.368695058462063, "grad_norm": 0.2836841318270409, "learning_rate": 4.5878693132992285e-06, "loss": 0.2935, "num_tokens": 4119800262.0, "step": 5401 }, { "epoch": 7.370060595715627, "grad_norm": 0.28261382563473336, "learning_rate": 4.585351933476811e-06, "loss": 0.2972, "num_tokens": 4120554871.0, "step": 5402 }, { "epoch": 7.37142613296919, "grad_norm": 0.29063790952507884, "learning_rate": 4.582839866109353e-06, "loss": 0.3005, "num_tokens": 4121337220.0, "step": 5403 }, { "epoch": 7.372791670222753, "grad_norm": 0.30380212369872217, "learning_rate": 4.580333111963183e-06, "loss": 0.2802, "num_tokens": 4122021848.0, "step": 5404 }, { "epoch": 7.374157207476316, "grad_norm": 0.28072421886130333, "learning_rate": 4.5778316718029955e-06, "loss": 0.2762, "num_tokens": 4122749179.0, "step": 5405 }, { "epoch": 7.37552274472988, "grad_norm": 0.28914502665892666, "learning_rate": 4.575335546391875e-06, "loss": 0.282, "num_tokens": 4123515932.0, "step": 5406 }, { "epoch": 7.376888281983443, "grad_norm": 0.2834168990295532, "learning_rate": 4.572844736491282e-06, "loss": 0.2879, "num_tokens": 4124270905.0, "step": 5407 }, { "epoch": 7.378253819237006, "grad_norm": 0.29944070053668886, "learning_rate": 4.57035924286105e-06, "loss": 0.2911, "num_tokens": 4124975602.0, "step": 5408 }, { "epoch": 7.379619356490569, "grad_norm": 0.30401226588967944, "learning_rate": 4.5678790662594055e-06, "loss": 0.2806, "num_tokens": 4125744891.0, "step": 5409 }, { "epoch": 7.380984893744133, "grad_norm": 0.27883886712362194, "learning_rate": 4.565404207442934e-06, "loss": 0.289, "num_tokens": 4126558346.0, "step": 5410 }, { "epoch": 7.382350430997696, "grad_norm": 0.2666229252863181, "learning_rate": 4.562934667166611e-06, "loss": 0.2803, "num_tokens": 4127364544.0, "step": 5411 }, { "epoch": 7.383715968251259, "grad_norm": 0.3066177998656175, "learning_rate": 4.560470446183785e-06, "loss": 0.2848, "num_tokens": 4128157771.0, "step": 5412 }, { "epoch": 7.3850815055048225, "grad_norm": 0.2958862083384003, "learning_rate": 4.558011545246186e-06, "loss": 0.2886, "num_tokens": 4128873959.0, "step": 5413 }, { "epoch": 7.386447042758386, "grad_norm": 0.2957865920348737, "learning_rate": 4.555557965103913e-06, "loss": 0.2892, "num_tokens": 4129575159.0, "step": 5414 }, { "epoch": 7.387812580011948, "grad_norm": 0.2995082645278457, "learning_rate": 4.5531097065054524e-06, "loss": 0.3042, "num_tokens": 4130343499.0, "step": 5415 }, { "epoch": 7.389178117265511, "grad_norm": 0.29348165537333626, "learning_rate": 4.550666770197662e-06, "loss": 0.2944, "num_tokens": 4131102787.0, "step": 5416 }, { "epoch": 7.390543654519075, "grad_norm": 0.2749776971462745, "learning_rate": 4.548229156925768e-06, "loss": 0.2884, "num_tokens": 4131898766.0, "step": 5417 }, { "epoch": 7.391909191772638, "grad_norm": 0.299268259014071, "learning_rate": 4.545796867433393e-06, "loss": 0.2806, "num_tokens": 4132656536.0, "step": 5418 }, { "epoch": 7.393274729026201, "grad_norm": 0.2996930051516492, "learning_rate": 4.543369902462511e-06, "loss": 0.2887, "num_tokens": 4133346237.0, "step": 5419 }, { "epoch": 7.394640266279764, "grad_norm": 0.2827204672565976, "learning_rate": 4.5409482627534925e-06, "loss": 0.2864, "num_tokens": 4134189582.0, "step": 5420 }, { "epoch": 7.396005803533328, "grad_norm": 0.26687686661184296, "learning_rate": 4.538531949045073e-06, "loss": 0.2702, "num_tokens": 4134989699.0, "step": 5421 }, { "epoch": 7.397371340786891, "grad_norm": 0.2868110647599363, "learning_rate": 4.536120962074359e-06, "loss": 0.2785, "num_tokens": 4135741480.0, "step": 5422 }, { "epoch": 7.398736878040454, "grad_norm": 0.3007148711540325, "learning_rate": 4.533715302576847e-06, "loss": 0.2856, "num_tokens": 4136500074.0, "step": 5423 }, { "epoch": 7.400102415294017, "grad_norm": 0.3027098256374047, "learning_rate": 4.531314971286395e-06, "loss": 0.2799, "num_tokens": 4137173636.0, "step": 5424 }, { "epoch": 7.401467952547581, "grad_norm": 0.27890606050152106, "learning_rate": 4.52891996893524e-06, "loss": 0.2854, "num_tokens": 4137924798.0, "step": 5425 }, { "epoch": 7.402833489801144, "grad_norm": 0.2924390987228382, "learning_rate": 4.526530296253994e-06, "loss": 0.2923, "num_tokens": 4138703041.0, "step": 5426 }, { "epoch": 7.404199027054707, "grad_norm": 0.28818100144783143, "learning_rate": 4.524145953971645e-06, "loss": 0.2906, "num_tokens": 4139501233.0, "step": 5427 }, { "epoch": 7.40556456430827, "grad_norm": 0.28259050255787055, "learning_rate": 4.521766942815548e-06, "loss": 0.2882, "num_tokens": 4140262974.0, "step": 5428 }, { "epoch": 7.406930101561834, "grad_norm": 0.29144418547316453, "learning_rate": 4.519393263511441e-06, "loss": 0.2859, "num_tokens": 4141023091.0, "step": 5429 }, { "epoch": 7.408295638815396, "grad_norm": 0.2906133966721517, "learning_rate": 4.517024916783429e-06, "loss": 0.2973, "num_tokens": 4141814281.0, "step": 5430 }, { "epoch": 7.409661176068959, "grad_norm": 0.2877986009837965, "learning_rate": 4.514661903353989e-06, "loss": 0.2907, "num_tokens": 4142606230.0, "step": 5431 }, { "epoch": 7.4110267133225225, "grad_norm": 0.293136215841417, "learning_rate": 4.512304223943978e-06, "loss": 0.2877, "num_tokens": 4143312583.0, "step": 5432 }, { "epoch": 7.412392250576086, "grad_norm": 0.26960896856460204, "learning_rate": 4.50995187927262e-06, "loss": 0.2923, "num_tokens": 4144215805.0, "step": 5433 }, { "epoch": 7.413757787829649, "grad_norm": 0.3017463115182708, "learning_rate": 4.5076048700575135e-06, "loss": 0.3036, "num_tokens": 4144951031.0, "step": 5434 }, { "epoch": 7.415123325083212, "grad_norm": 0.29322616911482696, "learning_rate": 4.505263197014632e-06, "loss": 0.2846, "num_tokens": 4145712658.0, "step": 5435 }, { "epoch": 7.4164888623367755, "grad_norm": 0.29520438677817057, "learning_rate": 4.502926860858317e-06, "loss": 0.2948, "num_tokens": 4146455701.0, "step": 5436 }, { "epoch": 7.417854399590339, "grad_norm": 0.2844249700415621, "learning_rate": 4.500595862301281e-06, "loss": 0.2854, "num_tokens": 4147205041.0, "step": 5437 }, { "epoch": 7.419219936843902, "grad_norm": 0.2816027958861822, "learning_rate": 4.498270202054612e-06, "loss": 0.2814, "num_tokens": 4147930925.0, "step": 5438 }, { "epoch": 7.420585474097465, "grad_norm": 0.2813772860169446, "learning_rate": 4.495949880827772e-06, "loss": 0.2824, "num_tokens": 4148658370.0, "step": 5439 }, { "epoch": 7.4219510113510285, "grad_norm": 0.28904547655420065, "learning_rate": 4.493634899328584e-06, "loss": 0.2962, "num_tokens": 4149549310.0, "step": 5440 }, { "epoch": 7.423316548604592, "grad_norm": 0.2893347232946484, "learning_rate": 4.491325258263255e-06, "loss": 0.2945, "num_tokens": 4150300638.0, "step": 5441 }, { "epoch": 7.424682085858155, "grad_norm": 0.28442629580279877, "learning_rate": 4.4890209583363506e-06, "loss": 0.3035, "num_tokens": 4151143648.0, "step": 5442 }, { "epoch": 7.426047623111718, "grad_norm": 0.29732613014493586, "learning_rate": 4.4867220002508166e-06, "loss": 0.2883, "num_tokens": 4151838169.0, "step": 5443 }, { "epoch": 7.4274131603652815, "grad_norm": 0.27540266614059844, "learning_rate": 4.4844283847079665e-06, "loss": 0.2894, "num_tokens": 4152620430.0, "step": 5444 }, { "epoch": 7.428778697618845, "grad_norm": 0.2958675159037956, "learning_rate": 4.482140112407479e-06, "loss": 0.2793, "num_tokens": 4153337195.0, "step": 5445 }, { "epoch": 7.430144234872408, "grad_norm": 0.29670590494807714, "learning_rate": 4.47985718404741e-06, "loss": 0.299, "num_tokens": 4154082976.0, "step": 5446 }, { "epoch": 7.43150977212597, "grad_norm": 0.29124564614460424, "learning_rate": 4.477579600324181e-06, "loss": 0.2816, "num_tokens": 4154858235.0, "step": 5447 }, { "epoch": 7.432875309379534, "grad_norm": 0.2891224825878074, "learning_rate": 4.475307361932587e-06, "loss": 0.2747, "num_tokens": 4155578727.0, "step": 5448 }, { "epoch": 7.434240846633097, "grad_norm": 0.2946016583627011, "learning_rate": 4.4730404695657845e-06, "loss": 0.2975, "num_tokens": 4156331529.0, "step": 5449 }, { "epoch": 7.43560638388666, "grad_norm": 0.2817997951601349, "learning_rate": 4.470778923915306e-06, "loss": 0.3014, "num_tokens": 4157164345.0, "step": 5450 }, { "epoch": 7.436971921140223, "grad_norm": 0.30133621528020327, "learning_rate": 4.468522725671053e-06, "loss": 0.2739, "num_tokens": 4157849679.0, "step": 5451 }, { "epoch": 7.438337458393787, "grad_norm": 0.29051768193275956, "learning_rate": 4.46627187552129e-06, "loss": 0.2906, "num_tokens": 4158598366.0, "step": 5452 }, { "epoch": 7.43970299564735, "grad_norm": 0.28992983318966803, "learning_rate": 4.464026374152661e-06, "loss": 0.2919, "num_tokens": 4159356430.0, "step": 5453 }, { "epoch": 7.441068532900913, "grad_norm": 0.30405138482806715, "learning_rate": 4.461786222250165e-06, "loss": 0.2937, "num_tokens": 4160090988.0, "step": 5454 }, { "epoch": 7.4424340701544764, "grad_norm": 0.2988886901941754, "learning_rate": 4.459551420497175e-06, "loss": 0.2935, "num_tokens": 4160914099.0, "step": 5455 }, { "epoch": 7.44379960740804, "grad_norm": 0.27287441650212546, "learning_rate": 4.457321969575438e-06, "loss": 0.2851, "num_tokens": 4161742068.0, "step": 5456 }, { "epoch": 7.445165144661603, "grad_norm": 0.281505357906846, "learning_rate": 4.455097870165058e-06, "loss": 0.2926, "num_tokens": 4162594640.0, "step": 5457 }, { "epoch": 7.446530681915166, "grad_norm": 0.26773772635060705, "learning_rate": 4.452879122944509e-06, "loss": 0.2804, "num_tokens": 4163356647.0, "step": 5458 }, { "epoch": 7.4478962191687295, "grad_norm": 0.2820849985267738, "learning_rate": 4.450665728590643e-06, "loss": 0.2783, "num_tokens": 4164146467.0, "step": 5459 }, { "epoch": 7.449261756422293, "grad_norm": 0.2935544251234138, "learning_rate": 4.4484576877786666e-06, "loss": 0.2917, "num_tokens": 4164915824.0, "step": 5460 }, { "epoch": 7.450627293675856, "grad_norm": 0.28017025782521854, "learning_rate": 4.4462550011821526e-06, "loss": 0.2997, "num_tokens": 4165695668.0, "step": 5461 }, { "epoch": 7.451992830929418, "grad_norm": 0.2854054802455859, "learning_rate": 4.444057669473054e-06, "loss": 0.287, "num_tokens": 4166426520.0, "step": 5462 }, { "epoch": 7.453358368182982, "grad_norm": 0.296247665502273, "learning_rate": 4.441865693321676e-06, "loss": 0.2914, "num_tokens": 4167262428.0, "step": 5463 }, { "epoch": 7.454723905436545, "grad_norm": 0.2943415002070058, "learning_rate": 4.439679073396696e-06, "loss": 0.2871, "num_tokens": 4167975944.0, "step": 5464 }, { "epoch": 7.456089442690108, "grad_norm": 0.28731576763923555, "learning_rate": 4.437497810365158e-06, "loss": 0.2871, "num_tokens": 4168729649.0, "step": 5465 }, { "epoch": 7.457454979943671, "grad_norm": 0.2808774893817491, "learning_rate": 4.435321904892471e-06, "loss": 0.2902, "num_tokens": 4169513202.0, "step": 5466 }, { "epoch": 7.458820517197235, "grad_norm": 0.28737276328136024, "learning_rate": 4.433151357642409e-06, "loss": 0.2624, "num_tokens": 4170232010.0, "step": 5467 }, { "epoch": 7.460186054450798, "grad_norm": 0.2772609120915931, "learning_rate": 4.430986169277112e-06, "loss": 0.2996, "num_tokens": 4170953220.0, "step": 5468 }, { "epoch": 7.461551591704361, "grad_norm": 0.28433392464255486, "learning_rate": 4.428826340457088e-06, "loss": 0.2996, "num_tokens": 4171707166.0, "step": 5469 }, { "epoch": 7.462917128957924, "grad_norm": 0.2995586437173122, "learning_rate": 4.4266718718412e-06, "loss": 0.2832, "num_tokens": 4172493230.0, "step": 5470 }, { "epoch": 7.464282666211488, "grad_norm": 0.29072756530230204, "learning_rate": 4.424522764086692e-06, "loss": 0.2869, "num_tokens": 4173224197.0, "step": 5471 }, { "epoch": 7.465648203465051, "grad_norm": 0.2739319834195488, "learning_rate": 4.422379017849157e-06, "loss": 0.29, "num_tokens": 4174001840.0, "step": 5472 }, { "epoch": 7.467013740718614, "grad_norm": 0.28048031579568855, "learning_rate": 4.420240633782562e-06, "loss": 0.28, "num_tokens": 4174752645.0, "step": 5473 }, { "epoch": 7.468379277972177, "grad_norm": 0.29224680447888185, "learning_rate": 4.418107612539235e-06, "loss": 0.2825, "num_tokens": 4175423825.0, "step": 5474 }, { "epoch": 7.469744815225741, "grad_norm": 0.2919603760427891, "learning_rate": 4.4159799547698704e-06, "loss": 0.298, "num_tokens": 4176225836.0, "step": 5475 }, { "epoch": 7.471110352479304, "grad_norm": 0.28241906603337735, "learning_rate": 4.413857661123519e-06, "loss": 0.2972, "num_tokens": 4176997683.0, "step": 5476 }, { "epoch": 7.472475889732867, "grad_norm": 0.29280680065855824, "learning_rate": 4.411740732247607e-06, "loss": 0.3017, "num_tokens": 4177852628.0, "step": 5477 }, { "epoch": 7.47384142698643, "grad_norm": 0.3007108965581469, "learning_rate": 4.4096291687879135e-06, "loss": 0.2861, "num_tokens": 4178509634.0, "step": 5478 }, { "epoch": 7.475206964239993, "grad_norm": 0.29726810968832174, "learning_rate": 4.407522971388586e-06, "loss": 0.2844, "num_tokens": 4179195843.0, "step": 5479 }, { "epoch": 7.476572501493556, "grad_norm": 0.2849076851085166, "learning_rate": 4.405422140692136e-06, "loss": 0.3072, "num_tokens": 4180004847.0, "step": 5480 }, { "epoch": 7.477938038747119, "grad_norm": 0.283002911138277, "learning_rate": 4.403326677339432e-06, "loss": 0.2781, "num_tokens": 4180756201.0, "step": 5481 }, { "epoch": 7.4793035760006825, "grad_norm": 0.31856669012983324, "learning_rate": 4.401236581969715e-06, "loss": 0.3016, "num_tokens": 4181439582.0, "step": 5482 }, { "epoch": 7.480669113254246, "grad_norm": 0.2696114329515583, "learning_rate": 4.399151855220577e-06, "loss": 0.2882, "num_tokens": 4182130928.0, "step": 5483 }, { "epoch": 7.482034650507809, "grad_norm": 0.2910818696094256, "learning_rate": 4.39707249772798e-06, "loss": 0.2786, "num_tokens": 4182890035.0, "step": 5484 }, { "epoch": 7.483400187761372, "grad_norm": 0.28710885898844735, "learning_rate": 4.394998510126246e-06, "loss": 0.2915, "num_tokens": 4183683743.0, "step": 5485 }, { "epoch": 7.4847657250149355, "grad_norm": 0.2789762179332812, "learning_rate": 4.39292989304806e-06, "loss": 0.2906, "num_tokens": 4184467586.0, "step": 5486 }, { "epoch": 7.486131262268499, "grad_norm": 0.28964844424437336, "learning_rate": 4.390866647124466e-06, "loss": 0.2977, "num_tokens": 4185241141.0, "step": 5487 }, { "epoch": 7.487496799522062, "grad_norm": 0.2909301086874611, "learning_rate": 4.388808772984868e-06, "loss": 0.2906, "num_tokens": 4186021640.0, "step": 5488 }, { "epoch": 7.488862336775625, "grad_norm": 0.2809242197441037, "learning_rate": 4.386756271257042e-06, "loss": 0.2665, "num_tokens": 4186717049.0, "step": 5489 }, { "epoch": 7.4902278740291885, "grad_norm": 0.27958575578736256, "learning_rate": 4.38470914256711e-06, "loss": 0.3071, "num_tokens": 4187583237.0, "step": 5490 }, { "epoch": 7.491593411282752, "grad_norm": 0.2805630739965468, "learning_rate": 4.3826673875395676e-06, "loss": 0.28, "num_tokens": 4188327879.0, "step": 5491 }, { "epoch": 7.492958948536315, "grad_norm": 0.2966114971903506, "learning_rate": 4.380631006797264e-06, "loss": 0.2996, "num_tokens": 4189066775.0, "step": 5492 }, { "epoch": 7.494324485789878, "grad_norm": 0.28199914436331985, "learning_rate": 4.378600000961408e-06, "loss": 0.281, "num_tokens": 4189906569.0, "step": 5493 }, { "epoch": 7.495690023043442, "grad_norm": 0.2773027767809688, "learning_rate": 4.376574370651572e-06, "loss": 0.2757, "num_tokens": 4190611056.0, "step": 5494 }, { "epoch": 7.497055560297005, "grad_norm": 0.2912880875555774, "learning_rate": 4.3745541164856925e-06, "loss": 0.2935, "num_tokens": 4191347172.0, "step": 5495 }, { "epoch": 7.498421097550567, "grad_norm": 0.30721167214579637, "learning_rate": 4.3725392390800605e-06, "loss": 0.3105, "num_tokens": 4192128320.0, "step": 5496 }, { "epoch": 7.49978663480413, "grad_norm": 0.2840696047018739, "learning_rate": 4.3705297390493205e-06, "loss": 0.2925, "num_tokens": 4192895087.0, "step": 5497 }, { "epoch": 7.501152172057694, "grad_norm": 0.29855904513258424, "learning_rate": 4.368525617006495e-06, "loss": 0.2981, "num_tokens": 4193620655.0, "step": 5498 }, { "epoch": 7.502517709311257, "grad_norm": 0.2780581013456812, "learning_rate": 4.3665268735629466e-06, "loss": 0.2931, "num_tokens": 4194457929.0, "step": 5499 }, { "epoch": 7.50388324656482, "grad_norm": 0.2831827253297952, "learning_rate": 4.3645335093284106e-06, "loss": 0.2916, "num_tokens": 4195198127.0, "step": 5500 }, { "epoch": 7.5052487838183835, "grad_norm": 0.29652417812078175, "learning_rate": 4.362545524910972e-06, "loss": 0.293, "num_tokens": 4195999689.0, "step": 5501 }, { "epoch": 7.506614321071947, "grad_norm": 0.2903475299054689, "learning_rate": 4.360562920917083e-06, "loss": 0.2867, "num_tokens": 4196746598.0, "step": 5502 }, { "epoch": 7.50797985832551, "grad_norm": 0.27701049690135304, "learning_rate": 4.358585697951544e-06, "loss": 0.305, "num_tokens": 4197526887.0, "step": 5503 }, { "epoch": 7.509345395579073, "grad_norm": 0.28601014033464456, "learning_rate": 4.356613856617526e-06, "loss": 0.2782, "num_tokens": 4198238171.0, "step": 5504 }, { "epoch": 7.5107109328326365, "grad_norm": 0.2676215892750706, "learning_rate": 4.354647397516551e-06, "loss": 0.2837, "num_tokens": 4199013631.0, "step": 5505 }, { "epoch": 7.5120764700862, "grad_norm": 0.28513794765555667, "learning_rate": 4.3526863212485e-06, "loss": 0.2814, "num_tokens": 4199743217.0, "step": 5506 }, { "epoch": 7.513442007339763, "grad_norm": 0.3116602037579748, "learning_rate": 4.350730628411609e-06, "loss": 0.2806, "num_tokens": 4200372510.0, "step": 5507 }, { "epoch": 7.514807544593326, "grad_norm": 0.29254686828244203, "learning_rate": 4.348780319602479e-06, "loss": 0.2819, "num_tokens": 4201109315.0, "step": 5508 }, { "epoch": 7.5161730818468895, "grad_norm": 0.2859825385517165, "learning_rate": 4.346835395416066e-06, "loss": 0.2866, "num_tokens": 4201822007.0, "step": 5509 }, { "epoch": 7.517538619100453, "grad_norm": 0.28953207233594186, "learning_rate": 4.344895856445682e-06, "loss": 0.286, "num_tokens": 4202609220.0, "step": 5510 }, { "epoch": 7.518904156354015, "grad_norm": 0.27019038074824275, "learning_rate": 4.3429617032829914e-06, "loss": 0.2925, "num_tokens": 4203406508.0, "step": 5511 }, { "epoch": 7.520269693607578, "grad_norm": 0.28756554142850227, "learning_rate": 4.3410329365180255e-06, "loss": 0.2887, "num_tokens": 4204161855.0, "step": 5512 }, { "epoch": 7.521635230861142, "grad_norm": 0.2730739821952042, "learning_rate": 4.339109556739164e-06, "loss": 0.3023, "num_tokens": 4204985656.0, "step": 5513 }, { "epoch": 7.523000768114705, "grad_norm": 0.29123947129605304, "learning_rate": 4.3371915645331505e-06, "loss": 0.2919, "num_tokens": 4205738691.0, "step": 5514 }, { "epoch": 7.524366305368268, "grad_norm": 0.288799835923388, "learning_rate": 4.3352789604850785e-06, "loss": 0.2867, "num_tokens": 4206429060.0, "step": 5515 }, { "epoch": 7.525731842621831, "grad_norm": 0.29072394345339003, "learning_rate": 4.333371745178402e-06, "loss": 0.2902, "num_tokens": 4207210208.0, "step": 5516 }, { "epoch": 7.527097379875395, "grad_norm": 0.28665856274878454, "learning_rate": 4.331469919194929e-06, "loss": 0.2971, "num_tokens": 4207975644.0, "step": 5517 }, { "epoch": 7.528462917128958, "grad_norm": 0.3084678699968341, "learning_rate": 4.329573483114826e-06, "loss": 0.2876, "num_tokens": 4208678975.0, "step": 5518 }, { "epoch": 7.529828454382521, "grad_norm": 0.27845116415319504, "learning_rate": 4.327682437516611e-06, "loss": 0.2787, "num_tokens": 4209415556.0, "step": 5519 }, { "epoch": 7.531193991636084, "grad_norm": 0.28988954916620907, "learning_rate": 4.325796782977161e-06, "loss": 0.2971, "num_tokens": 4210201705.0, "step": 5520 }, { "epoch": 7.532559528889648, "grad_norm": 0.28276079971550583, "learning_rate": 4.323916520071709e-06, "loss": 0.2953, "num_tokens": 4210976350.0, "step": 5521 }, { "epoch": 7.533925066143211, "grad_norm": 0.27057952098576005, "learning_rate": 4.3220416493738415e-06, "loss": 0.2949, "num_tokens": 4211788177.0, "step": 5522 }, { "epoch": 7.535290603396774, "grad_norm": 0.29108223408955247, "learning_rate": 4.320172171455495e-06, "loss": 0.2747, "num_tokens": 4212533016.0, "step": 5523 }, { "epoch": 7.536656140650337, "grad_norm": 0.29099681255453463, "learning_rate": 4.318308086886975e-06, "loss": 0.2796, "num_tokens": 4213246739.0, "step": 5524 }, { "epoch": 7.538021677903901, "grad_norm": 0.3058051845732115, "learning_rate": 4.316449396236928e-06, "loss": 0.2848, "num_tokens": 4213917748.0, "step": 5525 }, { "epoch": 7.539387215157463, "grad_norm": 0.2809537298023906, "learning_rate": 4.314596100072359e-06, "loss": 0.2922, "num_tokens": 4214723890.0, "step": 5526 }, { "epoch": 7.540752752411027, "grad_norm": 0.2857893892817634, "learning_rate": 4.312748198958631e-06, "loss": 0.2858, "num_tokens": 4215514935.0, "step": 5527 }, { "epoch": 7.5421182896645895, "grad_norm": 0.27373873180802494, "learning_rate": 4.310905693459459e-06, "loss": 0.2961, "num_tokens": 4216296505.0, "step": 5528 }, { "epoch": 7.543483826918153, "grad_norm": 0.28812834475966337, "learning_rate": 4.309068584136909e-06, "loss": 0.2905, "num_tokens": 4217112885.0, "step": 5529 }, { "epoch": 7.544849364171716, "grad_norm": 0.29160601417719884, "learning_rate": 4.3072368715514045e-06, "loss": 0.2916, "num_tokens": 4217865474.0, "step": 5530 }, { "epoch": 7.546214901425279, "grad_norm": 0.27068731190624346, "learning_rate": 4.305410556261724e-06, "loss": 0.2842, "num_tokens": 4218624778.0, "step": 5531 }, { "epoch": 7.5475804386788425, "grad_norm": 0.2804256980017736, "learning_rate": 4.303589638824993e-06, "loss": 0.2782, "num_tokens": 4219373320.0, "step": 5532 }, { "epoch": 7.548945975932406, "grad_norm": 0.2819790825977116, "learning_rate": 4.301774119796699e-06, "loss": 0.2735, "num_tokens": 4220088313.0, "step": 5533 }, { "epoch": 7.550311513185969, "grad_norm": 0.2949954492704244, "learning_rate": 4.299963999730674e-06, "loss": 0.295, "num_tokens": 4220917675.0, "step": 5534 }, { "epoch": 7.551677050439532, "grad_norm": 0.2685396369334709, "learning_rate": 4.29815927917911e-06, "loss": 0.2734, "num_tokens": 4221707119.0, "step": 5535 }, { "epoch": 7.5530425876930956, "grad_norm": 0.2854111784888608, "learning_rate": 4.296359958692548e-06, "loss": 0.2851, "num_tokens": 4222388781.0, "step": 5536 }, { "epoch": 7.554408124946659, "grad_norm": 0.29481328268266876, "learning_rate": 4.2945660388198866e-06, "loss": 0.2851, "num_tokens": 4223162294.0, "step": 5537 }, { "epoch": 7.555773662200222, "grad_norm": 0.280717880689564, "learning_rate": 4.292777520108366e-06, "loss": 0.2896, "num_tokens": 4223915655.0, "step": 5538 }, { "epoch": 7.557139199453785, "grad_norm": 0.2808737336024921, "learning_rate": 4.29099440310359e-06, "loss": 0.2935, "num_tokens": 4224703165.0, "step": 5539 }, { "epoch": 7.558504736707349, "grad_norm": 0.2778100119371076, "learning_rate": 4.289216688349511e-06, "loss": 0.2731, "num_tokens": 4225431541.0, "step": 5540 }, { "epoch": 7.559870273960912, "grad_norm": 0.2963283093502776, "learning_rate": 4.287444376388429e-06, "loss": 0.2896, "num_tokens": 4226139751.0, "step": 5541 }, { "epoch": 7.561235811214475, "grad_norm": 0.26083037718284535, "learning_rate": 4.285677467761003e-06, "loss": 0.2808, "num_tokens": 4226983884.0, "step": 5542 }, { "epoch": 7.562601348468037, "grad_norm": 0.2762354698569836, "learning_rate": 4.283915963006241e-06, "loss": 0.2797, "num_tokens": 4227715997.0, "step": 5543 }, { "epoch": 7.563966885721602, "grad_norm": 0.2977192538939932, "learning_rate": 4.2821598626615e-06, "loss": 0.2856, "num_tokens": 4228496400.0, "step": 5544 }, { "epoch": 7.565332422975164, "grad_norm": 0.30384073950203583, "learning_rate": 4.28040916726249e-06, "loss": 0.2899, "num_tokens": 4229234000.0, "step": 5545 }, { "epoch": 7.566697960228727, "grad_norm": 0.2826549874362286, "learning_rate": 4.278663877343272e-06, "loss": 0.2885, "num_tokens": 4230028081.0, "step": 5546 }, { "epoch": 7.5680634974822905, "grad_norm": 0.2931766777781895, "learning_rate": 4.27692399343626e-06, "loss": 0.2815, "num_tokens": 4230778419.0, "step": 5547 }, { "epoch": 7.569429034735854, "grad_norm": 0.30171615012799285, "learning_rate": 4.275189516072217e-06, "loss": 0.2907, "num_tokens": 4231480697.0, "step": 5548 }, { "epoch": 7.570794571989417, "grad_norm": 0.28255383194069533, "learning_rate": 4.2734604457802565e-06, "loss": 0.2966, "num_tokens": 4232197174.0, "step": 5549 }, { "epoch": 7.57216010924298, "grad_norm": 0.2979784943092048, "learning_rate": 4.271736783087842e-06, "loss": 0.2803, "num_tokens": 4232922912.0, "step": 5550 }, { "epoch": 7.5735256464965435, "grad_norm": 0.3102687532706693, "learning_rate": 4.270018528520788e-06, "loss": 0.2807, "num_tokens": 4233602241.0, "step": 5551 }, { "epoch": 7.574891183750107, "grad_norm": 0.3040270316970514, "learning_rate": 4.268305682603264e-06, "loss": 0.2878, "num_tokens": 4234287631.0, "step": 5552 }, { "epoch": 7.57625672100367, "grad_norm": 0.28486627588810326, "learning_rate": 4.2665982458577795e-06, "loss": 0.2823, "num_tokens": 4235094778.0, "step": 5553 }, { "epoch": 7.577622258257233, "grad_norm": 0.28562839915117755, "learning_rate": 4.264896218805202e-06, "loss": 0.3029, "num_tokens": 4235893463.0, "step": 5554 }, { "epoch": 7.5789877955107965, "grad_norm": 0.29093525572846557, "learning_rate": 4.263199601964745e-06, "loss": 0.3041, "num_tokens": 4236664179.0, "step": 5555 }, { "epoch": 7.58035333276436, "grad_norm": 0.28649628275244793, "learning_rate": 4.261508395853977e-06, "loss": 0.2895, "num_tokens": 4237448857.0, "step": 5556 }, { "epoch": 7.581718870017923, "grad_norm": 0.29023922360946164, "learning_rate": 4.259822600988807e-06, "loss": 0.2954, "num_tokens": 4238252299.0, "step": 5557 }, { "epoch": 7.583084407271486, "grad_norm": 0.28278970989241153, "learning_rate": 4.2581422178834995e-06, "loss": 0.2866, "num_tokens": 4238974110.0, "step": 5558 }, { "epoch": 7.5844499445250495, "grad_norm": 0.2861830168474966, "learning_rate": 4.256467247050665e-06, "loss": 0.2768, "num_tokens": 4239676460.0, "step": 5559 }, { "epoch": 7.585815481778612, "grad_norm": 0.29332306123655494, "learning_rate": 4.254797689001269e-06, "loss": 0.2926, "num_tokens": 4240442537.0, "step": 5560 }, { "epoch": 7.587181019032175, "grad_norm": 0.3077591932906425, "learning_rate": 4.253133544244619e-06, "loss": 0.2949, "num_tokens": 4241219742.0, "step": 5561 }, { "epoch": 7.588546556285738, "grad_norm": 0.29779476957200324, "learning_rate": 4.251474813288371e-06, "loss": 0.2751, "num_tokens": 4241935772.0, "step": 5562 }, { "epoch": 7.589912093539302, "grad_norm": 0.27927185495461426, "learning_rate": 4.249821496638535e-06, "loss": 0.2833, "num_tokens": 4242679511.0, "step": 5563 }, { "epoch": 7.591277630792865, "grad_norm": 0.30383668561181393, "learning_rate": 4.248173594799468e-06, "loss": 0.2806, "num_tokens": 4243388949.0, "step": 5564 }, { "epoch": 7.592643168046428, "grad_norm": 0.28839744419849883, "learning_rate": 4.2465311082738675e-06, "loss": 0.2842, "num_tokens": 4244127946.0, "step": 5565 }, { "epoch": 7.594008705299991, "grad_norm": 0.277575072511341, "learning_rate": 4.24489403756279e-06, "loss": 0.2825, "num_tokens": 4244860431.0, "step": 5566 }, { "epoch": 7.595374242553555, "grad_norm": 0.29566341301053495, "learning_rate": 4.243262383165634e-06, "loss": 0.28, "num_tokens": 4245602666.0, "step": 5567 }, { "epoch": 7.596739779807118, "grad_norm": 0.28428240592982523, "learning_rate": 4.241636145580143e-06, "loss": 0.2939, "num_tokens": 4246362040.0, "step": 5568 }, { "epoch": 7.598105317060681, "grad_norm": 0.28789800408726396, "learning_rate": 4.2400153253024175e-06, "loss": 0.2819, "num_tokens": 4247185471.0, "step": 5569 }, { "epoch": 7.599470854314244, "grad_norm": 0.2930875909478503, "learning_rate": 4.238399922826896e-06, "loss": 0.2775, "num_tokens": 4247869239.0, "step": 5570 }, { "epoch": 7.600836391567808, "grad_norm": 0.2908056950715098, "learning_rate": 4.236789938646369e-06, "loss": 0.2841, "num_tokens": 4248607679.0, "step": 5571 }, { "epoch": 7.602201928821371, "grad_norm": 0.29652707909859144, "learning_rate": 4.235185373251972e-06, "loss": 0.2925, "num_tokens": 4249340741.0, "step": 5572 }, { "epoch": 7.603567466074934, "grad_norm": 0.2752126696273574, "learning_rate": 4.233586227133189e-06, "loss": 0.2866, "num_tokens": 4250090826.0, "step": 5573 }, { "epoch": 7.604933003328497, "grad_norm": 0.2861011942724902, "learning_rate": 4.2319925007778505e-06, "loss": 0.2933, "num_tokens": 4250919262.0, "step": 5574 }, { "epoch": 7.60629854058206, "grad_norm": 0.28690765331863044, "learning_rate": 4.230404194672131e-06, "loss": 0.2915, "num_tokens": 4251735124.0, "step": 5575 }, { "epoch": 7.607664077835624, "grad_norm": 0.29350382402124076, "learning_rate": 4.228821309300559e-06, "loss": 0.2803, "num_tokens": 4252507199.0, "step": 5576 }, { "epoch": 7.609029615089186, "grad_norm": 0.28399524870067033, "learning_rate": 4.227243845146e-06, "loss": 0.2891, "num_tokens": 4253297497.0, "step": 5577 }, { "epoch": 7.6103951523427495, "grad_norm": 0.27670218163137045, "learning_rate": 4.2256718026896695e-06, "loss": 0.2913, "num_tokens": 4254082039.0, "step": 5578 }, { "epoch": 7.611760689596313, "grad_norm": 0.27959873459909595, "learning_rate": 4.224105182411134e-06, "loss": 0.298, "num_tokens": 4254872067.0, "step": 5579 }, { "epoch": 7.613126226849876, "grad_norm": 0.289110533520963, "learning_rate": 4.222543984788297e-06, "loss": 0.294, "num_tokens": 4255625883.0, "step": 5580 }, { "epoch": 7.614491764103439, "grad_norm": 0.2875918585518052, "learning_rate": 4.220988210297415e-06, "loss": 0.2797, "num_tokens": 4256397935.0, "step": 5581 }, { "epoch": 7.615857301357003, "grad_norm": 0.2885833225803041, "learning_rate": 4.219437859413086e-06, "loss": 0.2881, "num_tokens": 4257103990.0, "step": 5582 }, { "epoch": 7.617222838610566, "grad_norm": 0.2807919712751618, "learning_rate": 4.217892932608255e-06, "loss": 0.2934, "num_tokens": 4257898798.0, "step": 5583 }, { "epoch": 7.618588375864129, "grad_norm": 0.27233146679435655, "learning_rate": 4.216353430354214e-06, "loss": 0.2886, "num_tokens": 4258763320.0, "step": 5584 }, { "epoch": 7.619953913117692, "grad_norm": 0.27678346836043727, "learning_rate": 4.214819353120597e-06, "loss": 0.3019, "num_tokens": 4259544492.0, "step": 5585 }, { "epoch": 7.621319450371256, "grad_norm": 0.28357288683533344, "learning_rate": 4.213290701375383e-06, "loss": 0.2892, "num_tokens": 4260319941.0, "step": 5586 }, { "epoch": 7.622684987624819, "grad_norm": 0.2969830462293607, "learning_rate": 4.2117674755849e-06, "loss": 0.2932, "num_tokens": 4261094297.0, "step": 5587 }, { "epoch": 7.624050524878382, "grad_norm": 0.29330705603640955, "learning_rate": 4.2102496762138175e-06, "loss": 0.2892, "num_tokens": 4261875853.0, "step": 5588 }, { "epoch": 7.625416062131945, "grad_norm": 0.2773242667946396, "learning_rate": 4.20873730372515e-06, "loss": 0.2802, "num_tokens": 4262633431.0, "step": 5589 }, { "epoch": 7.626781599385509, "grad_norm": 0.2945353791017067, "learning_rate": 4.207230358580257e-06, "loss": 0.2947, "num_tokens": 4263366019.0, "step": 5590 }, { "epoch": 7.628147136639072, "grad_norm": 0.27776865731710343, "learning_rate": 4.205728841238842e-06, "loss": 0.2751, "num_tokens": 4264138548.0, "step": 5591 }, { "epoch": 7.629512673892634, "grad_norm": 0.29616086777250555, "learning_rate": 4.204232752158954e-06, "loss": 0.2859, "num_tokens": 4264915910.0, "step": 5592 }, { "epoch": 7.6308782111461975, "grad_norm": 0.2735719654932958, "learning_rate": 4.202742091796985e-06, "loss": 0.2847, "num_tokens": 4265694151.0, "step": 5593 }, { "epoch": 7.632243748399761, "grad_norm": 0.2984871332395343, "learning_rate": 4.201256860607668e-06, "loss": 0.2865, "num_tokens": 4266397777.0, "step": 5594 }, { "epoch": 7.633609285653324, "grad_norm": 0.2784116586507101, "learning_rate": 4.1997770590440854e-06, "loss": 0.289, "num_tokens": 4267159364.0, "step": 5595 }, { "epoch": 7.634974822906887, "grad_norm": 0.29172227336466916, "learning_rate": 4.198302687557661e-06, "loss": 0.2926, "num_tokens": 4267961966.0, "step": 5596 }, { "epoch": 7.6363403601604505, "grad_norm": 0.29729220344750784, "learning_rate": 4.196833746598159e-06, "loss": 0.2967, "num_tokens": 4268702128.0, "step": 5597 }, { "epoch": 7.637705897414014, "grad_norm": 0.29126644904931126, "learning_rate": 4.195370236613692e-06, "loss": 0.2914, "num_tokens": 4269436421.0, "step": 5598 }, { "epoch": 7.639071434667577, "grad_norm": 0.2773331782508314, "learning_rate": 4.1939121580507144e-06, "loss": 0.3001, "num_tokens": 4270255269.0, "step": 5599 }, { "epoch": 7.64043697192114, "grad_norm": 0.2851600327493055, "learning_rate": 4.1924595113540186e-06, "loss": 0.2927, "num_tokens": 4271045928.0, "step": 5600 }, { "epoch": 7.6418025091747035, "grad_norm": 0.27819997971549026, "learning_rate": 4.191012296966747e-06, "loss": 0.2878, "num_tokens": 4271813367.0, "step": 5601 }, { "epoch": 7.643168046428267, "grad_norm": 0.2788639024349937, "learning_rate": 4.189570515330386e-06, "loss": 0.2749, "num_tokens": 4272611906.0, "step": 5602 }, { "epoch": 7.64453358368183, "grad_norm": 0.2737489157702638, "learning_rate": 4.1881341668847516e-06, "loss": 0.2803, "num_tokens": 4273415167.0, "step": 5603 }, { "epoch": 7.645899120935393, "grad_norm": 0.29677393666284346, "learning_rate": 4.18670325206802e-06, "loss": 0.2873, "num_tokens": 4274183571.0, "step": 5604 }, { "epoch": 7.6472646581889565, "grad_norm": 0.291786795038338, "learning_rate": 4.185277771316695e-06, "loss": 0.2937, "num_tokens": 4274919812.0, "step": 5605 }, { "epoch": 7.64863019544252, "grad_norm": 0.2905570775204992, "learning_rate": 4.1838577250656355e-06, "loss": 0.2946, "num_tokens": 4275708311.0, "step": 5606 }, { "epoch": 7.649995732696082, "grad_norm": 0.2796443072726138, "learning_rate": 4.18244311374803e-06, "loss": 0.2727, "num_tokens": 4276429570.0, "step": 5607 }, { "epoch": 7.651361269949646, "grad_norm": 0.30247731724349003, "learning_rate": 4.181033937795419e-06, "loss": 0.2898, "num_tokens": 4277148659.0, "step": 5608 }, { "epoch": 7.652726807203209, "grad_norm": 0.2702087161194403, "learning_rate": 4.1796301976376785e-06, "loss": 0.3066, "num_tokens": 4278018018.0, "step": 5609 }, { "epoch": 7.654092344456772, "grad_norm": 0.29704377220075745, "learning_rate": 4.178231893703029e-06, "loss": 0.2841, "num_tokens": 4278707498.0, "step": 5610 }, { "epoch": 7.655457881710335, "grad_norm": 0.29404108630823184, "learning_rate": 4.176839026418037e-06, "loss": 0.2866, "num_tokens": 4279407365.0, "step": 5611 }, { "epoch": 7.656823418963898, "grad_norm": 0.2892265506576681, "learning_rate": 4.1754515962075996e-06, "loss": 0.3015, "num_tokens": 4280197335.0, "step": 5612 }, { "epoch": 7.658188956217462, "grad_norm": 0.3090509401399202, "learning_rate": 4.174069603494967e-06, "loss": 0.2827, "num_tokens": 4280878851.0, "step": 5613 }, { "epoch": 7.659554493471025, "grad_norm": 0.2717310731524901, "learning_rate": 4.172693048701722e-06, "loss": 0.2934, "num_tokens": 4281653166.0, "step": 5614 }, { "epoch": 7.660920030724588, "grad_norm": 0.2803100130306492, "learning_rate": 4.1713219322477916e-06, "loss": 0.289, "num_tokens": 4282414805.0, "step": 5615 }, { "epoch": 7.662285567978151, "grad_norm": 0.289653520703531, "learning_rate": 4.169956254551448e-06, "loss": 0.2927, "num_tokens": 4283220922.0, "step": 5616 }, { "epoch": 7.663651105231715, "grad_norm": 0.2844790787885613, "learning_rate": 4.168596016029295e-06, "loss": 0.2723, "num_tokens": 4283942755.0, "step": 5617 }, { "epoch": 7.665016642485278, "grad_norm": 0.31062336843051996, "learning_rate": 4.167241217096287e-06, "loss": 0.2935, "num_tokens": 4284729803.0, "step": 5618 }, { "epoch": 7.666382179738841, "grad_norm": 0.2978729868674374, "learning_rate": 4.165891858165709e-06, "loss": 0.2764, "num_tokens": 4285479439.0, "step": 5619 }, { "epoch": 7.667747716992404, "grad_norm": 0.28443624960506053, "learning_rate": 4.164547939649199e-06, "loss": 0.2927, "num_tokens": 4286246199.0, "step": 5620 }, { "epoch": 7.669113254245968, "grad_norm": 0.2838944677982194, "learning_rate": 4.1632094619567226e-06, "loss": 0.2959, "num_tokens": 4287012966.0, "step": 5621 }, { "epoch": 7.670478791499531, "grad_norm": 0.2921935260294416, "learning_rate": 4.161876425496596e-06, "loss": 0.297, "num_tokens": 4287795106.0, "step": 5622 }, { "epoch": 7.671844328753094, "grad_norm": 0.2992458366729758, "learning_rate": 4.160548830675468e-06, "loss": 0.2994, "num_tokens": 4288575740.0, "step": 5623 }, { "epoch": 7.6732098660066566, "grad_norm": 0.260789230178529, "learning_rate": 4.1592266778983284e-06, "loss": 0.2785, "num_tokens": 4289366514.0, "step": 5624 }, { "epoch": 7.67457540326022, "grad_norm": 0.28821751924095423, "learning_rate": 4.157909967568514e-06, "loss": 0.3004, "num_tokens": 4290145448.0, "step": 5625 }, { "epoch": 7.675940940513783, "grad_norm": 0.2854635207872895, "learning_rate": 4.156598700087693e-06, "loss": 0.29, "num_tokens": 4290939396.0, "step": 5626 }, { "epoch": 7.677306477767346, "grad_norm": 0.26353933511754135, "learning_rate": 4.155292875855876e-06, "loss": 0.279, "num_tokens": 4291772439.0, "step": 5627 }, { "epoch": 7.67867201502091, "grad_norm": 0.30037827887977936, "learning_rate": 4.153992495271414e-06, "loss": 0.3029, "num_tokens": 4292457538.0, "step": 5628 }, { "epoch": 7.680037552274473, "grad_norm": 0.29344820187611126, "learning_rate": 4.152697558730997e-06, "loss": 0.2953, "num_tokens": 4293227394.0, "step": 5629 }, { "epoch": 7.681403089528036, "grad_norm": 0.28252011766670954, "learning_rate": 4.151408066629651e-06, "loss": 0.2857, "num_tokens": 4293985898.0, "step": 5630 }, { "epoch": 7.682768626781599, "grad_norm": 0.2833665556272357, "learning_rate": 4.150124019360751e-06, "loss": 0.291, "num_tokens": 4294802281.0, "step": 5631 }, { "epoch": 7.684134164035163, "grad_norm": 0.2949207678945514, "learning_rate": 4.148845417316e-06, "loss": 0.2877, "num_tokens": 4295554157.0, "step": 5632 }, { "epoch": 7.685499701288726, "grad_norm": 0.2785784704655681, "learning_rate": 4.1475722608854405e-06, "loss": 0.2816, "num_tokens": 4296332249.0, "step": 5633 }, { "epoch": 7.686865238542289, "grad_norm": 0.29077538232275674, "learning_rate": 4.146304550457467e-06, "loss": 0.2895, "num_tokens": 4297064083.0, "step": 5634 }, { "epoch": 7.688230775795852, "grad_norm": 0.2858745577384408, "learning_rate": 4.145042286418794e-06, "loss": 0.2938, "num_tokens": 4297864897.0, "step": 5635 }, { "epoch": 7.689596313049416, "grad_norm": 0.2840246297939456, "learning_rate": 4.143785469154487e-06, "loss": 0.3032, "num_tokens": 4298641227.0, "step": 5636 }, { "epoch": 7.690961850302979, "grad_norm": 0.3080614061492053, "learning_rate": 4.142534099047946e-06, "loss": 0.3074, "num_tokens": 4299419364.0, "step": 5637 }, { "epoch": 7.692327387556542, "grad_norm": 0.3070333514702857, "learning_rate": 4.141288176480911e-06, "loss": 0.2984, "num_tokens": 4300134093.0, "step": 5638 }, { "epoch": 7.693692924810105, "grad_norm": 0.2852961364145434, "learning_rate": 4.1400477018334596e-06, "loss": 0.2868, "num_tokens": 4300913438.0, "step": 5639 }, { "epoch": 7.695058462063669, "grad_norm": 0.28157916858584026, "learning_rate": 4.138812675484003e-06, "loss": 0.2781, "num_tokens": 4301625451.0, "step": 5640 }, { "epoch": 7.696423999317231, "grad_norm": 0.28669481358688215, "learning_rate": 4.137583097809299e-06, "loss": 0.2922, "num_tokens": 4302348341.0, "step": 5641 }, { "epoch": 7.697789536570794, "grad_norm": 0.2961703637982039, "learning_rate": 4.136358969184436e-06, "loss": 0.2819, "num_tokens": 4303083122.0, "step": 5642 }, { "epoch": 7.6991550738243575, "grad_norm": 0.3023646652947815, "learning_rate": 4.135140289982842e-06, "loss": 0.3055, "num_tokens": 4303867621.0, "step": 5643 }, { "epoch": 7.700520611077921, "grad_norm": 0.29275264682148244, "learning_rate": 4.133927060576285e-06, "loss": 0.2933, "num_tokens": 4304628453.0, "step": 5644 }, { "epoch": 7.701886148331484, "grad_norm": 0.2815601753806125, "learning_rate": 4.132719281334867e-06, "loss": 0.2892, "num_tokens": 4305363500.0, "step": 5645 }, { "epoch": 7.703251685585047, "grad_norm": 0.27973668899406307, "learning_rate": 4.131516952627031e-06, "loss": 0.2925, "num_tokens": 4306134091.0, "step": 5646 }, { "epoch": 7.7046172228386105, "grad_norm": 0.29792929738376706, "learning_rate": 4.130320074819555e-06, "loss": 0.2961, "num_tokens": 4306888889.0, "step": 5647 }, { "epoch": 7.705982760092174, "grad_norm": 0.2797997554251041, "learning_rate": 4.129128648277553e-06, "loss": 0.2934, "num_tokens": 4307682161.0, "step": 5648 }, { "epoch": 7.707348297345737, "grad_norm": 0.28398941960501, "learning_rate": 4.127942673364479e-06, "loss": 0.2934, "num_tokens": 4308468276.0, "step": 5649 }, { "epoch": 7.7087138345993, "grad_norm": 0.27675560658832576, "learning_rate": 4.126762150442124e-06, "loss": 0.2716, "num_tokens": 4309211531.0, "step": 5650 }, { "epoch": 7.7100793718528635, "grad_norm": 0.2763742966607643, "learning_rate": 4.12558707987061e-06, "loss": 0.2894, "num_tokens": 4309938678.0, "step": 5651 }, { "epoch": 7.711444909106427, "grad_norm": 0.28194749026904875, "learning_rate": 4.124417462008406e-06, "loss": 0.302, "num_tokens": 4310728728.0, "step": 5652 }, { "epoch": 7.71281044635999, "grad_norm": 0.2918012726418778, "learning_rate": 4.123253297212306e-06, "loss": 0.2919, "num_tokens": 4311513003.0, "step": 5653 }, { "epoch": 7.714175983613553, "grad_norm": 0.28513260630479376, "learning_rate": 4.122094585837449e-06, "loss": 0.296, "num_tokens": 4312240353.0, "step": 5654 }, { "epoch": 7.7155415208671165, "grad_norm": 0.2986469640240304, "learning_rate": 4.12094132823731e-06, "loss": 0.2913, "num_tokens": 4312948725.0, "step": 5655 }, { "epoch": 7.716907058120679, "grad_norm": 0.2813346257239989, "learning_rate": 4.119793524763693e-06, "loss": 0.3016, "num_tokens": 4313671782.0, "step": 5656 }, { "epoch": 7.718272595374243, "grad_norm": 0.2901148117069326, "learning_rate": 4.118651175766747e-06, "loss": 0.277, "num_tokens": 4314409234.0, "step": 5657 }, { "epoch": 7.719638132627805, "grad_norm": 0.28243055689432267, "learning_rate": 4.117514281594951e-06, "loss": 0.2916, "num_tokens": 4315219812.0, "step": 5658 }, { "epoch": 7.721003669881369, "grad_norm": 0.2890663620089343, "learning_rate": 4.116382842595125e-06, "loss": 0.2847, "num_tokens": 4316004176.0, "step": 5659 }, { "epoch": 7.722369207134932, "grad_norm": 0.27800011314557493, "learning_rate": 4.115256859112418e-06, "loss": 0.2893, "num_tokens": 4316775897.0, "step": 5660 }, { "epoch": 7.723734744388495, "grad_norm": 0.29737135181559726, "learning_rate": 4.114136331490323e-06, "loss": 0.2834, "num_tokens": 4317434547.0, "step": 5661 }, { "epoch": 7.725100281642058, "grad_norm": 0.2946947426275553, "learning_rate": 4.113021260070659e-06, "loss": 0.2818, "num_tokens": 4318145943.0, "step": 5662 }, { "epoch": 7.726465818895622, "grad_norm": 0.2748707895748843, "learning_rate": 4.111911645193594e-06, "loss": 0.2922, "num_tokens": 4318935831.0, "step": 5663 }, { "epoch": 7.727831356149185, "grad_norm": 0.28793949098810445, "learning_rate": 4.110807487197615e-06, "loss": 0.2913, "num_tokens": 4319712715.0, "step": 5664 }, { "epoch": 7.729196893402748, "grad_norm": 0.28052865476676453, "learning_rate": 4.1097087864195576e-06, "loss": 0.2928, "num_tokens": 4320443316.0, "step": 5665 }, { "epoch": 7.730562430656311, "grad_norm": 0.293536570372057, "learning_rate": 4.108615543194588e-06, "loss": 0.288, "num_tokens": 4321094926.0, "step": 5666 }, { "epoch": 7.731927967909875, "grad_norm": 0.3054334499020877, "learning_rate": 4.107527757856205e-06, "loss": 0.2884, "num_tokens": 4321899490.0, "step": 5667 }, { "epoch": 7.733293505163438, "grad_norm": 0.2911865795184768, "learning_rate": 4.106445430736245e-06, "loss": 0.2967, "num_tokens": 4322661455.0, "step": 5668 }, { "epoch": 7.734659042417001, "grad_norm": 0.29681868127642025, "learning_rate": 4.105368562164882e-06, "loss": 0.2881, "num_tokens": 4323399130.0, "step": 5669 }, { "epoch": 7.7360245796705644, "grad_norm": 0.29407919864961446, "learning_rate": 4.10429715247062e-06, "loss": 0.2984, "num_tokens": 4324181042.0, "step": 5670 }, { "epoch": 7.737390116924128, "grad_norm": 0.2702996107839984, "learning_rate": 4.103231201980299e-06, "loss": 0.2749, "num_tokens": 4324963362.0, "step": 5671 }, { "epoch": 7.738755654177691, "grad_norm": 0.2792162925868388, "learning_rate": 4.102170711019096e-06, "loss": 0.2761, "num_tokens": 4325731492.0, "step": 5672 }, { "epoch": 7.740121191431253, "grad_norm": 0.2716953421083749, "learning_rate": 4.101115679910519e-06, "loss": 0.2884, "num_tokens": 4326579466.0, "step": 5673 }, { "epoch": 7.741486728684817, "grad_norm": 0.2982985560005272, "learning_rate": 4.100066108976413e-06, "loss": 0.2851, "num_tokens": 4327254928.0, "step": 5674 }, { "epoch": 7.74285226593838, "grad_norm": 0.29223802953557043, "learning_rate": 4.0990219985369575e-06, "loss": 0.2846, "num_tokens": 4328031796.0, "step": 5675 }, { "epoch": 7.744217803191943, "grad_norm": 0.2762158285253355, "learning_rate": 4.097983348910666e-06, "loss": 0.3007, "num_tokens": 4328884460.0, "step": 5676 }, { "epoch": 7.745583340445506, "grad_norm": 0.26344819073498865, "learning_rate": 4.096950160414382e-06, "loss": 0.2948, "num_tokens": 4329672937.0, "step": 5677 }, { "epoch": 7.74694887769907, "grad_norm": 0.3029999548741811, "learning_rate": 4.09592243336329e-06, "loss": 0.2988, "num_tokens": 4330525777.0, "step": 5678 }, { "epoch": 7.748314414952633, "grad_norm": 0.2574729676083253, "learning_rate": 4.094900168070904e-06, "loss": 0.2784, "num_tokens": 4331371549.0, "step": 5679 }, { "epoch": 7.749679952206196, "grad_norm": 0.28978936851445963, "learning_rate": 4.0938833648490736e-06, "loss": 0.2841, "num_tokens": 4332102751.0, "step": 5680 }, { "epoch": 7.751045489459759, "grad_norm": 0.27394720335762107, "learning_rate": 4.092872024007979e-06, "loss": 0.2901, "num_tokens": 4332826462.0, "step": 5681 }, { "epoch": 7.752411026713323, "grad_norm": 0.2943854869718327, "learning_rate": 4.091866145856139e-06, "loss": 0.2821, "num_tokens": 4333593586.0, "step": 5682 }, { "epoch": 7.753776563966886, "grad_norm": 0.2689048043612627, "learning_rate": 4.090865730700402e-06, "loss": 0.3014, "num_tokens": 4334394964.0, "step": 5683 }, { "epoch": 7.755142101220449, "grad_norm": 0.30297184452353093, "learning_rate": 4.089870778845954e-06, "loss": 0.2856, "num_tokens": 4335121580.0, "step": 5684 }, { "epoch": 7.756507638474012, "grad_norm": 0.28928289158736126, "learning_rate": 4.088881290596307e-06, "loss": 0.2932, "num_tokens": 4335891077.0, "step": 5685 }, { "epoch": 7.757873175727576, "grad_norm": 0.2778239517231112, "learning_rate": 4.087897266253314e-06, "loss": 0.2981, "num_tokens": 4336732907.0, "step": 5686 }, { "epoch": 7.759238712981139, "grad_norm": 0.2923397115796401, "learning_rate": 4.0869187061171596e-06, "loss": 0.2859, "num_tokens": 4337476574.0, "step": 5687 }, { "epoch": 7.760604250234701, "grad_norm": 0.28261656470290275, "learning_rate": 4.0859456104863555e-06, "loss": 0.2895, "num_tokens": 4338242953.0, "step": 5688 }, { "epoch": 7.761969787488265, "grad_norm": 0.3109357164235319, "learning_rate": 4.084977979657756e-06, "loss": 0.2826, "num_tokens": 4338940363.0, "step": 5689 }, { "epoch": 7.763335324741828, "grad_norm": 0.28812730242737244, "learning_rate": 4.0840158139265396e-06, "loss": 0.3038, "num_tokens": 4339717726.0, "step": 5690 }, { "epoch": 7.764700861995391, "grad_norm": 0.2957050107316875, "learning_rate": 4.083059113586225e-06, "loss": 0.2876, "num_tokens": 4340436587.0, "step": 5691 }, { "epoch": 7.766066399248954, "grad_norm": 0.28259392815373763, "learning_rate": 4.082107878928656e-06, "loss": 0.2974, "num_tokens": 4341200795.0, "step": 5692 }, { "epoch": 7.7674319365025175, "grad_norm": 0.2860400229375732, "learning_rate": 4.081162110244018e-06, "loss": 0.2841, "num_tokens": 4341984068.0, "step": 5693 }, { "epoch": 7.768797473756081, "grad_norm": 0.2881954394467521, "learning_rate": 4.080221807820822e-06, "loss": 0.2983, "num_tokens": 4342809273.0, "step": 5694 }, { "epoch": 7.770163011009644, "grad_norm": 0.2601884284844817, "learning_rate": 4.079286971945909e-06, "loss": 0.2791, "num_tokens": 4343622220.0, "step": 5695 }, { "epoch": 7.771528548263207, "grad_norm": 0.3151421814279109, "learning_rate": 4.0783576029044626e-06, "loss": 0.2959, "num_tokens": 4344358399.0, "step": 5696 }, { "epoch": 7.7728940855167705, "grad_norm": 0.2735214299707197, "learning_rate": 4.077433700979991e-06, "loss": 0.2944, "num_tokens": 4345143797.0, "step": 5697 }, { "epoch": 7.774259622770334, "grad_norm": 0.29720838169929975, "learning_rate": 4.0765152664543366e-06, "loss": 0.2946, "num_tokens": 4345873593.0, "step": 5698 }, { "epoch": 7.775625160023897, "grad_norm": 0.29464684036184585, "learning_rate": 4.0756022996076745e-06, "loss": 0.2899, "num_tokens": 4346677840.0, "step": 5699 }, { "epoch": 7.77699069727746, "grad_norm": 0.274030561854194, "learning_rate": 4.074694800718511e-06, "loss": 0.2906, "num_tokens": 4347518016.0, "step": 5700 }, { "epoch": 7.7783562345310235, "grad_norm": 0.27963882345393926, "learning_rate": 4.073792770063685e-06, "loss": 0.2973, "num_tokens": 4348242363.0, "step": 5701 }, { "epoch": 7.779721771784587, "grad_norm": 0.2965639055104474, "learning_rate": 4.072896207918366e-06, "loss": 0.2924, "num_tokens": 4349004666.0, "step": 5702 }, { "epoch": 7.78108730903815, "grad_norm": 0.2868089670448591, "learning_rate": 4.072005114556059e-06, "loss": 0.2917, "num_tokens": 4349746566.0, "step": 5703 }, { "epoch": 7.782452846291713, "grad_norm": 0.2999290221341298, "learning_rate": 4.071119490248594e-06, "loss": 0.2912, "num_tokens": 4350522403.0, "step": 5704 }, { "epoch": 7.783818383545276, "grad_norm": 0.2894473400231432, "learning_rate": 4.070239335266142e-06, "loss": 0.3056, "num_tokens": 4351323412.0, "step": 5705 }, { "epoch": 7.785183920798839, "grad_norm": 0.2857817434516926, "learning_rate": 4.069364649877193e-06, "loss": 0.2956, "num_tokens": 4352090319.0, "step": 5706 }, { "epoch": 7.786549458052402, "grad_norm": 0.2922095911880761, "learning_rate": 4.0684954343485806e-06, "loss": 0.3101, "num_tokens": 4352829996.0, "step": 5707 }, { "epoch": 7.787914995305965, "grad_norm": 0.2697047729983644, "learning_rate": 4.067631688945465e-06, "loss": 0.2862, "num_tokens": 4353605066.0, "step": 5708 }, { "epoch": 7.789280532559529, "grad_norm": 0.2790374891739669, "learning_rate": 4.066773413931336e-06, "loss": 0.2692, "num_tokens": 4354325949.0, "step": 5709 }, { "epoch": 7.790646069813092, "grad_norm": 0.26746915891614786, "learning_rate": 4.065920609568015e-06, "loss": 0.2864, "num_tokens": 4355152884.0, "step": 5710 }, { "epoch": 7.792011607066655, "grad_norm": 0.27189839984492, "learning_rate": 4.065073276115658e-06, "loss": 0.2949, "num_tokens": 4355961251.0, "step": 5711 }, { "epoch": 7.793377144320218, "grad_norm": 0.2793489055789107, "learning_rate": 4.064231413832749e-06, "loss": 0.291, "num_tokens": 4356759682.0, "step": 5712 }, { "epoch": 7.794742681573782, "grad_norm": 0.29537619159283723, "learning_rate": 4.063395022976103e-06, "loss": 0.2891, "num_tokens": 4357551264.0, "step": 5713 }, { "epoch": 7.796108218827345, "grad_norm": 0.28323520301289495, "learning_rate": 4.062564103800868e-06, "loss": 0.2887, "num_tokens": 4358305887.0, "step": 5714 }, { "epoch": 7.797473756080908, "grad_norm": 0.29860752858257184, "learning_rate": 4.06173865656052e-06, "loss": 0.2939, "num_tokens": 4359098670.0, "step": 5715 }, { "epoch": 7.7988392933344715, "grad_norm": 0.2832897854093337, "learning_rate": 4.060918681506868e-06, "loss": 0.2951, "num_tokens": 4359918950.0, "step": 5716 }, { "epoch": 7.800204830588035, "grad_norm": 0.29172401773612155, "learning_rate": 4.060104178890051e-06, "loss": 0.2888, "num_tokens": 4360609556.0, "step": 5717 }, { "epoch": 7.801570367841598, "grad_norm": 0.27575990500037184, "learning_rate": 4.059295148958537e-06, "loss": 0.2935, "num_tokens": 4361378677.0, "step": 5718 }, { "epoch": 7.802935905095161, "grad_norm": 0.29639793496765593, "learning_rate": 4.058491591959128e-06, "loss": 0.2792, "num_tokens": 4362154272.0, "step": 5719 }, { "epoch": 7.804301442348724, "grad_norm": 0.2741269977673043, "learning_rate": 4.057693508136954e-06, "loss": 0.2769, "num_tokens": 4362891551.0, "step": 5720 }, { "epoch": 7.805666979602288, "grad_norm": 0.3121930671501344, "learning_rate": 4.0569008977354756e-06, "loss": 0.2966, "num_tokens": 4363717693.0, "step": 5721 }, { "epoch": 7.80703251685585, "grad_norm": 0.27846042538719606, "learning_rate": 4.0561137609964825e-06, "loss": 0.2917, "num_tokens": 4364430068.0, "step": 5722 }, { "epoch": 7.808398054109413, "grad_norm": 0.29738561684802817, "learning_rate": 4.055332098160099e-06, "loss": 0.2943, "num_tokens": 4365146133.0, "step": 5723 }, { "epoch": 7.809763591362977, "grad_norm": 0.2970661197035514, "learning_rate": 4.054555909464774e-06, "loss": 0.2796, "num_tokens": 4365900939.0, "step": 5724 }, { "epoch": 7.81112912861654, "grad_norm": 0.27598602439490105, "learning_rate": 4.0537851951472895e-06, "loss": 0.2686, "num_tokens": 4366649632.0, "step": 5725 }, { "epoch": 7.812494665870103, "grad_norm": 0.2940502030658319, "learning_rate": 4.053019955442758e-06, "loss": 0.2973, "num_tokens": 4367449728.0, "step": 5726 }, { "epoch": 7.813860203123666, "grad_norm": 0.2801259469897299, "learning_rate": 4.0522601905846195e-06, "loss": 0.2787, "num_tokens": 4368233746.0, "step": 5727 }, { "epoch": 7.81522574037723, "grad_norm": 0.28237043786700367, "learning_rate": 4.051505900804647e-06, "loss": 0.2854, "num_tokens": 4368952106.0, "step": 5728 }, { "epoch": 7.816591277630793, "grad_norm": 0.29285613353182854, "learning_rate": 4.050757086332941e-06, "loss": 0.2962, "num_tokens": 4369733180.0, "step": 5729 }, { "epoch": 7.817956814884356, "grad_norm": 0.2824638453876789, "learning_rate": 4.050013747397932e-06, "loss": 0.2835, "num_tokens": 4370484183.0, "step": 5730 }, { "epoch": 7.819322352137919, "grad_norm": 0.2939180876049139, "learning_rate": 4.049275884226382e-06, "loss": 0.2831, "num_tokens": 4371231738.0, "step": 5731 }, { "epoch": 7.820687889391483, "grad_norm": 0.30442746723267083, "learning_rate": 4.04854349704338e-06, "loss": 0.2809, "num_tokens": 4371947466.0, "step": 5732 }, { "epoch": 7.822053426645046, "grad_norm": 0.29698199808032033, "learning_rate": 4.047816586072344e-06, "loss": 0.2958, "num_tokens": 4372706425.0, "step": 5733 }, { "epoch": 7.823418963898609, "grad_norm": 0.287525937086229, "learning_rate": 4.047095151535024e-06, "loss": 0.2857, "num_tokens": 4373479554.0, "step": 5734 }, { "epoch": 7.824784501152172, "grad_norm": 0.28534747162304286, "learning_rate": 4.046379193651501e-06, "loss": 0.2861, "num_tokens": 4374191308.0, "step": 5735 }, { "epoch": 7.826150038405736, "grad_norm": 0.2865302732428724, "learning_rate": 4.045668712640181e-06, "loss": 0.298, "num_tokens": 4374959117.0, "step": 5736 }, { "epoch": 7.827515575659298, "grad_norm": 0.2771678894138829, "learning_rate": 4.0449637087178e-06, "loss": 0.2814, "num_tokens": 4375759560.0, "step": 5737 }, { "epoch": 7.828881112912862, "grad_norm": 0.2905195704253538, "learning_rate": 4.044264182099423e-06, "loss": 0.2914, "num_tokens": 4376504922.0, "step": 5738 }, { "epoch": 7.8302466501664245, "grad_norm": 0.28166434794206374, "learning_rate": 4.0435701329984505e-06, "loss": 0.2913, "num_tokens": 4377284243.0, "step": 5739 }, { "epoch": 7.831612187419988, "grad_norm": 0.2839002365293098, "learning_rate": 4.042881561626601e-06, "loss": 0.2853, "num_tokens": 4378062143.0, "step": 5740 }, { "epoch": 7.832977724673551, "grad_norm": 0.27408148676293587, "learning_rate": 4.042198468193933e-06, "loss": 0.304, "num_tokens": 4378843580.0, "step": 5741 }, { "epoch": 7.834343261927114, "grad_norm": 0.3264181417815801, "learning_rate": 4.041520852908825e-06, "loss": 0.3035, "num_tokens": 4379566578.0, "step": 5742 }, { "epoch": 7.8357087991806775, "grad_norm": 0.2591309658908212, "learning_rate": 4.040848715977988e-06, "loss": 0.2872, "num_tokens": 4380375684.0, "step": 5743 }, { "epoch": 7.837074336434241, "grad_norm": 0.2822254275887289, "learning_rate": 4.040182057606466e-06, "loss": 0.2853, "num_tokens": 4381111621.0, "step": 5744 }, { "epoch": 7.838439873687804, "grad_norm": 0.2812070774631119, "learning_rate": 4.0395208779976204e-06, "loss": 0.2971, "num_tokens": 4381847102.0, "step": 5745 }, { "epoch": 7.839805410941367, "grad_norm": 0.2912499286782968, "learning_rate": 4.038865177353156e-06, "loss": 0.2868, "num_tokens": 4382647528.0, "step": 5746 }, { "epoch": 7.8411709481949305, "grad_norm": 0.3042419651690699, "learning_rate": 4.0382149558730935e-06, "loss": 0.2886, "num_tokens": 4383335820.0, "step": 5747 }, { "epoch": 7.842536485448494, "grad_norm": 0.2871704165390055, "learning_rate": 4.03757021375579e-06, "loss": 0.2988, "num_tokens": 4384130788.0, "step": 5748 }, { "epoch": 7.843902022702057, "grad_norm": 0.2985636518209616, "learning_rate": 4.0369309511979274e-06, "loss": 0.2917, "num_tokens": 4384836634.0, "step": 5749 }, { "epoch": 7.84526755995562, "grad_norm": 0.2867797668670429, "learning_rate": 4.036297168394516e-06, "loss": 0.2873, "num_tokens": 4385552495.0, "step": 5750 }, { "epoch": 7.846633097209184, "grad_norm": 0.29222958230106905, "learning_rate": 4.0356688655388995e-06, "loss": 0.2933, "num_tokens": 4386363784.0, "step": 5751 }, { "epoch": 7.847998634462747, "grad_norm": 0.27888229508836787, "learning_rate": 4.03504604282274e-06, "loss": 0.2808, "num_tokens": 4387160920.0, "step": 5752 }, { "epoch": 7.84936417171631, "grad_norm": 0.28296568822780443, "learning_rate": 4.03442870043604e-06, "loss": 0.2753, "num_tokens": 4387875521.0, "step": 5753 }, { "epoch": 7.850729708969872, "grad_norm": 0.2727978571375459, "learning_rate": 4.033816838567119e-06, "loss": 0.2931, "num_tokens": 4388702632.0, "step": 5754 }, { "epoch": 7.852095246223436, "grad_norm": 0.296181405280616, "learning_rate": 4.03321045740263e-06, "loss": 0.2929, "num_tokens": 4389482307.0, "step": 5755 }, { "epoch": 7.853460783476999, "grad_norm": 0.2760766923867831, "learning_rate": 4.032609557127557e-06, "loss": 0.2835, "num_tokens": 4390253423.0, "step": 5756 }, { "epoch": 7.854826320730562, "grad_norm": 0.28350697058080465, "learning_rate": 4.032014137925207e-06, "loss": 0.2899, "num_tokens": 4390994550.0, "step": 5757 }, { "epoch": 7.8561918579841254, "grad_norm": 0.2963497976871351, "learning_rate": 4.0314241999772146e-06, "loss": 0.2967, "num_tokens": 4391709670.0, "step": 5758 }, { "epoch": 7.857557395237689, "grad_norm": 0.2813489120094673, "learning_rate": 4.030839743463547e-06, "loss": 0.2881, "num_tokens": 4392487443.0, "step": 5759 }, { "epoch": 7.858922932491252, "grad_norm": 0.28617264920725094, "learning_rate": 4.0302607685624944e-06, "loss": 0.286, "num_tokens": 4393185793.0, "step": 5760 }, { "epoch": 7.860288469744815, "grad_norm": 0.28697268554709726, "learning_rate": 4.0296872754506785e-06, "loss": 0.2973, "num_tokens": 4393888622.0, "step": 5761 }, { "epoch": 7.8616540069983785, "grad_norm": 0.2808026860488771, "learning_rate": 4.029119264303046e-06, "loss": 0.2967, "num_tokens": 4394729601.0, "step": 5762 }, { "epoch": 7.863019544251942, "grad_norm": 0.3145244971651699, "learning_rate": 4.028556735292875e-06, "loss": 0.2728, "num_tokens": 4395423553.0, "step": 5763 }, { "epoch": 7.864385081505505, "grad_norm": 0.26545636966147607, "learning_rate": 4.027999688591767e-06, "loss": 0.2839, "num_tokens": 4396147564.0, "step": 5764 }, { "epoch": 7.865750618759068, "grad_norm": 0.2870094987575519, "learning_rate": 4.027448124369651e-06, "loss": 0.2863, "num_tokens": 4396908754.0, "step": 5765 }, { "epoch": 7.8671161560126315, "grad_norm": 0.2824426399590004, "learning_rate": 4.026902042794788e-06, "loss": 0.2879, "num_tokens": 4397668319.0, "step": 5766 }, { "epoch": 7.868481693266195, "grad_norm": 0.28650485988365004, "learning_rate": 4.026361444033764e-06, "loss": 0.2997, "num_tokens": 4398412988.0, "step": 5767 }, { "epoch": 7.869847230519758, "grad_norm": 0.27749996595598464, "learning_rate": 4.025826328251491e-06, "loss": 0.2925, "num_tokens": 4399222157.0, "step": 5768 }, { "epoch": 7.87121276777332, "grad_norm": 0.2898023035158995, "learning_rate": 4.02529669561121e-06, "loss": 0.2845, "num_tokens": 4399945292.0, "step": 5769 }, { "epoch": 7.8725783050268845, "grad_norm": 0.2851184656097859, "learning_rate": 4.024772546274488e-06, "loss": 0.2765, "num_tokens": 4400650315.0, "step": 5770 }, { "epoch": 7.873943842280447, "grad_norm": 0.2983143409084664, "learning_rate": 4.024253880401222e-06, "loss": 0.3, "num_tokens": 4401418763.0, "step": 5771 }, { "epoch": 7.87530937953401, "grad_norm": 0.2761512249121721, "learning_rate": 4.023740698149633e-06, "loss": 0.287, "num_tokens": 4402151992.0, "step": 5772 }, { "epoch": 7.876674916787573, "grad_norm": 0.3085869206707055, "learning_rate": 4.023232999676272e-06, "loss": 0.2972, "num_tokens": 4402951899.0, "step": 5773 }, { "epoch": 7.878040454041137, "grad_norm": 0.30723381292501895, "learning_rate": 4.0227307851360155e-06, "loss": 0.2898, "num_tokens": 4403722148.0, "step": 5774 }, { "epoch": 7.8794059912947, "grad_norm": 0.2737618480812446, "learning_rate": 4.022234054682067e-06, "loss": 0.2846, "num_tokens": 4404501409.0, "step": 5775 }, { "epoch": 7.880771528548263, "grad_norm": 0.2791450100985167, "learning_rate": 4.021742808465957e-06, "loss": 0.2893, "num_tokens": 4405223566.0, "step": 5776 }, { "epoch": 7.882137065801826, "grad_norm": 0.2867106725720856, "learning_rate": 4.021257046637543e-06, "loss": 0.3034, "num_tokens": 4406055533.0, "step": 5777 }, { "epoch": 7.88350260305539, "grad_norm": 0.2684597414096056, "learning_rate": 4.020776769345013e-06, "loss": 0.2836, "num_tokens": 4406827390.0, "step": 5778 }, { "epoch": 7.884868140308953, "grad_norm": 0.29479506620256224, "learning_rate": 4.0203019767348755e-06, "loss": 0.3191, "num_tokens": 4407627513.0, "step": 5779 }, { "epoch": 7.886233677562516, "grad_norm": 0.28582044233432796, "learning_rate": 4.019832668951971e-06, "loss": 0.2981, "num_tokens": 4408432536.0, "step": 5780 }, { "epoch": 7.887599214816079, "grad_norm": 0.2902851799229732, "learning_rate": 4.019368846139464e-06, "loss": 0.2911, "num_tokens": 4409200492.0, "step": 5781 }, { "epoch": 7.888964752069643, "grad_norm": 0.27962717139887777, "learning_rate": 4.018910508438847e-06, "loss": 0.2869, "num_tokens": 4410004029.0, "step": 5782 }, { "epoch": 7.890330289323206, "grad_norm": 0.28114370756986057, "learning_rate": 4.018457655989939e-06, "loss": 0.2785, "num_tokens": 4410748180.0, "step": 5783 }, { "epoch": 7.891695826576769, "grad_norm": 0.30197042888960984, "learning_rate": 4.018010288930886e-06, "loss": 0.2889, "num_tokens": 4411509424.0, "step": 5784 }, { "epoch": 7.893061363830332, "grad_norm": 0.28350914482942435, "learning_rate": 4.017568407398161e-06, "loss": 0.3119, "num_tokens": 4412307429.0, "step": 5785 }, { "epoch": 7.894426901083895, "grad_norm": 0.27500235634844095, "learning_rate": 4.01713201152656e-06, "loss": 0.2911, "num_tokens": 4413097684.0, "step": 5786 }, { "epoch": 7.895792438337458, "grad_norm": 0.26711244976020165, "learning_rate": 4.016701101449212e-06, "loss": 0.2981, "num_tokens": 4413939799.0, "step": 5787 }, { "epoch": 7.897157975591021, "grad_norm": 0.300171516550202, "learning_rate": 4.0162756772975675e-06, "loss": 0.2725, "num_tokens": 4414626930.0, "step": 5788 }, { "epoch": 7.8985235128445845, "grad_norm": 0.2926109926161823, "learning_rate": 4.0158557392014045e-06, "loss": 0.2871, "num_tokens": 4415347297.0, "step": 5789 }, { "epoch": 7.899889050098148, "grad_norm": 0.2822363174461127, "learning_rate": 4.015441287288829e-06, "loss": 0.2823, "num_tokens": 4416152337.0, "step": 5790 }, { "epoch": 7.901254587351711, "grad_norm": 0.2732238576030767, "learning_rate": 4.01503232168627e-06, "loss": 0.2845, "num_tokens": 4416914935.0, "step": 5791 }, { "epoch": 7.902620124605274, "grad_norm": 0.2825142646911895, "learning_rate": 4.01462884251849e-06, "loss": 0.3001, "num_tokens": 4417712092.0, "step": 5792 }, { "epoch": 7.9039856618588376, "grad_norm": 0.2849620783705486, "learning_rate": 4.014230849908567e-06, "loss": 0.2906, "num_tokens": 4418459100.0, "step": 5793 }, { "epoch": 7.905351199112401, "grad_norm": 0.28222699869109613, "learning_rate": 4.013838343977916e-06, "loss": 0.3083, "num_tokens": 4419199699.0, "step": 5794 }, { "epoch": 7.906716736365964, "grad_norm": 0.2742964583602357, "learning_rate": 4.013451324846271e-06, "loss": 0.2809, "num_tokens": 4419940643.0, "step": 5795 }, { "epoch": 7.908082273619527, "grad_norm": 0.30241666564506037, "learning_rate": 4.0130697926316954e-06, "loss": 0.2815, "num_tokens": 4420648349.0, "step": 5796 }, { "epoch": 7.909447810873091, "grad_norm": 0.28211575465218397, "learning_rate": 4.012693747450579e-06, "loss": 0.2815, "num_tokens": 4421479249.0, "step": 5797 }, { "epoch": 7.910813348126654, "grad_norm": 0.3017824190262245, "learning_rate": 4.012323189417635e-06, "loss": 0.3004, "num_tokens": 4422236236.0, "step": 5798 }, { "epoch": 7.912178885380217, "grad_norm": 0.2778063091758128, "learning_rate": 4.0119581186459064e-06, "loss": 0.2909, "num_tokens": 4423086163.0, "step": 5799 }, { "epoch": 7.91354442263378, "grad_norm": 0.2739341505342804, "learning_rate": 4.011598535246762e-06, "loss": 0.2793, "num_tokens": 4423859762.0, "step": 5800 }, { "epoch": 7.914909959887343, "grad_norm": 0.27908960522361237, "learning_rate": 4.01124443932989e-06, "loss": 0.286, "num_tokens": 4424599649.0, "step": 5801 }, { "epoch": 7.916275497140907, "grad_norm": 0.3011687067814772, "learning_rate": 4.010895831003314e-06, "loss": 0.2879, "num_tokens": 4425355237.0, "step": 5802 }, { "epoch": 7.917641034394469, "grad_norm": 0.284397585369148, "learning_rate": 4.01055271037338e-06, "loss": 0.2757, "num_tokens": 4426050404.0, "step": 5803 }, { "epoch": 7.9190065716480325, "grad_norm": 0.2783509898768114, "learning_rate": 4.010215077544754e-06, "loss": 0.2851, "num_tokens": 4426810280.0, "step": 5804 }, { "epoch": 7.920372108901596, "grad_norm": 0.27785500661721235, "learning_rate": 4.009882932620438e-06, "loss": 0.292, "num_tokens": 4427622329.0, "step": 5805 }, { "epoch": 7.921737646155159, "grad_norm": 0.2795358429594024, "learning_rate": 4.009556275701754e-06, "loss": 0.2952, "num_tokens": 4428385467.0, "step": 5806 }, { "epoch": 7.923103183408722, "grad_norm": 0.315485220444975, "learning_rate": 4.009235106888349e-06, "loss": 0.2939, "num_tokens": 4429176875.0, "step": 5807 }, { "epoch": 7.9244687206622855, "grad_norm": 0.2872812980410123, "learning_rate": 4.008919426278201e-06, "loss": 0.282, "num_tokens": 4429877116.0, "step": 5808 }, { "epoch": 7.925834257915849, "grad_norm": 0.2795381206547082, "learning_rate": 4.008609233967607e-06, "loss": 0.2941, "num_tokens": 4430638504.0, "step": 5809 }, { "epoch": 7.927199795169412, "grad_norm": 0.28235443126145787, "learning_rate": 4.008304530051197e-06, "loss": 0.3052, "num_tokens": 4431409379.0, "step": 5810 }, { "epoch": 7.928565332422975, "grad_norm": 0.2864351133924553, "learning_rate": 4.0080053146219195e-06, "loss": 0.2928, "num_tokens": 4432204821.0, "step": 5811 }, { "epoch": 7.9299308696765385, "grad_norm": 0.27145743781147463, "learning_rate": 4.007711587771051e-06, "loss": 0.3084, "num_tokens": 4432994112.0, "step": 5812 }, { "epoch": 7.931296406930102, "grad_norm": 0.2946744506171655, "learning_rate": 4.0074233495882e-06, "loss": 0.2892, "num_tokens": 4433710631.0, "step": 5813 }, { "epoch": 7.932661944183665, "grad_norm": 0.2798263839699945, "learning_rate": 4.007140600161294e-06, "loss": 0.2962, "num_tokens": 4434489370.0, "step": 5814 }, { "epoch": 7.934027481437228, "grad_norm": 0.2906686091014025, "learning_rate": 4.006863339576585e-06, "loss": 0.2994, "num_tokens": 4435291244.0, "step": 5815 }, { "epoch": 7.9353930186907915, "grad_norm": 0.2789702018334533, "learning_rate": 4.006591567918653e-06, "loss": 0.2928, "num_tokens": 4436063853.0, "step": 5816 }, { "epoch": 7.936758555944355, "grad_norm": 0.2903935359206614, "learning_rate": 4.006325285270407e-06, "loss": 0.2912, "num_tokens": 4436795008.0, "step": 5817 }, { "epoch": 7.938124093197917, "grad_norm": 0.2682207264184072, "learning_rate": 4.006064491713076e-06, "loss": 0.2924, "num_tokens": 4437604560.0, "step": 5818 }, { "epoch": 7.93948963045148, "grad_norm": 0.2846229429531823, "learning_rate": 4.005809187326218e-06, "loss": 0.2892, "num_tokens": 4438322936.0, "step": 5819 }, { "epoch": 7.940855167705044, "grad_norm": 0.2799340439330702, "learning_rate": 4.005559372187715e-06, "loss": 0.3019, "num_tokens": 4439182873.0, "step": 5820 }, { "epoch": 7.942220704958607, "grad_norm": 0.29173587375746646, "learning_rate": 4.005315046373775e-06, "loss": 0.2898, "num_tokens": 4439938860.0, "step": 5821 }, { "epoch": 7.94358624221217, "grad_norm": 0.2830728359356329, "learning_rate": 4.005076209958929e-06, "loss": 0.2857, "num_tokens": 4440665271.0, "step": 5822 }, { "epoch": 7.944951779465733, "grad_norm": 0.2804502227997749, "learning_rate": 4.004842863016039e-06, "loss": 0.2875, "num_tokens": 4441501480.0, "step": 5823 }, { "epoch": 7.946317316719297, "grad_norm": 0.2670316578619263, "learning_rate": 4.004615005616287e-06, "loss": 0.3064, "num_tokens": 4442324090.0, "step": 5824 }, { "epoch": 7.94768285397286, "grad_norm": 0.29829837032900314, "learning_rate": 4.004392637829182e-06, "loss": 0.3066, "num_tokens": 4443171301.0, "step": 5825 }, { "epoch": 7.949048391226423, "grad_norm": 0.29420571925816497, "learning_rate": 4.00417575972256e-06, "loss": 0.2936, "num_tokens": 4443929898.0, "step": 5826 }, { "epoch": 7.950413928479986, "grad_norm": 0.29498548176009187, "learning_rate": 4.003964371362582e-06, "loss": 0.3002, "num_tokens": 4444648349.0, "step": 5827 }, { "epoch": 7.95177946573355, "grad_norm": 0.29938555889387103, "learning_rate": 4.003758472813732e-06, "loss": 0.2923, "num_tokens": 4445342332.0, "step": 5828 }, { "epoch": 7.953145002987113, "grad_norm": 0.2871319519458771, "learning_rate": 4.003558064138821e-06, "loss": 0.2754, "num_tokens": 4446061984.0, "step": 5829 }, { "epoch": 7.954510540240676, "grad_norm": 0.2955788920031377, "learning_rate": 4.003363145398984e-06, "loss": 0.3015, "num_tokens": 4446847852.0, "step": 5830 }, { "epoch": 7.955876077494239, "grad_norm": 0.27560374011984, "learning_rate": 4.003173716653684e-06, "loss": 0.2997, "num_tokens": 4447628621.0, "step": 5831 }, { "epoch": 7.957241614747803, "grad_norm": 0.3330350913806228, "learning_rate": 4.0029897779607074e-06, "loss": 0.282, "num_tokens": 4448297652.0, "step": 5832 }, { "epoch": 7.958607152001366, "grad_norm": 0.284523448652836, "learning_rate": 4.002811329376164e-06, "loss": 0.2946, "num_tokens": 4449126333.0, "step": 5833 }, { "epoch": 7.959972689254929, "grad_norm": 0.28764673090016313, "learning_rate": 4.0026383709544914e-06, "loss": 0.2874, "num_tokens": 4449876491.0, "step": 5834 }, { "epoch": 7.9613382265084915, "grad_norm": 0.28825844346072066, "learning_rate": 4.002470902748455e-06, "loss": 0.2822, "num_tokens": 4450570027.0, "step": 5835 }, { "epoch": 7.962703763762055, "grad_norm": 0.31290539149921165, "learning_rate": 4.002308924809137e-06, "loss": 0.298, "num_tokens": 4451297840.0, "step": 5836 }, { "epoch": 7.964069301015618, "grad_norm": 0.2949349126760123, "learning_rate": 4.002152437185954e-06, "loss": 0.2931, "num_tokens": 4452001197.0, "step": 5837 }, { "epoch": 7.965434838269181, "grad_norm": 0.27386336694306923, "learning_rate": 4.002001439926642e-06, "loss": 0.2869, "num_tokens": 4452751462.0, "step": 5838 }, { "epoch": 7.966800375522745, "grad_norm": 0.29222251967749013, "learning_rate": 4.001855933077262e-06, "loss": 0.3008, "num_tokens": 4453564004.0, "step": 5839 }, { "epoch": 7.968165912776308, "grad_norm": 0.2959404055757522, "learning_rate": 4.001715916682204e-06, "loss": 0.2917, "num_tokens": 4454328075.0, "step": 5840 }, { "epoch": 7.969531450029871, "grad_norm": 0.28664776256847624, "learning_rate": 4.00158139078418e-06, "loss": 0.2747, "num_tokens": 4455088262.0, "step": 5841 }, { "epoch": 7.970896987283434, "grad_norm": 0.2819413001431054, "learning_rate": 4.0014523554242295e-06, "loss": 0.2979, "num_tokens": 4455918584.0, "step": 5842 }, { "epoch": 7.972262524536998, "grad_norm": 0.27489218299491625, "learning_rate": 4.001328810641713e-06, "loss": 0.2834, "num_tokens": 4456703635.0, "step": 5843 }, { "epoch": 7.973628061790561, "grad_norm": 0.2835335958126823, "learning_rate": 4.001210756474322e-06, "loss": 0.3059, "num_tokens": 4457505976.0, "step": 5844 }, { "epoch": 7.974993599044124, "grad_norm": 0.2738157028180583, "learning_rate": 4.001098192958068e-06, "loss": 0.2953, "num_tokens": 4458334172.0, "step": 5845 }, { "epoch": 7.976359136297687, "grad_norm": 0.28962613515496666, "learning_rate": 4.000991120127287e-06, "loss": 0.3004, "num_tokens": 4459129273.0, "step": 5846 }, { "epoch": 7.977724673551251, "grad_norm": 0.274110503819637, "learning_rate": 4.000889538014646e-06, "loss": 0.2901, "num_tokens": 4459890208.0, "step": 5847 }, { "epoch": 7.979090210804814, "grad_norm": 0.2972873065742093, "learning_rate": 4.000793446651132e-06, "loss": 0.2915, "num_tokens": 4460602020.0, "step": 5848 }, { "epoch": 7.980455748058377, "grad_norm": 0.2954945189856023, "learning_rate": 4.000702846066058e-06, "loss": 0.273, "num_tokens": 4461314684.0, "step": 5849 }, { "epoch": 7.9818212853119395, "grad_norm": 0.28441705385371885, "learning_rate": 4.000617736287061e-06, "loss": 0.2863, "num_tokens": 4462016477.0, "step": 5850 }, { "epoch": 7.983186822565504, "grad_norm": 0.28979756971597653, "learning_rate": 4.0005381173401085e-06, "loss": 0.2767, "num_tokens": 4462791454.0, "step": 5851 }, { "epoch": 7.984552359819066, "grad_norm": 0.2837786478863559, "learning_rate": 4.000463989249482e-06, "loss": 0.2913, "num_tokens": 4463584158.0, "step": 5852 }, { "epoch": 7.985917897072629, "grad_norm": 0.27640598364878316, "learning_rate": 4.000395352037802e-06, "loss": 0.3017, "num_tokens": 4464412790.0, "step": 5853 }, { "epoch": 7.9872834343261925, "grad_norm": 0.2810028650935519, "learning_rate": 4.000332205726002e-06, "loss": 0.2811, "num_tokens": 4465099606.0, "step": 5854 }, { "epoch": 7.988648971579756, "grad_norm": 0.3147393798381532, "learning_rate": 4.000274550333346e-06, "loss": 0.2865, "num_tokens": 4465810649.0, "step": 5855 }, { "epoch": 7.990014508833319, "grad_norm": 0.27607147104004, "learning_rate": 4.000222385877426e-06, "loss": 0.2808, "num_tokens": 4466574254.0, "step": 5856 }, { "epoch": 7.991380046086882, "grad_norm": 0.2931205649310897, "learning_rate": 4.000175712374149e-06, "loss": 0.2964, "num_tokens": 4467355861.0, "step": 5857 }, { "epoch": 7.9927455833404455, "grad_norm": 0.28167548012058613, "learning_rate": 4.0001345298377565e-06, "loss": 0.2899, "num_tokens": 4468156350.0, "step": 5858 }, { "epoch": 7.994111120594009, "grad_norm": 0.29093668752011925, "learning_rate": 4.000098838280813e-06, "loss": 0.2876, "num_tokens": 4468921301.0, "step": 5859 }, { "epoch": 7.995476657847572, "grad_norm": 0.2913248741533037, "learning_rate": 4.000068637714203e-06, "loss": 0.3054, "num_tokens": 4469761157.0, "step": 5860 }, { "epoch": 7.996842195101135, "grad_norm": 0.28466811133926073, "learning_rate": 4.0000439281471406e-06, "loss": 0.2897, "num_tokens": 4470542645.0, "step": 5861 }, { "epoch": 7.9982077323546985, "grad_norm": 0.2713903600169412, "learning_rate": 4.000024709587163e-06, "loss": 0.2729, "num_tokens": 4471318562.0, "step": 5862 }, { "epoch": 7.999573269608262, "grad_norm": 0.2879017648340606, "learning_rate": 4.000010982040136e-06, "loss": 0.2964, "num_tokens": 4472100076.0, "step": 5863 }, { "epoch": 8.0, "grad_norm": 0.2879017648340606, "learning_rate": 4.000002745510243e-06, "loss": 0.2823, "num_tokens": 4472337088.0, "step": 5864 }, { "epoch": 8.0, "step": 5864, "total_flos": 5741810367217664.0, "train_loss": 0.36987388751496236, "train_runtime": 693492.9653, "train_samples_per_second": 1.081, "train_steps_per_second": 0.008 } ], "logging_steps": 1, "max_steps": 5864, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5741810367217664.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }