{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 8.0, "eval_steps": 500, "global_step": 5864, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0013655809586805073, "grad_norm": 3.3650412324693257, "learning_rate": 0.0, "loss": 0.5878, "num_tokens": 781862.0, "step": 1 }, { "epoch": 0.0027311619173610146, "grad_norm": 3.3569112201740188, "learning_rate": 1.1363636363636364e-07, "loss": 0.6167, "num_tokens": 1579241.0, "step": 2 }, { "epoch": 0.004096742876041522, "grad_norm": 3.41290845097348, "learning_rate": 2.2727272727272729e-07, "loss": 0.5823, "num_tokens": 2313777.0, "step": 3 }, { "epoch": 0.005462323834722029, "grad_norm": 3.4842079496751452, "learning_rate": 3.409090909090909e-07, "loss": 0.5846, "num_tokens": 2958332.0, "step": 4 }, { "epoch": 0.006827904793402537, "grad_norm": 3.306985245054534, "learning_rate": 4.5454545454545457e-07, "loss": 0.5782, "num_tokens": 3751162.0, "step": 5 }, { "epoch": 0.008193485752083044, "grad_norm": 3.2545411749396895, "learning_rate": 5.681818181818182e-07, "loss": 0.5903, "num_tokens": 4522410.0, "step": 6 }, { "epoch": 0.009559066710763552, "grad_norm": 3.3932860307909385, "learning_rate": 6.818181818181818e-07, "loss": 0.6029, "num_tokens": 5246142.0, "step": 7 }, { "epoch": 0.010924647669444058, "grad_norm": 3.196434844976779, "learning_rate": 7.954545454545455e-07, "loss": 0.6017, "num_tokens": 6047445.0, "step": 8 }, { "epoch": 0.012290228628124567, "grad_norm": 3.035147166240426, "learning_rate": 9.090909090909091e-07, "loss": 0.5895, "num_tokens": 6880372.0, "step": 9 }, { "epoch": 0.013655809586805075, "grad_norm": 2.843003416139641, "learning_rate": 1.0227272727272729e-06, "loss": 0.5795, "num_tokens": 7653932.0, "step": 10 }, { "epoch": 0.015021390545485581, "grad_norm": 2.7943692106453333, "learning_rate": 1.1363636363636364e-06, "loss": 0.5849, "num_tokens": 8367645.0, "step": 11 }, { "epoch": 0.016386971504166088, "grad_norm": 2.7811302763362304, "learning_rate": 1.25e-06, "loss": 0.5833, "num_tokens": 9111974.0, "step": 12 }, { "epoch": 0.017752552462846596, "grad_norm": 2.5984282822955747, "learning_rate": 1.3636363636363636e-06, "loss": 0.5705, "num_tokens": 9866272.0, "step": 13 }, { "epoch": 0.019118133421527104, "grad_norm": 2.5507012084013874, "learning_rate": 1.4772727272727275e-06, "loss": 0.569, "num_tokens": 10557085.0, "step": 14 }, { "epoch": 0.020483714380207612, "grad_norm": 2.4506414132525, "learning_rate": 1.590909090909091e-06, "loss": 0.5447, "num_tokens": 11350918.0, "step": 15 }, { "epoch": 0.021849295338888117, "grad_norm": 2.5816515429808624, "learning_rate": 1.7045454545454546e-06, "loss": 0.5798, "num_tokens": 11999096.0, "step": 16 }, { "epoch": 0.023214876297568625, "grad_norm": 2.2835123474475085, "learning_rate": 1.8181818181818183e-06, "loss": 0.5773, "num_tokens": 12834086.0, "step": 17 }, { "epoch": 0.024580457256249133, "grad_norm": 1.8632307461118707, "learning_rate": 1.931818181818182e-06, "loss": 0.5476, "num_tokens": 13567107.0, "step": 18 }, { "epoch": 0.02594603821492964, "grad_norm": 1.5552707560509897, "learning_rate": 2.0454545454545457e-06, "loss": 0.5386, "num_tokens": 14306544.0, "step": 19 }, { "epoch": 0.02731161917361015, "grad_norm": 1.644543794826326, "learning_rate": 2.1590909090909092e-06, "loss": 0.5354, "num_tokens": 15097194.0, "step": 20 }, { "epoch": 0.028677200132290654, "grad_norm": 1.8247718212508763, "learning_rate": 2.2727272727272728e-06, "loss": 0.5614, "num_tokens": 15881981.0, "step": 21 }, { "epoch": 0.030042781090971162, "grad_norm": 1.6412491816592796, "learning_rate": 2.3863636363636367e-06, "loss": 0.5362, "num_tokens": 16690050.0, "step": 22 }, { "epoch": 0.03140836204965167, "grad_norm": 1.454364307832489, "learning_rate": 2.5e-06, "loss": 0.5293, "num_tokens": 17379495.0, "step": 23 }, { "epoch": 0.032773943008332175, "grad_norm": 1.2788991989632146, "learning_rate": 2.6136363636363637e-06, "loss": 0.5364, "num_tokens": 18147974.0, "step": 24 }, { "epoch": 0.03413952396701268, "grad_norm": 0.9842957385383875, "learning_rate": 2.7272727272727272e-06, "loss": 0.4996, "num_tokens": 18882258.0, "step": 25 }, { "epoch": 0.03550510492569319, "grad_norm": 1.0863291341078234, "learning_rate": 2.8409090909090916e-06, "loss": 0.5123, "num_tokens": 19635638.0, "step": 26 }, { "epoch": 0.0368706858843737, "grad_norm": 1.0832881567839654, "learning_rate": 2.954545454545455e-06, "loss": 0.4951, "num_tokens": 20425283.0, "step": 27 }, { "epoch": 0.03823626684305421, "grad_norm": 1.0430380267995263, "learning_rate": 3.0681818181818186e-06, "loss": 0.4877, "num_tokens": 21143674.0, "step": 28 }, { "epoch": 0.039601847801734716, "grad_norm": 0.9411792061633784, "learning_rate": 3.181818181818182e-06, "loss": 0.4962, "num_tokens": 21875193.0, "step": 29 }, { "epoch": 0.040967428760415224, "grad_norm": 0.9226867197014109, "learning_rate": 3.2954545454545456e-06, "loss": 0.5071, "num_tokens": 22612249.0, "step": 30 }, { "epoch": 0.04233300971909573, "grad_norm": 0.763361721959445, "learning_rate": 3.409090909090909e-06, "loss": 0.4786, "num_tokens": 23359661.0, "step": 31 }, { "epoch": 0.04369859067777623, "grad_norm": 0.8183401383069621, "learning_rate": 3.522727272727273e-06, "loss": 0.5099, "num_tokens": 24201458.0, "step": 32 }, { "epoch": 0.04506417163645674, "grad_norm": 0.7674655250701875, "learning_rate": 3.6363636363636366e-06, "loss": 0.4873, "num_tokens": 24973015.0, "step": 33 }, { "epoch": 0.04642975259513725, "grad_norm": 0.7844309274110001, "learning_rate": 3.7500000000000005e-06, "loss": 0.491, "num_tokens": 25790103.0, "step": 34 }, { "epoch": 0.04779533355381776, "grad_norm": 0.8652269151802866, "learning_rate": 3.863636363636364e-06, "loss": 0.4748, "num_tokens": 26605078.0, "step": 35 }, { "epoch": 0.049160914512498266, "grad_norm": 0.8500735677733969, "learning_rate": 3.9772727272727275e-06, "loss": 0.4538, "num_tokens": 27311426.0, "step": 36 }, { "epoch": 0.050526495471178774, "grad_norm": 0.7676854129895507, "learning_rate": 4.0909090909090915e-06, "loss": 0.456, "num_tokens": 28028209.0, "step": 37 }, { "epoch": 0.05189207642985928, "grad_norm": 0.6904805048255496, "learning_rate": 4.204545454545455e-06, "loss": 0.4645, "num_tokens": 28813214.0, "step": 38 }, { "epoch": 0.05325765738853979, "grad_norm": 0.674443339309719, "learning_rate": 4.3181818181818185e-06, "loss": 0.4597, "num_tokens": 29649897.0, "step": 39 }, { "epoch": 0.0546232383472203, "grad_norm": 0.6754724402818755, "learning_rate": 4.4318181818181824e-06, "loss": 0.4776, "num_tokens": 30381412.0, "step": 40 }, { "epoch": 0.0559888193059008, "grad_norm": 0.6489534214594035, "learning_rate": 4.5454545454545455e-06, "loss": 0.4611, "num_tokens": 31138037.0, "step": 41 }, { "epoch": 0.05735440026458131, "grad_norm": 0.5478698172314034, "learning_rate": 4.6590909090909095e-06, "loss": 0.4509, "num_tokens": 31904977.0, "step": 42 }, { "epoch": 0.058719981223261816, "grad_norm": 0.4412777204971197, "learning_rate": 4.772727272727273e-06, "loss": 0.4757, "num_tokens": 32705553.0, "step": 43 }, { "epoch": 0.060085562181942324, "grad_norm": 0.37314697259810586, "learning_rate": 4.8863636363636365e-06, "loss": 0.4645, "num_tokens": 33448701.0, "step": 44 }, { "epoch": 0.06145114314062283, "grad_norm": 0.4397963511117393, "learning_rate": 5e-06, "loss": 0.4453, "num_tokens": 34188110.0, "step": 45 }, { "epoch": 0.06281672409930333, "grad_norm": 0.4066256038354766, "learning_rate": 5.113636363636364e-06, "loss": 0.4544, "num_tokens": 34917083.0, "step": 46 }, { "epoch": 0.06418230505798385, "grad_norm": 0.38878423635846254, "learning_rate": 5.2272727272727274e-06, "loss": 0.4579, "num_tokens": 35648165.0, "step": 47 }, { "epoch": 0.06554788601666435, "grad_norm": 0.3934226660553369, "learning_rate": 5.340909090909091e-06, "loss": 0.454, "num_tokens": 36382540.0, "step": 48 }, { "epoch": 0.06691346697534487, "grad_norm": 0.35334436807302505, "learning_rate": 5.4545454545454545e-06, "loss": 0.4767, "num_tokens": 37185624.0, "step": 49 }, { "epoch": 0.06827904793402537, "grad_norm": 0.35676201510597416, "learning_rate": 5.568181818181818e-06, "loss": 0.4321, "num_tokens": 37911531.0, "step": 50 }, { "epoch": 0.06964462889270588, "grad_norm": 0.424957874194271, "learning_rate": 5.681818181818183e-06, "loss": 0.4723, "num_tokens": 38686844.0, "step": 51 }, { "epoch": 0.07101020985138638, "grad_norm": 0.35470760532123313, "learning_rate": 5.795454545454546e-06, "loss": 0.4352, "num_tokens": 39376780.0, "step": 52 }, { "epoch": 0.0723757908100669, "grad_norm": 0.34097398547737073, "learning_rate": 5.90909090909091e-06, "loss": 0.4567, "num_tokens": 40177770.0, "step": 53 }, { "epoch": 0.0737413717687474, "grad_norm": 0.3656613446075414, "learning_rate": 6.022727272727273e-06, "loss": 0.4393, "num_tokens": 40922233.0, "step": 54 }, { "epoch": 0.0751069527274279, "grad_norm": 0.4177074254300217, "learning_rate": 6.136363636363637e-06, "loss": 0.461, "num_tokens": 41776457.0, "step": 55 }, { "epoch": 0.07647253368610842, "grad_norm": 0.3837454441911233, "learning_rate": 6.25e-06, "loss": 0.441, "num_tokens": 42492497.0, "step": 56 }, { "epoch": 0.07783811464478892, "grad_norm": 0.39451332321583593, "learning_rate": 6.363636363636364e-06, "loss": 0.4398, "num_tokens": 43228499.0, "step": 57 }, { "epoch": 0.07920369560346943, "grad_norm": 0.3996252633332505, "learning_rate": 6.477272727272727e-06, "loss": 0.4595, "num_tokens": 43953622.0, "step": 58 }, { "epoch": 0.08056927656214993, "grad_norm": 0.38498824581477703, "learning_rate": 6.590909090909091e-06, "loss": 0.444, "num_tokens": 44703923.0, "step": 59 }, { "epoch": 0.08193485752083045, "grad_norm": 0.3278378106882008, "learning_rate": 6.704545454545454e-06, "loss": 0.4363, "num_tokens": 45481633.0, "step": 60 }, { "epoch": 0.08330043847951095, "grad_norm": 0.3637884232990754, "learning_rate": 6.818181818181818e-06, "loss": 0.4466, "num_tokens": 46179700.0, "step": 61 }, { "epoch": 0.08466601943819146, "grad_norm": 0.3309280983512684, "learning_rate": 6.931818181818183e-06, "loss": 0.4367, "num_tokens": 46989012.0, "step": 62 }, { "epoch": 0.08603160039687197, "grad_norm": 0.33422459706305213, "learning_rate": 7.045454545454546e-06, "loss": 0.4571, "num_tokens": 47792009.0, "step": 63 }, { "epoch": 0.08739718135555247, "grad_norm": 0.3440576954239099, "learning_rate": 7.15909090909091e-06, "loss": 0.434, "num_tokens": 48535837.0, "step": 64 }, { "epoch": 0.08876276231423298, "grad_norm": 0.3248374753617506, "learning_rate": 7.272727272727273e-06, "loss": 0.4393, "num_tokens": 49300416.0, "step": 65 }, { "epoch": 0.09012834327291348, "grad_norm": 0.32380014393623346, "learning_rate": 7.386363636363637e-06, "loss": 0.4405, "num_tokens": 50027704.0, "step": 66 }, { "epoch": 0.091493924231594, "grad_norm": 0.3297272175543842, "learning_rate": 7.500000000000001e-06, "loss": 0.4242, "num_tokens": 50738298.0, "step": 67 }, { "epoch": 0.0928595051902745, "grad_norm": 0.36000657652843704, "learning_rate": 7.613636363636364e-06, "loss": 0.4433, "num_tokens": 51518049.0, "step": 68 }, { "epoch": 0.09422508614895501, "grad_norm": 0.32613650163068914, "learning_rate": 7.727272727272727e-06, "loss": 0.4264, "num_tokens": 52202527.0, "step": 69 }, { "epoch": 0.09559066710763552, "grad_norm": 0.3433725924095902, "learning_rate": 7.840909090909091e-06, "loss": 0.4448, "num_tokens": 52936090.0, "step": 70 }, { "epoch": 0.09695624806631603, "grad_norm": 0.3251222538582668, "learning_rate": 7.954545454545455e-06, "loss": 0.4232, "num_tokens": 53700006.0, "step": 71 }, { "epoch": 0.09832182902499653, "grad_norm": 0.3658669972345525, "learning_rate": 8.068181818181819e-06, "loss": 0.4288, "num_tokens": 54469611.0, "step": 72 }, { "epoch": 0.09968740998367703, "grad_norm": 0.3232970419517625, "learning_rate": 8.181818181818183e-06, "loss": 0.4197, "num_tokens": 55233428.0, "step": 73 }, { "epoch": 0.10105299094235755, "grad_norm": 0.3597752744038255, "learning_rate": 8.295454545454547e-06, "loss": 0.4548, "num_tokens": 56002125.0, "step": 74 }, { "epoch": 0.10241857190103805, "grad_norm": 0.35190549612706723, "learning_rate": 8.40909090909091e-06, "loss": 0.4447, "num_tokens": 56831886.0, "step": 75 }, { "epoch": 0.10378415285971856, "grad_norm": 0.3770860001910229, "learning_rate": 8.522727272727273e-06, "loss": 0.4075, "num_tokens": 57576284.0, "step": 76 }, { "epoch": 0.10514973381839907, "grad_norm": 0.3947836413692913, "learning_rate": 8.636363636363637e-06, "loss": 0.4224, "num_tokens": 58350348.0, "step": 77 }, { "epoch": 0.10651531477707958, "grad_norm": 0.3697600082612356, "learning_rate": 8.750000000000001e-06, "loss": 0.4312, "num_tokens": 59061243.0, "step": 78 }, { "epoch": 0.10788089573576008, "grad_norm": 0.3391868653260943, "learning_rate": 8.863636363636365e-06, "loss": 0.4111, "num_tokens": 59804038.0, "step": 79 }, { "epoch": 0.1092464766944406, "grad_norm": 0.37185165832650896, "learning_rate": 8.977272727272727e-06, "loss": 0.4361, "num_tokens": 60563985.0, "step": 80 }, { "epoch": 0.1106120576531211, "grad_norm": 0.3328828068083075, "learning_rate": 9.090909090909091e-06, "loss": 0.411, "num_tokens": 61329157.0, "step": 81 }, { "epoch": 0.1119776386118016, "grad_norm": 0.3104215777046083, "learning_rate": 9.204545454545455e-06, "loss": 0.4083, "num_tokens": 62190537.0, "step": 82 }, { "epoch": 0.11334321957048211, "grad_norm": 0.34714189341720686, "learning_rate": 9.318181818181819e-06, "loss": 0.4184, "num_tokens": 62999763.0, "step": 83 }, { "epoch": 0.11470880052916262, "grad_norm": 0.3290620622380869, "learning_rate": 9.431818181818183e-06, "loss": 0.4126, "num_tokens": 63729924.0, "step": 84 }, { "epoch": 0.11607438148784313, "grad_norm": 0.3805923225981605, "learning_rate": 9.545454545454547e-06, "loss": 0.4216, "num_tokens": 64474072.0, "step": 85 }, { "epoch": 0.11743996244652363, "grad_norm": 0.30722391916270153, "learning_rate": 9.65909090909091e-06, "loss": 0.4189, "num_tokens": 65266387.0, "step": 86 }, { "epoch": 0.11880554340520415, "grad_norm": 0.3120134564429968, "learning_rate": 9.772727272727273e-06, "loss": 0.4291, "num_tokens": 66001793.0, "step": 87 }, { "epoch": 0.12017112436388465, "grad_norm": 0.32663742684678004, "learning_rate": 9.886363636363637e-06, "loss": 0.4225, "num_tokens": 66749495.0, "step": 88 }, { "epoch": 0.12153670532256516, "grad_norm": 0.31168083838187166, "learning_rate": 1e-05, "loss": 0.4386, "num_tokens": 67564234.0, "step": 89 }, { "epoch": 0.12290228628124567, "grad_norm": 0.29981938869033276, "learning_rate": 1.0113636363636365e-05, "loss": 0.4244, "num_tokens": 68330993.0, "step": 90 }, { "epoch": 0.12426786723992617, "grad_norm": 0.31406004011736194, "learning_rate": 1.0227272727272729e-05, "loss": 0.4226, "num_tokens": 69125869.0, "step": 91 }, { "epoch": 0.12563344819860667, "grad_norm": 0.30936526645695506, "learning_rate": 1.0340909090909093e-05, "loss": 0.4245, "num_tokens": 69891218.0, "step": 92 }, { "epoch": 0.1269990291572872, "grad_norm": 0.36236097496678255, "learning_rate": 1.0454545454545455e-05, "loss": 0.4241, "num_tokens": 70631922.0, "step": 93 }, { "epoch": 0.1283646101159677, "grad_norm": 0.3318515908322732, "learning_rate": 1.056818181818182e-05, "loss": 0.4437, "num_tokens": 71369020.0, "step": 94 }, { "epoch": 0.1297301910746482, "grad_norm": 0.3461896686234585, "learning_rate": 1.0681818181818183e-05, "loss": 0.4027, "num_tokens": 72145740.0, "step": 95 }, { "epoch": 0.1310957720333287, "grad_norm": 0.3860835709036509, "learning_rate": 1.0795454545454547e-05, "loss": 0.3974, "num_tokens": 72920824.0, "step": 96 }, { "epoch": 0.13246135299200923, "grad_norm": 0.3643110918215684, "learning_rate": 1.0909090909090909e-05, "loss": 0.4189, "num_tokens": 73771316.0, "step": 97 }, { "epoch": 0.13382693395068973, "grad_norm": 0.3353257666171098, "learning_rate": 1.1022727272727275e-05, "loss": 0.404, "num_tokens": 74515867.0, "step": 98 }, { "epoch": 0.13519251490937023, "grad_norm": 0.35881084614551506, "learning_rate": 1.1136363636363637e-05, "loss": 0.4258, "num_tokens": 75280980.0, "step": 99 }, { "epoch": 0.13655809586805073, "grad_norm": 0.3290476032876705, "learning_rate": 1.125e-05, "loss": 0.4251, "num_tokens": 76045199.0, "step": 100 }, { "epoch": 0.13792367682673123, "grad_norm": 0.35757146172347526, "learning_rate": 1.1363636363636366e-05, "loss": 0.4042, "num_tokens": 76836546.0, "step": 101 }, { "epoch": 0.13928925778541176, "grad_norm": 0.3635084532240097, "learning_rate": 1.1477272727272729e-05, "loss": 0.4231, "num_tokens": 77610906.0, "step": 102 }, { "epoch": 0.14065483874409226, "grad_norm": 0.306860085328856, "learning_rate": 1.1590909090909093e-05, "loss": 0.4162, "num_tokens": 78465370.0, "step": 103 }, { "epoch": 0.14202041970277277, "grad_norm": 0.33073707281264003, "learning_rate": 1.1704545454545455e-05, "loss": 0.4122, "num_tokens": 79261238.0, "step": 104 }, { "epoch": 0.14338600066145327, "grad_norm": 0.3099173658517829, "learning_rate": 1.181818181818182e-05, "loss": 0.4174, "num_tokens": 80056201.0, "step": 105 }, { "epoch": 0.1447515816201338, "grad_norm": 0.3279392298917046, "learning_rate": 1.1931818181818183e-05, "loss": 0.4032, "num_tokens": 80790155.0, "step": 106 }, { "epoch": 0.1461171625788143, "grad_norm": 0.3264212075539135, "learning_rate": 1.2045454545454547e-05, "loss": 0.4213, "num_tokens": 81520104.0, "step": 107 }, { "epoch": 0.1474827435374948, "grad_norm": 0.37445493702708793, "learning_rate": 1.2159090909090909e-05, "loss": 0.4302, "num_tokens": 82261359.0, "step": 108 }, { "epoch": 0.1488483244961753, "grad_norm": 0.34114998615379194, "learning_rate": 1.2272727272727274e-05, "loss": 0.4196, "num_tokens": 83051862.0, "step": 109 }, { "epoch": 0.1502139054548558, "grad_norm": 0.3541077945442501, "learning_rate": 1.2386363636363637e-05, "loss": 0.4126, "num_tokens": 83800921.0, "step": 110 }, { "epoch": 0.15157948641353633, "grad_norm": 0.3665298642316797, "learning_rate": 1.25e-05, "loss": 0.399, "num_tokens": 84463933.0, "step": 111 }, { "epoch": 0.15294506737221683, "grad_norm": 0.37794199681419804, "learning_rate": 1.2613636363636366e-05, "loss": 0.3977, "num_tokens": 85234195.0, "step": 112 }, { "epoch": 0.15431064833089733, "grad_norm": 0.4150871065411515, "learning_rate": 1.2727272727272728e-05, "loss": 0.4207, "num_tokens": 85990575.0, "step": 113 }, { "epoch": 0.15567622928957783, "grad_norm": 0.41004065320799266, "learning_rate": 1.2840909090909092e-05, "loss": 0.4441, "num_tokens": 86796478.0, "step": 114 }, { "epoch": 0.15704181024825836, "grad_norm": 0.43529548672767665, "learning_rate": 1.2954545454545455e-05, "loss": 0.4206, "num_tokens": 87587191.0, "step": 115 }, { "epoch": 0.15840739120693886, "grad_norm": 0.4172472835772534, "learning_rate": 1.306818181818182e-05, "loss": 0.4079, "num_tokens": 88318757.0, "step": 116 }, { "epoch": 0.15977297216561936, "grad_norm": 0.38521023276269134, "learning_rate": 1.3181818181818183e-05, "loss": 0.4008, "num_tokens": 89117031.0, "step": 117 }, { "epoch": 0.16113855312429987, "grad_norm": 0.45228331579152686, "learning_rate": 1.3295454545454546e-05, "loss": 0.4206, "num_tokens": 89863782.0, "step": 118 }, { "epoch": 0.16250413408298037, "grad_norm": 0.3186124048150442, "learning_rate": 1.3409090909090909e-05, "loss": 0.3922, "num_tokens": 90626599.0, "step": 119 }, { "epoch": 0.1638697150416609, "grad_norm": 0.341038521251306, "learning_rate": 1.3522727272727274e-05, "loss": 0.4171, "num_tokens": 91426268.0, "step": 120 }, { "epoch": 0.1652352960003414, "grad_norm": 0.3119171227854974, "learning_rate": 1.3636363636363637e-05, "loss": 0.4053, "num_tokens": 92168484.0, "step": 121 }, { "epoch": 0.1666008769590219, "grad_norm": 0.3215289788156116, "learning_rate": 1.375e-05, "loss": 0.4179, "num_tokens": 93022732.0, "step": 122 }, { "epoch": 0.1679664579177024, "grad_norm": 0.3503658529028788, "learning_rate": 1.3863636363636366e-05, "loss": 0.3958, "num_tokens": 93780799.0, "step": 123 }, { "epoch": 0.16933203887638293, "grad_norm": 0.36630314169023653, "learning_rate": 1.3977272727272728e-05, "loss": 0.4338, "num_tokens": 94536625.0, "step": 124 }, { "epoch": 0.17069761983506343, "grad_norm": 0.37764181000226554, "learning_rate": 1.4090909090909092e-05, "loss": 0.4195, "num_tokens": 95331936.0, "step": 125 }, { "epoch": 0.17206320079374393, "grad_norm": 0.40094960882392333, "learning_rate": 1.4204545454545455e-05, "loss": 0.4008, "num_tokens": 96013577.0, "step": 126 }, { "epoch": 0.17342878175242443, "grad_norm": 0.4224329999385295, "learning_rate": 1.431818181818182e-05, "loss": 0.4352, "num_tokens": 96777177.0, "step": 127 }, { "epoch": 0.17479436271110493, "grad_norm": 0.3659015714456921, "learning_rate": 1.4431818181818182e-05, "loss": 0.405, "num_tokens": 97649984.0, "step": 128 }, { "epoch": 0.17615994366978546, "grad_norm": 0.3847777214467326, "learning_rate": 1.4545454545454546e-05, "loss": 0.4165, "num_tokens": 98426131.0, "step": 129 }, { "epoch": 0.17752552462846596, "grad_norm": 0.32688609754166364, "learning_rate": 1.465909090909091e-05, "loss": 0.3959, "num_tokens": 99280265.0, "step": 130 }, { "epoch": 0.17889110558714647, "grad_norm": 0.44502582613703623, "learning_rate": 1.4772727272727274e-05, "loss": 0.4202, "num_tokens": 100068369.0, "step": 131 }, { "epoch": 0.18025668654582697, "grad_norm": 0.3718517597229538, "learning_rate": 1.4886363636363636e-05, "loss": 0.4075, "num_tokens": 100866816.0, "step": 132 }, { "epoch": 0.1816222675045075, "grad_norm": 0.4033353268247993, "learning_rate": 1.5000000000000002e-05, "loss": 0.4122, "num_tokens": 101644474.0, "step": 133 }, { "epoch": 0.182987848463188, "grad_norm": 0.41126550211732105, "learning_rate": 1.5113636363636366e-05, "loss": 0.4096, "num_tokens": 102393514.0, "step": 134 }, { "epoch": 0.1843534294218685, "grad_norm": 0.3712967075606849, "learning_rate": 1.5227272727272728e-05, "loss": 0.4032, "num_tokens": 103111951.0, "step": 135 }, { "epoch": 0.185719010380549, "grad_norm": 0.3767775569249854, "learning_rate": 1.5340909090909094e-05, "loss": 0.416, "num_tokens": 103864022.0, "step": 136 }, { "epoch": 0.1870845913392295, "grad_norm": 0.375238907768676, "learning_rate": 1.5454545454545454e-05, "loss": 0.4221, "num_tokens": 104563581.0, "step": 137 }, { "epoch": 0.18845017229791003, "grad_norm": 0.39224381524332996, "learning_rate": 1.5568181818181822e-05, "loss": 0.4163, "num_tokens": 105282648.0, "step": 138 }, { "epoch": 0.18981575325659053, "grad_norm": 0.36362163949154414, "learning_rate": 1.5681818181818182e-05, "loss": 0.4107, "num_tokens": 105977886.0, "step": 139 }, { "epoch": 0.19118133421527103, "grad_norm": 0.44442549741258364, "learning_rate": 1.5795454545454546e-05, "loss": 0.4309, "num_tokens": 106783806.0, "step": 140 }, { "epoch": 0.19254691517395153, "grad_norm": 0.3348040948116041, "learning_rate": 1.590909090909091e-05, "loss": 0.3798, "num_tokens": 107499922.0, "step": 141 }, { "epoch": 0.19391249613263206, "grad_norm": 0.4054092517318534, "learning_rate": 1.6022727272727274e-05, "loss": 0.4052, "num_tokens": 108277422.0, "step": 142 }, { "epoch": 0.19527807709131256, "grad_norm": 0.36332864858879105, "learning_rate": 1.6136363636363638e-05, "loss": 0.4028, "num_tokens": 109114032.0, "step": 143 }, { "epoch": 0.19664365804999306, "grad_norm": 0.36734020195853057, "learning_rate": 1.6250000000000002e-05, "loss": 0.3969, "num_tokens": 109899718.0, "step": 144 }, { "epoch": 0.19800923900867357, "grad_norm": 0.411722441842276, "learning_rate": 1.6363636363636366e-05, "loss": 0.4087, "num_tokens": 110717613.0, "step": 145 }, { "epoch": 0.19937481996735407, "grad_norm": 0.3638291017386694, "learning_rate": 1.647727272727273e-05, "loss": 0.408, "num_tokens": 111439912.0, "step": 146 }, { "epoch": 0.2007404009260346, "grad_norm": 0.39153546417238255, "learning_rate": 1.6590909090909094e-05, "loss": 0.412, "num_tokens": 112180564.0, "step": 147 }, { "epoch": 0.2021059818847151, "grad_norm": 0.3979231399520793, "learning_rate": 1.6704545454545454e-05, "loss": 0.4266, "num_tokens": 112954449.0, "step": 148 }, { "epoch": 0.2034715628433956, "grad_norm": 0.35474285289086577, "learning_rate": 1.681818181818182e-05, "loss": 0.4032, "num_tokens": 113701483.0, "step": 149 }, { "epoch": 0.2048371438020761, "grad_norm": 0.41960532235754483, "learning_rate": 1.6931818181818182e-05, "loss": 0.4121, "num_tokens": 114385292.0, "step": 150 }, { "epoch": 0.20620272476075663, "grad_norm": 0.3208323074924627, "learning_rate": 1.7045454545454546e-05, "loss": 0.4124, "num_tokens": 115144805.0, "step": 151 }, { "epoch": 0.20756830571943713, "grad_norm": 0.3918541853413258, "learning_rate": 1.715909090909091e-05, "loss": 0.3933, "num_tokens": 115906063.0, "step": 152 }, { "epoch": 0.20893388667811763, "grad_norm": 0.38256450822772764, "learning_rate": 1.7272727272727274e-05, "loss": 0.3957, "num_tokens": 116680256.0, "step": 153 }, { "epoch": 0.21029946763679813, "grad_norm": 0.3926981510841826, "learning_rate": 1.7386363636363638e-05, "loss": 0.4037, "num_tokens": 117453273.0, "step": 154 }, { "epoch": 0.21166504859547863, "grad_norm": 0.35981922549246803, "learning_rate": 1.7500000000000002e-05, "loss": 0.3919, "num_tokens": 118305135.0, "step": 155 }, { "epoch": 0.21303062955415916, "grad_norm": 0.393283477728591, "learning_rate": 1.7613636363636366e-05, "loss": 0.417, "num_tokens": 119052182.0, "step": 156 }, { "epoch": 0.21439621051283966, "grad_norm": 0.4037922402082293, "learning_rate": 1.772727272727273e-05, "loss": 0.3872, "num_tokens": 119755320.0, "step": 157 }, { "epoch": 0.21576179147152016, "grad_norm": 0.39561764727128945, "learning_rate": 1.7840909090909094e-05, "loss": 0.4259, "num_tokens": 120582682.0, "step": 158 }, { "epoch": 0.21712737243020067, "grad_norm": 0.35894057299289617, "learning_rate": 1.7954545454545454e-05, "loss": 0.412, "num_tokens": 121414797.0, "step": 159 }, { "epoch": 0.2184929533888812, "grad_norm": 0.39124239469182126, "learning_rate": 1.806818181818182e-05, "loss": 0.3916, "num_tokens": 122173384.0, "step": 160 }, { "epoch": 0.2198585343475617, "grad_norm": 0.3770835121096991, "learning_rate": 1.8181818181818182e-05, "loss": 0.4095, "num_tokens": 122918408.0, "step": 161 }, { "epoch": 0.2212241153062422, "grad_norm": 0.38063325493115563, "learning_rate": 1.8295454545454546e-05, "loss": 0.4097, "num_tokens": 123669596.0, "step": 162 }, { "epoch": 0.2225896962649227, "grad_norm": 0.37518397307666107, "learning_rate": 1.840909090909091e-05, "loss": 0.4244, "num_tokens": 124429571.0, "step": 163 }, { "epoch": 0.2239552772236032, "grad_norm": 0.3588695560474301, "learning_rate": 1.8522727272727274e-05, "loss": 0.4104, "num_tokens": 125196963.0, "step": 164 }, { "epoch": 0.22532085818228373, "grad_norm": 0.36889996137627357, "learning_rate": 1.8636363636363638e-05, "loss": 0.3946, "num_tokens": 125949052.0, "step": 165 }, { "epoch": 0.22668643914096423, "grad_norm": 0.3942017445331087, "learning_rate": 1.8750000000000002e-05, "loss": 0.3907, "num_tokens": 126742770.0, "step": 166 }, { "epoch": 0.22805202009964473, "grad_norm": 0.35061375730916733, "learning_rate": 1.8863636363636366e-05, "loss": 0.3958, "num_tokens": 127558819.0, "step": 167 }, { "epoch": 0.22941760105832523, "grad_norm": 0.4652802192938087, "learning_rate": 1.897727272727273e-05, "loss": 0.4087, "num_tokens": 128286834.0, "step": 168 }, { "epoch": 0.23078318201700576, "grad_norm": 0.4081948054772298, "learning_rate": 1.9090909090909094e-05, "loss": 0.4024, "num_tokens": 129132344.0, "step": 169 }, { "epoch": 0.23214876297568626, "grad_norm": 0.4175284220879797, "learning_rate": 1.9204545454545454e-05, "loss": 0.4243, "num_tokens": 129824101.0, "step": 170 }, { "epoch": 0.23351434393436676, "grad_norm": 0.45332666037123026, "learning_rate": 1.931818181818182e-05, "loss": 0.4015, "num_tokens": 130556473.0, "step": 171 }, { "epoch": 0.23487992489304726, "grad_norm": 0.4152289920667755, "learning_rate": 1.9431818181818182e-05, "loss": 0.4122, "num_tokens": 131291044.0, "step": 172 }, { "epoch": 0.23624550585172777, "grad_norm": 0.49231460645116015, "learning_rate": 1.9545454545454546e-05, "loss": 0.4377, "num_tokens": 132084966.0, "step": 173 }, { "epoch": 0.2376110868104083, "grad_norm": 0.32369078846343474, "learning_rate": 1.965909090909091e-05, "loss": 0.4092, "num_tokens": 132902353.0, "step": 174 }, { "epoch": 0.2389766677690888, "grad_norm": 0.5161325874763318, "learning_rate": 1.9772727272727274e-05, "loss": 0.4215, "num_tokens": 133622462.0, "step": 175 }, { "epoch": 0.2403422487277693, "grad_norm": 0.33193016199489667, "learning_rate": 1.9886363636363638e-05, "loss": 0.3971, "num_tokens": 134319877.0, "step": 176 }, { "epoch": 0.2417078296864498, "grad_norm": 0.4798974266530614, "learning_rate": 2e-05, "loss": 0.396, "num_tokens": 135041196.0, "step": 177 }, { "epoch": 0.24307341064513033, "grad_norm": 0.384269231947528, "learning_rate": 1.999999862724488e-05, "loss": 0.3957, "num_tokens": 135748184.0, "step": 178 }, { "epoch": 0.24443899160381083, "grad_norm": 0.4673196178393515, "learning_rate": 1.9999994508979935e-05, "loss": 0.394, "num_tokens": 136581030.0, "step": 179 }, { "epoch": 0.24580457256249133, "grad_norm": 0.37523509594136417, "learning_rate": 1.9999987645206422e-05, "loss": 0.422, "num_tokens": 137373989.0, "step": 180 }, { "epoch": 0.24717015352117183, "grad_norm": 0.4605013266444706, "learning_rate": 1.999997803592643e-05, "loss": 0.3974, "num_tokens": 138117615.0, "step": 181 }, { "epoch": 0.24853573447985233, "grad_norm": 0.35013465535003613, "learning_rate": 1.9999965681142902e-05, "loss": 0.395, "num_tokens": 138849889.0, "step": 182 }, { "epoch": 0.24990131543853286, "grad_norm": 0.4634961786265312, "learning_rate": 1.9999950580859594e-05, "loss": 0.3974, "num_tokens": 139648257.0, "step": 183 }, { "epoch": 0.25126689639721334, "grad_norm": 0.3564681330312269, "learning_rate": 1.9999932735081123e-05, "loss": 0.3939, "num_tokens": 140300071.0, "step": 184 }, { "epoch": 0.2526324773558939, "grad_norm": 0.5212925010332135, "learning_rate": 1.9999912143812927e-05, "loss": 0.4056, "num_tokens": 141035858.0, "step": 185 }, { "epoch": 0.2539980583145744, "grad_norm": 0.4182428311993074, "learning_rate": 1.9999888807061288e-05, "loss": 0.3914, "num_tokens": 141908782.0, "step": 186 }, { "epoch": 0.2553636392732549, "grad_norm": 0.49147903642173646, "learning_rate": 1.9999862724833328e-05, "loss": 0.4409, "num_tokens": 142717216.0, "step": 187 }, { "epoch": 0.2567292202319354, "grad_norm": 0.34481147969504883, "learning_rate": 1.9999833897137e-05, "loss": 0.4095, "num_tokens": 143499035.0, "step": 188 }, { "epoch": 0.2580948011906159, "grad_norm": 0.42755706915596964, "learning_rate": 1.99998023239811e-05, "loss": 0.3927, "num_tokens": 144262477.0, "step": 189 }, { "epoch": 0.2594603821492964, "grad_norm": 0.4180818729161203, "learning_rate": 1.999976800537526e-05, "loss": 0.3963, "num_tokens": 144972290.0, "step": 190 }, { "epoch": 0.2608259631079769, "grad_norm": 0.3959702147826673, "learning_rate": 1.9999730941329947e-05, "loss": 0.3945, "num_tokens": 145734994.0, "step": 191 }, { "epoch": 0.2621915440666574, "grad_norm": 0.39347136364939717, "learning_rate": 1.999969113185647e-05, "loss": 0.3933, "num_tokens": 146518634.0, "step": 192 }, { "epoch": 0.2635571250253379, "grad_norm": 0.3561674216343085, "learning_rate": 1.9999648576966974e-05, "loss": 0.3969, "num_tokens": 147199487.0, "step": 193 }, { "epoch": 0.26492270598401846, "grad_norm": 0.41008506362832686, "learning_rate": 1.9999603276674434e-05, "loss": 0.3924, "num_tokens": 147964517.0, "step": 194 }, { "epoch": 0.26628828694269896, "grad_norm": 0.39271386473573855, "learning_rate": 1.999955523099268e-05, "loss": 0.4091, "num_tokens": 148768364.0, "step": 195 }, { "epoch": 0.26765386790137946, "grad_norm": 0.4448205584781951, "learning_rate": 1.9999504439936358e-05, "loss": 0.4145, "num_tokens": 149581041.0, "step": 196 }, { "epoch": 0.26901944886005996, "grad_norm": 0.3820873204035089, "learning_rate": 1.999945090352097e-05, "loss": 0.4024, "num_tokens": 150376574.0, "step": 197 }, { "epoch": 0.27038502981874046, "grad_norm": 0.4655397720104624, "learning_rate": 1.999939462176284e-05, "loss": 0.3974, "num_tokens": 151084782.0, "step": 198 }, { "epoch": 0.27175061077742096, "grad_norm": 0.3880409470247186, "learning_rate": 1.9999335594679144e-05, "loss": 0.4097, "num_tokens": 151869753.0, "step": 199 }, { "epoch": 0.27311619173610147, "grad_norm": 0.43087037053967353, "learning_rate": 1.999927382228789e-05, "loss": 0.4001, "num_tokens": 152631222.0, "step": 200 }, { "epoch": 0.27448177269478197, "grad_norm": 0.38461100559019773, "learning_rate": 1.9999209304607913e-05, "loss": 0.3819, "num_tokens": 153384826.0, "step": 201 }, { "epoch": 0.27584735365346247, "grad_norm": 0.38468162299126785, "learning_rate": 1.9999142041658898e-05, "loss": 0.3933, "num_tokens": 154123081.0, "step": 202 }, { "epoch": 0.277212934612143, "grad_norm": 0.4111234140427244, "learning_rate": 1.9999072033461372e-05, "loss": 0.4122, "num_tokens": 154974829.0, "step": 203 }, { "epoch": 0.2785785155708235, "grad_norm": 0.32232552516136564, "learning_rate": 1.9998999280036682e-05, "loss": 0.3984, "num_tokens": 155689814.0, "step": 204 }, { "epoch": 0.279944096529504, "grad_norm": 0.38159043710187907, "learning_rate": 1.9998923781407025e-05, "loss": 0.409, "num_tokens": 156498336.0, "step": 205 }, { "epoch": 0.28130967748818453, "grad_norm": 0.3231936218372313, "learning_rate": 1.9998845537595432e-05, "loss": 0.409, "num_tokens": 157211030.0, "step": 206 }, { "epoch": 0.28267525844686503, "grad_norm": 0.39701233066766234, "learning_rate": 1.999876454862577e-05, "loss": 0.3868, "num_tokens": 157935970.0, "step": 207 }, { "epoch": 0.28404083940554553, "grad_norm": 0.3473286689561777, "learning_rate": 1.9998680814522756e-05, "loss": 0.385, "num_tokens": 158711701.0, "step": 208 }, { "epoch": 0.28540642036422603, "grad_norm": 0.39949594661313825, "learning_rate": 1.999859433531192e-05, "loss": 0.4084, "num_tokens": 159464623.0, "step": 209 }, { "epoch": 0.28677200132290653, "grad_norm": 0.39437860073351505, "learning_rate": 1.9998505111019648e-05, "loss": 0.4153, "num_tokens": 160241725.0, "step": 210 }, { "epoch": 0.28813758228158703, "grad_norm": 0.3562930703448643, "learning_rate": 1.9998413141673162e-05, "loss": 0.3916, "num_tokens": 160978940.0, "step": 211 }, { "epoch": 0.2895031632402676, "grad_norm": 0.3659820926431436, "learning_rate": 1.999831842730051e-05, "loss": 0.3983, "num_tokens": 161779321.0, "step": 212 }, { "epoch": 0.2908687441989481, "grad_norm": 0.3575333121641269, "learning_rate": 1.999822096793059e-05, "loss": 0.4104, "num_tokens": 162530909.0, "step": 213 }, { "epoch": 0.2922343251576286, "grad_norm": 0.33338314476453984, "learning_rate": 1.9998120763593137e-05, "loss": 0.419, "num_tokens": 163320760.0, "step": 214 }, { "epoch": 0.2935999061163091, "grad_norm": 0.397913734510872, "learning_rate": 1.9998017814318712e-05, "loss": 0.3949, "num_tokens": 164174798.0, "step": 215 }, { "epoch": 0.2949654870749896, "grad_norm": 0.27986609445997696, "learning_rate": 1.999791212013872e-05, "loss": 0.3827, "num_tokens": 164897363.0, "step": 216 }, { "epoch": 0.2963310680336701, "grad_norm": 0.3450032237823481, "learning_rate": 1.9997803681085412e-05, "loss": 0.3858, "num_tokens": 165686427.0, "step": 217 }, { "epoch": 0.2976966489923506, "grad_norm": 0.32254629341332614, "learning_rate": 1.999769249719186e-05, "loss": 0.3877, "num_tokens": 166498716.0, "step": 218 }, { "epoch": 0.2990622299510311, "grad_norm": 0.2791852188392648, "learning_rate": 1.9997578568491983e-05, "loss": 0.3764, "num_tokens": 167265170.0, "step": 219 }, { "epoch": 0.3004278109097116, "grad_norm": 0.4004359968618145, "learning_rate": 1.9997461895020538e-05, "loss": 0.4077, "num_tokens": 168029418.0, "step": 220 }, { "epoch": 0.30179339186839216, "grad_norm": 0.3049469446124067, "learning_rate": 1.9997342476813116e-05, "loss": 0.4008, "num_tokens": 168831970.0, "step": 221 }, { "epoch": 0.30315897282707266, "grad_norm": 0.41221492444530605, "learning_rate": 1.9997220313906145e-05, "loss": 0.4171, "num_tokens": 169643664.0, "step": 222 }, { "epoch": 0.30452455378575316, "grad_norm": 0.3516336492924699, "learning_rate": 1.9997095406336892e-05, "loss": 0.4036, "num_tokens": 170389390.0, "step": 223 }, { "epoch": 0.30589013474443366, "grad_norm": 0.375968127066635, "learning_rate": 1.9996967754143467e-05, "loss": 0.3901, "num_tokens": 171120690.0, "step": 224 }, { "epoch": 0.30725571570311416, "grad_norm": 0.3183575086829415, "learning_rate": 1.99968373573648e-05, "loss": 0.3828, "num_tokens": 171934188.0, "step": 225 }, { "epoch": 0.30862129666179466, "grad_norm": 0.29167742320642454, "learning_rate": 1.9996704216040675e-05, "loss": 0.3902, "num_tokens": 172755877.0, "step": 226 }, { "epoch": 0.30998687762047517, "grad_norm": 0.3745152571978657, "learning_rate": 1.999656833021171e-05, "loss": 0.4057, "num_tokens": 173513956.0, "step": 227 }, { "epoch": 0.31135245857915567, "grad_norm": 0.3155968331719428, "learning_rate": 1.9996429699919353e-05, "loss": 0.4003, "num_tokens": 174298361.0, "step": 228 }, { "epoch": 0.31271803953783617, "grad_norm": 0.30620235017275976, "learning_rate": 1.99962883252059e-05, "loss": 0.3998, "num_tokens": 175161082.0, "step": 229 }, { "epoch": 0.3140836204965167, "grad_norm": 0.2993754779819447, "learning_rate": 1.9996144206114477e-05, "loss": 0.3838, "num_tokens": 175888547.0, "step": 230 }, { "epoch": 0.3154492014551972, "grad_norm": 0.3042805556881561, "learning_rate": 1.9995997342689044e-05, "loss": 0.3916, "num_tokens": 176701182.0, "step": 231 }, { "epoch": 0.3168147824138777, "grad_norm": 0.3338530354898371, "learning_rate": 1.9995847734974403e-05, "loss": 0.3734, "num_tokens": 177456235.0, "step": 232 }, { "epoch": 0.31818036337255823, "grad_norm": 0.325114394134763, "learning_rate": 1.99956953830162e-05, "loss": 0.3873, "num_tokens": 178193394.0, "step": 233 }, { "epoch": 0.31954594433123873, "grad_norm": 0.42273079346990633, "learning_rate": 1.9995540286860902e-05, "loss": 0.3915, "num_tokens": 178913206.0, "step": 234 }, { "epoch": 0.32091152528991923, "grad_norm": 0.38867419304556305, "learning_rate": 1.9995382446555826e-05, "loss": 0.403, "num_tokens": 179715440.0, "step": 235 }, { "epoch": 0.32227710624859973, "grad_norm": 0.35126682720363056, "learning_rate": 1.9995221862149126e-05, "loss": 0.4096, "num_tokens": 180486851.0, "step": 236 }, { "epoch": 0.32364268720728023, "grad_norm": 0.3540006171419841, "learning_rate": 1.9995058533689784e-05, "loss": 0.4178, "num_tokens": 181295035.0, "step": 237 }, { "epoch": 0.32500826816596073, "grad_norm": 0.3249230249417242, "learning_rate": 1.9994892461227626e-05, "loss": 0.3768, "num_tokens": 182036842.0, "step": 238 }, { "epoch": 0.3263738491246413, "grad_norm": 0.31634248166236556, "learning_rate": 1.999472364481331e-05, "loss": 0.4005, "num_tokens": 182764703.0, "step": 239 }, { "epoch": 0.3277394300833218, "grad_norm": 0.316816872669518, "learning_rate": 1.9994552084498345e-05, "loss": 0.3954, "num_tokens": 183540023.0, "step": 240 }, { "epoch": 0.3291050110420023, "grad_norm": 0.3804437991638129, "learning_rate": 1.9994377780335056e-05, "loss": 0.3962, "num_tokens": 184271977.0, "step": 241 }, { "epoch": 0.3304705920006828, "grad_norm": 0.3163469468649419, "learning_rate": 1.9994200732376622e-05, "loss": 0.4046, "num_tokens": 185074936.0, "step": 242 }, { "epoch": 0.3318361729593633, "grad_norm": 0.3880085468452102, "learning_rate": 1.9994020940677047e-05, "loss": 0.4155, "num_tokens": 185912258.0, "step": 243 }, { "epoch": 0.3332017539180438, "grad_norm": 0.3540815713190658, "learning_rate": 1.9993838405291183e-05, "loss": 0.3935, "num_tokens": 186708027.0, "step": 244 }, { "epoch": 0.3345673348767243, "grad_norm": 0.32729358132477676, "learning_rate": 1.9993653126274712e-05, "loss": 0.4094, "num_tokens": 187484562.0, "step": 245 }, { "epoch": 0.3359329158354048, "grad_norm": 0.33996263809705835, "learning_rate": 1.9993465103684153e-05, "loss": 0.3937, "num_tokens": 188214559.0, "step": 246 }, { "epoch": 0.3372984967940853, "grad_norm": 0.3072902200874743, "learning_rate": 1.9993274337576866e-05, "loss": 0.3921, "num_tokens": 189003396.0, "step": 247 }, { "epoch": 0.33866407775276586, "grad_norm": 0.3046749015339264, "learning_rate": 1.9993080828011046e-05, "loss": 0.3799, "num_tokens": 189712206.0, "step": 248 }, { "epoch": 0.34002965871144636, "grad_norm": 0.32137805243013307, "learning_rate": 1.9992884575045717e-05, "loss": 0.4022, "num_tokens": 190557446.0, "step": 249 }, { "epoch": 0.34139523967012686, "grad_norm": 0.32728603097870307, "learning_rate": 1.9992685578740755e-05, "loss": 0.3932, "num_tokens": 191296974.0, "step": 250 }, { "epoch": 0.34276082062880736, "grad_norm": 0.3099730220704177, "learning_rate": 1.9992483839156868e-05, "loss": 0.3867, "num_tokens": 192090517.0, "step": 251 }, { "epoch": 0.34412640158748786, "grad_norm": 0.2842483644080127, "learning_rate": 1.9992279356355585e-05, "loss": 0.392, "num_tokens": 192824923.0, "step": 252 }, { "epoch": 0.34549198254616836, "grad_norm": 0.3238047793809841, "learning_rate": 1.9992072130399297e-05, "loss": 0.3912, "num_tokens": 193592604.0, "step": 253 }, { "epoch": 0.34685756350484886, "grad_norm": 0.28721131927558613, "learning_rate": 1.9991862161351218e-05, "loss": 0.4012, "num_tokens": 194343790.0, "step": 254 }, { "epoch": 0.34822314446352937, "grad_norm": 0.3394688073397443, "learning_rate": 1.9991649449275397e-05, "loss": 0.3942, "num_tokens": 195144936.0, "step": 255 }, { "epoch": 0.34958872542220987, "grad_norm": 0.26264904539351003, "learning_rate": 1.9991433994236723e-05, "loss": 0.3799, "num_tokens": 195911042.0, "step": 256 }, { "epoch": 0.3509543063808904, "grad_norm": 0.3291469523238052, "learning_rate": 1.999121579630092e-05, "loss": 0.3852, "num_tokens": 196669972.0, "step": 257 }, { "epoch": 0.3523198873395709, "grad_norm": 0.3094973846307219, "learning_rate": 1.999099485553456e-05, "loss": 0.3987, "num_tokens": 197435742.0, "step": 258 }, { "epoch": 0.3536854682982514, "grad_norm": 0.324981801089362, "learning_rate": 1.999077117200503e-05, "loss": 0.3895, "num_tokens": 198199606.0, "step": 259 }, { "epoch": 0.3550510492569319, "grad_norm": 0.31470510510405425, "learning_rate": 1.999054474578058e-05, "loss": 0.3751, "num_tokens": 198987046.0, "step": 260 }, { "epoch": 0.35641663021561243, "grad_norm": 0.32930223251356155, "learning_rate": 1.999031557693027e-05, "loss": 0.3911, "num_tokens": 199762693.0, "step": 261 }, { "epoch": 0.35778221117429293, "grad_norm": 0.317962820115311, "learning_rate": 1.9990083665524016e-05, "loss": 0.4247, "num_tokens": 200504594.0, "step": 262 }, { "epoch": 0.35914779213297343, "grad_norm": 0.32831751675616766, "learning_rate": 1.998984901163256e-05, "loss": 0.3966, "num_tokens": 201206950.0, "step": 263 }, { "epoch": 0.36051337309165393, "grad_norm": 0.337537269391906, "learning_rate": 1.9989611615327497e-05, "loss": 0.4067, "num_tokens": 201958474.0, "step": 264 }, { "epoch": 0.36187895405033443, "grad_norm": 0.304514178353785, "learning_rate": 1.998937147668123e-05, "loss": 0.385, "num_tokens": 202668583.0, "step": 265 }, { "epoch": 0.363244535009015, "grad_norm": 0.27368560229670585, "learning_rate": 1.9989128595767025e-05, "loss": 0.3888, "num_tokens": 203335965.0, "step": 266 }, { "epoch": 0.3646101159676955, "grad_norm": 0.32944612825367875, "learning_rate": 1.9988882972658967e-05, "loss": 0.3933, "num_tokens": 204113580.0, "step": 267 }, { "epoch": 0.365975696926376, "grad_norm": 0.28819118290481627, "learning_rate": 1.9988634607431995e-05, "loss": 0.3946, "num_tokens": 204930237.0, "step": 268 }, { "epoch": 0.3673412778850565, "grad_norm": 0.3140182585732985, "learning_rate": 1.9988383500161866e-05, "loss": 0.388, "num_tokens": 205713593.0, "step": 269 }, { "epoch": 0.368706858843737, "grad_norm": 0.28508464158957064, "learning_rate": 1.9988129650925188e-05, "loss": 0.3914, "num_tokens": 206518217.0, "step": 270 }, { "epoch": 0.3700724398024175, "grad_norm": 0.30894837924638346, "learning_rate": 1.9987873059799393e-05, "loss": 0.3901, "num_tokens": 207298768.0, "step": 271 }, { "epoch": 0.371438020761098, "grad_norm": 0.30752980076574654, "learning_rate": 1.9987613726862757e-05, "loss": 0.3909, "num_tokens": 208115943.0, "step": 272 }, { "epoch": 0.3728036017197785, "grad_norm": 0.3078288935321802, "learning_rate": 1.9987351652194394e-05, "loss": 0.3857, "num_tokens": 208902284.0, "step": 273 }, { "epoch": 0.374169182678459, "grad_norm": 0.301593513697497, "learning_rate": 1.9987086835874256e-05, "loss": 0.3961, "num_tokens": 209613804.0, "step": 274 }, { "epoch": 0.37553476363713956, "grad_norm": 0.280767654185102, "learning_rate": 1.998681927798312e-05, "loss": 0.3837, "num_tokens": 210340571.0, "step": 275 }, { "epoch": 0.37690034459582006, "grad_norm": 0.308310030043621, "learning_rate": 1.9986548978602606e-05, "loss": 0.3991, "num_tokens": 211039158.0, "step": 276 }, { "epoch": 0.37826592555450056, "grad_norm": 0.2664909217167294, "learning_rate": 1.9986275937815178e-05, "loss": 0.3934, "num_tokens": 211825565.0, "step": 277 }, { "epoch": 0.37963150651318106, "grad_norm": 0.2962525295222797, "learning_rate": 1.998600015570412e-05, "loss": 0.3918, "num_tokens": 212582971.0, "step": 278 }, { "epoch": 0.38099708747186156, "grad_norm": 0.2552381969232553, "learning_rate": 1.9985721632353566e-05, "loss": 0.3844, "num_tokens": 213266102.0, "step": 279 }, { "epoch": 0.38236266843054206, "grad_norm": 0.2883797413981686, "learning_rate": 1.998544036784848e-05, "loss": 0.4028, "num_tokens": 214010120.0, "step": 280 }, { "epoch": 0.38372824938922256, "grad_norm": 0.2860005852024724, "learning_rate": 1.9985156362274663e-05, "loss": 0.3889, "num_tokens": 214823555.0, "step": 281 }, { "epoch": 0.38509383034790307, "grad_norm": 0.29463479099759743, "learning_rate": 1.9984869615718757e-05, "loss": 0.4181, "num_tokens": 215565525.0, "step": 282 }, { "epoch": 0.38645941130658357, "grad_norm": 0.2739628123157226, "learning_rate": 1.998458012826823e-05, "loss": 0.3965, "num_tokens": 216359979.0, "step": 283 }, { "epoch": 0.3878249922652641, "grad_norm": 0.29734792567019613, "learning_rate": 1.9984287900011398e-05, "loss": 0.3927, "num_tokens": 217119930.0, "step": 284 }, { "epoch": 0.3891905732239446, "grad_norm": 0.27376649183184126, "learning_rate": 1.9983992931037398e-05, "loss": 0.3912, "num_tokens": 217887396.0, "step": 285 }, { "epoch": 0.3905561541826251, "grad_norm": 0.3030156285828892, "learning_rate": 1.998369522143622e-05, "loss": 0.3999, "num_tokens": 218687938.0, "step": 286 }, { "epoch": 0.3919217351413056, "grad_norm": 0.26357741774904286, "learning_rate": 1.9983394771298687e-05, "loss": 0.3819, "num_tokens": 219500101.0, "step": 287 }, { "epoch": 0.39328731609998613, "grad_norm": 0.27773932035001925, "learning_rate": 1.998309158071644e-05, "loss": 0.3865, "num_tokens": 220242171.0, "step": 288 }, { "epoch": 0.39465289705866663, "grad_norm": 0.30344000448105085, "learning_rate": 1.9982785649781983e-05, "loss": 0.396, "num_tokens": 221055384.0, "step": 289 }, { "epoch": 0.39601847801734713, "grad_norm": 0.2726114378102743, "learning_rate": 1.998247697858863e-05, "loss": 0.406, "num_tokens": 221792382.0, "step": 290 }, { "epoch": 0.39738405897602763, "grad_norm": 0.5283030074048782, "learning_rate": 1.9982165567230552e-05, "loss": 0.401, "num_tokens": 222572810.0, "step": 291 }, { "epoch": 0.39874963993470813, "grad_norm": 0.30188348683730365, "learning_rate": 1.9981851415802743e-05, "loss": 0.3855, "num_tokens": 223262344.0, "step": 292 }, { "epoch": 0.4001152208933887, "grad_norm": 0.31300225912713664, "learning_rate": 1.998153452440104e-05, "loss": 0.3905, "num_tokens": 224015030.0, "step": 293 }, { "epoch": 0.4014808018520692, "grad_norm": 0.2798364017827532, "learning_rate": 1.9981214893122107e-05, "loss": 0.3852, "num_tokens": 224756295.0, "step": 294 }, { "epoch": 0.4028463828107497, "grad_norm": 0.31661515877008606, "learning_rate": 1.9980892522063457e-05, "loss": 0.3884, "num_tokens": 225558938.0, "step": 295 }, { "epoch": 0.4042119637694302, "grad_norm": 0.2831359311396393, "learning_rate": 1.9980567411323427e-05, "loss": 0.3707, "num_tokens": 226245787.0, "step": 296 }, { "epoch": 0.4055775447281107, "grad_norm": 0.272849610936483, "learning_rate": 1.9980239561001192e-05, "loss": 0.4126, "num_tokens": 227039605.0, "step": 297 }, { "epoch": 0.4069431256867912, "grad_norm": 0.2902834845698562, "learning_rate": 1.997990897119677e-05, "loss": 0.3811, "num_tokens": 227815552.0, "step": 298 }, { "epoch": 0.4083087066454717, "grad_norm": 0.26960678467819793, "learning_rate": 1.997957564201101e-05, "loss": 0.3931, "num_tokens": 228659573.0, "step": 299 }, { "epoch": 0.4096742876041522, "grad_norm": 0.25950510396300264, "learning_rate": 1.997923957354559e-05, "loss": 0.3887, "num_tokens": 229433563.0, "step": 300 }, { "epoch": 0.4110398685628327, "grad_norm": 0.29351318438149404, "learning_rate": 1.9978900765903037e-05, "loss": 0.3802, "num_tokens": 230233993.0, "step": 301 }, { "epoch": 0.41240544952151326, "grad_norm": 0.2787262385318486, "learning_rate": 1.9978559219186702e-05, "loss": 0.4058, "num_tokens": 230960817.0, "step": 302 }, { "epoch": 0.41377103048019376, "grad_norm": 0.2650837790346735, "learning_rate": 1.9978214933500777e-05, "loss": 0.3963, "num_tokens": 231745708.0, "step": 303 }, { "epoch": 0.41513661143887426, "grad_norm": 0.27633166340896387, "learning_rate": 1.997786790895029e-05, "loss": 0.4047, "num_tokens": 232542634.0, "step": 304 }, { "epoch": 0.41650219239755476, "grad_norm": 0.2820431872850607, "learning_rate": 1.99775181456411e-05, "loss": 0.4083, "num_tokens": 233333236.0, "step": 305 }, { "epoch": 0.41786777335623526, "grad_norm": 0.26189315975193533, "learning_rate": 1.9977165643679913e-05, "loss": 0.3849, "num_tokens": 234089042.0, "step": 306 }, { "epoch": 0.41923335431491576, "grad_norm": 0.28671882177057284, "learning_rate": 1.997681040317425e-05, "loss": 0.4107, "num_tokens": 234830707.0, "step": 307 }, { "epoch": 0.42059893527359626, "grad_norm": 0.27643270280565047, "learning_rate": 1.997645242423249e-05, "loss": 0.3835, "num_tokens": 235545529.0, "step": 308 }, { "epoch": 0.42196451623227677, "grad_norm": 0.2791557944426623, "learning_rate": 1.997609170696383e-05, "loss": 0.3986, "num_tokens": 236370933.0, "step": 309 }, { "epoch": 0.42333009719095727, "grad_norm": 0.3031548819088689, "learning_rate": 1.997572825147831e-05, "loss": 0.4048, "num_tokens": 237102492.0, "step": 310 }, { "epoch": 0.4246956781496378, "grad_norm": 0.2662342314014747, "learning_rate": 1.9975362057886812e-05, "loss": 0.3834, "num_tokens": 237880756.0, "step": 311 }, { "epoch": 0.4260612591083183, "grad_norm": 0.27593203948273765, "learning_rate": 1.9974993126301037e-05, "loss": 0.4045, "num_tokens": 238626595.0, "step": 312 }, { "epoch": 0.4274268400669988, "grad_norm": 0.3344909718483932, "learning_rate": 1.9974621456833533e-05, "loss": 0.3893, "num_tokens": 239367594.0, "step": 313 }, { "epoch": 0.4287924210256793, "grad_norm": 0.27152942672280966, "learning_rate": 1.997424704959768e-05, "loss": 0.3942, "num_tokens": 240116791.0, "step": 314 }, { "epoch": 0.43015800198435983, "grad_norm": 0.35187788812986587, "learning_rate": 1.9973869904707692e-05, "loss": 0.4018, "num_tokens": 240817505.0, "step": 315 }, { "epoch": 0.43152358294304033, "grad_norm": 0.29379276078832656, "learning_rate": 1.9973490022278624e-05, "loss": 0.4099, "num_tokens": 241564572.0, "step": 316 }, { "epoch": 0.43288916390172083, "grad_norm": 0.3272274069124779, "learning_rate": 1.9973107402426356e-05, "loss": 0.4107, "num_tokens": 242315939.0, "step": 317 }, { "epoch": 0.43425474486040133, "grad_norm": 0.2799499428114526, "learning_rate": 1.9972722045267615e-05, "loss": 0.3942, "num_tokens": 243121592.0, "step": 318 }, { "epoch": 0.43562032581908183, "grad_norm": 0.3132778483317372, "learning_rate": 1.997233395091995e-05, "loss": 0.3893, "num_tokens": 243893474.0, "step": 319 }, { "epoch": 0.4369859067777624, "grad_norm": 0.2736232558207145, "learning_rate": 1.997194311950176e-05, "loss": 0.3676, "num_tokens": 244667700.0, "step": 320 }, { "epoch": 0.4383514877364429, "grad_norm": 0.31393048004474117, "learning_rate": 1.9971549551132264e-05, "loss": 0.4038, "num_tokens": 245524894.0, "step": 321 }, { "epoch": 0.4397170686951234, "grad_norm": 0.27953373187517455, "learning_rate": 1.9971153245931525e-05, "loss": 0.4187, "num_tokens": 246318623.0, "step": 322 }, { "epoch": 0.4410826496538039, "grad_norm": 0.3208878683747047, "learning_rate": 1.9970754204020438e-05, "loss": 0.3979, "num_tokens": 247063112.0, "step": 323 }, { "epoch": 0.4424482306124844, "grad_norm": 0.28182071526957775, "learning_rate": 1.9970352425520733e-05, "loss": 0.3803, "num_tokens": 247731589.0, "step": 324 }, { "epoch": 0.4438138115711649, "grad_norm": 0.31897862190934717, "learning_rate": 1.9969947910554976e-05, "loss": 0.3944, "num_tokens": 248444602.0, "step": 325 }, { "epoch": 0.4451793925298454, "grad_norm": 0.28908743458874975, "learning_rate": 1.9969540659246568e-05, "loss": 0.393, "num_tokens": 249230640.0, "step": 326 }, { "epoch": 0.4465449734885259, "grad_norm": 0.2760586145185695, "learning_rate": 1.9969130671719743e-05, "loss": 0.3989, "num_tokens": 249979267.0, "step": 327 }, { "epoch": 0.4479105544472064, "grad_norm": 0.30622359342047717, "learning_rate": 1.9968717948099567e-05, "loss": 0.3879, "num_tokens": 250713432.0, "step": 328 }, { "epoch": 0.44927613540588696, "grad_norm": 0.2891241067300086, "learning_rate": 1.996830248851195e-05, "loss": 0.4007, "num_tokens": 251499076.0, "step": 329 }, { "epoch": 0.45064171636456746, "grad_norm": 0.25680471264854765, "learning_rate": 1.9967884293083627e-05, "loss": 0.3704, "num_tokens": 252294504.0, "step": 330 }, { "epoch": 0.45200729732324796, "grad_norm": 0.27342123129651363, "learning_rate": 1.9967463361942173e-05, "loss": 0.4062, "num_tokens": 253098070.0, "step": 331 }, { "epoch": 0.45337287828192846, "grad_norm": 0.2638329152757139, "learning_rate": 1.9967039695215994e-05, "loss": 0.3846, "num_tokens": 253773906.0, "step": 332 }, { "epoch": 0.45473845924060896, "grad_norm": 0.2657793566799468, "learning_rate": 1.9966613293034333e-05, "loss": 0.3918, "num_tokens": 254497202.0, "step": 333 }, { "epoch": 0.45610404019928946, "grad_norm": 0.28704836815028045, "learning_rate": 1.9966184155527272e-05, "loss": 0.3872, "num_tokens": 255311898.0, "step": 334 }, { "epoch": 0.45746962115796996, "grad_norm": 0.2513708559824376, "learning_rate": 1.9965752282825714e-05, "loss": 0.4045, "num_tokens": 256101427.0, "step": 335 }, { "epoch": 0.45883520211665046, "grad_norm": 0.29544118773578576, "learning_rate": 1.9965317675061403e-05, "loss": 0.3803, "num_tokens": 256854702.0, "step": 336 }, { "epoch": 0.46020078307533097, "grad_norm": 0.30074858818631434, "learning_rate": 1.996488033236693e-05, "loss": 0.3926, "num_tokens": 257665665.0, "step": 337 }, { "epoch": 0.4615663640340115, "grad_norm": 0.2774812609610397, "learning_rate": 1.9964440254875705e-05, "loss": 0.3756, "num_tokens": 258421412.0, "step": 338 }, { "epoch": 0.462931944992692, "grad_norm": 0.36005669021995146, "learning_rate": 1.9963997442721972e-05, "loss": 0.393, "num_tokens": 259281491.0, "step": 339 }, { "epoch": 0.4642975259513725, "grad_norm": 0.32279468577531245, "learning_rate": 1.996355189604082e-05, "loss": 0.4021, "num_tokens": 260132318.0, "step": 340 }, { "epoch": 0.465663106910053, "grad_norm": 0.27780022276604843, "learning_rate": 1.996310361496816e-05, "loss": 0.3821, "num_tokens": 260859367.0, "step": 341 }, { "epoch": 0.4670286878687335, "grad_norm": 0.30291829349542976, "learning_rate": 1.996265259964075e-05, "loss": 0.3815, "num_tokens": 261592897.0, "step": 342 }, { "epoch": 0.46839426882741403, "grad_norm": 0.2876814464513085, "learning_rate": 1.9962198850196166e-05, "loss": 0.3783, "num_tokens": 262343138.0, "step": 343 }, { "epoch": 0.46975984978609453, "grad_norm": 0.32155905506701327, "learning_rate": 1.9961742366772832e-05, "loss": 0.3776, "num_tokens": 263134383.0, "step": 344 }, { "epoch": 0.47112543074477503, "grad_norm": 0.2954037842357005, "learning_rate": 1.9961283149510007e-05, "loss": 0.4046, "num_tokens": 263964936.0, "step": 345 }, { "epoch": 0.47249101170345553, "grad_norm": 0.2557493428932082, "learning_rate": 1.996082119854777e-05, "loss": 0.3872, "num_tokens": 264686760.0, "step": 346 }, { "epoch": 0.4738565926621361, "grad_norm": 0.3154717167445236, "learning_rate": 1.996035651402705e-05, "loss": 0.3961, "num_tokens": 265443028.0, "step": 347 }, { "epoch": 0.4752221736208166, "grad_norm": 0.29346388965250597, "learning_rate": 1.9959889096089594e-05, "loss": 0.4049, "num_tokens": 266231883.0, "step": 348 }, { "epoch": 0.4765877545794971, "grad_norm": 0.2795639359573214, "learning_rate": 1.9959418944877992e-05, "loss": 0.3776, "num_tokens": 267006260.0, "step": 349 }, { "epoch": 0.4779533355381776, "grad_norm": 0.28019482619663694, "learning_rate": 1.9958946060535675e-05, "loss": 0.3934, "num_tokens": 267769431.0, "step": 350 }, { "epoch": 0.4793189164968581, "grad_norm": 0.3286925047098573, "learning_rate": 1.9958470443206886e-05, "loss": 0.4057, "num_tokens": 268531278.0, "step": 351 }, { "epoch": 0.4806844974555386, "grad_norm": 0.271067937301479, "learning_rate": 1.9957992093036733e-05, "loss": 0.3981, "num_tokens": 269329197.0, "step": 352 }, { "epoch": 0.4820500784142191, "grad_norm": 0.2793519046266766, "learning_rate": 1.9957511010171124e-05, "loss": 0.3948, "num_tokens": 270106226.0, "step": 353 }, { "epoch": 0.4834156593728996, "grad_norm": 0.3101396768130941, "learning_rate": 1.9957027194756825e-05, "loss": 0.4024, "num_tokens": 270869973.0, "step": 354 }, { "epoch": 0.4847812403315801, "grad_norm": 0.24183674814782896, "learning_rate": 1.995654064694142e-05, "loss": 0.3987, "num_tokens": 271641818.0, "step": 355 }, { "epoch": 0.48614682129026066, "grad_norm": 0.336563986256493, "learning_rate": 1.9956051366873344e-05, "loss": 0.4046, "num_tokens": 272466578.0, "step": 356 }, { "epoch": 0.48751240224894116, "grad_norm": 0.2850997465618145, "learning_rate": 1.9955559354701847e-05, "loss": 0.401, "num_tokens": 273263173.0, "step": 357 }, { "epoch": 0.48887798320762166, "grad_norm": 0.341007592208121, "learning_rate": 1.9955064610577025e-05, "loss": 0.3991, "num_tokens": 274072129.0, "step": 358 }, { "epoch": 0.49024356416630216, "grad_norm": 0.3034384213011678, "learning_rate": 1.9954567134649802e-05, "loss": 0.3929, "num_tokens": 274788487.0, "step": 359 }, { "epoch": 0.49160914512498266, "grad_norm": 0.34700266089629217, "learning_rate": 1.9954066927071933e-05, "loss": 0.4025, "num_tokens": 275517372.0, "step": 360 }, { "epoch": 0.49297472608366316, "grad_norm": 0.24559917447059515, "learning_rate": 1.995356398799601e-05, "loss": 0.3756, "num_tokens": 276221573.0, "step": 361 }, { "epoch": 0.49434030704234366, "grad_norm": 0.29820127258741186, "learning_rate": 1.9953058317575466e-05, "loss": 0.3716, "num_tokens": 276933000.0, "step": 362 }, { "epoch": 0.49570588800102416, "grad_norm": 0.29464656103684145, "learning_rate": 1.995254991596455e-05, "loss": 0.3908, "num_tokens": 277707476.0, "step": 363 }, { "epoch": 0.49707146895970467, "grad_norm": 0.3108825389264355, "learning_rate": 1.9952038783318355e-05, "loss": 0.3873, "num_tokens": 278432728.0, "step": 364 }, { "epoch": 0.4984370499183852, "grad_norm": 0.2695543241455866, "learning_rate": 1.995152491979281e-05, "loss": 0.3964, "num_tokens": 279161715.0, "step": 365 }, { "epoch": 0.4998026308770657, "grad_norm": 0.26050177831941734, "learning_rate": 1.995100832554467e-05, "loss": 0.3848, "num_tokens": 279998957.0, "step": 366 }, { "epoch": 0.5011682118357462, "grad_norm": 0.26686976133695306, "learning_rate": 1.9950489000731523e-05, "loss": 0.3806, "num_tokens": 280724323.0, "step": 367 }, { "epoch": 0.5025337927944267, "grad_norm": 0.2784986859560697, "learning_rate": 1.9949966945511797e-05, "loss": 0.3835, "num_tokens": 281517033.0, "step": 368 }, { "epoch": 0.5038993737531072, "grad_norm": 0.2758146927110324, "learning_rate": 1.994944216004474e-05, "loss": 0.4011, "num_tokens": 282370345.0, "step": 369 }, { "epoch": 0.5052649547117878, "grad_norm": 0.2698387260904606, "learning_rate": 1.9948914644490456e-05, "loss": 0.3784, "num_tokens": 283115713.0, "step": 370 }, { "epoch": 0.5066305356704682, "grad_norm": 0.27838457135525596, "learning_rate": 1.9948384399009852e-05, "loss": 0.3679, "num_tokens": 283864860.0, "step": 371 }, { "epoch": 0.5079961166291488, "grad_norm": 0.2727942574219679, "learning_rate": 1.9947851423764693e-05, "loss": 0.3941, "num_tokens": 284665733.0, "step": 372 }, { "epoch": 0.5093616975878292, "grad_norm": 0.2573338521711438, "learning_rate": 1.994731571891756e-05, "loss": 0.4076, "num_tokens": 285469958.0, "step": 373 }, { "epoch": 0.5107272785465098, "grad_norm": 0.2863594498607459, "learning_rate": 1.9946777284631877e-05, "loss": 0.3828, "num_tokens": 286222903.0, "step": 374 }, { "epoch": 0.5120928595051902, "grad_norm": 0.2989772798339145, "learning_rate": 1.99462361210719e-05, "loss": 0.4123, "num_tokens": 287019550.0, "step": 375 }, { "epoch": 0.5134584404638708, "grad_norm": 0.24819277826666683, "learning_rate": 1.9945692228402708e-05, "loss": 0.3713, "num_tokens": 287752277.0, "step": 376 }, { "epoch": 0.5148240214225512, "grad_norm": 0.3151599320480343, "learning_rate": 1.9945145606790222e-05, "loss": 0.3725, "num_tokens": 288496766.0, "step": 377 }, { "epoch": 0.5161896023812318, "grad_norm": 0.27129002688225495, "learning_rate": 1.9944596256401198e-05, "loss": 0.3949, "num_tokens": 289277216.0, "step": 378 }, { "epoch": 0.5175551833399124, "grad_norm": 0.37934420516247214, "learning_rate": 1.9944044177403205e-05, "loss": 0.387, "num_tokens": 290000884.0, "step": 379 }, { "epoch": 0.5189207642985928, "grad_norm": 0.3090685932126554, "learning_rate": 1.9943489369964672e-05, "loss": 0.3952, "num_tokens": 290694042.0, "step": 380 }, { "epoch": 0.5202863452572734, "grad_norm": 0.28055184622500373, "learning_rate": 1.994293183425484e-05, "loss": 0.4098, "num_tokens": 291467310.0, "step": 381 }, { "epoch": 0.5216519262159538, "grad_norm": 0.2558143703975956, "learning_rate": 1.9942371570443793e-05, "loss": 0.3811, "num_tokens": 292263453.0, "step": 382 }, { "epoch": 0.5230175071746344, "grad_norm": 0.28700899851986844, "learning_rate": 1.9941808578702438e-05, "loss": 0.3936, "num_tokens": 293045342.0, "step": 383 }, { "epoch": 0.5243830881333148, "grad_norm": 0.2701965433820782, "learning_rate": 1.9941242859202527e-05, "loss": 0.3855, "num_tokens": 293752202.0, "step": 384 }, { "epoch": 0.5257486690919954, "grad_norm": 0.2539671988876104, "learning_rate": 1.994067441211663e-05, "loss": 0.3772, "num_tokens": 294509326.0, "step": 385 }, { "epoch": 0.5271142500506758, "grad_norm": 0.2722733623938688, "learning_rate": 1.9940103237618153e-05, "loss": 0.3986, "num_tokens": 295286096.0, "step": 386 }, { "epoch": 0.5284798310093564, "grad_norm": 0.2696900803864628, "learning_rate": 1.9939529335881348e-05, "loss": 0.3779, "num_tokens": 296024065.0, "step": 387 }, { "epoch": 0.5298454119680369, "grad_norm": 0.24862256946366604, "learning_rate": 1.9938952707081277e-05, "loss": 0.3992, "num_tokens": 296814855.0, "step": 388 }, { "epoch": 0.5312109929267174, "grad_norm": 0.23943991538031187, "learning_rate": 1.9938373351393846e-05, "loss": 0.3865, "num_tokens": 297601966.0, "step": 389 }, { "epoch": 0.5325765738853979, "grad_norm": 0.27228591211162795, "learning_rate": 1.99377912689958e-05, "loss": 0.393, "num_tokens": 298298993.0, "step": 390 }, { "epoch": 0.5339421548440784, "grad_norm": 0.23819815267255628, "learning_rate": 1.9937206460064698e-05, "loss": 0.3922, "num_tokens": 299084362.0, "step": 391 }, { "epoch": 0.5353077358027589, "grad_norm": 0.2510574264021747, "learning_rate": 1.993661892477894e-05, "loss": 0.3864, "num_tokens": 299866430.0, "step": 392 }, { "epoch": 0.5366733167614394, "grad_norm": 0.2523478907599862, "learning_rate": 1.9936028663317762e-05, "loss": 0.3831, "num_tokens": 300572297.0, "step": 393 }, { "epoch": 0.5380388977201199, "grad_norm": 0.272754024461173, "learning_rate": 1.9935435675861227e-05, "loss": 0.3742, "num_tokens": 301280163.0, "step": 394 }, { "epoch": 0.5394044786788004, "grad_norm": 0.27892282948839753, "learning_rate": 1.9934839962590224e-05, "loss": 0.3674, "num_tokens": 302007061.0, "step": 395 }, { "epoch": 0.5407700596374809, "grad_norm": 0.2767111228811789, "learning_rate": 1.9934241523686487e-05, "loss": 0.3911, "num_tokens": 302793860.0, "step": 396 }, { "epoch": 0.5421356405961615, "grad_norm": 0.27940877557372473, "learning_rate": 1.993364035933257e-05, "loss": 0.4072, "num_tokens": 303539228.0, "step": 397 }, { "epoch": 0.5435012215548419, "grad_norm": 0.2692479564607352, "learning_rate": 1.993303646971186e-05, "loss": 0.3853, "num_tokens": 304271393.0, "step": 398 }, { "epoch": 0.5448668025135225, "grad_norm": 0.25916129072101557, "learning_rate": 1.993242985500858e-05, "loss": 0.3981, "num_tokens": 305007034.0, "step": 399 }, { "epoch": 0.5462323834722029, "grad_norm": 0.3091265460204507, "learning_rate": 1.9931820515407784e-05, "loss": 0.388, "num_tokens": 305738401.0, "step": 400 }, { "epoch": 0.5475979644308835, "grad_norm": 0.27150239937933734, "learning_rate": 1.9931208451095353e-05, "loss": 0.3994, "num_tokens": 306532200.0, "step": 401 }, { "epoch": 0.5489635453895639, "grad_norm": 0.3194026900410218, "learning_rate": 1.9930593662258e-05, "loss": 0.407, "num_tokens": 307244307.0, "step": 402 }, { "epoch": 0.5503291263482445, "grad_norm": 0.2650284159872338, "learning_rate": 1.9929976149083272e-05, "loss": 0.3992, "num_tokens": 308006043.0, "step": 403 }, { "epoch": 0.5516947073069249, "grad_norm": 0.25728837792031095, "learning_rate": 1.9929355911759545e-05, "loss": 0.3824, "num_tokens": 308785558.0, "step": 404 }, { "epoch": 0.5530602882656055, "grad_norm": 0.2572433565874476, "learning_rate": 1.992873295047603e-05, "loss": 0.3829, "num_tokens": 309553286.0, "step": 405 }, { "epoch": 0.554425869224286, "grad_norm": 0.2650468920377569, "learning_rate": 1.9928107265422757e-05, "loss": 0.376, "num_tokens": 310326933.0, "step": 406 }, { "epoch": 0.5557914501829665, "grad_norm": 0.2624003719990711, "learning_rate": 1.9927478856790606e-05, "loss": 0.4008, "num_tokens": 311116677.0, "step": 407 }, { "epoch": 0.557157031141647, "grad_norm": 0.3375733066147591, "learning_rate": 1.992684772477127e-05, "loss": 0.3759, "num_tokens": 311856460.0, "step": 408 }, { "epoch": 0.5585226121003275, "grad_norm": 0.28472978510710845, "learning_rate": 1.992621386955728e-05, "loss": 0.3907, "num_tokens": 312526419.0, "step": 409 }, { "epoch": 0.559888193059008, "grad_norm": 0.23641115855611305, "learning_rate": 1.9925577291342005e-05, "loss": 0.3824, "num_tokens": 313315050.0, "step": 410 }, { "epoch": 0.5612537740176885, "grad_norm": 0.26283645892730956, "learning_rate": 1.9924937990319627e-05, "loss": 0.3902, "num_tokens": 314118654.0, "step": 411 }, { "epoch": 0.5626193549763691, "grad_norm": 0.23665553287273927, "learning_rate": 1.9924295966685175e-05, "loss": 0.3784, "num_tokens": 314876435.0, "step": 412 }, { "epoch": 0.5639849359350495, "grad_norm": 0.26426203107354346, "learning_rate": 1.9923651220634505e-05, "loss": 0.3986, "num_tokens": 315647483.0, "step": 413 }, { "epoch": 0.5653505168937301, "grad_norm": 0.25774901326287525, "learning_rate": 1.9923003752364297e-05, "loss": 0.3939, "num_tokens": 316454197.0, "step": 414 }, { "epoch": 0.5667160978524106, "grad_norm": 0.2768510430148426, "learning_rate": 1.9922353562072062e-05, "loss": 0.381, "num_tokens": 317113360.0, "step": 415 }, { "epoch": 0.5680816788110911, "grad_norm": 0.2618735623570518, "learning_rate": 1.9921700649956156e-05, "loss": 0.3916, "num_tokens": 317893602.0, "step": 416 }, { "epoch": 0.5694472597697716, "grad_norm": 0.23339769271155472, "learning_rate": 1.9921045016215745e-05, "loss": 0.3957, "num_tokens": 318737749.0, "step": 417 }, { "epoch": 0.5708128407284521, "grad_norm": 0.2696881913596961, "learning_rate": 1.992038666105084e-05, "loss": 0.3673, "num_tokens": 319398186.0, "step": 418 }, { "epoch": 0.5721784216871326, "grad_norm": 0.2534714084698837, "learning_rate": 1.991972558466227e-05, "loss": 0.4052, "num_tokens": 320235448.0, "step": 419 }, { "epoch": 0.5735440026458131, "grad_norm": 0.3020992437635151, "learning_rate": 1.9919061787251703e-05, "loss": 0.3798, "num_tokens": 320994453.0, "step": 420 }, { "epoch": 0.5749095836044936, "grad_norm": 0.24054166227400872, "learning_rate": 1.991839526902164e-05, "loss": 0.3712, "num_tokens": 321722682.0, "step": 421 }, { "epoch": 0.5762751645631741, "grad_norm": 0.29600481633822495, "learning_rate": 1.99177260301754e-05, "loss": 0.3874, "num_tokens": 322491853.0, "step": 422 }, { "epoch": 0.5776407455218546, "grad_norm": 0.30018586208778225, "learning_rate": 1.991705407091715e-05, "loss": 0.3839, "num_tokens": 323230647.0, "step": 423 }, { "epoch": 0.5790063264805352, "grad_norm": 0.2803108550097222, "learning_rate": 1.991637939145186e-05, "loss": 0.4046, "num_tokens": 323998089.0, "step": 424 }, { "epoch": 0.5803719074392156, "grad_norm": 0.26754754121716146, "learning_rate": 1.9915701991985356e-05, "loss": 0.375, "num_tokens": 324679142.0, "step": 425 }, { "epoch": 0.5817374883978962, "grad_norm": 0.25236905102013724, "learning_rate": 1.991502187272428e-05, "loss": 0.3669, "num_tokens": 325360054.0, "step": 426 }, { "epoch": 0.5831030693565766, "grad_norm": 0.2681885553058281, "learning_rate": 1.9914339033876107e-05, "loss": 0.4014, "num_tokens": 326095180.0, "step": 427 }, { "epoch": 0.5844686503152572, "grad_norm": 0.25280579864144903, "learning_rate": 1.991365347564914e-05, "loss": 0.3909, "num_tokens": 326905415.0, "step": 428 }, { "epoch": 0.5858342312739376, "grad_norm": 0.2747701278453264, "learning_rate": 1.9912965198252518e-05, "loss": 0.3967, "num_tokens": 327711008.0, "step": 429 }, { "epoch": 0.5871998122326182, "grad_norm": 0.25525879442005694, "learning_rate": 1.9912274201896203e-05, "loss": 0.3816, "num_tokens": 328403684.0, "step": 430 }, { "epoch": 0.5885653931912986, "grad_norm": 0.2643107138017229, "learning_rate": 1.9911580486790983e-05, "loss": 0.3973, "num_tokens": 329242640.0, "step": 431 }, { "epoch": 0.5899309741499792, "grad_norm": 0.256201704365424, "learning_rate": 1.9910884053148487e-05, "loss": 0.3746, "num_tokens": 329985268.0, "step": 432 }, { "epoch": 0.5912965551086597, "grad_norm": 0.24903051883371624, "learning_rate": 1.9910184901181162e-05, "loss": 0.3802, "num_tokens": 330815185.0, "step": 433 }, { "epoch": 0.5926621360673402, "grad_norm": 0.24385500266499738, "learning_rate": 1.990948303110229e-05, "loss": 0.3582, "num_tokens": 331567995.0, "step": 434 }, { "epoch": 0.5940277170260208, "grad_norm": 0.24529074527901873, "learning_rate": 1.9908778443125986e-05, "loss": 0.3956, "num_tokens": 332334214.0, "step": 435 }, { "epoch": 0.5953932979847012, "grad_norm": 0.2832748422070591, "learning_rate": 1.9908071137467183e-05, "loss": 0.3901, "num_tokens": 333131199.0, "step": 436 }, { "epoch": 0.5967588789433818, "grad_norm": 0.24354259019370247, "learning_rate": 1.9907361114341654e-05, "loss": 0.3805, "num_tokens": 333900299.0, "step": 437 }, { "epoch": 0.5981244599020622, "grad_norm": 0.2572670626553479, "learning_rate": 1.990664837396599e-05, "loss": 0.3828, "num_tokens": 334642801.0, "step": 438 }, { "epoch": 0.5994900408607428, "grad_norm": 0.2777162274871795, "learning_rate": 1.9905932916557624e-05, "loss": 0.3736, "num_tokens": 335423971.0, "step": 439 }, { "epoch": 0.6008556218194232, "grad_norm": 0.23616167881302969, "learning_rate": 1.990521474233481e-05, "loss": 0.3585, "num_tokens": 336156368.0, "step": 440 }, { "epoch": 0.6022212027781038, "grad_norm": 0.24672370189004839, "learning_rate": 1.9904493851516628e-05, "loss": 0.37, "num_tokens": 336842360.0, "step": 441 }, { "epoch": 0.6035867837367843, "grad_norm": 0.2567303053916664, "learning_rate": 1.9903770244322993e-05, "loss": 0.3968, "num_tokens": 337607796.0, "step": 442 }, { "epoch": 0.6049523646954648, "grad_norm": 0.25557272386307955, "learning_rate": 1.9903043920974644e-05, "loss": 0.403, "num_tokens": 338378413.0, "step": 443 }, { "epoch": 0.6063179456541453, "grad_norm": 0.23869729520466798, "learning_rate": 1.9902314881693155e-05, "loss": 0.3872, "num_tokens": 339163790.0, "step": 444 }, { "epoch": 0.6076835266128258, "grad_norm": 0.258158730017596, "learning_rate": 1.9901583126700922e-05, "loss": 0.3833, "num_tokens": 339913682.0, "step": 445 }, { "epoch": 0.6090491075715063, "grad_norm": 0.2785739301599366, "learning_rate": 1.9900848656221173e-05, "loss": 0.4001, "num_tokens": 340667280.0, "step": 446 }, { "epoch": 0.6104146885301868, "grad_norm": 0.27475036069638153, "learning_rate": 1.990011147047796e-05, "loss": 0.3947, "num_tokens": 341402387.0, "step": 447 }, { "epoch": 0.6117802694888673, "grad_norm": 0.258511209003639, "learning_rate": 1.9899371569696168e-05, "loss": 0.3737, "num_tokens": 342126377.0, "step": 448 }, { "epoch": 0.6131458504475478, "grad_norm": 0.28808180786894944, "learning_rate": 1.989862895410151e-05, "loss": 0.3681, "num_tokens": 342944624.0, "step": 449 }, { "epoch": 0.6145114314062283, "grad_norm": 0.2709620428767766, "learning_rate": 1.9897883623920523e-05, "loss": 0.3958, "num_tokens": 343793644.0, "step": 450 }, { "epoch": 0.6158770123649089, "grad_norm": 0.24149618865623834, "learning_rate": 1.989713557938058e-05, "loss": 0.3844, "num_tokens": 344553381.0, "step": 451 }, { "epoch": 0.6172425933235893, "grad_norm": 0.2838899023279151, "learning_rate": 1.989638482070987e-05, "loss": 0.4093, "num_tokens": 345386637.0, "step": 452 }, { "epoch": 0.6186081742822699, "grad_norm": 0.2737458195788977, "learning_rate": 1.9895631348137427e-05, "loss": 0.3931, "num_tokens": 346134370.0, "step": 453 }, { "epoch": 0.6199737552409503, "grad_norm": 0.22723145417246904, "learning_rate": 1.9894875161893092e-05, "loss": 0.3764, "num_tokens": 346892468.0, "step": 454 }, { "epoch": 0.6213393361996309, "grad_norm": 0.2491686892612959, "learning_rate": 1.9894116262207553e-05, "loss": 0.3875, "num_tokens": 347629735.0, "step": 455 }, { "epoch": 0.6227049171583113, "grad_norm": 0.2471805251070627, "learning_rate": 1.989335464931231e-05, "loss": 0.4011, "num_tokens": 348412290.0, "step": 456 }, { "epoch": 0.6240704981169919, "grad_norm": 0.25489337619836044, "learning_rate": 1.9892590323439703e-05, "loss": 0.3828, "num_tokens": 349089958.0, "step": 457 }, { "epoch": 0.6254360790756723, "grad_norm": 0.22881235723740262, "learning_rate": 1.9891823284822893e-05, "loss": 0.3792, "num_tokens": 349823834.0, "step": 458 }, { "epoch": 0.6268016600343529, "grad_norm": 0.23315263734184938, "learning_rate": 1.9891053533695875e-05, "loss": 0.3966, "num_tokens": 350632107.0, "step": 459 }, { "epoch": 0.6281672409930334, "grad_norm": 0.23079513648155103, "learning_rate": 1.989028107029346e-05, "loss": 0.362, "num_tokens": 351357486.0, "step": 460 }, { "epoch": 0.6295328219517139, "grad_norm": 0.2827631709395634, "learning_rate": 1.9889505894851295e-05, "loss": 0.3816, "num_tokens": 352152235.0, "step": 461 }, { "epoch": 0.6308984029103945, "grad_norm": 0.23472627965073545, "learning_rate": 1.9888728007605856e-05, "loss": 0.3866, "num_tokens": 352899563.0, "step": 462 }, { "epoch": 0.6322639838690749, "grad_norm": 0.2763922513968407, "learning_rate": 1.9887947408794436e-05, "loss": 0.386, "num_tokens": 353641255.0, "step": 463 }, { "epoch": 0.6336295648277555, "grad_norm": 0.2325647245087109, "learning_rate": 1.9887164098655167e-05, "loss": 0.3747, "num_tokens": 354413544.0, "step": 464 }, { "epoch": 0.6349951457864359, "grad_norm": 0.2659446044254964, "learning_rate": 1.9886378077427004e-05, "loss": 0.3834, "num_tokens": 355172169.0, "step": 465 }, { "epoch": 0.6363607267451165, "grad_norm": 0.24388278827787876, "learning_rate": 1.988558934534972e-05, "loss": 0.3722, "num_tokens": 355974796.0, "step": 466 }, { "epoch": 0.6377263077037969, "grad_norm": 0.25243421298958635, "learning_rate": 1.9884797902663935e-05, "loss": 0.392, "num_tokens": 356653202.0, "step": 467 }, { "epoch": 0.6390918886624775, "grad_norm": 0.2626999112356006, "learning_rate": 1.9884003749611076e-05, "loss": 0.4143, "num_tokens": 357453524.0, "step": 468 }, { "epoch": 0.640457469621158, "grad_norm": 0.25183527749776646, "learning_rate": 1.988320688643341e-05, "loss": 0.3994, "num_tokens": 358205430.0, "step": 469 }, { "epoch": 0.6418230505798385, "grad_norm": 0.27717369348191173, "learning_rate": 1.988240731337402e-05, "loss": 0.3932, "num_tokens": 358969175.0, "step": 470 }, { "epoch": 0.643188631538519, "grad_norm": 0.22146088432606734, "learning_rate": 1.9881605030676816e-05, "loss": 0.3621, "num_tokens": 359732045.0, "step": 471 }, { "epoch": 0.6445542124971995, "grad_norm": 0.27802956769561044, "learning_rate": 1.9880800038586553e-05, "loss": 0.3833, "num_tokens": 360503104.0, "step": 472 }, { "epoch": 0.64591979345588, "grad_norm": 0.2733803995679551, "learning_rate": 1.9879992337348792e-05, "loss": 0.3796, "num_tokens": 361190644.0, "step": 473 }, { "epoch": 0.6472853744145605, "grad_norm": 0.2810144410561898, "learning_rate": 1.987918192720993e-05, "loss": 0.3873, "num_tokens": 361964681.0, "step": 474 }, { "epoch": 0.648650955373241, "grad_norm": 0.2265414791985924, "learning_rate": 1.9878368808417185e-05, "loss": 0.3795, "num_tokens": 362712524.0, "step": 475 }, { "epoch": 0.6500165363319215, "grad_norm": 0.3060506099122097, "learning_rate": 1.9877552981218605e-05, "loss": 0.3947, "num_tokens": 363444809.0, "step": 476 }, { "epoch": 0.651382117290602, "grad_norm": 0.23865118502594165, "learning_rate": 1.9876734445863065e-05, "loss": 0.3824, "num_tokens": 364223553.0, "step": 477 }, { "epoch": 0.6527476982492826, "grad_norm": 0.28593938491522236, "learning_rate": 1.987591320260027e-05, "loss": 0.3716, "num_tokens": 364930413.0, "step": 478 }, { "epoch": 0.654113279207963, "grad_norm": 0.2242331191091773, "learning_rate": 1.9875089251680735e-05, "loss": 0.3774, "num_tokens": 365627313.0, "step": 479 }, { "epoch": 0.6554788601666436, "grad_norm": 0.2732859231465138, "learning_rate": 1.9874262593355815e-05, "loss": 0.3632, "num_tokens": 366390560.0, "step": 480 }, { "epoch": 0.656844441125324, "grad_norm": 0.22229610323101276, "learning_rate": 1.9873433227877693e-05, "loss": 0.3824, "num_tokens": 367203785.0, "step": 481 }, { "epoch": 0.6582100220840046, "grad_norm": 0.283970200130262, "learning_rate": 1.987260115549937e-05, "loss": 0.3982, "num_tokens": 367972346.0, "step": 482 }, { "epoch": 0.659575603042685, "grad_norm": 0.26638015326160996, "learning_rate": 1.9871766376474668e-05, "loss": 0.3933, "num_tokens": 368704810.0, "step": 483 }, { "epoch": 0.6609411840013656, "grad_norm": 0.2826046729667879, "learning_rate": 1.9870928891058253e-05, "loss": 0.3922, "num_tokens": 369463250.0, "step": 484 }, { "epoch": 0.662306764960046, "grad_norm": 0.25397644275180864, "learning_rate": 1.98700886995056e-05, "loss": 0.3831, "num_tokens": 370240745.0, "step": 485 }, { "epoch": 0.6636723459187266, "grad_norm": 0.23163295447995425, "learning_rate": 1.9869245802073014e-05, "loss": 0.3642, "num_tokens": 370918463.0, "step": 486 }, { "epoch": 0.6650379268774071, "grad_norm": 0.23269281096405695, "learning_rate": 1.986840019901763e-05, "loss": 0.3706, "num_tokens": 371669825.0, "step": 487 }, { "epoch": 0.6664035078360876, "grad_norm": 0.21497843188130977, "learning_rate": 1.9867551890597402e-05, "loss": 0.3746, "num_tokens": 372455157.0, "step": 488 }, { "epoch": 0.6677690887947682, "grad_norm": 0.21332532471343174, "learning_rate": 1.986670087707111e-05, "loss": 0.3915, "num_tokens": 373292474.0, "step": 489 }, { "epoch": 0.6691346697534486, "grad_norm": 0.21758228015114192, "learning_rate": 1.9865847158698373e-05, "loss": 0.3833, "num_tokens": 374081092.0, "step": 490 }, { "epoch": 0.6705002507121292, "grad_norm": 0.22771642866648495, "learning_rate": 1.9864990735739607e-05, "loss": 0.3798, "num_tokens": 374866303.0, "step": 491 }, { "epoch": 0.6718658316708096, "grad_norm": 0.27961910298625503, "learning_rate": 1.9864131608456082e-05, "loss": 0.3697, "num_tokens": 375564177.0, "step": 492 }, { "epoch": 0.6732314126294902, "grad_norm": 0.23494724323530727, "learning_rate": 1.9863269777109875e-05, "loss": 0.3643, "num_tokens": 376352567.0, "step": 493 }, { "epoch": 0.6745969935881706, "grad_norm": 0.2690806115631594, "learning_rate": 1.9862405241963894e-05, "loss": 0.3628, "num_tokens": 377105995.0, "step": 494 }, { "epoch": 0.6759625745468512, "grad_norm": 0.26966208227775673, "learning_rate": 1.986153800328187e-05, "loss": 0.3953, "num_tokens": 377839271.0, "step": 495 }, { "epoch": 0.6773281555055317, "grad_norm": 0.27724799806112765, "learning_rate": 1.9860668061328364e-05, "loss": 0.3781, "num_tokens": 378614568.0, "step": 496 }, { "epoch": 0.6786937364642122, "grad_norm": 0.2435848556850448, "learning_rate": 1.9859795416368758e-05, "loss": 0.3504, "num_tokens": 379355838.0, "step": 497 }, { "epoch": 0.6800593174228927, "grad_norm": 0.27485323256623506, "learning_rate": 1.985892006866925e-05, "loss": 0.3855, "num_tokens": 380106924.0, "step": 498 }, { "epoch": 0.6814248983815732, "grad_norm": 0.25543341775917644, "learning_rate": 1.9858042018496882e-05, "loss": 0.4035, "num_tokens": 380873032.0, "step": 499 }, { "epoch": 0.6827904793402537, "grad_norm": 0.23291356363517804, "learning_rate": 1.98571612661195e-05, "loss": 0.3689, "num_tokens": 381611002.0, "step": 500 }, { "epoch": 0.6841560602989342, "grad_norm": 0.24105329409525156, "learning_rate": 1.9856277811805788e-05, "loss": 0.3972, "num_tokens": 382420963.0, "step": 501 }, { "epoch": 0.6855216412576147, "grad_norm": 0.22876948749965997, "learning_rate": 1.9855391655825246e-05, "loss": 0.3928, "num_tokens": 383164139.0, "step": 502 }, { "epoch": 0.6868872222162952, "grad_norm": 0.2579673005328045, "learning_rate": 1.9854502798448208e-05, "loss": 0.3745, "num_tokens": 383873544.0, "step": 503 }, { "epoch": 0.6882528031749757, "grad_norm": 0.2461310513626293, "learning_rate": 1.985361123994582e-05, "loss": 0.3738, "num_tokens": 384672596.0, "step": 504 }, { "epoch": 0.6896183841336563, "grad_norm": 0.28338509927491723, "learning_rate": 1.9852716980590057e-05, "loss": 0.3828, "num_tokens": 385339695.0, "step": 505 }, { "epoch": 0.6909839650923367, "grad_norm": 0.2338473085333619, "learning_rate": 1.985182002065373e-05, "loss": 0.3868, "num_tokens": 386117976.0, "step": 506 }, { "epoch": 0.6923495460510173, "grad_norm": 0.3058844564979638, "learning_rate": 1.9850920360410447e-05, "loss": 0.3895, "num_tokens": 386905846.0, "step": 507 }, { "epoch": 0.6937151270096977, "grad_norm": 0.24881858920143227, "learning_rate": 1.9850018000134665e-05, "loss": 0.3702, "num_tokens": 387557365.0, "step": 508 }, { "epoch": 0.6950807079683783, "grad_norm": 0.26122840195750624, "learning_rate": 1.984911294010165e-05, "loss": 0.383, "num_tokens": 388355725.0, "step": 509 }, { "epoch": 0.6964462889270587, "grad_norm": 0.2528416838151127, "learning_rate": 1.9848205180587507e-05, "loss": 0.3731, "num_tokens": 389089982.0, "step": 510 }, { "epoch": 0.6978118698857393, "grad_norm": 0.25373699637052777, "learning_rate": 1.984729472186914e-05, "loss": 0.3695, "num_tokens": 389842970.0, "step": 511 }, { "epoch": 0.6991774508444197, "grad_norm": 0.25595011310613536, "learning_rate": 1.98463815642243e-05, "loss": 0.4078, "num_tokens": 390674832.0, "step": 512 }, { "epoch": 0.7005430318031003, "grad_norm": 0.2283173779633846, "learning_rate": 1.984546570793155e-05, "loss": 0.3953, "num_tokens": 391477961.0, "step": 513 }, { "epoch": 0.7019086127617808, "grad_norm": 0.24127946688575574, "learning_rate": 1.984454715327027e-05, "loss": 0.3735, "num_tokens": 392221351.0, "step": 514 }, { "epoch": 0.7032741937204613, "grad_norm": 0.22195864373925833, "learning_rate": 1.9843625900520685e-05, "loss": 0.3784, "num_tokens": 392928524.0, "step": 515 }, { "epoch": 0.7046397746791419, "grad_norm": 0.2628662633547868, "learning_rate": 1.9842701949963824e-05, "loss": 0.3979, "num_tokens": 393638153.0, "step": 516 }, { "epoch": 0.7060053556378223, "grad_norm": 0.2193066546143698, "learning_rate": 1.9841775301881538e-05, "loss": 0.3805, "num_tokens": 394382313.0, "step": 517 }, { "epoch": 0.7073709365965029, "grad_norm": 0.23903929222407663, "learning_rate": 1.9840845956556514e-05, "loss": 0.395, "num_tokens": 395182565.0, "step": 518 }, { "epoch": 0.7087365175551833, "grad_norm": 0.22097089974659156, "learning_rate": 1.9839913914272254e-05, "loss": 0.3783, "num_tokens": 395904917.0, "step": 519 }, { "epoch": 0.7101020985138639, "grad_norm": 0.2470922722796958, "learning_rate": 1.9838979175313083e-05, "loss": 0.4086, "num_tokens": 396652902.0, "step": 520 }, { "epoch": 0.7114676794725443, "grad_norm": 0.22726727816183342, "learning_rate": 1.983804173996415e-05, "loss": 0.3769, "num_tokens": 397450311.0, "step": 521 }, { "epoch": 0.7128332604312249, "grad_norm": 0.23057022631657506, "learning_rate": 1.9837101608511423e-05, "loss": 0.3654, "num_tokens": 398205440.0, "step": 522 }, { "epoch": 0.7141988413899054, "grad_norm": 0.2530875570489181, "learning_rate": 1.9836158781241696e-05, "loss": 0.3836, "num_tokens": 398927088.0, "step": 523 }, { "epoch": 0.7155644223485859, "grad_norm": 0.27484578253896275, "learning_rate": 1.9835213258442588e-05, "loss": 0.3907, "num_tokens": 399686370.0, "step": 524 }, { "epoch": 0.7169300033072664, "grad_norm": 0.25455842237924364, "learning_rate": 1.9834265040402538e-05, "loss": 0.3943, "num_tokens": 400413458.0, "step": 525 }, { "epoch": 0.7182955842659469, "grad_norm": 0.2817596902691573, "learning_rate": 1.98333141274108e-05, "loss": 0.3838, "num_tokens": 401149225.0, "step": 526 }, { "epoch": 0.7196611652246274, "grad_norm": 0.24331185460597055, "learning_rate": 1.9832360519757462e-05, "loss": 0.3667, "num_tokens": 401892171.0, "step": 527 }, { "epoch": 0.7210267461833079, "grad_norm": 0.23574338178878287, "learning_rate": 1.9831404217733426e-05, "loss": 0.3882, "num_tokens": 402646606.0, "step": 528 }, { "epoch": 0.7223923271419884, "grad_norm": 0.24004211424765443, "learning_rate": 1.9830445221630418e-05, "loss": 0.3699, "num_tokens": 403408801.0, "step": 529 }, { "epoch": 0.7237579081006689, "grad_norm": 0.24192801205690154, "learning_rate": 1.982948353174099e-05, "loss": 0.3858, "num_tokens": 404167900.0, "step": 530 }, { "epoch": 0.7251234890593494, "grad_norm": 0.2341700165648956, "learning_rate": 1.9828519148358505e-05, "loss": 0.3721, "num_tokens": 404939438.0, "step": 531 }, { "epoch": 0.72648907001803, "grad_norm": 0.23697310002511854, "learning_rate": 1.982755207177716e-05, "loss": 0.3798, "num_tokens": 405642199.0, "step": 532 }, { "epoch": 0.7278546509767104, "grad_norm": 0.25967280968484235, "learning_rate": 1.982658230229197e-05, "loss": 0.4015, "num_tokens": 406455367.0, "step": 533 }, { "epoch": 0.729220231935391, "grad_norm": 0.23755660874203546, "learning_rate": 1.9825609840198764e-05, "loss": 0.3809, "num_tokens": 407269972.0, "step": 534 }, { "epoch": 0.7305858128940714, "grad_norm": 0.23348182751311877, "learning_rate": 1.9824634685794198e-05, "loss": 0.3811, "num_tokens": 408061462.0, "step": 535 }, { "epoch": 0.731951393852752, "grad_norm": 0.24165279724074662, "learning_rate": 1.9823656839375753e-05, "loss": 0.3955, "num_tokens": 408786224.0, "step": 536 }, { "epoch": 0.7333169748114324, "grad_norm": 0.2468350845226071, "learning_rate": 1.9822676301241728e-05, "loss": 0.378, "num_tokens": 409563687.0, "step": 537 }, { "epoch": 0.734682555770113, "grad_norm": 0.2478622719683164, "learning_rate": 1.9821693071691237e-05, "loss": 0.3936, "num_tokens": 410341422.0, "step": 538 }, { "epoch": 0.7360481367287934, "grad_norm": 0.25420915420800777, "learning_rate": 1.982070715102423e-05, "loss": 0.3862, "num_tokens": 411058975.0, "step": 539 }, { "epoch": 0.737413717687474, "grad_norm": 0.22002184202626918, "learning_rate": 1.9819718539541463e-05, "loss": 0.3839, "num_tokens": 411855286.0, "step": 540 }, { "epoch": 0.7387792986461545, "grad_norm": 0.3001776094826476, "learning_rate": 1.9818727237544516e-05, "loss": 0.3986, "num_tokens": 412665147.0, "step": 541 }, { "epoch": 0.740144879604835, "grad_norm": 0.25786898262664704, "learning_rate": 1.9817733245335797e-05, "loss": 0.3947, "num_tokens": 413455885.0, "step": 542 }, { "epoch": 0.7415104605635155, "grad_norm": 0.2603732577565379, "learning_rate": 1.9816736563218527e-05, "loss": 0.3846, "num_tokens": 414187350.0, "step": 543 }, { "epoch": 0.742876041522196, "grad_norm": 0.23779622601478156, "learning_rate": 1.9815737191496757e-05, "loss": 0.3763, "num_tokens": 414928387.0, "step": 544 }, { "epoch": 0.7442416224808766, "grad_norm": 0.22553059306561224, "learning_rate": 1.9814735130475343e-05, "loss": 0.3907, "num_tokens": 415837678.0, "step": 545 }, { "epoch": 0.745607203439557, "grad_norm": 0.24740338366171932, "learning_rate": 1.981373038045997e-05, "loss": 0.3903, "num_tokens": 416571596.0, "step": 546 }, { "epoch": 0.7469727843982376, "grad_norm": 0.217503603083374, "learning_rate": 1.9812722941757158e-05, "loss": 0.3765, "num_tokens": 417331981.0, "step": 547 }, { "epoch": 0.748338365356918, "grad_norm": 0.24288815763146587, "learning_rate": 1.9811712814674217e-05, "loss": 0.3647, "num_tokens": 418077619.0, "step": 548 }, { "epoch": 0.7497039463155986, "grad_norm": 0.26163525869359044, "learning_rate": 1.98106999995193e-05, "loss": 0.3692, "num_tokens": 418753245.0, "step": 549 }, { "epoch": 0.7510695272742791, "grad_norm": 0.23163428275016085, "learning_rate": 1.980968449660137e-05, "loss": 0.386, "num_tokens": 419512346.0, "step": 550 }, { "epoch": 0.7524351082329596, "grad_norm": 0.24983584864385303, "learning_rate": 1.9808666306230216e-05, "loss": 0.3726, "num_tokens": 420240679.0, "step": 551 }, { "epoch": 0.7538006891916401, "grad_norm": 0.25231398309944786, "learning_rate": 1.9807645428716446e-05, "loss": 0.3762, "num_tokens": 420969087.0, "step": 552 }, { "epoch": 0.7551662701503206, "grad_norm": 0.2296403351380135, "learning_rate": 1.980662186437148e-05, "loss": 0.3851, "num_tokens": 421664482.0, "step": 553 }, { "epoch": 0.7565318511090011, "grad_norm": 0.24567345732314982, "learning_rate": 1.9805595613507568e-05, "loss": 0.3705, "num_tokens": 422390017.0, "step": 554 }, { "epoch": 0.7578974320676816, "grad_norm": 0.23182413087172507, "learning_rate": 1.980456667643777e-05, "loss": 0.3799, "num_tokens": 423141101.0, "step": 555 }, { "epoch": 0.7592630130263621, "grad_norm": 0.2351678365619299, "learning_rate": 1.9803535053475972e-05, "loss": 0.3801, "num_tokens": 423911241.0, "step": 556 }, { "epoch": 0.7606285939850426, "grad_norm": 0.25878287215060225, "learning_rate": 1.980250074493688e-05, "loss": 0.3635, "num_tokens": 424629204.0, "step": 557 }, { "epoch": 0.7619941749437231, "grad_norm": 0.23582082867021878, "learning_rate": 1.9801463751136013e-05, "loss": 0.3842, "num_tokens": 425332262.0, "step": 558 }, { "epoch": 0.7633597559024037, "grad_norm": 0.24861234568698637, "learning_rate": 1.9800424072389716e-05, "loss": 0.3716, "num_tokens": 426135873.0, "step": 559 }, { "epoch": 0.7647253368610841, "grad_norm": 0.24766074717936187, "learning_rate": 1.9799381709015143e-05, "loss": 0.3725, "num_tokens": 426854958.0, "step": 560 }, { "epoch": 0.7660909178197647, "grad_norm": 0.23657253194854444, "learning_rate": 1.9798336661330286e-05, "loss": 0.3744, "num_tokens": 427580781.0, "step": 561 }, { "epoch": 0.7674564987784451, "grad_norm": 0.2423998623700624, "learning_rate": 1.9797288929653935e-05, "loss": 0.3772, "num_tokens": 428332677.0, "step": 562 }, { "epoch": 0.7688220797371257, "grad_norm": 0.23620336491932659, "learning_rate": 1.979623851430571e-05, "loss": 0.3767, "num_tokens": 429196780.0, "step": 563 }, { "epoch": 0.7701876606958061, "grad_norm": 0.24285190155311534, "learning_rate": 1.9795185415606045e-05, "loss": 0.3739, "num_tokens": 429959680.0, "step": 564 }, { "epoch": 0.7715532416544867, "grad_norm": 0.2188096444122681, "learning_rate": 1.9794129633876198e-05, "loss": 0.3791, "num_tokens": 430737865.0, "step": 565 }, { "epoch": 0.7729188226131671, "grad_norm": 0.2291050916443397, "learning_rate": 1.9793071169438244e-05, "loss": 0.378, "num_tokens": 431481363.0, "step": 566 }, { "epoch": 0.7742844035718477, "grad_norm": 0.2223169199335638, "learning_rate": 1.979201002261507e-05, "loss": 0.3816, "num_tokens": 432280602.0, "step": 567 }, { "epoch": 0.7756499845305282, "grad_norm": 0.2277022540324355, "learning_rate": 1.9790946193730384e-05, "loss": 0.3835, "num_tokens": 433046763.0, "step": 568 }, { "epoch": 0.7770155654892087, "grad_norm": 0.2212733199414894, "learning_rate": 1.9789879683108722e-05, "loss": 0.3799, "num_tokens": 433830679.0, "step": 569 }, { "epoch": 0.7783811464478892, "grad_norm": 0.22542859032477075, "learning_rate": 1.9788810491075425e-05, "loss": 0.359, "num_tokens": 434524050.0, "step": 570 }, { "epoch": 0.7797467274065697, "grad_norm": 0.2342874963785997, "learning_rate": 1.978773861795666e-05, "loss": 0.3815, "num_tokens": 435330086.0, "step": 571 }, { "epoch": 0.7811123083652503, "grad_norm": 0.22814192216046725, "learning_rate": 1.97866640640794e-05, "loss": 0.3798, "num_tokens": 436139120.0, "step": 572 }, { "epoch": 0.7824778893239307, "grad_norm": 0.2331823325460082, "learning_rate": 1.978558682977146e-05, "loss": 0.3682, "num_tokens": 436860759.0, "step": 573 }, { "epoch": 0.7838434702826113, "grad_norm": 0.2557923886981176, "learning_rate": 1.9784506915361445e-05, "loss": 0.3755, "num_tokens": 437661182.0, "step": 574 }, { "epoch": 0.7852090512412917, "grad_norm": 0.2348809627974478, "learning_rate": 1.9783424321178797e-05, "loss": 0.3795, "num_tokens": 438449444.0, "step": 575 }, { "epoch": 0.7865746321999723, "grad_norm": 0.2784741532632914, "learning_rate": 1.9782339047553767e-05, "loss": 0.3903, "num_tokens": 439327369.0, "step": 576 }, { "epoch": 0.7879402131586528, "grad_norm": 0.23968630750825912, "learning_rate": 1.9781251094817423e-05, "loss": 0.3851, "num_tokens": 440150905.0, "step": 577 }, { "epoch": 0.7893057941173333, "grad_norm": 0.23687336796140596, "learning_rate": 1.9780160463301653e-05, "loss": 0.3814, "num_tokens": 440966144.0, "step": 578 }, { "epoch": 0.7906713750760138, "grad_norm": 0.2134502546757845, "learning_rate": 1.9779067153339165e-05, "loss": 0.391, "num_tokens": 441775252.0, "step": 579 }, { "epoch": 0.7920369560346943, "grad_norm": 0.23513431515710911, "learning_rate": 1.9777971165263477e-05, "loss": 0.3865, "num_tokens": 442486015.0, "step": 580 }, { "epoch": 0.7934025369933748, "grad_norm": 0.24181196565377724, "learning_rate": 1.9776872499408925e-05, "loss": 0.3738, "num_tokens": 443232818.0, "step": 581 }, { "epoch": 0.7947681179520553, "grad_norm": 0.22997897649246346, "learning_rate": 1.977577115611067e-05, "loss": 0.3797, "num_tokens": 443987483.0, "step": 582 }, { "epoch": 0.7961336989107358, "grad_norm": 0.2593450511775391, "learning_rate": 1.977466713570468e-05, "loss": 0.3859, "num_tokens": 444714986.0, "step": 583 }, { "epoch": 0.7974992798694163, "grad_norm": 0.22856268800651694, "learning_rate": 1.9773560438527748e-05, "loss": 0.3831, "num_tokens": 445546278.0, "step": 584 }, { "epoch": 0.7988648608280968, "grad_norm": 0.24982362205340583, "learning_rate": 1.9772451064917473e-05, "loss": 0.3666, "num_tokens": 446283839.0, "step": 585 }, { "epoch": 0.8002304417867774, "grad_norm": 0.3205232125146528, "learning_rate": 1.9771339015212283e-05, "loss": 0.3756, "num_tokens": 446984657.0, "step": 586 }, { "epoch": 0.8015960227454578, "grad_norm": 0.22394185488731852, "learning_rate": 1.9770224289751413e-05, "loss": 0.3862, "num_tokens": 447723211.0, "step": 587 }, { "epoch": 0.8029616037041384, "grad_norm": 0.24549860293359288, "learning_rate": 1.976910688887492e-05, "loss": 0.3808, "num_tokens": 448502325.0, "step": 588 }, { "epoch": 0.8043271846628188, "grad_norm": 0.2465950684973369, "learning_rate": 1.9767986812923672e-05, "loss": 0.4037, "num_tokens": 449250620.0, "step": 589 }, { "epoch": 0.8056927656214994, "grad_norm": 0.24912159289975766, "learning_rate": 1.9766864062239354e-05, "loss": 0.3951, "num_tokens": 450121084.0, "step": 590 }, { "epoch": 0.8070583465801798, "grad_norm": 0.26089058276887034, "learning_rate": 1.976573863716448e-05, "loss": 0.3776, "num_tokens": 450902615.0, "step": 591 }, { "epoch": 0.8084239275388604, "grad_norm": 0.2883150556050347, "learning_rate": 1.976461053804235e-05, "loss": 0.3832, "num_tokens": 451655810.0, "step": 592 }, { "epoch": 0.8097895084975408, "grad_norm": 0.2504770907786986, "learning_rate": 1.976347976521711e-05, "loss": 0.3701, "num_tokens": 452387810.0, "step": 593 }, { "epoch": 0.8111550894562214, "grad_norm": 0.23527347768674112, "learning_rate": 1.976234631903371e-05, "loss": 0.3681, "num_tokens": 453195850.0, "step": 594 }, { "epoch": 0.812520670414902, "grad_norm": 0.22360428801954593, "learning_rate": 1.976121019983791e-05, "loss": 0.3856, "num_tokens": 453924672.0, "step": 595 }, { "epoch": 0.8138862513735824, "grad_norm": 0.23516510280380343, "learning_rate": 1.9760071407976297e-05, "loss": 0.3728, "num_tokens": 454674914.0, "step": 596 }, { "epoch": 0.815251832332263, "grad_norm": 0.22256579676769378, "learning_rate": 1.9758929943796264e-05, "loss": 0.3623, "num_tokens": 455425901.0, "step": 597 }, { "epoch": 0.8166174132909434, "grad_norm": 0.21090538292293234, "learning_rate": 1.9757785807646018e-05, "loss": 0.3738, "num_tokens": 456187892.0, "step": 598 }, { "epoch": 0.817982994249624, "grad_norm": 0.2118197093454376, "learning_rate": 1.9756638999874595e-05, "loss": 0.3859, "num_tokens": 456952090.0, "step": 599 }, { "epoch": 0.8193485752083044, "grad_norm": 0.23993957159349516, "learning_rate": 1.975548952083183e-05, "loss": 0.3903, "num_tokens": 457759460.0, "step": 600 }, { "epoch": 0.820714156166985, "grad_norm": 0.22474858226223499, "learning_rate": 1.9754337370868375e-05, "loss": 0.3886, "num_tokens": 458501714.0, "step": 601 }, { "epoch": 0.8220797371256654, "grad_norm": 0.23954931191966175, "learning_rate": 1.975318255033571e-05, "loss": 0.3879, "num_tokens": 459274331.0, "step": 602 }, { "epoch": 0.823445318084346, "grad_norm": 0.22972783258852786, "learning_rate": 1.9752025059586117e-05, "loss": 0.3868, "num_tokens": 459976740.0, "step": 603 }, { "epoch": 0.8248108990430265, "grad_norm": 0.24829074202883777, "learning_rate": 1.9750864898972695e-05, "loss": 0.3732, "num_tokens": 460698062.0, "step": 604 }, { "epoch": 0.826176480001707, "grad_norm": 0.21473849989869662, "learning_rate": 1.9749702068849363e-05, "loss": 0.3686, "num_tokens": 461482739.0, "step": 605 }, { "epoch": 0.8275420609603875, "grad_norm": 0.22919416220071162, "learning_rate": 1.9748536569570843e-05, "loss": 0.3845, "num_tokens": 462328274.0, "step": 606 }, { "epoch": 0.828907641919068, "grad_norm": 0.23072253824361572, "learning_rate": 1.9747368401492684e-05, "loss": 0.3635, "num_tokens": 463065850.0, "step": 607 }, { "epoch": 0.8302732228777485, "grad_norm": 0.2356488836200443, "learning_rate": 1.9746197564971243e-05, "loss": 0.379, "num_tokens": 463809036.0, "step": 608 }, { "epoch": 0.831638803836429, "grad_norm": 0.23259246905184328, "learning_rate": 1.974502406036369e-05, "loss": 0.3766, "num_tokens": 464564339.0, "step": 609 }, { "epoch": 0.8330043847951095, "grad_norm": 0.24030443665738932, "learning_rate": 1.9743847888028015e-05, "loss": 0.3835, "num_tokens": 465398163.0, "step": 610 }, { "epoch": 0.83436996575379, "grad_norm": 0.2277197308433794, "learning_rate": 1.9742669048323008e-05, "loss": 0.3853, "num_tokens": 466189270.0, "step": 611 }, { "epoch": 0.8357355467124705, "grad_norm": 0.21942654499212572, "learning_rate": 1.9741487541608288e-05, "loss": 0.3826, "num_tokens": 467007726.0, "step": 612 }, { "epoch": 0.8371011276711511, "grad_norm": 0.2330821982686077, "learning_rate": 1.9740303368244284e-05, "loss": 0.3514, "num_tokens": 467673955.0, "step": 613 }, { "epoch": 0.8384667086298315, "grad_norm": 0.23616162427208337, "learning_rate": 1.9739116528592228e-05, "loss": 0.3742, "num_tokens": 468434142.0, "step": 614 }, { "epoch": 0.8398322895885121, "grad_norm": 0.22004329045624535, "learning_rate": 1.973792702301418e-05, "loss": 0.3819, "num_tokens": 469207037.0, "step": 615 }, { "epoch": 0.8411978705471925, "grad_norm": 0.2310741474815828, "learning_rate": 1.9736734851873005e-05, "loss": 0.3692, "num_tokens": 469900771.0, "step": 616 }, { "epoch": 0.8425634515058731, "grad_norm": 0.22679386651007433, "learning_rate": 1.9735540015532382e-05, "loss": 0.3742, "num_tokens": 470685956.0, "step": 617 }, { "epoch": 0.8439290324645535, "grad_norm": 0.21893249318705613, "learning_rate": 1.9734342514356803e-05, "loss": 0.3837, "num_tokens": 471399655.0, "step": 618 }, { "epoch": 0.8452946134232341, "grad_norm": 0.24953887721403875, "learning_rate": 1.973314234871158e-05, "loss": 0.3782, "num_tokens": 472232989.0, "step": 619 }, { "epoch": 0.8466601943819145, "grad_norm": 0.22796048696586324, "learning_rate": 1.9731939518962823e-05, "loss": 0.3645, "num_tokens": 472988964.0, "step": 620 }, { "epoch": 0.8480257753405951, "grad_norm": 0.22821522242862166, "learning_rate": 1.9730734025477467e-05, "loss": 0.3868, "num_tokens": 473815786.0, "step": 621 }, { "epoch": 0.8493913562992756, "grad_norm": 0.24489662423830855, "learning_rate": 1.9729525868623254e-05, "loss": 0.3817, "num_tokens": 474550755.0, "step": 622 }, { "epoch": 0.8507569372579561, "grad_norm": 0.23760887157420488, "learning_rate": 1.972831504876875e-05, "loss": 0.3932, "num_tokens": 475315571.0, "step": 623 }, { "epoch": 0.8521225182166366, "grad_norm": 0.22876048170493865, "learning_rate": 1.9727101566283307e-05, "loss": 0.3736, "num_tokens": 476008579.0, "step": 624 }, { "epoch": 0.8534880991753171, "grad_norm": 0.23494991281974495, "learning_rate": 1.9725885421537116e-05, "loss": 0.3911, "num_tokens": 476811527.0, "step": 625 }, { "epoch": 0.8548536801339977, "grad_norm": 0.2316856448414772, "learning_rate": 1.972466661490117e-05, "loss": 0.3832, "num_tokens": 477563397.0, "step": 626 }, { "epoch": 0.8562192610926781, "grad_norm": 0.21441754690128814, "learning_rate": 1.9723445146747277e-05, "loss": 0.3772, "num_tokens": 478362280.0, "step": 627 }, { "epoch": 0.8575848420513587, "grad_norm": 0.23297456924908996, "learning_rate": 1.9722221017448045e-05, "loss": 0.3908, "num_tokens": 479230529.0, "step": 628 }, { "epoch": 0.8589504230100391, "grad_norm": 0.22225694243110378, "learning_rate": 1.9720994227376908e-05, "loss": 0.3836, "num_tokens": 480010501.0, "step": 629 }, { "epoch": 0.8603160039687197, "grad_norm": 0.2125115149147555, "learning_rate": 1.971976477690811e-05, "loss": 0.3812, "num_tokens": 480747299.0, "step": 630 }, { "epoch": 0.8616815849274002, "grad_norm": 0.20654861882721307, "learning_rate": 1.9718532666416695e-05, "loss": 0.3688, "num_tokens": 481482682.0, "step": 631 }, { "epoch": 0.8630471658860807, "grad_norm": 0.2416448641446861, "learning_rate": 1.9717297896278535e-05, "loss": 0.3778, "num_tokens": 482249063.0, "step": 632 }, { "epoch": 0.8644127468447612, "grad_norm": 0.22426248538017754, "learning_rate": 1.97160604668703e-05, "loss": 0.3814, "num_tokens": 482979943.0, "step": 633 }, { "epoch": 0.8657783278034417, "grad_norm": 0.2552756736492648, "learning_rate": 1.9714820378569474e-05, "loss": 0.3955, "num_tokens": 483814857.0, "step": 634 }, { "epoch": 0.8671439087621222, "grad_norm": 0.22092178064559376, "learning_rate": 1.971357763175436e-05, "loss": 0.3956, "num_tokens": 484562127.0, "step": 635 }, { "epoch": 0.8685094897208027, "grad_norm": 0.21609906284675354, "learning_rate": 1.9712332226804065e-05, "loss": 0.3758, "num_tokens": 485331121.0, "step": 636 }, { "epoch": 0.8698750706794832, "grad_norm": 0.21126693038566016, "learning_rate": 1.9711084164098506e-05, "loss": 0.3702, "num_tokens": 486141888.0, "step": 637 }, { "epoch": 0.8712406516381637, "grad_norm": 0.21407447765340132, "learning_rate": 1.970983344401841e-05, "loss": 0.3778, "num_tokens": 486843415.0, "step": 638 }, { "epoch": 0.8726062325968442, "grad_norm": 0.22318803674207643, "learning_rate": 1.9708580066945325e-05, "loss": 0.37, "num_tokens": 487664048.0, "step": 639 }, { "epoch": 0.8739718135555248, "grad_norm": 0.26782782092336077, "learning_rate": 1.9707324033261594e-05, "loss": 0.366, "num_tokens": 488443278.0, "step": 640 }, { "epoch": 0.8753373945142052, "grad_norm": 0.2278862813802831, "learning_rate": 1.970606534335039e-05, "loss": 0.374, "num_tokens": 489205290.0, "step": 641 }, { "epoch": 0.8767029754728858, "grad_norm": 0.23677205947476207, "learning_rate": 1.970480399759567e-05, "loss": 0.37, "num_tokens": 489948809.0, "step": 642 }, { "epoch": 0.8780685564315662, "grad_norm": 0.2308773989115437, "learning_rate": 1.9703539996382225e-05, "loss": 0.3733, "num_tokens": 490776253.0, "step": 643 }, { "epoch": 0.8794341373902468, "grad_norm": 0.22638371427570686, "learning_rate": 1.9702273340095647e-05, "loss": 0.3738, "num_tokens": 491574723.0, "step": 644 }, { "epoch": 0.8807997183489272, "grad_norm": 0.2302236984102126, "learning_rate": 1.970100402912234e-05, "loss": 0.3799, "num_tokens": 492268808.0, "step": 645 }, { "epoch": 0.8821652993076078, "grad_norm": 0.2143052225501384, "learning_rate": 1.9699732063849504e-05, "loss": 0.3789, "num_tokens": 493102054.0, "step": 646 }, { "epoch": 0.8835308802662882, "grad_norm": 0.22439245687976675, "learning_rate": 1.9698457444665172e-05, "loss": 0.3978, "num_tokens": 493896107.0, "step": 647 }, { "epoch": 0.8848964612249688, "grad_norm": 0.21033479780085776, "learning_rate": 1.969718017195817e-05, "loss": 0.3878, "num_tokens": 494753487.0, "step": 648 }, { "epoch": 0.8862620421836493, "grad_norm": 0.22935056891003955, "learning_rate": 1.9695900246118144e-05, "loss": 0.3569, "num_tokens": 495525209.0, "step": 649 }, { "epoch": 0.8876276231423298, "grad_norm": 0.237107426362609, "learning_rate": 1.9694617667535542e-05, "loss": 0.3804, "num_tokens": 496280567.0, "step": 650 }, { "epoch": 0.8889932041010103, "grad_norm": 0.22000645836477845, "learning_rate": 1.9693332436601616e-05, "loss": 0.3819, "num_tokens": 497059019.0, "step": 651 }, { "epoch": 0.8903587850596908, "grad_norm": 0.20370084472449873, "learning_rate": 1.9692044553708444e-05, "loss": 0.385, "num_tokens": 497862274.0, "step": 652 }, { "epoch": 0.8917243660183714, "grad_norm": 0.2508176624909547, "learning_rate": 1.9690754019248897e-05, "loss": 0.3723, "num_tokens": 498687395.0, "step": 653 }, { "epoch": 0.8930899469770518, "grad_norm": 0.20011022460278874, "learning_rate": 1.968946083361666e-05, "loss": 0.3784, "num_tokens": 499457325.0, "step": 654 }, { "epoch": 0.8944555279357324, "grad_norm": 0.21259949229219932, "learning_rate": 1.9688164997206238e-05, "loss": 0.3706, "num_tokens": 500233706.0, "step": 655 }, { "epoch": 0.8958211088944128, "grad_norm": 0.21057484774782864, "learning_rate": 1.9686866510412926e-05, "loss": 0.3855, "num_tokens": 501092806.0, "step": 656 }, { "epoch": 0.8971866898530934, "grad_norm": 0.23216787577618175, "learning_rate": 1.9685565373632834e-05, "loss": 0.3804, "num_tokens": 501834992.0, "step": 657 }, { "epoch": 0.8985522708117739, "grad_norm": 0.20614841009838017, "learning_rate": 1.9684261587262888e-05, "loss": 0.3742, "num_tokens": 502690794.0, "step": 658 }, { "epoch": 0.8999178517704544, "grad_norm": 0.20136177360525598, "learning_rate": 1.9682955151700817e-05, "loss": 0.3806, "num_tokens": 503426763.0, "step": 659 }, { "epoch": 0.9012834327291349, "grad_norm": 0.22228762928573298, "learning_rate": 1.9681646067345158e-05, "loss": 0.369, "num_tokens": 504173303.0, "step": 660 }, { "epoch": 0.9026490136878154, "grad_norm": 0.21876503589304166, "learning_rate": 1.9680334334595248e-05, "loss": 0.3737, "num_tokens": 504997886.0, "step": 661 }, { "epoch": 0.9040145946464959, "grad_norm": 0.24491104747026152, "learning_rate": 1.9679019953851247e-05, "loss": 0.3825, "num_tokens": 505734378.0, "step": 662 }, { "epoch": 0.9053801756051764, "grad_norm": 0.2017257147123203, "learning_rate": 1.967770292551412e-05, "loss": 0.3763, "num_tokens": 506499275.0, "step": 663 }, { "epoch": 0.9067457565638569, "grad_norm": 0.2403346270691312, "learning_rate": 1.9676383249985624e-05, "loss": 0.3758, "num_tokens": 507255021.0, "step": 664 }, { "epoch": 0.9081113375225374, "grad_norm": 0.23358497250163435, "learning_rate": 1.9675060927668347e-05, "loss": 0.375, "num_tokens": 507963839.0, "step": 665 }, { "epoch": 0.9094769184812179, "grad_norm": 0.2357773088876595, "learning_rate": 1.9673735958965658e-05, "loss": 0.3777, "num_tokens": 508688312.0, "step": 666 }, { "epoch": 0.9108424994398985, "grad_norm": 0.23783557968906405, "learning_rate": 1.967240834428176e-05, "loss": 0.3776, "num_tokens": 509488948.0, "step": 667 }, { "epoch": 0.9122080803985789, "grad_norm": 0.22183797973535546, "learning_rate": 1.9671078084021645e-05, "loss": 0.3818, "num_tokens": 510270165.0, "step": 668 }, { "epoch": 0.9135736613572595, "grad_norm": 0.21231185572697747, "learning_rate": 1.966974517859112e-05, "loss": 0.3781, "num_tokens": 511054964.0, "step": 669 }, { "epoch": 0.9149392423159399, "grad_norm": 0.2324792670616, "learning_rate": 1.9668409628396797e-05, "loss": 0.4018, "num_tokens": 511833692.0, "step": 670 }, { "epoch": 0.9163048232746205, "grad_norm": 0.1879591669283904, "learning_rate": 1.966707143384609e-05, "loss": 0.3747, "num_tokens": 512558816.0, "step": 671 }, { "epoch": 0.9176704042333009, "grad_norm": 0.22183142427459596, "learning_rate": 1.966573059534723e-05, "loss": 0.3536, "num_tokens": 513311507.0, "step": 672 }, { "epoch": 0.9190359851919815, "grad_norm": 0.2253608194948316, "learning_rate": 1.9664387113309243e-05, "loss": 0.3712, "num_tokens": 514067137.0, "step": 673 }, { "epoch": 0.9204015661506619, "grad_norm": 0.2361307673881122, "learning_rate": 1.9663040988141973e-05, "loss": 0.3688, "num_tokens": 514786276.0, "step": 674 }, { "epoch": 0.9217671471093425, "grad_norm": 0.2541250846126281, "learning_rate": 1.9661692220256064e-05, "loss": 0.4066, "num_tokens": 515568565.0, "step": 675 }, { "epoch": 0.923132728068023, "grad_norm": 0.21298254805290392, "learning_rate": 1.966034081006296e-05, "loss": 0.3722, "num_tokens": 516327301.0, "step": 676 }, { "epoch": 0.9244983090267035, "grad_norm": 0.2562403494200992, "learning_rate": 1.9658986757974925e-05, "loss": 0.3993, "num_tokens": 517075995.0, "step": 677 }, { "epoch": 0.925863889985384, "grad_norm": 0.21201433865362707, "learning_rate": 1.965763006440502e-05, "loss": 0.3815, "num_tokens": 517845488.0, "step": 678 }, { "epoch": 0.9272294709440645, "grad_norm": 0.24872730840373092, "learning_rate": 1.9656270729767112e-05, "loss": 0.3696, "num_tokens": 518561677.0, "step": 679 }, { "epoch": 0.928595051902745, "grad_norm": 0.23794106185666544, "learning_rate": 1.965490875447587e-05, "loss": 0.3925, "num_tokens": 519310106.0, "step": 680 }, { "epoch": 0.9299606328614255, "grad_norm": 0.24094389389057022, "learning_rate": 1.9653544138946784e-05, "loss": 0.3647, "num_tokens": 520024498.0, "step": 681 }, { "epoch": 0.931326213820106, "grad_norm": 0.2338332993880323, "learning_rate": 1.9652176883596136e-05, "loss": 0.3731, "num_tokens": 520781116.0, "step": 682 }, { "epoch": 0.9326917947787865, "grad_norm": 0.22942082959991267, "learning_rate": 1.9650806988841013e-05, "loss": 0.3712, "num_tokens": 521535126.0, "step": 683 }, { "epoch": 0.934057375737467, "grad_norm": 0.2051553856267546, "learning_rate": 1.964943445509931e-05, "loss": 0.3552, "num_tokens": 522310489.0, "step": 684 }, { "epoch": 0.9354229566961476, "grad_norm": 0.2695420606518578, "learning_rate": 1.9648059282789736e-05, "loss": 0.3921, "num_tokens": 523133176.0, "step": 685 }, { "epoch": 0.9367885376548281, "grad_norm": 0.20953155093853865, "learning_rate": 1.9646681472331786e-05, "loss": 0.3806, "num_tokens": 523874078.0, "step": 686 }, { "epoch": 0.9381541186135086, "grad_norm": 0.2598467679287756, "learning_rate": 1.9645301024145774e-05, "loss": 0.3981, "num_tokens": 524699884.0, "step": 687 }, { "epoch": 0.9395196995721891, "grad_norm": 0.21649808202531495, "learning_rate": 1.9643917938652818e-05, "loss": 0.3729, "num_tokens": 525514167.0, "step": 688 }, { "epoch": 0.9408852805308696, "grad_norm": 0.246397584115257, "learning_rate": 1.9642532216274835e-05, "loss": 0.3738, "num_tokens": 526284972.0, "step": 689 }, { "epoch": 0.9422508614895501, "grad_norm": 0.25137200087968886, "learning_rate": 1.9641143857434544e-05, "loss": 0.3829, "num_tokens": 527011742.0, "step": 690 }, { "epoch": 0.9436164424482306, "grad_norm": 0.24349238916573324, "learning_rate": 1.9639752862555483e-05, "loss": 0.3704, "num_tokens": 527839006.0, "step": 691 }, { "epoch": 0.9449820234069111, "grad_norm": 0.22784975772650592, "learning_rate": 1.963835923206198e-05, "loss": 0.3932, "num_tokens": 528615716.0, "step": 692 }, { "epoch": 0.9463476043655916, "grad_norm": 0.2592418301980075, "learning_rate": 1.963696296637917e-05, "loss": 0.3766, "num_tokens": 529357369.0, "step": 693 }, { "epoch": 0.9477131853242722, "grad_norm": 0.226785712908802, "learning_rate": 1.9635564065932994e-05, "loss": 0.3874, "num_tokens": 530095535.0, "step": 694 }, { "epoch": 0.9490787662829526, "grad_norm": 0.2242018039260005, "learning_rate": 1.9634162531150196e-05, "loss": 0.37, "num_tokens": 530804954.0, "step": 695 }, { "epoch": 0.9504443472416332, "grad_norm": 0.24557588286173263, "learning_rate": 1.9632758362458325e-05, "loss": 0.3827, "num_tokens": 531557175.0, "step": 696 }, { "epoch": 0.9518099282003136, "grad_norm": 0.24643526597426926, "learning_rate": 1.9631351560285724e-05, "loss": 0.3916, "num_tokens": 532314504.0, "step": 697 }, { "epoch": 0.9531755091589942, "grad_norm": 0.22644645539793357, "learning_rate": 1.9629942125061563e-05, "loss": 0.3815, "num_tokens": 533088218.0, "step": 698 }, { "epoch": 0.9545410901176746, "grad_norm": 0.2812692190074564, "learning_rate": 1.9628530057215784e-05, "loss": 0.3751, "num_tokens": 533834601.0, "step": 699 }, { "epoch": 0.9559066710763552, "grad_norm": 0.21426000524672442, "learning_rate": 1.9627115357179155e-05, "loss": 0.3648, "num_tokens": 534588437.0, "step": 700 }, { "epoch": 0.9572722520350356, "grad_norm": 0.22514162881816935, "learning_rate": 1.9625698025383245e-05, "loss": 0.3536, "num_tokens": 535343643.0, "step": 701 }, { "epoch": 0.9586378329937162, "grad_norm": 0.2021493298704689, "learning_rate": 1.962427806226041e-05, "loss": 0.3765, "num_tokens": 536171406.0, "step": 702 }, { "epoch": 0.9600034139523967, "grad_norm": 0.2205587910484086, "learning_rate": 1.9622855468243823e-05, "loss": 0.363, "num_tokens": 536964518.0, "step": 703 }, { "epoch": 0.9613689949110772, "grad_norm": 0.21325227571656347, "learning_rate": 1.962143024376746e-05, "loss": 0.3878, "num_tokens": 537756351.0, "step": 704 }, { "epoch": 0.9627345758697577, "grad_norm": 0.21954134424156602, "learning_rate": 1.962000238926609e-05, "loss": 0.3583, "num_tokens": 538470208.0, "step": 705 }, { "epoch": 0.9641001568284382, "grad_norm": 0.1887864355795394, "learning_rate": 1.961857190517529e-05, "loss": 0.3812, "num_tokens": 539278841.0, "step": 706 }, { "epoch": 0.9654657377871187, "grad_norm": 0.2272762567364451, "learning_rate": 1.9617138791931444e-05, "loss": 0.366, "num_tokens": 539923394.0, "step": 707 }, { "epoch": 0.9668313187457992, "grad_norm": 0.22447526075519114, "learning_rate": 1.961570304997173e-05, "loss": 0.3907, "num_tokens": 540659009.0, "step": 708 }, { "epoch": 0.9681968997044798, "grad_norm": 0.19008200326565305, "learning_rate": 1.9614264679734126e-05, "loss": 0.3717, "num_tokens": 541383570.0, "step": 709 }, { "epoch": 0.9695624806631602, "grad_norm": 0.22127452924775257, "learning_rate": 1.9612823681657424e-05, "loss": 0.374, "num_tokens": 542145576.0, "step": 710 }, { "epoch": 0.9709280616218408, "grad_norm": 0.22868424422903078, "learning_rate": 1.96113800561812e-05, "loss": 0.3755, "num_tokens": 542939007.0, "step": 711 }, { "epoch": 0.9722936425805213, "grad_norm": 0.22089845681080716, "learning_rate": 1.9609933803745854e-05, "loss": 0.4115, "num_tokens": 543712051.0, "step": 712 }, { "epoch": 0.9736592235392018, "grad_norm": 0.20968855843561338, "learning_rate": 1.9608484924792567e-05, "loss": 0.3653, "num_tokens": 544447877.0, "step": 713 }, { "epoch": 0.9750248044978823, "grad_norm": 0.23661560227078382, "learning_rate": 1.960703341976333e-05, "loss": 0.3818, "num_tokens": 545124888.0, "step": 714 }, { "epoch": 0.9763903854565628, "grad_norm": 0.23160933958305577, "learning_rate": 1.9605579289100937e-05, "loss": 0.3901, "num_tokens": 545890233.0, "step": 715 }, { "epoch": 0.9777559664152433, "grad_norm": 0.23554837368433398, "learning_rate": 1.960412253324898e-05, "loss": 0.368, "num_tokens": 546603773.0, "step": 716 }, { "epoch": 0.9791215473739238, "grad_norm": 0.2286169550517569, "learning_rate": 1.9602663152651847e-05, "loss": 0.3786, "num_tokens": 547453280.0, "step": 717 }, { "epoch": 0.9804871283326043, "grad_norm": 0.22218825095315792, "learning_rate": 1.960120114775474e-05, "loss": 0.3813, "num_tokens": 548172445.0, "step": 718 }, { "epoch": 0.9818527092912848, "grad_norm": 0.2275151337830258, "learning_rate": 1.9599736519003646e-05, "loss": 0.3813, "num_tokens": 548891111.0, "step": 719 }, { "epoch": 0.9832182902499653, "grad_norm": 0.2187378857657384, "learning_rate": 1.9598269266845367e-05, "loss": 0.3694, "num_tokens": 549684049.0, "step": 720 }, { "epoch": 0.9845838712086459, "grad_norm": 0.2112126150058604, "learning_rate": 1.9596799391727492e-05, "loss": 0.3646, "num_tokens": 550381777.0, "step": 721 }, { "epoch": 0.9859494521673263, "grad_norm": 0.2437941576809699, "learning_rate": 1.9595326894098417e-05, "loss": 0.3871, "num_tokens": 551248951.0, "step": 722 }, { "epoch": 0.9873150331260069, "grad_norm": 0.2286558699225236, "learning_rate": 1.959385177440734e-05, "loss": 0.3701, "num_tokens": 551973185.0, "step": 723 }, { "epoch": 0.9886806140846873, "grad_norm": 0.2461869569615954, "learning_rate": 1.9592374033104254e-05, "loss": 0.3641, "num_tokens": 552740161.0, "step": 724 }, { "epoch": 0.9900461950433679, "grad_norm": 0.2073019114118674, "learning_rate": 1.9590893670639955e-05, "loss": 0.3709, "num_tokens": 553526563.0, "step": 725 }, { "epoch": 0.9914117760020483, "grad_norm": 0.2474048144401556, "learning_rate": 1.9589410687466038e-05, "loss": 0.3701, "num_tokens": 554245398.0, "step": 726 }, { "epoch": 0.9927773569607289, "grad_norm": 0.21628458821804883, "learning_rate": 1.9587925084034895e-05, "loss": 0.387, "num_tokens": 554985005.0, "step": 727 }, { "epoch": 0.9941429379194093, "grad_norm": 0.2523858627994551, "learning_rate": 1.9586436860799717e-05, "loss": 0.3958, "num_tokens": 555748935.0, "step": 728 }, { "epoch": 0.9955085188780899, "grad_norm": 0.2186895191900311, "learning_rate": 1.9584946018214507e-05, "loss": 0.3715, "num_tokens": 556485927.0, "step": 729 }, { "epoch": 0.9968740998367704, "grad_norm": 0.2242641076845794, "learning_rate": 1.9583452556734043e-05, "loss": 0.3698, "num_tokens": 557294696.0, "step": 730 }, { "epoch": 0.9982396807954509, "grad_norm": 0.2131695371398388, "learning_rate": 1.9581956476813927e-05, "loss": 0.3805, "num_tokens": 558033148.0, "step": 731 }, { "epoch": 0.9996052617541314, "grad_norm": 0.23320356040938617, "learning_rate": 1.958045777891054e-05, "loss": 0.3756, "num_tokens": 558794487.0, "step": 732 }, { "epoch": 1.0, "grad_norm": 0.23320356040938617, "learning_rate": 1.957895646348107e-05, "loss": 0.3803, "num_tokens": 559042136.0, "step": 733 }, { "epoch": 1.0013655809586806, "grad_norm": 0.3825135940407043, "learning_rate": 1.957745253098351e-05, "loss": 0.3416, "num_tokens": 559798424.0, "step": 734 }, { "epoch": 1.0027311619173611, "grad_norm": 0.2897650651273311, "learning_rate": 1.9575945981876635e-05, "loss": 0.3536, "num_tokens": 560570310.0, "step": 735 }, { "epoch": 1.0040967428760414, "grad_norm": 0.2290980247840941, "learning_rate": 1.9574436816620038e-05, "loss": 0.3438, "num_tokens": 561257328.0, "step": 736 }, { "epoch": 1.005462323834722, "grad_norm": 0.2908496468928532, "learning_rate": 1.95729250356741e-05, "loss": 0.3561, "num_tokens": 562035083.0, "step": 737 }, { "epoch": 1.0068279047934026, "grad_norm": 0.24474109613460202, "learning_rate": 1.957141063949999e-05, "loss": 0.34, "num_tokens": 562778400.0, "step": 738 }, { "epoch": 1.0081934857520831, "grad_norm": 0.25421021892130446, "learning_rate": 1.9569893628559692e-05, "loss": 0.3387, "num_tokens": 563508421.0, "step": 739 }, { "epoch": 1.0095590667107635, "grad_norm": 0.22870380389667097, "learning_rate": 1.9568374003315974e-05, "loss": 0.3512, "num_tokens": 564286882.0, "step": 740 }, { "epoch": 1.010924647669444, "grad_norm": 0.2645570751433239, "learning_rate": 1.9566851764232414e-05, "loss": 0.3539, "num_tokens": 565065347.0, "step": 741 }, { "epoch": 1.0122902286281246, "grad_norm": 0.2174071442958411, "learning_rate": 1.9565326911773385e-05, "loss": 0.3322, "num_tokens": 565906059.0, "step": 742 }, { "epoch": 1.0136558095868051, "grad_norm": 0.2551581290015055, "learning_rate": 1.956379944640404e-05, "loss": 0.3734, "num_tokens": 566665307.0, "step": 743 }, { "epoch": 1.0150213905454857, "grad_norm": 0.2653134988662466, "learning_rate": 1.9562269368590357e-05, "loss": 0.3442, "num_tokens": 567409795.0, "step": 744 }, { "epoch": 1.016386971504166, "grad_norm": 0.24862266844549816, "learning_rate": 1.9560736678799088e-05, "loss": 0.3419, "num_tokens": 568121859.0, "step": 745 }, { "epoch": 1.0177525524628466, "grad_norm": 0.2398461782965882, "learning_rate": 1.955920137749779e-05, "loss": 0.3355, "num_tokens": 568949621.0, "step": 746 }, { "epoch": 1.0191181334215271, "grad_norm": 0.2271463646011032, "learning_rate": 1.9557663465154824e-05, "loss": 0.3459, "num_tokens": 569716202.0, "step": 747 }, { "epoch": 1.0204837143802077, "grad_norm": 0.2374417496076064, "learning_rate": 1.955612294223933e-05, "loss": 0.3514, "num_tokens": 570522506.0, "step": 748 }, { "epoch": 1.021849295338888, "grad_norm": 0.25175071136036276, "learning_rate": 1.9554579809221264e-05, "loss": 0.36, "num_tokens": 571313681.0, "step": 749 }, { "epoch": 1.0232148762975686, "grad_norm": 0.21937228293588593, "learning_rate": 1.9553034066571366e-05, "loss": 0.3505, "num_tokens": 572099039.0, "step": 750 }, { "epoch": 1.0245804572562491, "grad_norm": 0.2252337411630276, "learning_rate": 1.9551485714761173e-05, "loss": 0.3406, "num_tokens": 572867223.0, "step": 751 }, { "epoch": 1.0259460382149297, "grad_norm": 0.23169573422516954, "learning_rate": 1.9549934754263023e-05, "loss": 0.3312, "num_tokens": 573561839.0, "step": 752 }, { "epoch": 1.0273116191736102, "grad_norm": 0.21392985642691065, "learning_rate": 1.9548381185550047e-05, "loss": 0.3419, "num_tokens": 574292688.0, "step": 753 }, { "epoch": 1.0286772001322906, "grad_norm": 0.2243638225576261, "learning_rate": 1.9546825009096167e-05, "loss": 0.356, "num_tokens": 575096073.0, "step": 754 }, { "epoch": 1.0300427810909711, "grad_norm": 0.21332838620710934, "learning_rate": 1.9545266225376112e-05, "loss": 0.3389, "num_tokens": 575916595.0, "step": 755 }, { "epoch": 1.0314083620496517, "grad_norm": 0.1992051714308279, "learning_rate": 1.9543704834865388e-05, "loss": 0.3444, "num_tokens": 576712138.0, "step": 756 }, { "epoch": 1.0327739430083323, "grad_norm": 0.21746986145790495, "learning_rate": 1.9542140838040324e-05, "loss": 0.3399, "num_tokens": 577507085.0, "step": 757 }, { "epoch": 1.0341395239670126, "grad_norm": 0.22343420622840984, "learning_rate": 1.9540574235378014e-05, "loss": 0.3586, "num_tokens": 578262098.0, "step": 758 }, { "epoch": 1.0355051049256931, "grad_norm": 0.22772194947028826, "learning_rate": 1.9539005027356366e-05, "loss": 0.334, "num_tokens": 579137978.0, "step": 759 }, { "epoch": 1.0368706858843737, "grad_norm": 0.19720526744651076, "learning_rate": 1.953743321445408e-05, "loss": 0.343, "num_tokens": 579892754.0, "step": 760 }, { "epoch": 1.0382362668430543, "grad_norm": 0.236702180067579, "learning_rate": 1.953585879715064e-05, "loss": 0.3508, "num_tokens": 580625847.0, "step": 761 }, { "epoch": 1.0396018478017348, "grad_norm": 0.23934596109582543, "learning_rate": 1.9534281775926346e-05, "loss": 0.3619, "num_tokens": 581386690.0, "step": 762 }, { "epoch": 1.0409674287604151, "grad_norm": 0.2131402454693042, "learning_rate": 1.9532702151262262e-05, "loss": 0.3375, "num_tokens": 582155426.0, "step": 763 }, { "epoch": 1.0423330097190957, "grad_norm": 0.19336758783053334, "learning_rate": 1.9531119923640276e-05, "loss": 0.3336, "num_tokens": 582902124.0, "step": 764 }, { "epoch": 1.0436985906777763, "grad_norm": 0.2114730972701699, "learning_rate": 1.952953509354305e-05, "loss": 0.3445, "num_tokens": 583659563.0, "step": 765 }, { "epoch": 1.0450641716364568, "grad_norm": 0.21433503240186524, "learning_rate": 1.9527947661454054e-05, "loss": 0.3481, "num_tokens": 584429340.0, "step": 766 }, { "epoch": 1.0464297525951372, "grad_norm": 0.21369663933442637, "learning_rate": 1.9526357627857537e-05, "loss": 0.3624, "num_tokens": 585223552.0, "step": 767 }, { "epoch": 1.0477953335538177, "grad_norm": 0.2200008386516188, "learning_rate": 1.9524764993238553e-05, "loss": 0.3526, "num_tokens": 585982078.0, "step": 768 }, { "epoch": 1.0491609145124983, "grad_norm": 0.20975499602522088, "learning_rate": 1.9523169758082946e-05, "loss": 0.3361, "num_tokens": 586703202.0, "step": 769 }, { "epoch": 1.0505264954711788, "grad_norm": 0.2009931673157653, "learning_rate": 1.9521571922877355e-05, "loss": 0.3611, "num_tokens": 587497570.0, "step": 770 }, { "epoch": 1.0518920764298594, "grad_norm": 0.22128574621686295, "learning_rate": 1.9519971488109207e-05, "loss": 0.334, "num_tokens": 588184932.0, "step": 771 }, { "epoch": 1.0532576573885397, "grad_norm": 0.21869235992327593, "learning_rate": 1.9518368454266726e-05, "loss": 0.3405, "num_tokens": 588950129.0, "step": 772 }, { "epoch": 1.0546232383472203, "grad_norm": 0.225898003741059, "learning_rate": 1.9516762821838927e-05, "loss": 0.3382, "num_tokens": 589714638.0, "step": 773 }, { "epoch": 1.0559888193059008, "grad_norm": 0.24083446279434642, "learning_rate": 1.951515459131562e-05, "loss": 0.3335, "num_tokens": 590509231.0, "step": 774 }, { "epoch": 1.0573544002645814, "grad_norm": 0.2238857327015622, "learning_rate": 1.9513543763187415e-05, "loss": 0.3405, "num_tokens": 591277832.0, "step": 775 }, { "epoch": 1.0587199812232617, "grad_norm": 0.2304878398059193, "learning_rate": 1.951193033794569e-05, "loss": 0.3265, "num_tokens": 591950717.0, "step": 776 }, { "epoch": 1.0600855621819423, "grad_norm": 0.2106306513372773, "learning_rate": 1.951031431608264e-05, "loss": 0.3675, "num_tokens": 592801890.0, "step": 777 }, { "epoch": 1.0614511431406228, "grad_norm": 0.24626933009606206, "learning_rate": 1.9508695698091248e-05, "loss": 0.3401, "num_tokens": 593614528.0, "step": 778 }, { "epoch": 1.0628167240993034, "grad_norm": 0.2036973708429303, "learning_rate": 1.9507074484465275e-05, "loss": 0.3273, "num_tokens": 594406685.0, "step": 779 }, { "epoch": 1.064182305057984, "grad_norm": 0.27014717985201614, "learning_rate": 1.9505450675699288e-05, "loss": 0.3469, "num_tokens": 595158188.0, "step": 780 }, { "epoch": 1.0655478860166643, "grad_norm": 0.21963088425190025, "learning_rate": 1.9503824272288638e-05, "loss": 0.3545, "num_tokens": 595927943.0, "step": 781 }, { "epoch": 1.0669134669753448, "grad_norm": 0.2106585797718594, "learning_rate": 1.9502195274729472e-05, "loss": 0.3483, "num_tokens": 596795301.0, "step": 782 }, { "epoch": 1.0682790479340254, "grad_norm": 0.23246998475077077, "learning_rate": 1.950056368351873e-05, "loss": 0.3425, "num_tokens": 597598411.0, "step": 783 }, { "epoch": 1.069644628892706, "grad_norm": 0.24328956963721618, "learning_rate": 1.949892949915413e-05, "loss": 0.3529, "num_tokens": 598380624.0, "step": 784 }, { "epoch": 1.0710102098513863, "grad_norm": 0.23043651709543356, "learning_rate": 1.9497292722134204e-05, "loss": 0.332, "num_tokens": 599130592.0, "step": 785 }, { "epoch": 1.0723757908100668, "grad_norm": 0.3203703356203683, "learning_rate": 1.949565335295825e-05, "loss": 0.3387, "num_tokens": 599807665.0, "step": 786 }, { "epoch": 1.0737413717687474, "grad_norm": 0.26005769731686473, "learning_rate": 1.9494011392126375e-05, "loss": 0.3343, "num_tokens": 600571353.0, "step": 787 }, { "epoch": 1.075106952727428, "grad_norm": 0.21478417178316983, "learning_rate": 1.9492366840139468e-05, "loss": 0.3556, "num_tokens": 601282480.0, "step": 788 }, { "epoch": 1.0764725336861085, "grad_norm": 0.22354619987300667, "learning_rate": 1.9490719697499213e-05, "loss": 0.3534, "num_tokens": 602059188.0, "step": 789 }, { "epoch": 1.0778381146447888, "grad_norm": 0.23341858936439303, "learning_rate": 1.9489069964708078e-05, "loss": 0.363, "num_tokens": 602849615.0, "step": 790 }, { "epoch": 1.0792036956034694, "grad_norm": 0.21369895982916975, "learning_rate": 1.9487417642269328e-05, "loss": 0.3432, "num_tokens": 603610326.0, "step": 791 }, { "epoch": 1.08056927656215, "grad_norm": 0.23214500777604785, "learning_rate": 1.9485762730687014e-05, "loss": 0.3395, "num_tokens": 604338380.0, "step": 792 }, { "epoch": 1.0819348575208305, "grad_norm": 0.20705994014107917, "learning_rate": 1.9484105230465978e-05, "loss": 0.3524, "num_tokens": 605077064.0, "step": 793 }, { "epoch": 1.0833004384795109, "grad_norm": 0.24173710866201498, "learning_rate": 1.9482445142111852e-05, "loss": 0.3496, "num_tokens": 605809585.0, "step": 794 }, { "epoch": 1.0846660194381914, "grad_norm": 0.20929193210864164, "learning_rate": 1.9480782466131056e-05, "loss": 0.3354, "num_tokens": 606598541.0, "step": 795 }, { "epoch": 1.086031600396872, "grad_norm": 0.24458946452512642, "learning_rate": 1.9479117203030803e-05, "loss": 0.3373, "num_tokens": 607302298.0, "step": 796 }, { "epoch": 1.0873971813555525, "grad_norm": 0.21853750631580782, "learning_rate": 1.947744935331909e-05, "loss": 0.3541, "num_tokens": 608046682.0, "step": 797 }, { "epoch": 1.088762762314233, "grad_norm": 0.28036874146332763, "learning_rate": 1.9475778917504703e-05, "loss": 0.3607, "num_tokens": 608810756.0, "step": 798 }, { "epoch": 1.0901283432729134, "grad_norm": 0.2127303801483816, "learning_rate": 1.947410589609723e-05, "loss": 0.3544, "num_tokens": 609603922.0, "step": 799 }, { "epoch": 1.091493924231594, "grad_norm": 0.2359714588934887, "learning_rate": 1.9472430289607025e-05, "loss": 0.3314, "num_tokens": 610306230.0, "step": 800 }, { "epoch": 1.0928595051902745, "grad_norm": 0.2433090672983506, "learning_rate": 1.947075209854525e-05, "loss": 0.3377, "num_tokens": 611015690.0, "step": 801 }, { "epoch": 1.094225086148955, "grad_norm": 0.23014334300961076, "learning_rate": 1.9469071323423844e-05, "loss": 0.3365, "num_tokens": 611790112.0, "step": 802 }, { "epoch": 1.0955906671076354, "grad_norm": 0.28148534782836104, "learning_rate": 1.9467387964755545e-05, "loss": 0.3454, "num_tokens": 612578023.0, "step": 803 }, { "epoch": 1.096956248066316, "grad_norm": 0.23569966182226046, "learning_rate": 1.9465702023053866e-05, "loss": 0.3623, "num_tokens": 613357473.0, "step": 804 }, { "epoch": 1.0983218290249965, "grad_norm": 0.206852317821413, "learning_rate": 1.9464013498833123e-05, "loss": 0.3355, "num_tokens": 614079911.0, "step": 805 }, { "epoch": 1.099687409983677, "grad_norm": 0.21388520928879326, "learning_rate": 1.9462322392608402e-05, "loss": 0.3447, "num_tokens": 614976527.0, "step": 806 }, { "epoch": 1.1010529909423576, "grad_norm": 0.18739995316002714, "learning_rate": 1.9460628704895595e-05, "loss": 0.3242, "num_tokens": 615739303.0, "step": 807 }, { "epoch": 1.102418571901038, "grad_norm": 0.23377448662966474, "learning_rate": 1.9458932436211365e-05, "loss": 0.3433, "num_tokens": 616511728.0, "step": 808 }, { "epoch": 1.1037841528597185, "grad_norm": 0.22987354440488406, "learning_rate": 1.9457233587073177e-05, "loss": 0.3582, "num_tokens": 617274800.0, "step": 809 }, { "epoch": 1.105149733818399, "grad_norm": 0.23123100282429546, "learning_rate": 1.945553215799927e-05, "loss": 0.3418, "num_tokens": 618008106.0, "step": 810 }, { "epoch": 1.1065153147770797, "grad_norm": 0.21625497188314524, "learning_rate": 1.9453828149508684e-05, "loss": 0.3403, "num_tokens": 618700596.0, "step": 811 }, { "epoch": 1.10788089573576, "grad_norm": 0.22647218761980423, "learning_rate": 1.9452121562121232e-05, "loss": 0.3526, "num_tokens": 619527542.0, "step": 812 }, { "epoch": 1.1092464766944405, "grad_norm": 0.21575319555420466, "learning_rate": 1.945041239635752e-05, "loss": 0.3368, "num_tokens": 620334333.0, "step": 813 }, { "epoch": 1.110612057653121, "grad_norm": 0.20076794897715036, "learning_rate": 1.9448700652738943e-05, "loss": 0.3388, "num_tokens": 621031584.0, "step": 814 }, { "epoch": 1.1119776386118017, "grad_norm": 0.3121453839366858, "learning_rate": 1.944698633178768e-05, "loss": 0.3399, "num_tokens": 621769147.0, "step": 815 }, { "epoch": 1.1133432195704822, "grad_norm": 0.2289933751086313, "learning_rate": 1.9445269434026696e-05, "loss": 0.3337, "num_tokens": 622519658.0, "step": 816 }, { "epoch": 1.1147088005291625, "grad_norm": 0.241690252259585, "learning_rate": 1.9443549959979738e-05, "loss": 0.363, "num_tokens": 623293585.0, "step": 817 }, { "epoch": 1.116074381487843, "grad_norm": 0.20991941188165925, "learning_rate": 1.9441827910171347e-05, "loss": 0.3464, "num_tokens": 624144270.0, "step": 818 }, { "epoch": 1.1174399624465237, "grad_norm": 0.2410147117893179, "learning_rate": 1.9440103285126847e-05, "loss": 0.3541, "num_tokens": 624944787.0, "step": 819 }, { "epoch": 1.1188055434052042, "grad_norm": 0.2051297873721244, "learning_rate": 1.943837608537234e-05, "loss": 0.3507, "num_tokens": 625748395.0, "step": 820 }, { "epoch": 1.1201711243638846, "grad_norm": 0.22664534948103088, "learning_rate": 1.943664631143473e-05, "loss": 0.3352, "num_tokens": 626474429.0, "step": 821 }, { "epoch": 1.121536705322565, "grad_norm": 0.21670359113765952, "learning_rate": 1.943491396384169e-05, "loss": 0.3497, "num_tokens": 627243813.0, "step": 822 }, { "epoch": 1.1229022862812457, "grad_norm": 0.20899784816306777, "learning_rate": 1.9433179043121683e-05, "loss": 0.3451, "num_tokens": 628020340.0, "step": 823 }, { "epoch": 1.1242678672399262, "grad_norm": 0.22411570922905472, "learning_rate": 1.943144154980396e-05, "loss": 0.3441, "num_tokens": 628762821.0, "step": 824 }, { "epoch": 1.1256334481986068, "grad_norm": 0.22009284476576324, "learning_rate": 1.9429701484418553e-05, "loss": 0.3323, "num_tokens": 629539587.0, "step": 825 }, { "epoch": 1.1269990291572871, "grad_norm": 0.21429558195937343, "learning_rate": 1.942795884749628e-05, "loss": 0.3612, "num_tokens": 630258493.0, "step": 826 }, { "epoch": 1.1283646101159677, "grad_norm": 0.22840451785751004, "learning_rate": 1.9426213639568753e-05, "loss": 0.3431, "num_tokens": 631009914.0, "step": 827 }, { "epoch": 1.1297301910746482, "grad_norm": 0.21718260945448883, "learning_rate": 1.9424465861168353e-05, "loss": 0.3448, "num_tokens": 631759759.0, "step": 828 }, { "epoch": 1.1310957720333288, "grad_norm": 0.20919764440849264, "learning_rate": 1.9422715512828248e-05, "loss": 0.3331, "num_tokens": 632434149.0, "step": 829 }, { "epoch": 1.1324613529920091, "grad_norm": 0.23389433499368334, "learning_rate": 1.9420962595082396e-05, "loss": 0.3164, "num_tokens": 633144885.0, "step": 830 }, { "epoch": 1.1338269339506897, "grad_norm": 0.2295696284022184, "learning_rate": 1.9419207108465538e-05, "loss": 0.3306, "num_tokens": 633828564.0, "step": 831 }, { "epoch": 1.1351925149093702, "grad_norm": 0.22530533159356056, "learning_rate": 1.94174490535132e-05, "loss": 0.3394, "num_tokens": 634554740.0, "step": 832 }, { "epoch": 1.1365580958680508, "grad_norm": 0.20058206707021065, "learning_rate": 1.941568843076168e-05, "loss": 0.3435, "num_tokens": 635271883.0, "step": 833 }, { "epoch": 1.1379236768267313, "grad_norm": 0.23175101701616693, "learning_rate": 1.9413925240748074e-05, "loss": 0.353, "num_tokens": 635992085.0, "step": 834 }, { "epoch": 1.1392892577854117, "grad_norm": 0.22536288869732607, "learning_rate": 1.9412159484010254e-05, "loss": 0.3542, "num_tokens": 636717912.0, "step": 835 }, { "epoch": 1.1406548387440922, "grad_norm": 0.22346931543715234, "learning_rate": 1.941039116108687e-05, "loss": 0.3402, "num_tokens": 637491230.0, "step": 836 }, { "epoch": 1.1420204197027728, "grad_norm": 0.21597589067749015, "learning_rate": 1.940862027251737e-05, "loss": 0.3432, "num_tokens": 638285530.0, "step": 837 }, { "epoch": 1.1433860006614533, "grad_norm": 0.21226927129186632, "learning_rate": 1.940684681884197e-05, "loss": 0.3394, "num_tokens": 639051804.0, "step": 838 }, { "epoch": 1.1447515816201337, "grad_norm": 0.21502298436100398, "learning_rate": 1.9405070800601677e-05, "loss": 0.351, "num_tokens": 639875837.0, "step": 839 }, { "epoch": 1.1461171625788142, "grad_norm": 0.22064724264056018, "learning_rate": 1.9403292218338276e-05, "loss": 0.3503, "num_tokens": 640615235.0, "step": 840 }, { "epoch": 1.1474827435374948, "grad_norm": 0.2026653755473225, "learning_rate": 1.940151107259433e-05, "loss": 0.3435, "num_tokens": 641400053.0, "step": 841 }, { "epoch": 1.1488483244961754, "grad_norm": 0.22108893109365524, "learning_rate": 1.93997273639132e-05, "loss": 0.3396, "num_tokens": 642184918.0, "step": 842 }, { "epoch": 1.150213905454856, "grad_norm": 0.1960884338704318, "learning_rate": 1.939794109283901e-05, "loss": 0.3438, "num_tokens": 642971503.0, "step": 843 }, { "epoch": 1.1515794864135362, "grad_norm": 0.2257429508864646, "learning_rate": 1.9396152259916676e-05, "loss": 0.3593, "num_tokens": 643755043.0, "step": 844 }, { "epoch": 1.1529450673722168, "grad_norm": 0.21499626799680963, "learning_rate": 1.9394360865691896e-05, "loss": 0.3539, "num_tokens": 644582058.0, "step": 845 }, { "epoch": 1.1543106483308974, "grad_norm": 0.2304590106792569, "learning_rate": 1.939256691071114e-05, "loss": 0.3326, "num_tokens": 645381828.0, "step": 846 }, { "epoch": 1.155676229289578, "grad_norm": 0.21592350712455738, "learning_rate": 1.9390770395521674e-05, "loss": 0.338, "num_tokens": 646206644.0, "step": 847 }, { "epoch": 1.1570418102482583, "grad_norm": 0.2529983356660823, "learning_rate": 1.9388971320671533e-05, "loss": 0.3424, "num_tokens": 646917687.0, "step": 848 }, { "epoch": 1.1584073912069388, "grad_norm": 0.2241649505814211, "learning_rate": 1.938716968670954e-05, "loss": 0.3486, "num_tokens": 647662144.0, "step": 849 }, { "epoch": 1.1597729721656194, "grad_norm": 0.2163814578235409, "learning_rate": 1.938536549418529e-05, "loss": 0.3494, "num_tokens": 648424916.0, "step": 850 }, { "epoch": 1.1611385531243, "grad_norm": 0.24955187239088902, "learning_rate": 1.9383558743649168e-05, "loss": 0.3584, "num_tokens": 649156600.0, "step": 851 }, { "epoch": 1.1625041340829805, "grad_norm": 0.23446575168126618, "learning_rate": 1.9381749435652337e-05, "loss": 0.3536, "num_tokens": 649917916.0, "step": 852 }, { "epoch": 1.1638697150416608, "grad_norm": 0.22634812733227788, "learning_rate": 1.9379937570746733e-05, "loss": 0.3409, "num_tokens": 650642497.0, "step": 853 }, { "epoch": 1.1652352960003414, "grad_norm": 0.2307793578805773, "learning_rate": 1.9378123149485082e-05, "loss": 0.3461, "num_tokens": 651386219.0, "step": 854 }, { "epoch": 1.166600876959022, "grad_norm": 0.23303330781466192, "learning_rate": 1.937630617242088e-05, "loss": 0.3594, "num_tokens": 652102809.0, "step": 855 }, { "epoch": 1.1679664579177025, "grad_norm": 0.21770567500218296, "learning_rate": 1.9374486640108416e-05, "loss": 0.346, "num_tokens": 652873368.0, "step": 856 }, { "epoch": 1.1693320388763828, "grad_norm": 0.2132618376781528, "learning_rate": 1.9372664553102743e-05, "loss": 0.361, "num_tokens": 653648340.0, "step": 857 }, { "epoch": 1.1706976198350634, "grad_norm": 0.20701407389920137, "learning_rate": 1.9370839911959708e-05, "loss": 0.3343, "num_tokens": 654442924.0, "step": 858 }, { "epoch": 1.172063200793744, "grad_norm": 0.2758451261437571, "learning_rate": 1.9369012717235922e-05, "loss": 0.3398, "num_tokens": 655195472.0, "step": 859 }, { "epoch": 1.1734287817524245, "grad_norm": 0.20518300542704662, "learning_rate": 1.9367182969488792e-05, "loss": 0.356, "num_tokens": 655973657.0, "step": 860 }, { "epoch": 1.174794362711105, "grad_norm": 0.23288096668252745, "learning_rate": 1.9365350669276482e-05, "loss": 0.3655, "num_tokens": 656753462.0, "step": 861 }, { "epoch": 1.1761599436697854, "grad_norm": 0.2257984443735613, "learning_rate": 1.9363515817157963e-05, "loss": 0.3396, "num_tokens": 657577877.0, "step": 862 }, { "epoch": 1.177525524628466, "grad_norm": 0.2094542616694453, "learning_rate": 1.936167841369296e-05, "loss": 0.3403, "num_tokens": 658374789.0, "step": 863 }, { "epoch": 1.1788911055871465, "grad_norm": 0.18789618185999296, "learning_rate": 1.9359838459441985e-05, "loss": 0.3248, "num_tokens": 659115923.0, "step": 864 }, { "epoch": 1.180256686545827, "grad_norm": 0.21942954207411852, "learning_rate": 1.935799595496633e-05, "loss": 0.3385, "num_tokens": 659873530.0, "step": 865 }, { "epoch": 1.1816222675045074, "grad_norm": 0.2079256007257021, "learning_rate": 1.935615090082806e-05, "loss": 0.363, "num_tokens": 660687369.0, "step": 866 }, { "epoch": 1.182987848463188, "grad_norm": 0.22514594796201975, "learning_rate": 1.935430329759003e-05, "loss": 0.3467, "num_tokens": 661457954.0, "step": 867 }, { "epoch": 1.1843534294218685, "grad_norm": 0.18731723865554684, "learning_rate": 1.9352453145815854e-05, "loss": 0.3626, "num_tokens": 662210300.0, "step": 868 }, { "epoch": 1.185719010380549, "grad_norm": 0.22170893892455693, "learning_rate": 1.935060044606994e-05, "loss": 0.3322, "num_tokens": 662931119.0, "step": 869 }, { "epoch": 1.1870845913392296, "grad_norm": 0.1992673749908861, "learning_rate": 1.934874519891746e-05, "loss": 0.3487, "num_tokens": 663663455.0, "step": 870 }, { "epoch": 1.18845017229791, "grad_norm": 0.20808166381530568, "learning_rate": 1.934688740492438e-05, "loss": 0.342, "num_tokens": 664534798.0, "step": 871 }, { "epoch": 1.1898157532565905, "grad_norm": 0.22087488348609283, "learning_rate": 1.934502706465742e-05, "loss": 0.3452, "num_tokens": 665300874.0, "step": 872 }, { "epoch": 1.191181334215271, "grad_norm": 0.23784556953514013, "learning_rate": 1.9343164178684093e-05, "loss": 0.3692, "num_tokens": 666005267.0, "step": 873 }, { "epoch": 1.1925469151739516, "grad_norm": 0.20292370579501118, "learning_rate": 1.934129874757269e-05, "loss": 0.337, "num_tokens": 666734740.0, "step": 874 }, { "epoch": 1.193912496132632, "grad_norm": 0.2217411028595786, "learning_rate": 1.933943077189227e-05, "loss": 0.3448, "num_tokens": 667492731.0, "step": 875 }, { "epoch": 1.1952780770913125, "grad_norm": 0.21870724777646677, "learning_rate": 1.9337560252212673e-05, "loss": 0.3277, "num_tokens": 668243503.0, "step": 876 }, { "epoch": 1.196643658049993, "grad_norm": 0.20457576767489188, "learning_rate": 1.933568718910451e-05, "loss": 0.3522, "num_tokens": 669092752.0, "step": 877 }, { "epoch": 1.1980092390086736, "grad_norm": 0.22226510666272514, "learning_rate": 1.9333811583139173e-05, "loss": 0.3582, "num_tokens": 669851085.0, "step": 878 }, { "epoch": 1.1993748199673542, "grad_norm": 0.21376895191728973, "learning_rate": 1.933193343488883e-05, "loss": 0.3332, "num_tokens": 670602523.0, "step": 879 }, { "epoch": 1.2007404009260345, "grad_norm": 0.1937378493811625, "learning_rate": 1.9330052744926424e-05, "loss": 0.3436, "num_tokens": 671411552.0, "step": 880 }, { "epoch": 1.202105981884715, "grad_norm": 0.20017633469468055, "learning_rate": 1.9328169513825664e-05, "loss": 0.3446, "num_tokens": 672193651.0, "step": 881 }, { "epoch": 1.2034715628433956, "grad_norm": 0.2200322586900562, "learning_rate": 1.932628374216105e-05, "loss": 0.3523, "num_tokens": 672976492.0, "step": 882 }, { "epoch": 1.2048371438020762, "grad_norm": 0.21024693808266928, "learning_rate": 1.9324395430507847e-05, "loss": 0.3384, "num_tokens": 673659643.0, "step": 883 }, { "epoch": 1.2062027247607565, "grad_norm": 0.2082678406004933, "learning_rate": 1.9322504579442098e-05, "loss": 0.3336, "num_tokens": 674453313.0, "step": 884 }, { "epoch": 1.207568305719437, "grad_norm": 0.22541917650661503, "learning_rate": 1.9320611189540616e-05, "loss": 0.3376, "num_tokens": 675212051.0, "step": 885 }, { "epoch": 1.2089338866781176, "grad_norm": 0.2054400878769126, "learning_rate": 1.9318715261381e-05, "loss": 0.3645, "num_tokens": 675930971.0, "step": 886 }, { "epoch": 1.2102994676367982, "grad_norm": 0.22583871319409823, "learning_rate": 1.931681679554161e-05, "loss": 0.3591, "num_tokens": 676632984.0, "step": 887 }, { "epoch": 1.2116650485954787, "grad_norm": 0.20963858343378733, "learning_rate": 1.931491579260158e-05, "loss": 0.3383, "num_tokens": 677333340.0, "step": 888 }, { "epoch": 1.213030629554159, "grad_norm": 0.20935971520820942, "learning_rate": 1.9313012253140833e-05, "loss": 0.3571, "num_tokens": 678129113.0, "step": 889 }, { "epoch": 1.2143962105128396, "grad_norm": 0.18888913938880342, "learning_rate": 1.931110617774006e-05, "loss": 0.3483, "num_tokens": 678869608.0, "step": 890 }, { "epoch": 1.2157617914715202, "grad_norm": 0.2263753363047279, "learning_rate": 1.930919756698071e-05, "loss": 0.3512, "num_tokens": 679553554.0, "step": 891 }, { "epoch": 1.2171273724302007, "grad_norm": 0.20681892978243827, "learning_rate": 1.9307286421445023e-05, "loss": 0.345, "num_tokens": 680422050.0, "step": 892 }, { "epoch": 1.218492953388881, "grad_norm": 0.20107974222576824, "learning_rate": 1.9305372741716008e-05, "loss": 0.3525, "num_tokens": 681202885.0, "step": 893 }, { "epoch": 1.2198585343475616, "grad_norm": 0.20455955055154176, "learning_rate": 1.9303456528377444e-05, "loss": 0.3405, "num_tokens": 681955579.0, "step": 894 }, { "epoch": 1.2212241153062422, "grad_norm": 0.20056820706659426, "learning_rate": 1.9301537782013884e-05, "loss": 0.3393, "num_tokens": 682668266.0, "step": 895 }, { "epoch": 1.2225896962649228, "grad_norm": 0.2287317027634672, "learning_rate": 1.9299616503210657e-05, "loss": 0.3487, "num_tokens": 683406434.0, "step": 896 }, { "epoch": 1.2239552772236033, "grad_norm": 0.2135978474830208, "learning_rate": 1.929769269255386e-05, "loss": 0.3473, "num_tokens": 684209879.0, "step": 897 }, { "epoch": 1.2253208581822836, "grad_norm": 0.21593332622963474, "learning_rate": 1.929576635063037e-05, "loss": 0.3464, "num_tokens": 684902408.0, "step": 898 }, { "epoch": 1.2266864391409642, "grad_norm": 0.20679704084756928, "learning_rate": 1.929383747802782e-05, "loss": 0.3487, "num_tokens": 685761392.0, "step": 899 }, { "epoch": 1.2280520200996448, "grad_norm": 0.21172760667058732, "learning_rate": 1.929190607533463e-05, "loss": 0.3286, "num_tokens": 686535283.0, "step": 900 }, { "epoch": 1.2294176010583253, "grad_norm": 0.21559225156061718, "learning_rate": 1.9289972143139993e-05, "loss": 0.3484, "num_tokens": 687252380.0, "step": 901 }, { "epoch": 1.2307831820170057, "grad_norm": 0.206461435190598, "learning_rate": 1.928803568203386e-05, "loss": 0.344, "num_tokens": 688013226.0, "step": 902 }, { "epoch": 1.2321487629756862, "grad_norm": 0.249320720317246, "learning_rate": 1.9286096692606966e-05, "loss": 0.3525, "num_tokens": 688722910.0, "step": 903 }, { "epoch": 1.2335143439343668, "grad_norm": 0.22298578762211163, "learning_rate": 1.928415517545081e-05, "loss": 0.3632, "num_tokens": 689480695.0, "step": 904 }, { "epoch": 1.2348799248930473, "grad_norm": 0.21398907317964638, "learning_rate": 1.9282211131157668e-05, "loss": 0.3529, "num_tokens": 690231910.0, "step": 905 }, { "epoch": 1.2362455058517279, "grad_norm": 0.2159969296053859, "learning_rate": 1.928026456032058e-05, "loss": 0.3539, "num_tokens": 691017158.0, "step": 906 }, { "epoch": 1.2376110868104082, "grad_norm": 0.2015170949291365, "learning_rate": 1.9278315463533365e-05, "loss": 0.3472, "num_tokens": 691701734.0, "step": 907 }, { "epoch": 1.2389766677690888, "grad_norm": 0.24649108499207192, "learning_rate": 1.9276363841390603e-05, "loss": 0.3396, "num_tokens": 692576491.0, "step": 908 }, { "epoch": 1.2403422487277693, "grad_norm": 0.2026259469053315, "learning_rate": 1.9274409694487654e-05, "loss": 0.3529, "num_tokens": 693393118.0, "step": 909 }, { "epoch": 1.2417078296864499, "grad_norm": 0.19428982705829625, "learning_rate": 1.927245302342064e-05, "loss": 0.3341, "num_tokens": 694149888.0, "step": 910 }, { "epoch": 1.2430734106451302, "grad_norm": 0.22577262738699272, "learning_rate": 1.9270493828786457e-05, "loss": 0.3489, "num_tokens": 694908938.0, "step": 911 }, { "epoch": 1.2444389916038108, "grad_norm": 0.1982661779929654, "learning_rate": 1.9268532111182772e-05, "loss": 0.34, "num_tokens": 695717201.0, "step": 912 }, { "epoch": 1.2458045725624913, "grad_norm": 0.19791120584715394, "learning_rate": 1.9266567871208022e-05, "loss": 0.3331, "num_tokens": 696518422.0, "step": 913 }, { "epoch": 1.2471701535211719, "grad_norm": 0.21571988790206187, "learning_rate": 1.9264601109461412e-05, "loss": 0.3339, "num_tokens": 697253730.0, "step": 914 }, { "epoch": 1.2485357344798524, "grad_norm": 0.2126983603731703, "learning_rate": 1.9262631826542904e-05, "loss": 0.3313, "num_tokens": 698006536.0, "step": 915 }, { "epoch": 1.2499013154385328, "grad_norm": 0.21342653764012828, "learning_rate": 1.9260660023053256e-05, "loss": 0.3295, "num_tokens": 698712459.0, "step": 916 }, { "epoch": 1.2512668963972133, "grad_norm": 0.218368896243392, "learning_rate": 1.9258685699593972e-05, "loss": 0.3273, "num_tokens": 699421995.0, "step": 917 }, { "epoch": 1.252632477355894, "grad_norm": 0.22264291582529575, "learning_rate": 1.9256708856767338e-05, "loss": 0.3572, "num_tokens": 700254656.0, "step": 918 }, { "epoch": 1.2539980583145744, "grad_norm": 0.21611845504912808, "learning_rate": 1.9254729495176393e-05, "loss": 0.3241, "num_tokens": 700967683.0, "step": 919 }, { "epoch": 1.2553636392732548, "grad_norm": 0.18863129681747487, "learning_rate": 1.9252747615424966e-05, "loss": 0.3277, "num_tokens": 701725427.0, "step": 920 }, { "epoch": 1.2567292202319353, "grad_norm": 0.22201479546361255, "learning_rate": 1.9250763218117636e-05, "loss": 0.3341, "num_tokens": 702515753.0, "step": 921 }, { "epoch": 1.258094801190616, "grad_norm": 0.2161054952344156, "learning_rate": 1.9248776303859757e-05, "loss": 0.3445, "num_tokens": 703312570.0, "step": 922 }, { "epoch": 1.2594603821492965, "grad_norm": 0.18451489888415928, "learning_rate": 1.924678687325745e-05, "loss": 0.345, "num_tokens": 704075881.0, "step": 923 }, { "epoch": 1.260825963107977, "grad_norm": 0.237986494280731, "learning_rate": 1.924479492691761e-05, "loss": 0.3517, "num_tokens": 704881821.0, "step": 924 }, { "epoch": 1.2621915440666573, "grad_norm": 0.20009995110294146, "learning_rate": 1.924280046544789e-05, "loss": 0.3589, "num_tokens": 705636460.0, "step": 925 }, { "epoch": 1.263557125025338, "grad_norm": 0.21559373028265322, "learning_rate": 1.9240803489456713e-05, "loss": 0.3387, "num_tokens": 706282807.0, "step": 926 }, { "epoch": 1.2649227059840185, "grad_norm": 0.2460565066724444, "learning_rate": 1.923880399955327e-05, "loss": 0.3506, "num_tokens": 707084446.0, "step": 927 }, { "epoch": 1.266288286942699, "grad_norm": 0.22862353852431203, "learning_rate": 1.9236801996347513e-05, "loss": 0.334, "num_tokens": 707811973.0, "step": 928 }, { "epoch": 1.2676538679013793, "grad_norm": 0.22059963119861575, "learning_rate": 1.923479748045018e-05, "loss": 0.3487, "num_tokens": 708637101.0, "step": 929 }, { "epoch": 1.26901944886006, "grad_norm": 0.19517680300458531, "learning_rate": 1.923279045247275e-05, "loss": 0.3437, "num_tokens": 709439690.0, "step": 930 }, { "epoch": 1.2703850298187405, "grad_norm": 0.2479375954572112, "learning_rate": 1.9230780913027484e-05, "loss": 0.3368, "num_tokens": 710157194.0, "step": 931 }, { "epoch": 1.271750610777421, "grad_norm": 0.21771506885320027, "learning_rate": 1.922876886272741e-05, "loss": 0.3519, "num_tokens": 710876038.0, "step": 932 }, { "epoch": 1.2731161917361016, "grad_norm": 0.20079152511242526, "learning_rate": 1.9226754302186313e-05, "loss": 0.325, "num_tokens": 711629721.0, "step": 933 }, { "epoch": 1.274481772694782, "grad_norm": 0.24386219741255047, "learning_rate": 1.922473723201875e-05, "loss": 0.3414, "num_tokens": 712408387.0, "step": 934 }, { "epoch": 1.2758473536534625, "grad_norm": 0.24175223401582388, "learning_rate": 1.922271765284004e-05, "loss": 0.3529, "num_tokens": 713231205.0, "step": 935 }, { "epoch": 1.277212934612143, "grad_norm": 0.21176233406611686, "learning_rate": 1.9220695565266265e-05, "loss": 0.3357, "num_tokens": 714025276.0, "step": 936 }, { "epoch": 1.2785785155708236, "grad_norm": 0.19314126072160737, "learning_rate": 1.9218670969914284e-05, "loss": 0.3482, "num_tokens": 714757577.0, "step": 937 }, { "epoch": 1.279944096529504, "grad_norm": 0.22150036668126125, "learning_rate": 1.921664386740171e-05, "loss": 0.3379, "num_tokens": 715553807.0, "step": 938 }, { "epoch": 1.2813096774881845, "grad_norm": 0.2111040354418008, "learning_rate": 1.9214614258346928e-05, "loss": 0.3417, "num_tokens": 716320648.0, "step": 939 }, { "epoch": 1.282675258446865, "grad_norm": 0.2303164328764197, "learning_rate": 1.9212582143369078e-05, "loss": 0.3398, "num_tokens": 717066607.0, "step": 940 }, { "epoch": 1.2840408394055456, "grad_norm": 0.20879361256930948, "learning_rate": 1.9210547523088073e-05, "loss": 0.3603, "num_tokens": 717798341.0, "step": 941 }, { "epoch": 1.2854064203642261, "grad_norm": 0.23176279347249062, "learning_rate": 1.920851039812459e-05, "loss": 0.3432, "num_tokens": 718655059.0, "step": 942 }, { "epoch": 1.2867720013229065, "grad_norm": 0.20531457263298838, "learning_rate": 1.9206470769100063e-05, "loss": 0.3231, "num_tokens": 719398983.0, "step": 943 }, { "epoch": 1.288137582281587, "grad_norm": 0.20914640582929006, "learning_rate": 1.9204428636636694e-05, "loss": 0.349, "num_tokens": 720137181.0, "step": 944 }, { "epoch": 1.2895031632402676, "grad_norm": 0.21045564925200708, "learning_rate": 1.9202384001357452e-05, "loss": 0.3357, "num_tokens": 720887396.0, "step": 945 }, { "epoch": 1.2908687441989481, "grad_norm": 0.19777279937695008, "learning_rate": 1.920033686388607e-05, "loss": 0.3413, "num_tokens": 721650294.0, "step": 946 }, { "epoch": 1.2922343251576285, "grad_norm": 0.23003136386994227, "learning_rate": 1.9198287224847034e-05, "loss": 0.3347, "num_tokens": 722358014.0, "step": 947 }, { "epoch": 1.293599906116309, "grad_norm": 0.21724007875928364, "learning_rate": 1.9196235084865603e-05, "loss": 0.3644, "num_tokens": 723174884.0, "step": 948 }, { "epoch": 1.2949654870749896, "grad_norm": 0.22242812289006225, "learning_rate": 1.9194180444567796e-05, "loss": 0.3425, "num_tokens": 723980683.0, "step": 949 }, { "epoch": 1.2963310680336702, "grad_norm": 0.24235339752294088, "learning_rate": 1.9192123304580398e-05, "loss": 0.3631, "num_tokens": 724784950.0, "step": 950 }, { "epoch": 1.2976966489923507, "grad_norm": 0.2072722317309631, "learning_rate": 1.9190063665530947e-05, "loss": 0.3522, "num_tokens": 725552248.0, "step": 951 }, { "epoch": 1.299062229951031, "grad_norm": 0.2394038013464679, "learning_rate": 1.918800152804776e-05, "loss": 0.3562, "num_tokens": 726376218.0, "step": 952 }, { "epoch": 1.3004278109097116, "grad_norm": 0.22565292148704347, "learning_rate": 1.9185936892759893e-05, "loss": 0.3597, "num_tokens": 727105372.0, "step": 953 }, { "epoch": 1.3017933918683922, "grad_norm": 0.22320333296989056, "learning_rate": 1.918386976029718e-05, "loss": 0.3499, "num_tokens": 727818025.0, "step": 954 }, { "epoch": 1.3031589728270727, "grad_norm": 0.242177705754565, "learning_rate": 1.9181800131290222e-05, "loss": 0.3555, "num_tokens": 728555653.0, "step": 955 }, { "epoch": 1.304524553785753, "grad_norm": 0.20226389795968736, "learning_rate": 1.9179728006370367e-05, "loss": 0.3341, "num_tokens": 729381507.0, "step": 956 }, { "epoch": 1.3058901347444336, "grad_norm": 0.2153484183374385, "learning_rate": 1.917765338616973e-05, "loss": 0.3456, "num_tokens": 730115528.0, "step": 957 }, { "epoch": 1.3072557157031142, "grad_norm": 0.23322247288989956, "learning_rate": 1.9175576271321188e-05, "loss": 0.343, "num_tokens": 730878062.0, "step": 958 }, { "epoch": 1.3086212966617947, "grad_norm": 0.1991822899683223, "learning_rate": 1.917349666245838e-05, "loss": 0.3565, "num_tokens": 731664199.0, "step": 959 }, { "epoch": 1.3099868776204753, "grad_norm": 0.22924913081574538, "learning_rate": 1.9171414560215705e-05, "loss": 0.3272, "num_tokens": 732329125.0, "step": 960 }, { "epoch": 1.3113524585791556, "grad_norm": 0.20915980221914657, "learning_rate": 1.9169329965228323e-05, "loss": 0.3505, "num_tokens": 733128714.0, "step": 961 }, { "epoch": 1.3127180395378362, "grad_norm": 0.2223105490801981, "learning_rate": 1.916724287813215e-05, "loss": 0.3593, "num_tokens": 733937293.0, "step": 962 }, { "epoch": 1.3140836204965167, "grad_norm": 0.20308503885377469, "learning_rate": 1.916515329956387e-05, "loss": 0.3552, "num_tokens": 734734607.0, "step": 963 }, { "epoch": 1.3154492014551973, "grad_norm": 0.20820254378488895, "learning_rate": 1.9163061230160918e-05, "loss": 0.3377, "num_tokens": 735442588.0, "step": 964 }, { "epoch": 1.3168147824138776, "grad_norm": 0.21645143501370412, "learning_rate": 1.9160966670561502e-05, "loss": 0.3546, "num_tokens": 736237349.0, "step": 965 }, { "epoch": 1.3181803633725582, "grad_norm": 0.21580788857459463, "learning_rate": 1.9158869621404572e-05, "loss": 0.3551, "num_tokens": 736953126.0, "step": 966 }, { "epoch": 1.3195459443312387, "grad_norm": 0.20345921802707528, "learning_rate": 1.915677008332985e-05, "loss": 0.3447, "num_tokens": 737782878.0, "step": 967 }, { "epoch": 1.3209115252899193, "grad_norm": 0.21736050900413706, "learning_rate": 1.915466805697782e-05, "loss": 0.352, "num_tokens": 738571143.0, "step": 968 }, { "epoch": 1.3222771062485998, "grad_norm": 0.20380288687700202, "learning_rate": 1.915256354298971e-05, "loss": 0.3648, "num_tokens": 739373505.0, "step": 969 }, { "epoch": 1.3236426872072802, "grad_norm": 0.2187041565793758, "learning_rate": 1.915045654200752e-05, "loss": 0.3337, "num_tokens": 740070006.0, "step": 970 }, { "epoch": 1.3250082681659607, "grad_norm": 0.22980616847624716, "learning_rate": 1.914834705467401e-05, "loss": 0.3597, "num_tokens": 740862451.0, "step": 971 }, { "epoch": 1.3263738491246413, "grad_norm": 0.205813085401544, "learning_rate": 1.9146235081632687e-05, "loss": 0.3218, "num_tokens": 741583865.0, "step": 972 }, { "epoch": 1.3277394300833218, "grad_norm": 0.20012872057025288, "learning_rate": 1.9144120623527824e-05, "loss": 0.3373, "num_tokens": 742408521.0, "step": 973 }, { "epoch": 1.3291050110420022, "grad_norm": 0.21608061460844172, "learning_rate": 1.914200368100445e-05, "loss": 0.3481, "num_tokens": 743160985.0, "step": 974 }, { "epoch": 1.3304705920006827, "grad_norm": 0.2352023540578385, "learning_rate": 1.9139884254708353e-05, "loss": 0.3564, "num_tokens": 743904992.0, "step": 975 }, { "epoch": 1.3318361729593633, "grad_norm": 0.19265647665416, "learning_rate": 1.9137762345286077e-05, "loss": 0.3449, "num_tokens": 744663505.0, "step": 976 }, { "epoch": 1.3332017539180439, "grad_norm": 0.20377383426491888, "learning_rate": 1.913563795338493e-05, "loss": 0.3551, "num_tokens": 745491376.0, "step": 977 }, { "epoch": 1.3345673348767244, "grad_norm": 0.21588129828653063, "learning_rate": 1.9133511079652965e-05, "loss": 0.3298, "num_tokens": 746254832.0, "step": 978 }, { "epoch": 1.3359329158354047, "grad_norm": 0.21577923760467208, "learning_rate": 1.9131381724739e-05, "loss": 0.3469, "num_tokens": 747070818.0, "step": 979 }, { "epoch": 1.3372984967940853, "grad_norm": 0.20218301388184165, "learning_rate": 1.9129249889292614e-05, "loss": 0.3385, "num_tokens": 747768715.0, "step": 980 }, { "epoch": 1.3386640777527659, "grad_norm": 0.1903518976197912, "learning_rate": 1.912711557396413e-05, "loss": 0.3688, "num_tokens": 748595355.0, "step": 981 }, { "epoch": 1.3400296587114464, "grad_norm": 0.2088139640674936, "learning_rate": 1.9124978779404648e-05, "loss": 0.3405, "num_tokens": 749387982.0, "step": 982 }, { "epoch": 1.3413952396701267, "grad_norm": 0.21099392343115447, "learning_rate": 1.9122839506265998e-05, "loss": 0.3317, "num_tokens": 750111930.0, "step": 983 }, { "epoch": 1.3427608206288073, "grad_norm": 0.2200938314581359, "learning_rate": 1.9120697755200786e-05, "loss": 0.3436, "num_tokens": 750837799.0, "step": 984 }, { "epoch": 1.3441264015874879, "grad_norm": 0.1921585652015767, "learning_rate": 1.9118553526862365e-05, "loss": 0.3425, "num_tokens": 751591219.0, "step": 985 }, { "epoch": 1.3454919825461684, "grad_norm": 0.2108152644594403, "learning_rate": 1.911640682190485e-05, "loss": 0.3397, "num_tokens": 752313245.0, "step": 986 }, { "epoch": 1.346857563504849, "grad_norm": 0.1991539069981807, "learning_rate": 1.9114257640983106e-05, "loss": 0.3636, "num_tokens": 753064329.0, "step": 987 }, { "epoch": 1.3482231444635293, "grad_norm": 0.18906600873651339, "learning_rate": 1.9112105984752752e-05, "loss": 0.3561, "num_tokens": 753952656.0, "step": 988 }, { "epoch": 1.3495887254222099, "grad_norm": 0.19434232533149676, "learning_rate": 1.9109951853870168e-05, "loss": 0.3243, "num_tokens": 754612682.0, "step": 989 }, { "epoch": 1.3509543063808904, "grad_norm": 0.20198763197861377, "learning_rate": 1.9107795248992484e-05, "loss": 0.3485, "num_tokens": 755433330.0, "step": 990 }, { "epoch": 1.352319887339571, "grad_norm": 0.18145603909795255, "learning_rate": 1.910563617077759e-05, "loss": 0.3357, "num_tokens": 756189698.0, "step": 991 }, { "epoch": 1.3536854682982513, "grad_norm": 0.20287739259347587, "learning_rate": 1.9103474619884125e-05, "loss": 0.3333, "num_tokens": 756913299.0, "step": 992 }, { "epoch": 1.3550510492569319, "grad_norm": 0.21332552684792763, "learning_rate": 1.9101310596971488e-05, "loss": 0.3438, "num_tokens": 757647161.0, "step": 993 }, { "epoch": 1.3564166302156124, "grad_norm": 0.19123283867132806, "learning_rate": 1.9099144102699823e-05, "loss": 0.3431, "num_tokens": 758342215.0, "step": 994 }, { "epoch": 1.357782211174293, "grad_norm": 0.210306784581258, "learning_rate": 1.909697513773004e-05, "loss": 0.3438, "num_tokens": 759153206.0, "step": 995 }, { "epoch": 1.3591477921329735, "grad_norm": 0.2104569929114126, "learning_rate": 1.9094803702723785e-05, "loss": 0.3366, "num_tokens": 759885538.0, "step": 996 }, { "epoch": 1.3605133730916539, "grad_norm": 0.20557835447448367, "learning_rate": 1.9092629798343484e-05, "loss": 0.3439, "num_tokens": 760625195.0, "step": 997 }, { "epoch": 1.3618789540503344, "grad_norm": 0.21615516994462022, "learning_rate": 1.909045342525229e-05, "loss": 0.3548, "num_tokens": 761329022.0, "step": 998 }, { "epoch": 1.363244535009015, "grad_norm": 0.2108311085038829, "learning_rate": 1.9088274584114124e-05, "loss": 0.3402, "num_tokens": 762100819.0, "step": 999 }, { "epoch": 1.3646101159676955, "grad_norm": 0.20812136037000667, "learning_rate": 1.9086093275593656e-05, "loss": 0.3534, "num_tokens": 762939361.0, "step": 1000 }, { "epoch": 1.3659756969263759, "grad_norm": 0.19308185592020102, "learning_rate": 1.9083909500356307e-05, "loss": 0.354, "num_tokens": 763711552.0, "step": 1001 }, { "epoch": 1.3673412778850564, "grad_norm": 0.20355380923101798, "learning_rate": 1.9081723259068256e-05, "loss": 0.3248, "num_tokens": 764443102.0, "step": 1002 }, { "epoch": 1.368706858843737, "grad_norm": 0.19772939267999595, "learning_rate": 1.907953455239643e-05, "loss": 0.3506, "num_tokens": 765222188.0, "step": 1003 }, { "epoch": 1.3700724398024176, "grad_norm": 0.19291879892121955, "learning_rate": 1.9077343381008503e-05, "loss": 0.3478, "num_tokens": 766059302.0, "step": 1004 }, { "epoch": 1.371438020761098, "grad_norm": 0.20708226359133314, "learning_rate": 1.9075149745572912e-05, "loss": 0.3436, "num_tokens": 766772992.0, "step": 1005 }, { "epoch": 1.3728036017197784, "grad_norm": 0.20762474121703964, "learning_rate": 1.9072953646758837e-05, "loss": 0.3449, "num_tokens": 767544810.0, "step": 1006 }, { "epoch": 1.374169182678459, "grad_norm": 0.20778799814333088, "learning_rate": 1.9070755085236218e-05, "loss": 0.3533, "num_tokens": 768293750.0, "step": 1007 }, { "epoch": 1.3755347636371396, "grad_norm": 0.19904617759650783, "learning_rate": 1.9068554061675735e-05, "loss": 0.3471, "num_tokens": 769080589.0, "step": 1008 }, { "epoch": 1.3769003445958201, "grad_norm": 0.2067066225634614, "learning_rate": 1.9066350576748827e-05, "loss": 0.3417, "num_tokens": 769795210.0, "step": 1009 }, { "epoch": 1.3782659255545004, "grad_norm": 0.18363191098514786, "learning_rate": 1.9064144631127684e-05, "loss": 0.3418, "num_tokens": 770598994.0, "step": 1010 }, { "epoch": 1.379631506513181, "grad_norm": 0.21060996646622115, "learning_rate": 1.906193622548524e-05, "loss": 0.3523, "num_tokens": 771444771.0, "step": 1011 }, { "epoch": 1.3809970874718616, "grad_norm": 0.200401063192149, "learning_rate": 1.9059725360495188e-05, "loss": 0.3641, "num_tokens": 772224971.0, "step": 1012 }, { "epoch": 1.3823626684305421, "grad_norm": 0.20553157568060929, "learning_rate": 1.9057512036831962e-05, "loss": 0.345, "num_tokens": 773053413.0, "step": 1013 }, { "epoch": 1.3837282493892227, "grad_norm": 0.21239282639640547, "learning_rate": 1.905529625517076e-05, "loss": 0.3489, "num_tokens": 773815635.0, "step": 1014 }, { "epoch": 1.385093830347903, "grad_norm": 0.20468928264581807, "learning_rate": 1.9053078016187513e-05, "loss": 0.3415, "num_tokens": 774577434.0, "step": 1015 }, { "epoch": 1.3864594113065836, "grad_norm": 0.19637590038453787, "learning_rate": 1.905085732055891e-05, "loss": 0.3402, "num_tokens": 775300403.0, "step": 1016 }, { "epoch": 1.3878249922652641, "grad_norm": 0.20795524875098276, "learning_rate": 1.9048634168962394e-05, "loss": 0.3496, "num_tokens": 776072863.0, "step": 1017 }, { "epoch": 1.3891905732239447, "grad_norm": 0.18297877011054348, "learning_rate": 1.9046408562076152e-05, "loss": 0.332, "num_tokens": 776809411.0, "step": 1018 }, { "epoch": 1.390556154182625, "grad_norm": 0.20619295381677805, "learning_rate": 1.9044180500579116e-05, "loss": 0.3413, "num_tokens": 777602078.0, "step": 1019 }, { "epoch": 1.3919217351413056, "grad_norm": 0.4318734834665932, "learning_rate": 1.9041949985150972e-05, "loss": 0.3437, "num_tokens": 778411563.0, "step": 1020 }, { "epoch": 1.3932873160999861, "grad_norm": 0.19705087454282383, "learning_rate": 1.9039717016472158e-05, "loss": 0.3424, "num_tokens": 779214139.0, "step": 1021 }, { "epoch": 1.3946528970586667, "grad_norm": 0.18491408596375214, "learning_rate": 1.903748159522385e-05, "loss": 0.3417, "num_tokens": 779924509.0, "step": 1022 }, { "epoch": 1.3960184780173472, "grad_norm": 0.21652287688885463, "learning_rate": 1.9035243722087982e-05, "loss": 0.3441, "num_tokens": 780712634.0, "step": 1023 }, { "epoch": 1.3973840589760276, "grad_norm": 0.20746999614545147, "learning_rate": 1.9033003397747228e-05, "loss": 0.3437, "num_tokens": 781535885.0, "step": 1024 }, { "epoch": 1.3987496399347081, "grad_norm": 0.20234658547269846, "learning_rate": 1.9030760622885022e-05, "loss": 0.347, "num_tokens": 782325858.0, "step": 1025 }, { "epoch": 1.4001152208933887, "grad_norm": 0.2095417444728844, "learning_rate": 1.902851539818553e-05, "loss": 0.3676, "num_tokens": 783162812.0, "step": 1026 }, { "epoch": 1.4014808018520692, "grad_norm": 0.23199469962397398, "learning_rate": 1.9026267724333676e-05, "loss": 0.3359, "num_tokens": 783981985.0, "step": 1027 }, { "epoch": 1.4028463828107496, "grad_norm": 0.20818371341430414, "learning_rate": 1.9024017602015128e-05, "loss": 0.3412, "num_tokens": 784679182.0, "step": 1028 }, { "epoch": 1.4042119637694301, "grad_norm": 0.20278938365081214, "learning_rate": 1.90217650319163e-05, "loss": 0.3461, "num_tokens": 785526390.0, "step": 1029 }, { "epoch": 1.4055775447281107, "grad_norm": 0.20608082457805832, "learning_rate": 1.901951001472435e-05, "loss": 0.3339, "num_tokens": 786326316.0, "step": 1030 }, { "epoch": 1.4069431256867913, "grad_norm": 0.1951729453129531, "learning_rate": 1.9017252551127195e-05, "loss": 0.3512, "num_tokens": 787084834.0, "step": 1031 }, { "epoch": 1.4083087066454718, "grad_norm": 0.21828481355306958, "learning_rate": 1.9014992641813484e-05, "loss": 0.3513, "num_tokens": 787885803.0, "step": 1032 }, { "epoch": 1.4096742876041521, "grad_norm": 0.19173953789797196, "learning_rate": 1.901273028747262e-05, "loss": 0.3365, "num_tokens": 788689073.0, "step": 1033 }, { "epoch": 1.4110398685628327, "grad_norm": 0.19112014861705404, "learning_rate": 1.901046548879474e-05, "loss": 0.3385, "num_tokens": 789398548.0, "step": 1034 }, { "epoch": 1.4124054495215133, "grad_norm": 0.26585117217622123, "learning_rate": 1.9008198246470748e-05, "loss": 0.3572, "num_tokens": 790140215.0, "step": 1035 }, { "epoch": 1.4137710304801938, "grad_norm": 0.22627088805654563, "learning_rate": 1.9005928561192274e-05, "loss": 0.3487, "num_tokens": 790841488.0, "step": 1036 }, { "epoch": 1.4151366114388741, "grad_norm": 0.23083922239152327, "learning_rate": 1.900365643365171e-05, "loss": 0.3398, "num_tokens": 791541324.0, "step": 1037 }, { "epoch": 1.4165021923975547, "grad_norm": 0.22632226794019863, "learning_rate": 1.9001381864542167e-05, "loss": 0.3459, "num_tokens": 792334607.0, "step": 1038 }, { "epoch": 1.4178677733562353, "grad_norm": 0.20692448370949737, "learning_rate": 1.8999104854557534e-05, "loss": 0.3537, "num_tokens": 793048243.0, "step": 1039 }, { "epoch": 1.4192333543149158, "grad_norm": 0.2005373224082317, "learning_rate": 1.899682540439242e-05, "loss": 0.3445, "num_tokens": 793734926.0, "step": 1040 }, { "epoch": 1.4205989352735964, "grad_norm": 0.22857768108250684, "learning_rate": 1.8994543514742186e-05, "loss": 0.363, "num_tokens": 794421277.0, "step": 1041 }, { "epoch": 1.4219645162322767, "grad_norm": 0.18133469861516482, "learning_rate": 1.8992259186302944e-05, "loss": 0.3398, "num_tokens": 795229793.0, "step": 1042 }, { "epoch": 1.4233300971909573, "grad_norm": 0.2041516211927476, "learning_rate": 1.8989972419771537e-05, "loss": 0.3482, "num_tokens": 795997880.0, "step": 1043 }, { "epoch": 1.4246956781496378, "grad_norm": 0.1820872206051627, "learning_rate": 1.8987683215845562e-05, "loss": 0.3359, "num_tokens": 796782476.0, "step": 1044 }, { "epoch": 1.4260612591083184, "grad_norm": 0.19818268218630272, "learning_rate": 1.898539157522336e-05, "loss": 0.338, "num_tokens": 797568445.0, "step": 1045 }, { "epoch": 1.4274268400669987, "grad_norm": 0.18863219153517585, "learning_rate": 1.8983097498603996e-05, "loss": 0.3268, "num_tokens": 798369710.0, "step": 1046 }, { "epoch": 1.4287924210256793, "grad_norm": 0.19205616586937002, "learning_rate": 1.898080098668731e-05, "loss": 0.3424, "num_tokens": 799170218.0, "step": 1047 }, { "epoch": 1.4301580019843598, "grad_norm": 0.1998242361005097, "learning_rate": 1.897850204017386e-05, "loss": 0.3414, "num_tokens": 799962100.0, "step": 1048 }, { "epoch": 1.4315235829430404, "grad_norm": 0.22636009043250874, "learning_rate": 1.897620065976496e-05, "loss": 0.3512, "num_tokens": 800736291.0, "step": 1049 }, { "epoch": 1.432889163901721, "grad_norm": 0.19773457209180703, "learning_rate": 1.8973896846162655e-05, "loss": 0.3237, "num_tokens": 801479180.0, "step": 1050 }, { "epoch": 1.4342547448604013, "grad_norm": 0.21245997240300052, "learning_rate": 1.8971590600069743e-05, "loss": 0.3399, "num_tokens": 802317711.0, "step": 1051 }, { "epoch": 1.4356203258190818, "grad_norm": 0.18548143963073324, "learning_rate": 1.8969281922189763e-05, "loss": 0.3331, "num_tokens": 803208138.0, "step": 1052 }, { "epoch": 1.4369859067777624, "grad_norm": 0.24584593132830695, "learning_rate": 1.8966970813226983e-05, "loss": 0.35, "num_tokens": 803986681.0, "step": 1053 }, { "epoch": 1.438351487736443, "grad_norm": 0.25225116012958304, "learning_rate": 1.896465727388643e-05, "loss": 0.3638, "num_tokens": 804736757.0, "step": 1054 }, { "epoch": 1.4397170686951233, "grad_norm": 0.21938336427996333, "learning_rate": 1.8962341304873865e-05, "loss": 0.3373, "num_tokens": 805457947.0, "step": 1055 }, { "epoch": 1.4410826496538038, "grad_norm": 0.23853847380707593, "learning_rate": 1.8960022906895786e-05, "loss": 0.3424, "num_tokens": 806170211.0, "step": 1056 }, { "epoch": 1.4424482306124844, "grad_norm": 0.22913980031888978, "learning_rate": 1.8957702080659436e-05, "loss": 0.3597, "num_tokens": 806919908.0, "step": 1057 }, { "epoch": 1.443813811571165, "grad_norm": 0.21253234599811305, "learning_rate": 1.8955378826872805e-05, "loss": 0.3536, "num_tokens": 807742448.0, "step": 1058 }, { "epoch": 1.4451793925298455, "grad_norm": 0.22252222173831698, "learning_rate": 1.8953053146244607e-05, "loss": 0.3266, "num_tokens": 808524618.0, "step": 1059 }, { "epoch": 1.4465449734885258, "grad_norm": 0.21971631808056, "learning_rate": 1.8950725039484315e-05, "loss": 0.3448, "num_tokens": 809284663.0, "step": 1060 }, { "epoch": 1.4479105544472064, "grad_norm": 0.20295447511599457, "learning_rate": 1.8948394507302127e-05, "loss": 0.3526, "num_tokens": 810034681.0, "step": 1061 }, { "epoch": 1.449276135405887, "grad_norm": 0.20275623953096, "learning_rate": 1.894606155040899e-05, "loss": 0.3361, "num_tokens": 810838475.0, "step": 1062 }, { "epoch": 1.4506417163645675, "grad_norm": 0.18406433706742742, "learning_rate": 1.8943726169516593e-05, "loss": 0.3339, "num_tokens": 811591952.0, "step": 1063 }, { "epoch": 1.4520072973232478, "grad_norm": 0.21222076328260056, "learning_rate": 1.8941388365337353e-05, "loss": 0.3458, "num_tokens": 812338606.0, "step": 1064 }, { "epoch": 1.4533728782819284, "grad_norm": 0.19844414388804144, "learning_rate": 1.8939048138584433e-05, "loss": 0.3479, "num_tokens": 813084600.0, "step": 1065 }, { "epoch": 1.454738459240609, "grad_norm": 0.23105250796925433, "learning_rate": 1.893670548997174e-05, "loss": 0.3429, "num_tokens": 813846474.0, "step": 1066 }, { "epoch": 1.4561040401992895, "grad_norm": 0.1746600741739495, "learning_rate": 1.893436042021391e-05, "loss": 0.3333, "num_tokens": 814593720.0, "step": 1067 }, { "epoch": 1.45746962115797, "grad_norm": 0.2272698091045512, "learning_rate": 1.893201293002632e-05, "loss": 0.357, "num_tokens": 815413507.0, "step": 1068 }, { "epoch": 1.4588352021166504, "grad_norm": 0.17944151383076679, "learning_rate": 1.8929663020125098e-05, "loss": 0.3456, "num_tokens": 816145472.0, "step": 1069 }, { "epoch": 1.460200783075331, "grad_norm": 0.2252011436909475, "learning_rate": 1.8927310691227086e-05, "loss": 0.3416, "num_tokens": 816887616.0, "step": 1070 }, { "epoch": 1.4615663640340115, "grad_norm": 0.20737394287463223, "learning_rate": 1.8924955944049895e-05, "loss": 0.3521, "num_tokens": 817636306.0, "step": 1071 }, { "epoch": 1.462931944992692, "grad_norm": 0.19180997158059387, "learning_rate": 1.8922598779311835e-05, "loss": 0.3249, "num_tokens": 818354838.0, "step": 1072 }, { "epoch": 1.4642975259513724, "grad_norm": 0.20760733410420126, "learning_rate": 1.8920239197731994e-05, "loss": 0.3272, "num_tokens": 819108556.0, "step": 1073 }, { "epoch": 1.465663106910053, "grad_norm": 0.20399193667365068, "learning_rate": 1.8917877200030165e-05, "loss": 0.3393, "num_tokens": 819931541.0, "step": 1074 }, { "epoch": 1.4670286878687335, "grad_norm": 0.20370843557232557, "learning_rate": 1.8915512786926898e-05, "loss": 0.3688, "num_tokens": 820754766.0, "step": 1075 }, { "epoch": 1.468394268827414, "grad_norm": 0.22237871067208817, "learning_rate": 1.8913145959143474e-05, "loss": 0.3393, "num_tokens": 821496785.0, "step": 1076 }, { "epoch": 1.4697598497860946, "grad_norm": 0.20609229354037506, "learning_rate": 1.8910776717401903e-05, "loss": 0.3624, "num_tokens": 822266906.0, "step": 1077 }, { "epoch": 1.471125430744775, "grad_norm": 0.20138522886539814, "learning_rate": 1.890840506242495e-05, "loss": 0.3269, "num_tokens": 822963253.0, "step": 1078 }, { "epoch": 1.4724910117034555, "grad_norm": 0.22859993003978582, "learning_rate": 1.8906030994936088e-05, "loss": 0.3387, "num_tokens": 823680894.0, "step": 1079 }, { "epoch": 1.473856592662136, "grad_norm": 0.18945100145312388, "learning_rate": 1.890365451565955e-05, "loss": 0.3399, "num_tokens": 824416461.0, "step": 1080 }, { "epoch": 1.4752221736208166, "grad_norm": 0.21215026118066474, "learning_rate": 1.8901275625320304e-05, "loss": 0.3433, "num_tokens": 825187052.0, "step": 1081 }, { "epoch": 1.476587754579497, "grad_norm": 0.20247565763683667, "learning_rate": 1.8898894324644035e-05, "loss": 0.3568, "num_tokens": 825948377.0, "step": 1082 }, { "epoch": 1.4779533355381775, "grad_norm": 0.20125433042539995, "learning_rate": 1.8896510614357183e-05, "loss": 0.3303, "num_tokens": 826675665.0, "step": 1083 }, { "epoch": 1.479318916496858, "grad_norm": 0.21527018032206333, "learning_rate": 1.8894124495186908e-05, "loss": 0.3406, "num_tokens": 827442330.0, "step": 1084 }, { "epoch": 1.4806844974555387, "grad_norm": 0.1924507722243901, "learning_rate": 1.8891735967861116e-05, "loss": 0.3468, "num_tokens": 828225665.0, "step": 1085 }, { "epoch": 1.4820500784142192, "grad_norm": 0.18722681766944713, "learning_rate": 1.8889345033108443e-05, "loss": 0.3646, "num_tokens": 829039240.0, "step": 1086 }, { "epoch": 1.4834156593728995, "grad_norm": 0.7483727239434015, "learning_rate": 1.8886951691658258e-05, "loss": 0.3586, "num_tokens": 829803133.0, "step": 1087 }, { "epoch": 1.48478124033158, "grad_norm": 0.24493737838005014, "learning_rate": 1.8884555944240666e-05, "loss": 0.3334, "num_tokens": 830477988.0, "step": 1088 }, { "epoch": 1.4861468212902607, "grad_norm": 0.20772852706927325, "learning_rate": 1.888215779158651e-05, "loss": 0.3259, "num_tokens": 831142418.0, "step": 1089 }, { "epoch": 1.4875124022489412, "grad_norm": 0.22001610076871084, "learning_rate": 1.8879757234427357e-05, "loss": 0.3506, "num_tokens": 831952350.0, "step": 1090 }, { "epoch": 1.4888779832076215, "grad_norm": 0.20751587118764553, "learning_rate": 1.8877354273495515e-05, "loss": 0.3511, "num_tokens": 832718894.0, "step": 1091 }, { "epoch": 1.490243564166302, "grad_norm": 0.21955051975737266, "learning_rate": 1.8874948909524022e-05, "loss": 0.3425, "num_tokens": 833518145.0, "step": 1092 }, { "epoch": 1.4916091451249827, "grad_norm": 0.20125206273827248, "learning_rate": 1.887254114324665e-05, "loss": 0.3382, "num_tokens": 834282263.0, "step": 1093 }, { "epoch": 1.4929747260836632, "grad_norm": 0.24605774509975437, "learning_rate": 1.887013097539791e-05, "loss": 0.3557, "num_tokens": 835071186.0, "step": 1094 }, { "epoch": 1.4943403070423438, "grad_norm": 0.21094922352229073, "learning_rate": 1.8867718406713036e-05, "loss": 0.3697, "num_tokens": 835844139.0, "step": 1095 }, { "epoch": 1.495705888001024, "grad_norm": 0.2196305845390831, "learning_rate": 1.8865303437927996e-05, "loss": 0.3444, "num_tokens": 836667359.0, "step": 1096 }, { "epoch": 1.4970714689597047, "grad_norm": 0.19318856311555563, "learning_rate": 1.88628860697795e-05, "loss": 0.3484, "num_tokens": 837465077.0, "step": 1097 }, { "epoch": 1.4984370499183852, "grad_norm": 0.22536676182141221, "learning_rate": 1.8860466303004967e-05, "loss": 0.3413, "num_tokens": 838278586.0, "step": 1098 }, { "epoch": 1.4998026308770658, "grad_norm": 0.19910609890134676, "learning_rate": 1.885804413834258e-05, "loss": 0.346, "num_tokens": 839039986.0, "step": 1099 }, { "epoch": 1.5011682118357461, "grad_norm": 0.2139337796925142, "learning_rate": 1.8855619576531225e-05, "loss": 0.3523, "num_tokens": 839892474.0, "step": 1100 }, { "epoch": 1.5025337927944267, "grad_norm": 0.18923142678416963, "learning_rate": 1.8853192618310537e-05, "loss": 0.3508, "num_tokens": 840661420.0, "step": 1101 }, { "epoch": 1.5038993737531072, "grad_norm": 0.23337713830177587, "learning_rate": 1.885076326442088e-05, "loss": 0.3359, "num_tokens": 841366700.0, "step": 1102 }, { "epoch": 1.5052649547117878, "grad_norm": 0.22920671166483242, "learning_rate": 1.8848331515603326e-05, "loss": 0.3474, "num_tokens": 842157945.0, "step": 1103 }, { "epoch": 1.5066305356704683, "grad_norm": 0.18249487649221832, "learning_rate": 1.8845897372599718e-05, "loss": 0.3372, "num_tokens": 842903525.0, "step": 1104 }, { "epoch": 1.507996116629149, "grad_norm": 0.21917797777206327, "learning_rate": 1.8843460836152596e-05, "loss": 0.3383, "num_tokens": 843642274.0, "step": 1105 }, { "epoch": 1.5093616975878292, "grad_norm": 0.21165685698432415, "learning_rate": 1.8841021907005244e-05, "loss": 0.3447, "num_tokens": 844456405.0, "step": 1106 }, { "epoch": 1.5107272785465098, "grad_norm": 0.18257209608361372, "learning_rate": 1.8838580585901673e-05, "loss": 0.3531, "num_tokens": 845222915.0, "step": 1107 }, { "epoch": 1.5120928595051901, "grad_norm": 0.20182479734907893, "learning_rate": 1.8836136873586626e-05, "loss": 0.3335, "num_tokens": 846018937.0, "step": 1108 }, { "epoch": 1.5134584404638707, "grad_norm": 0.2029057512845665, "learning_rate": 1.8833690770805577e-05, "loss": 0.3553, "num_tokens": 846726028.0, "step": 1109 }, { "epoch": 1.5148240214225512, "grad_norm": 0.1944718838078091, "learning_rate": 1.883124227830472e-05, "loss": 0.3481, "num_tokens": 847427544.0, "step": 1110 }, { "epoch": 1.5161896023812318, "grad_norm": 0.21190164218005594, "learning_rate": 1.8828791396830986e-05, "loss": 0.3376, "num_tokens": 848120260.0, "step": 1111 }, { "epoch": 1.5175551833399124, "grad_norm": 0.2178713649665666, "learning_rate": 1.882633812713203e-05, "loss": 0.3429, "num_tokens": 848934638.0, "step": 1112 }, { "epoch": 1.518920764298593, "grad_norm": 0.20334855244097014, "learning_rate": 1.8823882469956246e-05, "loss": 0.3467, "num_tokens": 849700977.0, "step": 1113 }, { "epoch": 1.5202863452572735, "grad_norm": 0.1994604802778267, "learning_rate": 1.8821424426052742e-05, "loss": 0.3426, "num_tokens": 850492645.0, "step": 1114 }, { "epoch": 1.5216519262159538, "grad_norm": 0.19556196442663648, "learning_rate": 1.881896399617136e-05, "loss": 0.3519, "num_tokens": 851235954.0, "step": 1115 }, { "epoch": 1.5230175071746344, "grad_norm": 0.1928520166616452, "learning_rate": 1.8816501181062675e-05, "loss": 0.3491, "num_tokens": 852020802.0, "step": 1116 }, { "epoch": 1.5243830881333147, "grad_norm": 0.20890518102693073, "learning_rate": 1.8814035981477986e-05, "loss": 0.3475, "num_tokens": 852794546.0, "step": 1117 }, { "epoch": 1.5257486690919952, "grad_norm": 0.20233755448265625, "learning_rate": 1.8811568398169315e-05, "loss": 0.3462, "num_tokens": 853507662.0, "step": 1118 }, { "epoch": 1.5271142500506758, "grad_norm": 0.22472413730464402, "learning_rate": 1.880909843188941e-05, "loss": 0.351, "num_tokens": 854281205.0, "step": 1119 }, { "epoch": 1.5284798310093564, "grad_norm": 0.19290522461879728, "learning_rate": 1.8806626083391764e-05, "loss": 0.3316, "num_tokens": 854976425.0, "step": 1120 }, { "epoch": 1.529845411968037, "grad_norm": 0.20610572963560608, "learning_rate": 1.8804151353430577e-05, "loss": 0.3485, "num_tokens": 855758022.0, "step": 1121 }, { "epoch": 1.5312109929267175, "grad_norm": 0.19095717657589478, "learning_rate": 1.8801674242760773e-05, "loss": 0.3547, "num_tokens": 856591959.0, "step": 1122 }, { "epoch": 1.532576573885398, "grad_norm": 0.20434419682633065, "learning_rate": 1.8799194752138028e-05, "loss": 0.3486, "num_tokens": 857389990.0, "step": 1123 }, { "epoch": 1.5339421548440784, "grad_norm": 0.19653369565495807, "learning_rate": 1.8796712882318716e-05, "loss": 0.3311, "num_tokens": 858127252.0, "step": 1124 }, { "epoch": 1.535307735802759, "grad_norm": 0.2053778040183756, "learning_rate": 1.879422863405995e-05, "loss": 0.363, "num_tokens": 858900327.0, "step": 1125 }, { "epoch": 1.5366733167614393, "grad_norm": 0.20878497674557545, "learning_rate": 1.8791742008119566e-05, "loss": 0.3368, "num_tokens": 859666194.0, "step": 1126 }, { "epoch": 1.5380388977201198, "grad_norm": 0.19527098390817324, "learning_rate": 1.878925300525613e-05, "loss": 0.3621, "num_tokens": 860532261.0, "step": 1127 }, { "epoch": 1.5394044786788004, "grad_norm": 0.1962551433692768, "learning_rate": 1.878676162622892e-05, "loss": 0.336, "num_tokens": 861225131.0, "step": 1128 }, { "epoch": 1.540770059637481, "grad_norm": 0.20587609452158792, "learning_rate": 1.878426787179796e-05, "loss": 0.3386, "num_tokens": 861989442.0, "step": 1129 }, { "epoch": 1.5421356405961615, "grad_norm": 0.21105967587191177, "learning_rate": 1.8781771742723978e-05, "loss": 0.3561, "num_tokens": 862774724.0, "step": 1130 }, { "epoch": 1.543501221554842, "grad_norm": 0.19049136855894255, "learning_rate": 1.8779273239768435e-05, "loss": 0.3347, "num_tokens": 863576784.0, "step": 1131 }, { "epoch": 1.5448668025135226, "grad_norm": 0.1886926352982969, "learning_rate": 1.8776772363693524e-05, "loss": 0.3458, "num_tokens": 864374330.0, "step": 1132 }, { "epoch": 1.546232383472203, "grad_norm": 0.1998432914446766, "learning_rate": 1.8774269115262142e-05, "loss": 0.3523, "num_tokens": 865134067.0, "step": 1133 }, { "epoch": 1.5475979644308835, "grad_norm": 0.19117886297509182, "learning_rate": 1.8771763495237932e-05, "loss": 0.3419, "num_tokens": 865912923.0, "step": 1134 }, { "epoch": 1.5489635453895638, "grad_norm": 0.2204979353558171, "learning_rate": 1.8769255504385244e-05, "loss": 0.3534, "num_tokens": 866644649.0, "step": 1135 }, { "epoch": 1.5503291263482444, "grad_norm": 0.18700041818170185, "learning_rate": 1.8766745143469157e-05, "loss": 0.3677, "num_tokens": 867427608.0, "step": 1136 }, { "epoch": 1.551694707306925, "grad_norm": 0.1966972070122868, "learning_rate": 1.8764232413255483e-05, "loss": 0.3417, "num_tokens": 868137649.0, "step": 1137 }, { "epoch": 1.5530602882656055, "grad_norm": 0.200052173154243, "learning_rate": 1.8761717314510733e-05, "loss": 0.3666, "num_tokens": 868969239.0, "step": 1138 }, { "epoch": 1.554425869224286, "grad_norm": 0.19201935034043988, "learning_rate": 1.8759199848002162e-05, "loss": 0.3445, "num_tokens": 869732977.0, "step": 1139 }, { "epoch": 1.5557914501829666, "grad_norm": 0.18934592209977974, "learning_rate": 1.8756680014497746e-05, "loss": 0.3536, "num_tokens": 870544496.0, "step": 1140 }, { "epoch": 1.5571570311416472, "grad_norm": 0.17151486465293897, "learning_rate": 1.875415781476617e-05, "loss": 0.3348, "num_tokens": 871386951.0, "step": 1141 }, { "epoch": 1.5585226121003275, "grad_norm": 0.18729295378207766, "learning_rate": 1.875163324957684e-05, "loss": 0.3576, "num_tokens": 872218398.0, "step": 1142 }, { "epoch": 1.559888193059008, "grad_norm": 0.1896623817171892, "learning_rate": 1.8749106319699907e-05, "loss": 0.342, "num_tokens": 872948013.0, "step": 1143 }, { "epoch": 1.5612537740176884, "grad_norm": 0.18068651369781916, "learning_rate": 1.874657702590622e-05, "loss": 0.3334, "num_tokens": 873685991.0, "step": 1144 }, { "epoch": 1.562619354976369, "grad_norm": 0.1923847741040314, "learning_rate": 1.8744045368967357e-05, "loss": 0.3599, "num_tokens": 874395948.0, "step": 1145 }, { "epoch": 1.5639849359350495, "grad_norm": 0.2074784431283741, "learning_rate": 1.874151134965562e-05, "loss": 0.3605, "num_tokens": 875178449.0, "step": 1146 }, { "epoch": 1.56535051689373, "grad_norm": 0.19223932976945451, "learning_rate": 1.8738974968744026e-05, "loss": 0.3493, "num_tokens": 875987020.0, "step": 1147 }, { "epoch": 1.5667160978524106, "grad_norm": 0.20618517115820317, "learning_rate": 1.8736436227006318e-05, "loss": 0.3395, "num_tokens": 876714590.0, "step": 1148 }, { "epoch": 1.5680816788110912, "grad_norm": 0.18571391805661056, "learning_rate": 1.8733895125216952e-05, "loss": 0.3334, "num_tokens": 877502963.0, "step": 1149 }, { "epoch": 1.5694472597697717, "grad_norm": 0.19203583444164035, "learning_rate": 1.8731351664151108e-05, "loss": 0.3502, "num_tokens": 878295248.0, "step": 1150 }, { "epoch": 1.570812840728452, "grad_norm": 0.1962647150714445, "learning_rate": 1.8728805844584686e-05, "loss": 0.3364, "num_tokens": 878948915.0, "step": 1151 }, { "epoch": 1.5721784216871326, "grad_norm": 0.19577049825559556, "learning_rate": 1.8726257667294314e-05, "loss": 0.3569, "num_tokens": 879733990.0, "step": 1152 }, { "epoch": 1.573544002645813, "grad_norm": 0.1853618764564945, "learning_rate": 1.872370713305732e-05, "loss": 0.3534, "num_tokens": 880509444.0, "step": 1153 }, { "epoch": 1.5749095836044935, "grad_norm": 0.19611455524324298, "learning_rate": 1.872115424265176e-05, "loss": 0.343, "num_tokens": 881299960.0, "step": 1154 }, { "epoch": 1.576275164563174, "grad_norm": 0.18776152638683133, "learning_rate": 1.871859899685642e-05, "loss": 0.352, "num_tokens": 882061339.0, "step": 1155 }, { "epoch": 1.5776407455218546, "grad_norm": 0.20845681625621995, "learning_rate": 1.871604139645079e-05, "loss": 0.3644, "num_tokens": 882765973.0, "step": 1156 }, { "epoch": 1.5790063264805352, "grad_norm": 0.2269135163191663, "learning_rate": 1.8713481442215086e-05, "loss": 0.3638, "num_tokens": 883570192.0, "step": 1157 }, { "epoch": 1.5803719074392157, "grad_norm": 0.20735092918046732, "learning_rate": 1.871091913493023e-05, "loss": 0.3476, "num_tokens": 884336114.0, "step": 1158 }, { "epoch": 1.5817374883978963, "grad_norm": 0.22174595292888044, "learning_rate": 1.870835447537788e-05, "loss": 0.3656, "num_tokens": 885059560.0, "step": 1159 }, { "epoch": 1.5831030693565766, "grad_norm": 0.21263915398200806, "learning_rate": 1.8705787464340403e-05, "loss": 0.3529, "num_tokens": 885846501.0, "step": 1160 }, { "epoch": 1.5844686503152572, "grad_norm": 0.1917925369663659, "learning_rate": 1.8703218102600877e-05, "loss": 0.3301, "num_tokens": 886561332.0, "step": 1161 }, { "epoch": 1.5858342312739375, "grad_norm": 0.219807446828307, "learning_rate": 1.8700646390943108e-05, "loss": 0.3379, "num_tokens": 887321622.0, "step": 1162 }, { "epoch": 1.587199812232618, "grad_norm": 0.19917040847015707, "learning_rate": 1.869807233015161e-05, "loss": 0.3392, "num_tokens": 888137265.0, "step": 1163 }, { "epoch": 1.5885653931912986, "grad_norm": 0.19655964204785956, "learning_rate": 1.869549592101162e-05, "loss": 0.3393, "num_tokens": 888917315.0, "step": 1164 }, { "epoch": 1.5899309741499792, "grad_norm": 0.19800604857768825, "learning_rate": 1.8692917164309086e-05, "loss": 0.3437, "num_tokens": 889663794.0, "step": 1165 }, { "epoch": 1.5912965551086597, "grad_norm": 0.23491109315563793, "learning_rate": 1.8690336060830682e-05, "loss": 0.3451, "num_tokens": 890416078.0, "step": 1166 }, { "epoch": 1.5926621360673403, "grad_norm": 0.18949255260315612, "learning_rate": 1.868775261136378e-05, "loss": 0.345, "num_tokens": 891292450.0, "step": 1167 }, { "epoch": 1.5940277170260209, "grad_norm": 0.218979769704418, "learning_rate": 1.8685166816696488e-05, "loss": 0.3409, "num_tokens": 892049989.0, "step": 1168 }, { "epoch": 1.5953932979847012, "grad_norm": 0.19301382124159722, "learning_rate": 1.8682578677617616e-05, "loss": 0.333, "num_tokens": 892786184.0, "step": 1169 }, { "epoch": 1.5967588789433818, "grad_norm": 0.22371489773819192, "learning_rate": 1.8679988194916692e-05, "loss": 0.35, "num_tokens": 893542058.0, "step": 1170 }, { "epoch": 1.598124459902062, "grad_norm": 0.20017402667137996, "learning_rate": 1.8677395369383965e-05, "loss": 0.3373, "num_tokens": 894276344.0, "step": 1171 }, { "epoch": 1.5994900408607426, "grad_norm": 0.20623063190784222, "learning_rate": 1.867480020181039e-05, "loss": 0.3434, "num_tokens": 895049576.0, "step": 1172 }, { "epoch": 1.6008556218194232, "grad_norm": 0.19802655498706712, "learning_rate": 1.867220269298764e-05, "loss": 0.347, "num_tokens": 895841118.0, "step": 1173 }, { "epoch": 1.6022212027781038, "grad_norm": 0.23050160751605783, "learning_rate": 1.86696028437081e-05, "loss": 0.3484, "num_tokens": 896636430.0, "step": 1174 }, { "epoch": 1.6035867837367843, "grad_norm": 0.19769972154016946, "learning_rate": 1.8667000654764877e-05, "loss": 0.3414, "num_tokens": 897414539.0, "step": 1175 }, { "epoch": 1.6049523646954649, "grad_norm": 0.2094126137513376, "learning_rate": 1.8664396126951786e-05, "loss": 0.3391, "num_tokens": 898158005.0, "step": 1176 }, { "epoch": 1.6063179456541454, "grad_norm": 0.19675319801751903, "learning_rate": 1.8661789261063352e-05, "loss": 0.3446, "num_tokens": 898883706.0, "step": 1177 }, { "epoch": 1.6076835266128258, "grad_norm": 0.20624693602336422, "learning_rate": 1.8659180057894818e-05, "loss": 0.3547, "num_tokens": 899662132.0, "step": 1178 }, { "epoch": 1.6090491075715063, "grad_norm": 0.19723105488806744, "learning_rate": 1.8656568518242136e-05, "loss": 0.338, "num_tokens": 900428985.0, "step": 1179 }, { "epoch": 1.6104146885301867, "grad_norm": 0.19576464912065347, "learning_rate": 1.8653954642901983e-05, "loss": 0.3504, "num_tokens": 901132358.0, "step": 1180 }, { "epoch": 1.6117802694888672, "grad_norm": 0.21681173104870213, "learning_rate": 1.865133843267173e-05, "loss": 0.3478, "num_tokens": 901834067.0, "step": 1181 }, { "epoch": 1.6131458504475478, "grad_norm": 0.19698628618101263, "learning_rate": 1.8648719888349475e-05, "loss": 0.3442, "num_tokens": 902657804.0, "step": 1182 }, { "epoch": 1.6145114314062283, "grad_norm": 0.19499666438737037, "learning_rate": 1.8646099010734016e-05, "loss": 0.3415, "num_tokens": 903405433.0, "step": 1183 }, { "epoch": 1.6158770123649089, "grad_norm": 0.18206617724791047, "learning_rate": 1.8643475800624873e-05, "loss": 0.3631, "num_tokens": 904184629.0, "step": 1184 }, { "epoch": 1.6172425933235894, "grad_norm": 0.21362017683058335, "learning_rate": 1.8640850258822277e-05, "loss": 0.3344, "num_tokens": 904955451.0, "step": 1185 }, { "epoch": 1.61860817428227, "grad_norm": 0.20277690628614012, "learning_rate": 1.8638222386127163e-05, "loss": 0.3597, "num_tokens": 905696793.0, "step": 1186 }, { "epoch": 1.6199737552409503, "grad_norm": 0.19441641525766237, "learning_rate": 1.8635592183341182e-05, "loss": 0.3296, "num_tokens": 906438038.0, "step": 1187 }, { "epoch": 1.6213393361996309, "grad_norm": 0.19556927058959284, "learning_rate": 1.8632959651266696e-05, "loss": 0.3518, "num_tokens": 907270607.0, "step": 1188 }, { "epoch": 1.6227049171583112, "grad_norm": 0.2003546239873265, "learning_rate": 1.8630324790706777e-05, "loss": 0.3504, "num_tokens": 908090745.0, "step": 1189 }, { "epoch": 1.6240704981169918, "grad_norm": 0.18581905697668577, "learning_rate": 1.86276876024652e-05, "loss": 0.3448, "num_tokens": 908865997.0, "step": 1190 }, { "epoch": 1.6254360790756723, "grad_norm": 0.20054550306610225, "learning_rate": 1.8625048087346468e-05, "loss": 0.3525, "num_tokens": 909640165.0, "step": 1191 }, { "epoch": 1.626801660034353, "grad_norm": 0.2173186201759512, "learning_rate": 1.8622406246155777e-05, "loss": 0.3379, "num_tokens": 910419507.0, "step": 1192 }, { "epoch": 1.6281672409930334, "grad_norm": 0.1904839672245517, "learning_rate": 1.8619762079699038e-05, "loss": 0.3401, "num_tokens": 911224684.0, "step": 1193 }, { "epoch": 1.629532821951714, "grad_norm": 0.19432253490254348, "learning_rate": 1.861711558878287e-05, "loss": 0.3333, "num_tokens": 911996270.0, "step": 1194 }, { "epoch": 1.6308984029103946, "grad_norm": 0.19323334654918106, "learning_rate": 1.8614466774214605e-05, "loss": 0.3511, "num_tokens": 912769798.0, "step": 1195 }, { "epoch": 1.632263983869075, "grad_norm": 0.19598720178477153, "learning_rate": 1.8611815636802285e-05, "loss": 0.3501, "num_tokens": 913646571.0, "step": 1196 }, { "epoch": 1.6336295648277555, "grad_norm": 0.20935599741357566, "learning_rate": 1.8609162177354653e-05, "loss": 0.3516, "num_tokens": 914384512.0, "step": 1197 }, { "epoch": 1.6349951457864358, "grad_norm": 0.1988318709329742, "learning_rate": 1.8606506396681164e-05, "loss": 0.3346, "num_tokens": 915134562.0, "step": 1198 }, { "epoch": 1.6363607267451163, "grad_norm": 0.2106000854464148, "learning_rate": 1.8603848295591985e-05, "loss": 0.3553, "num_tokens": 915937181.0, "step": 1199 }, { "epoch": 1.637726307703797, "grad_norm": 0.19687573918019438, "learning_rate": 1.8601187874897985e-05, "loss": 0.3342, "num_tokens": 916696280.0, "step": 1200 }, { "epoch": 1.6390918886624775, "grad_norm": 0.21474057369610397, "learning_rate": 1.8598525135410742e-05, "loss": 0.33, "num_tokens": 917412503.0, "step": 1201 }, { "epoch": 1.640457469621158, "grad_norm": 0.20016869110189686, "learning_rate": 1.8595860077942545e-05, "loss": 0.3382, "num_tokens": 918117121.0, "step": 1202 }, { "epoch": 1.6418230505798386, "grad_norm": 0.19995265187043457, "learning_rate": 1.859319270330639e-05, "loss": 0.354, "num_tokens": 918937113.0, "step": 1203 }, { "epoch": 1.6431886315385191, "grad_norm": 0.21276174984042134, "learning_rate": 1.859052301231597e-05, "loss": 0.3425, "num_tokens": 919612274.0, "step": 1204 }, { "epoch": 1.6445542124971995, "grad_norm": 0.18200229864099504, "learning_rate": 1.85878510057857e-05, "loss": 0.3302, "num_tokens": 920432589.0, "step": 1205 }, { "epoch": 1.64591979345588, "grad_norm": 0.1936318122635255, "learning_rate": 1.858517668453069e-05, "loss": 0.3454, "num_tokens": 921231719.0, "step": 1206 }, { "epoch": 1.6472853744145604, "grad_norm": 0.21888372027519343, "learning_rate": 1.8582500049366758e-05, "loss": 0.3468, "num_tokens": 921997319.0, "step": 1207 }, { "epoch": 1.648650955373241, "grad_norm": 0.20906338645847447, "learning_rate": 1.8579821101110433e-05, "loss": 0.3403, "num_tokens": 922727822.0, "step": 1208 }, { "epoch": 1.6500165363319215, "grad_norm": 0.19551370107277014, "learning_rate": 1.8577139840578945e-05, "loss": 0.3501, "num_tokens": 923479628.0, "step": 1209 }, { "epoch": 1.651382117290602, "grad_norm": 0.20838917784372538, "learning_rate": 1.8574456268590227e-05, "loss": 0.3428, "num_tokens": 924180362.0, "step": 1210 }, { "epoch": 1.6527476982492826, "grad_norm": 0.20059919642802865, "learning_rate": 1.8571770385962924e-05, "loss": 0.3416, "num_tokens": 925028872.0, "step": 1211 }, { "epoch": 1.6541132792079631, "grad_norm": 0.18834584130405413, "learning_rate": 1.856908219351638e-05, "loss": 0.3373, "num_tokens": 925812614.0, "step": 1212 }, { "epoch": 1.6554788601666437, "grad_norm": 0.1846061237520437, "learning_rate": 1.856639169207065e-05, "loss": 0.3509, "num_tokens": 926595468.0, "step": 1213 }, { "epoch": 1.656844441125324, "grad_norm": 0.19717897134061352, "learning_rate": 1.8563698882446484e-05, "loss": 0.339, "num_tokens": 927392118.0, "step": 1214 }, { "epoch": 1.6582100220840046, "grad_norm": 0.19561888630466406, "learning_rate": 1.856100376546535e-05, "loss": 0.3412, "num_tokens": 928157989.0, "step": 1215 }, { "epoch": 1.659575603042685, "grad_norm": 0.21227557027489777, "learning_rate": 1.8558306341949404e-05, "loss": 0.3216, "num_tokens": 928846657.0, "step": 1216 }, { "epoch": 1.6609411840013655, "grad_norm": 0.17923808369042452, "learning_rate": 1.855560661272151e-05, "loss": 0.3411, "num_tokens": 929638962.0, "step": 1217 }, { "epoch": 1.662306764960046, "grad_norm": 0.20858444491270087, "learning_rate": 1.8552904578605256e-05, "loss": 0.3526, "num_tokens": 930366074.0, "step": 1218 }, { "epoch": 1.6636723459187266, "grad_norm": 0.20233809861716978, "learning_rate": 1.8550200240424895e-05, "loss": 0.336, "num_tokens": 931085956.0, "step": 1219 }, { "epoch": 1.6650379268774071, "grad_norm": 0.19060408051362424, "learning_rate": 1.8547493599005416e-05, "loss": 0.3306, "num_tokens": 931840275.0, "step": 1220 }, { "epoch": 1.6664035078360877, "grad_norm": 0.19741307256773502, "learning_rate": 1.854478465517249e-05, "loss": 0.3429, "num_tokens": 932575611.0, "step": 1221 }, { "epoch": 1.6677690887947683, "grad_norm": 0.19940823552079082, "learning_rate": 1.8542073409752506e-05, "loss": 0.3524, "num_tokens": 933317306.0, "step": 1222 }, { "epoch": 1.6691346697534486, "grad_norm": 0.1929246208801896, "learning_rate": 1.8539359863572545e-05, "loss": 0.34, "num_tokens": 934017808.0, "step": 1223 }, { "epoch": 1.6705002507121292, "grad_norm": 0.19122182552525324, "learning_rate": 1.8536644017460387e-05, "loss": 0.3408, "num_tokens": 934747849.0, "step": 1224 }, { "epoch": 1.6718658316708095, "grad_norm": 0.19270003898465832, "learning_rate": 1.8533925872244524e-05, "loss": 0.3219, "num_tokens": 935398512.0, "step": 1225 }, { "epoch": 1.67323141262949, "grad_norm": 0.20010170059158036, "learning_rate": 1.8531205428754146e-05, "loss": 0.3459, "num_tokens": 936201755.0, "step": 1226 }, { "epoch": 1.6745969935881706, "grad_norm": 0.17470186634204232, "learning_rate": 1.852848268781914e-05, "loss": 0.3401, "num_tokens": 937026108.0, "step": 1227 }, { "epoch": 1.6759625745468512, "grad_norm": 0.1987255591481813, "learning_rate": 1.85257576502701e-05, "loss": 0.3459, "num_tokens": 937830227.0, "step": 1228 }, { "epoch": 1.6773281555055317, "grad_norm": 0.1966232952097899, "learning_rate": 1.852303031693831e-05, "loss": 0.3348, "num_tokens": 938602809.0, "step": 1229 }, { "epoch": 1.6786937364642123, "grad_norm": 0.18811364351747842, "learning_rate": 1.8520300688655763e-05, "loss": 0.334, "num_tokens": 939307550.0, "step": 1230 }, { "epoch": 1.6800593174228928, "grad_norm": 0.19337673039104525, "learning_rate": 1.851756876625516e-05, "loss": 0.3377, "num_tokens": 940033170.0, "step": 1231 }, { "epoch": 1.6814248983815732, "grad_norm": 0.19144029715453328, "learning_rate": 1.8514834550569878e-05, "loss": 0.3499, "num_tokens": 940818366.0, "step": 1232 }, { "epoch": 1.6827904793402537, "grad_norm": 0.21169326485977855, "learning_rate": 1.8512098042434016e-05, "loss": 0.3531, "num_tokens": 941562733.0, "step": 1233 }, { "epoch": 1.684156060298934, "grad_norm": 0.20753901874145128, "learning_rate": 1.8509359242682363e-05, "loss": 0.3315, "num_tokens": 942309901.0, "step": 1234 }, { "epoch": 1.6855216412576146, "grad_norm": 0.19462920101168749, "learning_rate": 1.850661815215041e-05, "loss": 0.3619, "num_tokens": 943082992.0, "step": 1235 }, { "epoch": 1.6868872222162952, "grad_norm": 0.1962633877018682, "learning_rate": 1.850387477167434e-05, "loss": 0.3356, "num_tokens": 943902619.0, "step": 1236 }, { "epoch": 1.6882528031749757, "grad_norm": 0.1966731771790236, "learning_rate": 1.8501129102091042e-05, "loss": 0.3442, "num_tokens": 944742807.0, "step": 1237 }, { "epoch": 1.6896183841336563, "grad_norm": 0.19049572605099718, "learning_rate": 1.84983811442381e-05, "loss": 0.3274, "num_tokens": 945491535.0, "step": 1238 }, { "epoch": 1.6909839650923368, "grad_norm": 0.1939506173896781, "learning_rate": 1.8495630898953802e-05, "loss": 0.3414, "num_tokens": 946259346.0, "step": 1239 }, { "epoch": 1.6923495460510174, "grad_norm": 0.1796586678448362, "learning_rate": 1.8492878367077124e-05, "loss": 0.3552, "num_tokens": 947042346.0, "step": 1240 }, { "epoch": 1.6937151270096977, "grad_norm": 0.2100435951056057, "learning_rate": 1.8490123549447742e-05, "loss": 0.325, "num_tokens": 947742050.0, "step": 1241 }, { "epoch": 1.6950807079683783, "grad_norm": 0.18245678796797613, "learning_rate": 1.8487366446906037e-05, "loss": 0.3454, "num_tokens": 948460727.0, "step": 1242 }, { "epoch": 1.6964462889270586, "grad_norm": 0.19530374892053062, "learning_rate": 1.8484607060293084e-05, "loss": 0.3379, "num_tokens": 949212962.0, "step": 1243 }, { "epoch": 1.6978118698857392, "grad_norm": 0.20051938910068431, "learning_rate": 1.848184539045064e-05, "loss": 0.3483, "num_tokens": 950015170.0, "step": 1244 }, { "epoch": 1.6991774508444197, "grad_norm": 0.19544817532994013, "learning_rate": 1.847908143822118e-05, "loss": 0.341, "num_tokens": 950789340.0, "step": 1245 }, { "epoch": 1.7005430318031003, "grad_norm": 0.18588948205135084, "learning_rate": 1.8476315204447867e-05, "loss": 0.3502, "num_tokens": 951538216.0, "step": 1246 }, { "epoch": 1.7019086127617808, "grad_norm": 0.19265518491528197, "learning_rate": 1.8473546689974558e-05, "loss": 0.3482, "num_tokens": 952324531.0, "step": 1247 }, { "epoch": 1.7032741937204614, "grad_norm": 0.18357383373215874, "learning_rate": 1.8470775895645802e-05, "loss": 0.3503, "num_tokens": 953075087.0, "step": 1248 }, { "epoch": 1.704639774679142, "grad_norm": 0.19348833324227807, "learning_rate": 1.8468002822306855e-05, "loss": 0.3334, "num_tokens": 953891104.0, "step": 1249 }, { "epoch": 1.7060053556378223, "grad_norm": 0.1946291181658379, "learning_rate": 1.8465227470803658e-05, "loss": 0.3453, "num_tokens": 954650868.0, "step": 1250 }, { "epoch": 1.7073709365965029, "grad_norm": 0.21991654041949513, "learning_rate": 1.846244984198285e-05, "loss": 0.3366, "num_tokens": 955350949.0, "step": 1251 }, { "epoch": 1.7087365175551832, "grad_norm": 0.18888806066759609, "learning_rate": 1.8459669936691768e-05, "loss": 0.3553, "num_tokens": 956108107.0, "step": 1252 }, { "epoch": 1.7101020985138637, "grad_norm": 0.20173391709949873, "learning_rate": 1.8456887755778436e-05, "loss": 0.3248, "num_tokens": 956824755.0, "step": 1253 }, { "epoch": 1.7114676794725443, "grad_norm": 0.2177548548134337, "learning_rate": 1.8454103300091584e-05, "loss": 0.3414, "num_tokens": 957591891.0, "step": 1254 }, { "epoch": 1.7128332604312249, "grad_norm": 0.19958437720386601, "learning_rate": 1.8451316570480625e-05, "loss": 0.3422, "num_tokens": 958328498.0, "step": 1255 }, { "epoch": 1.7141988413899054, "grad_norm": 0.2626473427553063, "learning_rate": 1.8448527567795667e-05, "loss": 0.3177, "num_tokens": 959035921.0, "step": 1256 }, { "epoch": 1.715564422348586, "grad_norm": 0.19790715994422373, "learning_rate": 1.8445736292887517e-05, "loss": 0.3475, "num_tokens": 959763911.0, "step": 1257 }, { "epoch": 1.7169300033072665, "grad_norm": 0.2096717432104978, "learning_rate": 1.844294274660767e-05, "loss": 0.35, "num_tokens": 960581633.0, "step": 1258 }, { "epoch": 1.7182955842659469, "grad_norm": 0.20603013298951786, "learning_rate": 1.8440146929808323e-05, "loss": 0.3329, "num_tokens": 961311201.0, "step": 1259 }, { "epoch": 1.7196611652246274, "grad_norm": 0.2140267008877135, "learning_rate": 1.8437348843342345e-05, "loss": 0.3421, "num_tokens": 962135110.0, "step": 1260 }, { "epoch": 1.7210267461833078, "grad_norm": 0.2206616602340336, "learning_rate": 1.8434548488063324e-05, "loss": 0.3322, "num_tokens": 962877982.0, "step": 1261 }, { "epoch": 1.7223923271419883, "grad_norm": 0.18472855804602945, "learning_rate": 1.8431745864825523e-05, "loss": 0.3513, "num_tokens": 963664778.0, "step": 1262 }, { "epoch": 1.7237579081006689, "grad_norm": 0.22151923854615338, "learning_rate": 1.8428940974483896e-05, "loss": 0.3406, "num_tokens": 964383183.0, "step": 1263 }, { "epoch": 1.7251234890593494, "grad_norm": 0.20398157824017815, "learning_rate": 1.8426133817894102e-05, "loss": 0.3356, "num_tokens": 965127212.0, "step": 1264 }, { "epoch": 1.72648907001803, "grad_norm": 0.19879527163078042, "learning_rate": 1.842332439591248e-05, "loss": 0.3498, "num_tokens": 965943491.0, "step": 1265 }, { "epoch": 1.7278546509767105, "grad_norm": 0.22992745736348894, "learning_rate": 1.8420512709396057e-05, "loss": 0.3363, "num_tokens": 966704554.0, "step": 1266 }, { "epoch": 1.729220231935391, "grad_norm": 0.23017025600342914, "learning_rate": 1.8417698759202562e-05, "loss": 0.3247, "num_tokens": 967443941.0, "step": 1267 }, { "epoch": 1.7305858128940714, "grad_norm": 0.20516326444145594, "learning_rate": 1.841488254619041e-05, "loss": 0.3235, "num_tokens": 968170723.0, "step": 1268 }, { "epoch": 1.731951393852752, "grad_norm": 0.18490606870064655, "learning_rate": 1.8412064071218704e-05, "loss": 0.334, "num_tokens": 968917602.0, "step": 1269 }, { "epoch": 1.7333169748114323, "grad_norm": 0.22526472218814042, "learning_rate": 1.8409243335147238e-05, "loss": 0.335, "num_tokens": 969663786.0, "step": 1270 }, { "epoch": 1.7346825557701129, "grad_norm": 0.19693186744229033, "learning_rate": 1.8406420338836498e-05, "loss": 0.3425, "num_tokens": 970455867.0, "step": 1271 }, { "epoch": 1.7360481367287934, "grad_norm": 0.20032020526254093, "learning_rate": 1.8403595083147662e-05, "loss": 0.3644, "num_tokens": 971248699.0, "step": 1272 }, { "epoch": 1.737413717687474, "grad_norm": 0.219464872986859, "learning_rate": 1.8400767568942586e-05, "loss": 0.3449, "num_tokens": 972016614.0, "step": 1273 }, { "epoch": 1.7387792986461545, "grad_norm": 0.23308035470839664, "learning_rate": 1.839793779708382e-05, "loss": 0.3468, "num_tokens": 972771512.0, "step": 1274 }, { "epoch": 1.740144879604835, "grad_norm": 0.190929961619599, "learning_rate": 1.8395105768434615e-05, "loss": 0.3593, "num_tokens": 973502735.0, "step": 1275 }, { "epoch": 1.7415104605635157, "grad_norm": 0.21583161687324673, "learning_rate": 1.839227148385889e-05, "loss": 0.3552, "num_tokens": 974196212.0, "step": 1276 }, { "epoch": 1.742876041522196, "grad_norm": 0.19011482578637956, "learning_rate": 1.8389434944221274e-05, "loss": 0.3399, "num_tokens": 974944799.0, "step": 1277 }, { "epoch": 1.7442416224808766, "grad_norm": 0.19671668021681205, "learning_rate": 1.838659615038706e-05, "loss": 0.3359, "num_tokens": 975748225.0, "step": 1278 }, { "epoch": 1.7456072034395569, "grad_norm": 0.19649941611618246, "learning_rate": 1.838375510322225e-05, "loss": 0.3509, "num_tokens": 976552779.0, "step": 1279 }, { "epoch": 1.7469727843982374, "grad_norm": 0.20769773590433582, "learning_rate": 1.838091180359352e-05, "loss": 0.3454, "num_tokens": 977323948.0, "step": 1280 }, { "epoch": 1.748338365356918, "grad_norm": 0.19572746951340345, "learning_rate": 1.837806625236824e-05, "loss": 0.3338, "num_tokens": 978154226.0, "step": 1281 }, { "epoch": 1.7497039463155986, "grad_norm": 0.19465774262198837, "learning_rate": 1.837521845041446e-05, "loss": 0.339, "num_tokens": 978900281.0, "step": 1282 }, { "epoch": 1.7510695272742791, "grad_norm": 0.20263752204903554, "learning_rate": 1.8372368398600928e-05, "loss": 0.3469, "num_tokens": 979717687.0, "step": 1283 }, { "epoch": 1.7524351082329597, "grad_norm": 0.1874261607761662, "learning_rate": 1.8369516097797067e-05, "loss": 0.3441, "num_tokens": 980482708.0, "step": 1284 }, { "epoch": 1.7538006891916402, "grad_norm": 0.19583140909256944, "learning_rate": 1.836666154887299e-05, "loss": 0.3426, "num_tokens": 981324398.0, "step": 1285 }, { "epoch": 1.7551662701503206, "grad_norm": 0.20071873178609426, "learning_rate": 1.8363804752699497e-05, "loss": 0.3538, "num_tokens": 982189022.0, "step": 1286 }, { "epoch": 1.7565318511090011, "grad_norm": 0.1897794560492021, "learning_rate": 1.836094571014807e-05, "loss": 0.3445, "num_tokens": 982907827.0, "step": 1287 }, { "epoch": 1.7578974320676815, "grad_norm": 0.19465828086602127, "learning_rate": 1.835808442209089e-05, "loss": 0.3511, "num_tokens": 983702106.0, "step": 1288 }, { "epoch": 1.759263013026362, "grad_norm": 0.20100937394293544, "learning_rate": 1.83552208894008e-05, "loss": 0.3408, "num_tokens": 984378865.0, "step": 1289 }, { "epoch": 1.7606285939850426, "grad_norm": 0.20058606169969928, "learning_rate": 1.8352355112951346e-05, "loss": 0.3619, "num_tokens": 985168260.0, "step": 1290 }, { "epoch": 1.7619941749437231, "grad_norm": 0.20132072052026914, "learning_rate": 1.834948709361675e-05, "loss": 0.3407, "num_tokens": 985833549.0, "step": 1291 }, { "epoch": 1.7633597559024037, "grad_norm": 0.2087711431944956, "learning_rate": 1.8346616832271923e-05, "loss": 0.3531, "num_tokens": 986548571.0, "step": 1292 }, { "epoch": 1.7647253368610842, "grad_norm": 0.18843587840261322, "learning_rate": 1.834374432979245e-05, "loss": 0.3435, "num_tokens": 987277391.0, "step": 1293 }, { "epoch": 1.7660909178197648, "grad_norm": 0.1899610564719992, "learning_rate": 1.8340869587054617e-05, "loss": 0.3321, "num_tokens": 988038211.0, "step": 1294 }, { "epoch": 1.7674564987784451, "grad_norm": 0.21043918629186983, "learning_rate": 1.8337992604935374e-05, "loss": 0.3537, "num_tokens": 988843265.0, "step": 1295 }, { "epoch": 1.7688220797371257, "grad_norm": 0.21348032466562758, "learning_rate": 1.8335113384312372e-05, "loss": 0.3485, "num_tokens": 989739636.0, "step": 1296 }, { "epoch": 1.770187660695806, "grad_norm": 0.18107832366735577, "learning_rate": 1.8332231926063935e-05, "loss": 0.3537, "num_tokens": 990541518.0, "step": 1297 }, { "epoch": 1.7715532416544866, "grad_norm": 0.21420034140432206, "learning_rate": 1.832934823106907e-05, "loss": 0.3336, "num_tokens": 991288069.0, "step": 1298 }, { "epoch": 1.7729188226131671, "grad_norm": 0.19758975862234557, "learning_rate": 1.832646230020746e-05, "loss": 0.3308, "num_tokens": 991987588.0, "step": 1299 }, { "epoch": 1.7742844035718477, "grad_norm": 0.4864055265374092, "learning_rate": 1.832357413435949e-05, "loss": 0.3276, "num_tokens": 992744334.0, "step": 1300 }, { "epoch": 1.7756499845305282, "grad_norm": 0.2371564291653087, "learning_rate": 1.8320683734406208e-05, "loss": 0.3679, "num_tokens": 993497890.0, "step": 1301 }, { "epoch": 1.7770155654892088, "grad_norm": 0.17379743788282073, "learning_rate": 1.831779110122935e-05, "loss": 0.3412, "num_tokens": 994238417.0, "step": 1302 }, { "epoch": 1.7783811464478894, "grad_norm": 0.23271419634641805, "learning_rate": 1.831489623571133e-05, "loss": 0.3325, "num_tokens": 994963096.0, "step": 1303 }, { "epoch": 1.7797467274065697, "grad_norm": 0.21479665618764432, "learning_rate": 1.8311999138735256e-05, "loss": 0.3362, "num_tokens": 995640603.0, "step": 1304 }, { "epoch": 1.7811123083652503, "grad_norm": 0.2082157870286439, "learning_rate": 1.83090998111849e-05, "loss": 0.3356, "num_tokens": 996328265.0, "step": 1305 }, { "epoch": 1.7824778893239306, "grad_norm": 0.21703633294899552, "learning_rate": 1.830619825394472e-05, "loss": 0.3453, "num_tokens": 997089711.0, "step": 1306 }, { "epoch": 1.7838434702826111, "grad_norm": 0.20984693627624557, "learning_rate": 1.8303294467899853e-05, "loss": 0.3483, "num_tokens": 997803333.0, "step": 1307 }, { "epoch": 1.7852090512412917, "grad_norm": 0.2023558569650378, "learning_rate": 1.830038845393613e-05, "loss": 0.3303, "num_tokens": 998555301.0, "step": 1308 }, { "epoch": 1.7865746321999723, "grad_norm": 0.189416726118961, "learning_rate": 1.829748021294004e-05, "loss": 0.3382, "num_tokens": 999278977.0, "step": 1309 }, { "epoch": 1.7879402131586528, "grad_norm": 0.20543509911004632, "learning_rate": 1.829456974579876e-05, "loss": 0.3334, "num_tokens": 1000014672.0, "step": 1310 }, { "epoch": 1.7893057941173334, "grad_norm": 0.18601645776267262, "learning_rate": 1.8291657053400154e-05, "loss": 0.3469, "num_tokens": 1000741773.0, "step": 1311 }, { "epoch": 1.790671375076014, "grad_norm": 0.21006896733825822, "learning_rate": 1.8288742136632755e-05, "loss": 0.3425, "num_tokens": 1001494940.0, "step": 1312 }, { "epoch": 1.7920369560346943, "grad_norm": 0.19269957329582907, "learning_rate": 1.828582499638578e-05, "loss": 0.3707, "num_tokens": 1002252893.0, "step": 1313 }, { "epoch": 1.7934025369933748, "grad_norm": 0.2062901253720772, "learning_rate": 1.8282905633549122e-05, "loss": 0.3333, "num_tokens": 1002977777.0, "step": 1314 }, { "epoch": 1.7947681179520552, "grad_norm": 0.2042451698660808, "learning_rate": 1.8279984049013352e-05, "loss": 0.3446, "num_tokens": 1003740831.0, "step": 1315 }, { "epoch": 1.7961336989107357, "grad_norm": 0.18134213958973186, "learning_rate": 1.8277060243669714e-05, "loss": 0.3451, "num_tokens": 1004414611.0, "step": 1316 }, { "epoch": 1.7974992798694163, "grad_norm": 0.19994069606619363, "learning_rate": 1.827413421841014e-05, "loss": 0.3505, "num_tokens": 1005144139.0, "step": 1317 }, { "epoch": 1.7988648608280968, "grad_norm": 0.19966448198429756, "learning_rate": 1.827120597412723e-05, "loss": 0.3372, "num_tokens": 1005913575.0, "step": 1318 }, { "epoch": 1.8002304417867774, "grad_norm": 0.19633939480787413, "learning_rate": 1.826827551171427e-05, "loss": 0.3519, "num_tokens": 1006592989.0, "step": 1319 }, { "epoch": 1.801596022745458, "grad_norm": 0.19740868950887808, "learning_rate": 1.8265342832065215e-05, "loss": 0.3342, "num_tokens": 1007310808.0, "step": 1320 }, { "epoch": 1.8029616037041385, "grad_norm": 0.1918995422147318, "learning_rate": 1.8262407936074696e-05, "loss": 0.3421, "num_tokens": 1007965480.0, "step": 1321 }, { "epoch": 1.8043271846628188, "grad_norm": 0.21099660611905727, "learning_rate": 1.8259470824638018e-05, "loss": 0.3342, "num_tokens": 1008622295.0, "step": 1322 }, { "epoch": 1.8056927656214994, "grad_norm": 0.18777179813749642, "learning_rate": 1.8256531498651178e-05, "loss": 0.3531, "num_tokens": 1009364283.0, "step": 1323 }, { "epoch": 1.8070583465801797, "grad_norm": 0.18459150264304755, "learning_rate": 1.8253589959010832e-05, "loss": 0.343, "num_tokens": 1010141126.0, "step": 1324 }, { "epoch": 1.8084239275388603, "grad_norm": 0.19433979208056637, "learning_rate": 1.8250646206614314e-05, "loss": 0.364, "num_tokens": 1010884857.0, "step": 1325 }, { "epoch": 1.8097895084975408, "grad_norm": 0.20302257404012433, "learning_rate": 1.8247700242359635e-05, "loss": 0.3375, "num_tokens": 1011570362.0, "step": 1326 }, { "epoch": 1.8111550894562214, "grad_norm": 0.18484656566466565, "learning_rate": 1.8244752067145485e-05, "loss": 0.3383, "num_tokens": 1012365637.0, "step": 1327 }, { "epoch": 1.812520670414902, "grad_norm": 0.17529474724343838, "learning_rate": 1.8241801681871224e-05, "loss": 0.3416, "num_tokens": 1013157884.0, "step": 1328 }, { "epoch": 1.8138862513735825, "grad_norm": 0.1804607033897169, "learning_rate": 1.8238849087436887e-05, "loss": 0.3358, "num_tokens": 1013881776.0, "step": 1329 }, { "epoch": 1.815251832332263, "grad_norm": 0.19716895443010413, "learning_rate": 1.8235894284743176e-05, "loss": 0.3474, "num_tokens": 1014637454.0, "step": 1330 }, { "epoch": 1.8166174132909434, "grad_norm": 0.19476566376415835, "learning_rate": 1.823293727469148e-05, "loss": 0.3329, "num_tokens": 1015365856.0, "step": 1331 }, { "epoch": 1.817982994249624, "grad_norm": 0.17409451945346133, "learning_rate": 1.822997805818386e-05, "loss": 0.3352, "num_tokens": 1016121239.0, "step": 1332 }, { "epoch": 1.8193485752083043, "grad_norm": 0.19787808590456965, "learning_rate": 1.822701663612303e-05, "loss": 0.351, "num_tokens": 1016913224.0, "step": 1333 }, { "epoch": 1.8207141561669848, "grad_norm": 0.17533363730688015, "learning_rate": 1.8224053009412405e-05, "loss": 0.3369, "num_tokens": 1017707092.0, "step": 1334 }, { "epoch": 1.8220797371256654, "grad_norm": 0.18328965178318862, "learning_rate": 1.822108717895605e-05, "loss": 0.3517, "num_tokens": 1018451466.0, "step": 1335 }, { "epoch": 1.823445318084346, "grad_norm": 0.19823621688171397, "learning_rate": 1.821811914565872e-05, "loss": 0.3329, "num_tokens": 1019208986.0, "step": 1336 }, { "epoch": 1.8248108990430265, "grad_norm": 0.17404287520777875, "learning_rate": 1.8215148910425824e-05, "loss": 0.3345, "num_tokens": 1019981657.0, "step": 1337 }, { "epoch": 1.826176480001707, "grad_norm": 0.17525925835324338, "learning_rate": 1.821217647416346e-05, "loss": 0.3358, "num_tokens": 1020766462.0, "step": 1338 }, { "epoch": 1.8275420609603876, "grad_norm": 0.19301643166854465, "learning_rate": 1.8209201837778385e-05, "loss": 0.3378, "num_tokens": 1021524094.0, "step": 1339 }, { "epoch": 1.828907641919068, "grad_norm": 0.1897757885147259, "learning_rate": 1.8206225002178037e-05, "loss": 0.3367, "num_tokens": 1022304667.0, "step": 1340 }, { "epoch": 1.8302732228777485, "grad_norm": 0.16908449685320562, "learning_rate": 1.820324596827051e-05, "loss": 0.3492, "num_tokens": 1023037930.0, "step": 1341 }, { "epoch": 1.8316388038364289, "grad_norm": 0.18655023969990409, "learning_rate": 1.8200264736964588e-05, "loss": 0.3545, "num_tokens": 1023873838.0, "step": 1342 }, { "epoch": 1.8330043847951094, "grad_norm": 0.17937181315784795, "learning_rate": 1.819728130916971e-05, "loss": 0.3418, "num_tokens": 1024651306.0, "step": 1343 }, { "epoch": 1.83436996575379, "grad_norm": 0.1850719424441285, "learning_rate": 1.8194295685795997e-05, "loss": 0.3619, "num_tokens": 1025393758.0, "step": 1344 }, { "epoch": 1.8357355467124705, "grad_norm": 0.1915486303578099, "learning_rate": 1.819130786775422e-05, "loss": 0.3356, "num_tokens": 1026091695.0, "step": 1345 }, { "epoch": 1.837101127671151, "grad_norm": 0.2051535179580251, "learning_rate": 1.818831785595585e-05, "loss": 0.3354, "num_tokens": 1026878436.0, "step": 1346 }, { "epoch": 1.8384667086298316, "grad_norm": 0.1766179344651269, "learning_rate": 1.8185325651312997e-05, "loss": 0.3519, "num_tokens": 1027662285.0, "step": 1347 }, { "epoch": 1.8398322895885122, "grad_norm": 0.18603370818283574, "learning_rate": 1.818233125473846e-05, "loss": 0.334, "num_tokens": 1028417573.0, "step": 1348 }, { "epoch": 1.8411978705471925, "grad_norm": 0.20698208737015042, "learning_rate": 1.8179334667145698e-05, "loss": 0.3609, "num_tokens": 1029154389.0, "step": 1349 }, { "epoch": 1.842563451505873, "grad_norm": 0.22286434119183543, "learning_rate": 1.8176335889448833e-05, "loss": 0.3437, "num_tokens": 1029957505.0, "step": 1350 }, { "epoch": 1.8439290324645534, "grad_norm": 0.19158961491637777, "learning_rate": 1.817333492256268e-05, "loss": 0.3447, "num_tokens": 1030745318.0, "step": 1351 }, { "epoch": 1.845294613423234, "grad_norm": 0.18335545141159662, "learning_rate": 1.817033176740268e-05, "loss": 0.3514, "num_tokens": 1031487740.0, "step": 1352 }, { "epoch": 1.8466601943819145, "grad_norm": 0.18280195300100643, "learning_rate": 1.816732642488499e-05, "loss": 0.3419, "num_tokens": 1032266420.0, "step": 1353 }, { "epoch": 1.848025775340595, "grad_norm": 0.2030145162230529, "learning_rate": 1.8164318895926394e-05, "loss": 0.3523, "num_tokens": 1033022760.0, "step": 1354 }, { "epoch": 1.8493913562992756, "grad_norm": 0.19180827359230376, "learning_rate": 1.816130918144436e-05, "loss": 0.3372, "num_tokens": 1033757383.0, "step": 1355 }, { "epoch": 1.8507569372579562, "grad_norm": 0.17436404994867266, "learning_rate": 1.8158297282357027e-05, "loss": 0.3591, "num_tokens": 1034547784.0, "step": 1356 }, { "epoch": 1.8521225182166368, "grad_norm": 0.21384976445213802, "learning_rate": 1.8155283199583194e-05, "loss": 0.3487, "num_tokens": 1035322866.0, "step": 1357 }, { "epoch": 1.853488099175317, "grad_norm": 0.1770464325798218, "learning_rate": 1.8152266934042326e-05, "loss": 0.3485, "num_tokens": 1036103208.0, "step": 1358 }, { "epoch": 1.8548536801339977, "grad_norm": 0.20411953272383584, "learning_rate": 1.8149248486654554e-05, "loss": 0.3436, "num_tokens": 1036808074.0, "step": 1359 }, { "epoch": 1.856219261092678, "grad_norm": 0.1840135784819275, "learning_rate": 1.8146227858340674e-05, "loss": 0.3556, "num_tokens": 1037655441.0, "step": 1360 }, { "epoch": 1.8575848420513585, "grad_norm": 0.1893308807019159, "learning_rate": 1.8143205050022154e-05, "loss": 0.3404, "num_tokens": 1038422815.0, "step": 1361 }, { "epoch": 1.858950423010039, "grad_norm": 0.18346126676299288, "learning_rate": 1.8140180062621117e-05, "loss": 0.3469, "num_tokens": 1039246967.0, "step": 1362 }, { "epoch": 1.8603160039687197, "grad_norm": 0.18867242184477462, "learning_rate": 1.8137152897060358e-05, "loss": 0.3371, "num_tokens": 1039984720.0, "step": 1363 }, { "epoch": 1.8616815849274002, "grad_norm": 0.1874502479784267, "learning_rate": 1.8134123554263337e-05, "loss": 0.35, "num_tokens": 1040678355.0, "step": 1364 }, { "epoch": 1.8630471658860808, "grad_norm": 0.19925499420288498, "learning_rate": 1.813109203515417e-05, "loss": 0.3405, "num_tokens": 1041440149.0, "step": 1365 }, { "epoch": 1.8644127468447613, "grad_norm": 0.1985077086730961, "learning_rate": 1.8128058340657643e-05, "loss": 0.3463, "num_tokens": 1042158039.0, "step": 1366 }, { "epoch": 1.8657783278034417, "grad_norm": 0.20502006617873098, "learning_rate": 1.8125022471699208e-05, "loss": 0.3472, "num_tokens": 1042927338.0, "step": 1367 }, { "epoch": 1.8671439087621222, "grad_norm": 0.1748512898524473, "learning_rate": 1.8121984429204977e-05, "loss": 0.3513, "num_tokens": 1043710526.0, "step": 1368 }, { "epoch": 1.8685094897208026, "grad_norm": 0.20156828741505178, "learning_rate": 1.811894421410172e-05, "loss": 0.3544, "num_tokens": 1044452119.0, "step": 1369 }, { "epoch": 1.869875070679483, "grad_norm": 0.1800226382191922, "learning_rate": 1.8115901827316883e-05, "loss": 0.3344, "num_tokens": 1045251019.0, "step": 1370 }, { "epoch": 1.8712406516381637, "grad_norm": 0.18593556528140423, "learning_rate": 1.811285726977856e-05, "loss": 0.3393, "num_tokens": 1046008119.0, "step": 1371 }, { "epoch": 1.8726062325968442, "grad_norm": 0.1928769049094261, "learning_rate": 1.8109810542415513e-05, "loss": 0.3407, "num_tokens": 1046755215.0, "step": 1372 }, { "epoch": 1.8739718135555248, "grad_norm": 0.19303744783676646, "learning_rate": 1.810676164615717e-05, "loss": 0.3482, "num_tokens": 1047551893.0, "step": 1373 }, { "epoch": 1.8753373945142053, "grad_norm": 0.1809117622696174, "learning_rate": 1.810371058193362e-05, "loss": 0.3456, "num_tokens": 1048327530.0, "step": 1374 }, { "epoch": 1.876702975472886, "grad_norm": 0.17724932920680705, "learning_rate": 1.8100657350675604e-05, "loss": 0.3417, "num_tokens": 1049156457.0, "step": 1375 }, { "epoch": 1.8780685564315662, "grad_norm": 0.19624854630084038, "learning_rate": 1.8097601953314535e-05, "loss": 0.3414, "num_tokens": 1049866305.0, "step": 1376 }, { "epoch": 1.8794341373902468, "grad_norm": 0.19646324670650459, "learning_rate": 1.809454439078248e-05, "loss": 0.352, "num_tokens": 1050527086.0, "step": 1377 }, { "epoch": 1.8807997183489271, "grad_norm": 0.19012771679197407, "learning_rate": 1.8091484664012167e-05, "loss": 0.3287, "num_tokens": 1051300919.0, "step": 1378 }, { "epoch": 1.8821652993076077, "grad_norm": 0.2020061458130394, "learning_rate": 1.8088422773936997e-05, "loss": 0.3303, "num_tokens": 1052052554.0, "step": 1379 }, { "epoch": 1.8835308802662882, "grad_norm": 0.18387821178084687, "learning_rate": 1.8085358721491006e-05, "loss": 0.3427, "num_tokens": 1052805026.0, "step": 1380 }, { "epoch": 1.8848964612249688, "grad_norm": 0.18378571661837326, "learning_rate": 1.808229250760891e-05, "loss": 0.359, "num_tokens": 1053635092.0, "step": 1381 }, { "epoch": 1.8862620421836493, "grad_norm": 0.19440042091365756, "learning_rate": 1.8079224133226082e-05, "loss": 0.33, "num_tokens": 1054399214.0, "step": 1382 }, { "epoch": 1.88762762314233, "grad_norm": 0.18666890565975022, "learning_rate": 1.8076153599278544e-05, "loss": 0.3413, "num_tokens": 1055161105.0, "step": 1383 }, { "epoch": 1.8889932041010105, "grad_norm": 0.18115374497396763, "learning_rate": 1.8073080906702983e-05, "loss": 0.3274, "num_tokens": 1055888181.0, "step": 1384 }, { "epoch": 1.8903587850596908, "grad_norm": 0.2517316823057435, "learning_rate": 1.807000605643675e-05, "loss": 0.3323, "num_tokens": 1056594511.0, "step": 1385 }, { "epoch": 1.8917243660183714, "grad_norm": 0.20535764923706187, "learning_rate": 1.8066929049417844e-05, "loss": 0.335, "num_tokens": 1057373681.0, "step": 1386 }, { "epoch": 1.8930899469770517, "grad_norm": 0.20568523614243436, "learning_rate": 1.8063849886584922e-05, "loss": 0.3372, "num_tokens": 1058148174.0, "step": 1387 }, { "epoch": 1.8944555279357322, "grad_norm": 0.1824517469086104, "learning_rate": 1.8060768568877313e-05, "loss": 0.3408, "num_tokens": 1058931524.0, "step": 1388 }, { "epoch": 1.8958211088944128, "grad_norm": 0.19623630817547652, "learning_rate": 1.8057685097234995e-05, "loss": 0.3405, "num_tokens": 1059745866.0, "step": 1389 }, { "epoch": 1.8971866898530934, "grad_norm": 0.18249090828949116, "learning_rate": 1.805459947259859e-05, "loss": 0.3248, "num_tokens": 1060462436.0, "step": 1390 }, { "epoch": 1.898552270811774, "grad_norm": 0.18266165607601342, "learning_rate": 1.8051511695909396e-05, "loss": 0.3393, "num_tokens": 1061121671.0, "step": 1391 }, { "epoch": 1.8999178517704545, "grad_norm": 0.18501811951464603, "learning_rate": 1.8048421768109357e-05, "loss": 0.3517, "num_tokens": 1061955157.0, "step": 1392 }, { "epoch": 1.901283432729135, "grad_norm": 0.18574217741211582, "learning_rate": 1.804532969014108e-05, "loss": 0.3546, "num_tokens": 1062734127.0, "step": 1393 }, { "epoch": 1.9026490136878154, "grad_norm": 0.18971508137208495, "learning_rate": 1.8042235462947823e-05, "loss": 0.3454, "num_tokens": 1063546649.0, "step": 1394 }, { "epoch": 1.904014594646496, "grad_norm": 0.20066473397012283, "learning_rate": 1.8039139087473494e-05, "loss": 0.3441, "num_tokens": 1064301713.0, "step": 1395 }, { "epoch": 1.9053801756051763, "grad_norm": 0.19623389172055286, "learning_rate": 1.8036040564662674e-05, "loss": 0.3451, "num_tokens": 1065069050.0, "step": 1396 }, { "epoch": 1.9067457565638568, "grad_norm": 0.18494830571207507, "learning_rate": 1.803293989546058e-05, "loss": 0.3417, "num_tokens": 1065867837.0, "step": 1397 }, { "epoch": 1.9081113375225374, "grad_norm": 0.20116005447955038, "learning_rate": 1.8029837080813095e-05, "loss": 0.3438, "num_tokens": 1066635576.0, "step": 1398 }, { "epoch": 1.909476918481218, "grad_norm": 0.18322351206848775, "learning_rate": 1.8026732121666758e-05, "loss": 0.3299, "num_tokens": 1067400109.0, "step": 1399 }, { "epoch": 1.9108424994398985, "grad_norm": 0.19603943252294853, "learning_rate": 1.802362501896875e-05, "loss": 0.3457, "num_tokens": 1068169071.0, "step": 1400 }, { "epoch": 1.912208080398579, "grad_norm": 0.18664887516882694, "learning_rate": 1.802051577366691e-05, "loss": 0.3416, "num_tokens": 1068973662.0, "step": 1401 }, { "epoch": 1.9135736613572596, "grad_norm": 0.19749219202331883, "learning_rate": 1.8017404386709747e-05, "loss": 0.3249, "num_tokens": 1069687808.0, "step": 1402 }, { "epoch": 1.91493924231594, "grad_norm": 0.17389219368672768, "learning_rate": 1.80142908590464e-05, "loss": 0.3398, "num_tokens": 1070490203.0, "step": 1403 }, { "epoch": 1.9163048232746205, "grad_norm": 0.2000966956102326, "learning_rate": 1.8011175191626676e-05, "loss": 0.3516, "num_tokens": 1071259380.0, "step": 1404 }, { "epoch": 1.9176704042333008, "grad_norm": 0.1825884978583894, "learning_rate": 1.8008057385401028e-05, "loss": 0.3367, "num_tokens": 1071981465.0, "step": 1405 }, { "epoch": 1.9190359851919814, "grad_norm": 0.20332807817071039, "learning_rate": 1.800493744132057e-05, "loss": 0.3562, "num_tokens": 1072766456.0, "step": 1406 }, { "epoch": 1.920401566150662, "grad_norm": 0.1815362987061042, "learning_rate": 1.800181536033705e-05, "loss": 0.3606, "num_tokens": 1073561309.0, "step": 1407 }, { "epoch": 1.9217671471093425, "grad_norm": 0.1952574265074895, "learning_rate": 1.7998691143402888e-05, "loss": 0.3375, "num_tokens": 1074288618.0, "step": 1408 }, { "epoch": 1.923132728068023, "grad_norm": 0.18999880229380975, "learning_rate": 1.7995564791471146e-05, "loss": 0.3381, "num_tokens": 1075024005.0, "step": 1409 }, { "epoch": 1.9244983090267036, "grad_norm": 0.18569266367046677, "learning_rate": 1.799243630549554e-05, "loss": 0.3388, "num_tokens": 1075820395.0, "step": 1410 }, { "epoch": 1.9258638899853842, "grad_norm": 0.18453835381493514, "learning_rate": 1.7989305686430433e-05, "loss": 0.3395, "num_tokens": 1076589499.0, "step": 1411 }, { "epoch": 1.9272294709440645, "grad_norm": 0.19825938852606126, "learning_rate": 1.7986172935230843e-05, "loss": 0.3377, "num_tokens": 1077334267.0, "step": 1412 }, { "epoch": 1.928595051902745, "grad_norm": 0.16833730569022778, "learning_rate": 1.7983038052852434e-05, "loss": 0.3356, "num_tokens": 1078129944.0, "step": 1413 }, { "epoch": 1.9299606328614254, "grad_norm": 0.19846511955057783, "learning_rate": 1.797990104025153e-05, "loss": 0.3289, "num_tokens": 1078836245.0, "step": 1414 }, { "epoch": 1.931326213820106, "grad_norm": 0.178309376252815, "learning_rate": 1.7976761898385086e-05, "loss": 0.3527, "num_tokens": 1079587809.0, "step": 1415 }, { "epoch": 1.9326917947787865, "grad_norm": 0.1933292302721321, "learning_rate": 1.7973620628210725e-05, "loss": 0.3511, "num_tokens": 1080361621.0, "step": 1416 }, { "epoch": 1.934057375737467, "grad_norm": 0.19036434205641373, "learning_rate": 1.7970477230686717e-05, "loss": 0.3517, "num_tokens": 1081039509.0, "step": 1417 }, { "epoch": 1.9354229566961476, "grad_norm": 0.19141049758501175, "learning_rate": 1.796733170677197e-05, "loss": 0.3487, "num_tokens": 1081839421.0, "step": 1418 }, { "epoch": 1.9367885376548282, "grad_norm": 0.17934238191391758, "learning_rate": 1.7964184057426045e-05, "loss": 0.338, "num_tokens": 1082569390.0, "step": 1419 }, { "epoch": 1.9381541186135087, "grad_norm": 0.19807521633435893, "learning_rate": 1.796103428360916e-05, "loss": 0.3497, "num_tokens": 1083292577.0, "step": 1420 }, { "epoch": 1.939519699572189, "grad_norm": 0.18305012040318092, "learning_rate": 1.7957882386282173e-05, "loss": 0.3464, "num_tokens": 1084064757.0, "step": 1421 }, { "epoch": 1.9408852805308696, "grad_norm": 0.18450581957006604, "learning_rate": 1.7954728366406588e-05, "loss": 0.3261, "num_tokens": 1084831561.0, "step": 1422 }, { "epoch": 1.94225086148955, "grad_norm": 0.18184013588411369, "learning_rate": 1.7951572224944564e-05, "loss": 0.3498, "num_tokens": 1085676633.0, "step": 1423 }, { "epoch": 1.9436164424482305, "grad_norm": 0.19873453653967915, "learning_rate": 1.79484139628589e-05, "loss": 0.3502, "num_tokens": 1086491645.0, "step": 1424 }, { "epoch": 1.944982023406911, "grad_norm": 0.1969652974872697, "learning_rate": 1.7945253581113046e-05, "loss": 0.3438, "num_tokens": 1087234735.0, "step": 1425 }, { "epoch": 1.9463476043655916, "grad_norm": 0.18346487266648323, "learning_rate": 1.7942091080671097e-05, "loss": 0.3396, "num_tokens": 1087992936.0, "step": 1426 }, { "epoch": 1.9477131853242722, "grad_norm": 0.18034067155746153, "learning_rate": 1.7938926462497797e-05, "loss": 0.3447, "num_tokens": 1088704460.0, "step": 1427 }, { "epoch": 1.9490787662829527, "grad_norm": 0.19684308966889566, "learning_rate": 1.7935759727558536e-05, "loss": 0.3405, "num_tokens": 1089486597.0, "step": 1428 }, { "epoch": 1.9504443472416333, "grad_norm": 0.17872811583953227, "learning_rate": 1.793259087681934e-05, "loss": 0.3329, "num_tokens": 1090271761.0, "step": 1429 }, { "epoch": 1.9518099282003136, "grad_norm": 0.1702694455587737, "learning_rate": 1.792941991124689e-05, "loss": 0.3524, "num_tokens": 1091088504.0, "step": 1430 }, { "epoch": 1.9531755091589942, "grad_norm": 0.19731605642692482, "learning_rate": 1.7926246831808516e-05, "loss": 0.345, "num_tokens": 1091844749.0, "step": 1431 }, { "epoch": 1.9545410901176745, "grad_norm": 0.20055886160422845, "learning_rate": 1.7923071639472188e-05, "loss": 0.3651, "num_tokens": 1092633853.0, "step": 1432 }, { "epoch": 1.955906671076355, "grad_norm": 0.17410827724062555, "learning_rate": 1.791989433520651e-05, "loss": 0.3522, "num_tokens": 1093469460.0, "step": 1433 }, { "epoch": 1.9572722520350356, "grad_norm": 0.20563115832867138, "learning_rate": 1.7916714919980745e-05, "loss": 0.3461, "num_tokens": 1094146535.0, "step": 1434 }, { "epoch": 1.9586378329937162, "grad_norm": 0.17590890805029433, "learning_rate": 1.7913533394764798e-05, "loss": 0.3383, "num_tokens": 1094903981.0, "step": 1435 }, { "epoch": 1.9600034139523967, "grad_norm": 0.20378783515659474, "learning_rate": 1.7910349760529207e-05, "loss": 0.326, "num_tokens": 1095626160.0, "step": 1436 }, { "epoch": 1.9613689949110773, "grad_norm": 0.1899386729268108, "learning_rate": 1.7907164018245173e-05, "loss": 0.3428, "num_tokens": 1096460621.0, "step": 1437 }, { "epoch": 1.9627345758697579, "grad_norm": 0.19309482311331763, "learning_rate": 1.7903976168884517e-05, "loss": 0.3396, "num_tokens": 1097258051.0, "step": 1438 }, { "epoch": 1.9641001568284382, "grad_norm": 0.19078208782549586, "learning_rate": 1.790078621341972e-05, "loss": 0.3433, "num_tokens": 1098056109.0, "step": 1439 }, { "epoch": 1.9654657377871187, "grad_norm": 0.2002654020758272, "learning_rate": 1.7897594152823893e-05, "loss": 0.3435, "num_tokens": 1098865627.0, "step": 1440 }, { "epoch": 1.966831318745799, "grad_norm": 0.20237718741666783, "learning_rate": 1.7894399988070804e-05, "loss": 0.3437, "num_tokens": 1099577923.0, "step": 1441 }, { "epoch": 1.9681968997044796, "grad_norm": 0.19323889990176754, "learning_rate": 1.7891203720134846e-05, "loss": 0.3343, "num_tokens": 1100345883.0, "step": 1442 }, { "epoch": 1.9695624806631602, "grad_norm": 0.17799084938808096, "learning_rate": 1.788800534999107e-05, "loss": 0.3367, "num_tokens": 1101023823.0, "step": 1443 }, { "epoch": 1.9709280616218408, "grad_norm": 0.22052593881454585, "learning_rate": 1.7884804878615153e-05, "loss": 0.3518, "num_tokens": 1101822081.0, "step": 1444 }, { "epoch": 1.9722936425805213, "grad_norm": 0.16561302725627677, "learning_rate": 1.7881602306983423e-05, "loss": 0.3294, "num_tokens": 1102662227.0, "step": 1445 }, { "epoch": 1.9736592235392019, "grad_norm": 0.19423322388077277, "learning_rate": 1.7878397636072846e-05, "loss": 0.3456, "num_tokens": 1103413966.0, "step": 1446 }, { "epoch": 1.9750248044978824, "grad_norm": 0.19888086294560858, "learning_rate": 1.7875190866861027e-05, "loss": 0.3289, "num_tokens": 1104141719.0, "step": 1447 }, { "epoch": 1.9763903854565628, "grad_norm": 0.1942868085005814, "learning_rate": 1.7871982000326214e-05, "loss": 0.3388, "num_tokens": 1104907981.0, "step": 1448 }, { "epoch": 1.9777559664152433, "grad_norm": 0.20019352253834014, "learning_rate": 1.7868771037447298e-05, "loss": 0.3479, "num_tokens": 1105658789.0, "step": 1449 }, { "epoch": 1.9791215473739237, "grad_norm": 0.18434046637128346, "learning_rate": 1.7865557979203795e-05, "loss": 0.3244, "num_tokens": 1106411334.0, "step": 1450 }, { "epoch": 1.9804871283326042, "grad_norm": 0.20166965159632833, "learning_rate": 1.7862342826575877e-05, "loss": 0.3504, "num_tokens": 1107196407.0, "step": 1451 }, { "epoch": 1.9818527092912848, "grad_norm": 0.17556391549314052, "learning_rate": 1.785912558054434e-05, "loss": 0.3487, "num_tokens": 1107970681.0, "step": 1452 }, { "epoch": 1.9832182902499653, "grad_norm": 0.22181043820844654, "learning_rate": 1.785590624209064e-05, "loss": 0.3418, "num_tokens": 1108752574.0, "step": 1453 }, { "epoch": 1.9845838712086459, "grad_norm": 0.18071141709683847, "learning_rate": 1.7852684812196843e-05, "loss": 0.3407, "num_tokens": 1109515949.0, "step": 1454 }, { "epoch": 1.9859494521673264, "grad_norm": 0.22858368979083815, "learning_rate": 1.7849461291845675e-05, "loss": 0.3491, "num_tokens": 1110352057.0, "step": 1455 }, { "epoch": 1.987315033126007, "grad_norm": 0.1904827435339122, "learning_rate": 1.7846235682020495e-05, "loss": 0.3487, "num_tokens": 1111038880.0, "step": 1456 }, { "epoch": 1.9886806140846873, "grad_norm": 0.21938854691078485, "learning_rate": 1.7843007983705292e-05, "loss": 0.3461, "num_tokens": 1111785922.0, "step": 1457 }, { "epoch": 1.9900461950433679, "grad_norm": 0.1953156546514855, "learning_rate": 1.7839778197884696e-05, "loss": 0.3412, "num_tokens": 1112590900.0, "step": 1458 }, { "epoch": 1.9914117760020482, "grad_norm": 0.20727646574345288, "learning_rate": 1.783654632554398e-05, "loss": 0.3424, "num_tokens": 1113357660.0, "step": 1459 }, { "epoch": 1.9927773569607288, "grad_norm": 0.18901603292721011, "learning_rate": 1.7833312367669047e-05, "loss": 0.3393, "num_tokens": 1114126554.0, "step": 1460 }, { "epoch": 1.9941429379194093, "grad_norm": 0.1734216241668799, "learning_rate": 1.7830076325246437e-05, "loss": 0.3335, "num_tokens": 1114877291.0, "step": 1461 }, { "epoch": 1.9955085188780899, "grad_norm": 0.17588500008733202, "learning_rate": 1.7826838199263323e-05, "loss": 0.327, "num_tokens": 1115672422.0, "step": 1462 }, { "epoch": 1.9968740998367704, "grad_norm": 0.18510264401799617, "learning_rate": 1.782359799070752e-05, "loss": 0.3425, "num_tokens": 1116443311.0, "step": 1463 }, { "epoch": 1.998239680795451, "grad_norm": 0.1778572692496335, "learning_rate": 1.7820355700567476e-05, "loss": 0.3466, "num_tokens": 1117179618.0, "step": 1464 }, { "epoch": 1.9996052617541316, "grad_norm": 0.20439157694766044, "learning_rate": 1.781711132983227e-05, "loss": 0.3366, "num_tokens": 1117875132.0, "step": 1465 }, { "epoch": 2.0, "grad_norm": 0.20439157694766044, "learning_rate": 1.7813864879491623e-05, "loss": 0.3518, "num_tokens": 1118084272.0, "step": 1466 }, { "epoch": 2.0013655809586806, "grad_norm": 0.378984760095693, "learning_rate": 1.7810616350535883e-05, "loss": 0.3097, "num_tokens": 1118868163.0, "step": 1467 }, { "epoch": 2.002731161917361, "grad_norm": 0.2640621758362693, "learning_rate": 1.780736574395604e-05, "loss": 0.2975, "num_tokens": 1119643637.0, "step": 1468 }, { "epoch": 2.0040967428760417, "grad_norm": 0.22581722400585014, "learning_rate": 1.780411306074371e-05, "loss": 0.3011, "num_tokens": 1120381599.0, "step": 1469 }, { "epoch": 2.0054623238347222, "grad_norm": 0.23257444807994682, "learning_rate": 1.7800858301891148e-05, "loss": 0.3033, "num_tokens": 1121151170.0, "step": 1470 }, { "epoch": 2.0068279047934023, "grad_norm": 0.29324911922030766, "learning_rate": 1.7797601468391236e-05, "loss": 0.3032, "num_tokens": 1121915108.0, "step": 1471 }, { "epoch": 2.008193485752083, "grad_norm": 0.24582390505301388, "learning_rate": 1.779434256123749e-05, "loss": 0.2916, "num_tokens": 1122662888.0, "step": 1472 }, { "epoch": 2.0095590667107635, "grad_norm": 0.20767418342239186, "learning_rate": 1.7791081581424074e-05, "loss": 0.299, "num_tokens": 1123450898.0, "step": 1473 }, { "epoch": 2.010924647669444, "grad_norm": 0.24061084540292718, "learning_rate": 1.778781852994576e-05, "loss": 0.3151, "num_tokens": 1124287336.0, "step": 1474 }, { "epoch": 2.0122902286281246, "grad_norm": 0.19464234188575347, "learning_rate": 1.778455340779797e-05, "loss": 0.2924, "num_tokens": 1125036585.0, "step": 1475 }, { "epoch": 2.013655809586805, "grad_norm": 0.1970460782928974, "learning_rate": 1.7781286215976746e-05, "loss": 0.3152, "num_tokens": 1125847140.0, "step": 1476 }, { "epoch": 2.0150213905454857, "grad_norm": 0.19148437894811834, "learning_rate": 1.7778016955478774e-05, "loss": 0.3083, "num_tokens": 1126599912.0, "step": 1477 }, { "epoch": 2.0163869715041662, "grad_norm": 0.23189445798120906, "learning_rate": 1.777474562730136e-05, "loss": 0.3037, "num_tokens": 1127431146.0, "step": 1478 }, { "epoch": 2.017752552462847, "grad_norm": 0.20899589593357878, "learning_rate": 1.777147223244244e-05, "loss": 0.2978, "num_tokens": 1128139535.0, "step": 1479 }, { "epoch": 2.019118133421527, "grad_norm": 0.20722180758078915, "learning_rate": 1.7768196771900596e-05, "loss": 0.2998, "num_tokens": 1128817824.0, "step": 1480 }, { "epoch": 2.0204837143802075, "grad_norm": 0.21459711140754276, "learning_rate": 1.7764919246675017e-05, "loss": 0.2936, "num_tokens": 1129519989.0, "step": 1481 }, { "epoch": 2.021849295338888, "grad_norm": 0.20640922068068807, "learning_rate": 1.7761639657765542e-05, "loss": 0.3071, "num_tokens": 1130352398.0, "step": 1482 }, { "epoch": 2.0232148762975686, "grad_norm": 0.2050668006547895, "learning_rate": 1.7758358006172628e-05, "loss": 0.313, "num_tokens": 1131122937.0, "step": 1483 }, { "epoch": 2.024580457256249, "grad_norm": 0.21239057980054127, "learning_rate": 1.7755074292897367e-05, "loss": 0.2939, "num_tokens": 1131797648.0, "step": 1484 }, { "epoch": 2.0259460382149297, "grad_norm": 0.2094454594661445, "learning_rate": 1.7751788518941476e-05, "loss": 0.2955, "num_tokens": 1132611499.0, "step": 1485 }, { "epoch": 2.0273116191736102, "grad_norm": 0.1971088257589147, "learning_rate": 1.7748500685307305e-05, "loss": 0.3051, "num_tokens": 1133394765.0, "step": 1486 }, { "epoch": 2.028677200132291, "grad_norm": 0.22310991391117552, "learning_rate": 1.7745210792997824e-05, "loss": 0.3149, "num_tokens": 1134180473.0, "step": 1487 }, { "epoch": 2.0300427810909714, "grad_norm": 0.18757419047144772, "learning_rate": 1.7741918843016642e-05, "loss": 0.3167, "num_tokens": 1134952514.0, "step": 1488 }, { "epoch": 2.0314083620496515, "grad_norm": 0.20468953171221313, "learning_rate": 1.7738624836367988e-05, "loss": 0.317, "num_tokens": 1135780919.0, "step": 1489 }, { "epoch": 2.032773943008332, "grad_norm": 0.20079980582375928, "learning_rate": 1.7735328774056725e-05, "loss": 0.2962, "num_tokens": 1136597576.0, "step": 1490 }, { "epoch": 2.0341395239670126, "grad_norm": 0.198013350097171, "learning_rate": 1.773203065708833e-05, "loss": 0.3019, "num_tokens": 1137372645.0, "step": 1491 }, { "epoch": 2.035505104925693, "grad_norm": 0.19362761099290093, "learning_rate": 1.772873048646893e-05, "loss": 0.3008, "num_tokens": 1138161263.0, "step": 1492 }, { "epoch": 2.0368706858843737, "grad_norm": 0.23347834819424357, "learning_rate": 1.7725428263205244e-05, "loss": 0.3192, "num_tokens": 1138946272.0, "step": 1493 }, { "epoch": 2.0382362668430543, "grad_norm": 0.22842501503438037, "learning_rate": 1.7722123988304654e-05, "loss": 0.3006, "num_tokens": 1139664557.0, "step": 1494 }, { "epoch": 2.039601847801735, "grad_norm": 0.17409942401504142, "learning_rate": 1.7718817662775147e-05, "loss": 0.2996, "num_tokens": 1140406072.0, "step": 1495 }, { "epoch": 2.0409674287604154, "grad_norm": 0.188789890262592, "learning_rate": 1.7715509287625333e-05, "loss": 0.2941, "num_tokens": 1141156031.0, "step": 1496 }, { "epoch": 2.042333009719096, "grad_norm": 0.1823414480332087, "learning_rate": 1.7712198863864465e-05, "loss": 0.2946, "num_tokens": 1141864061.0, "step": 1497 }, { "epoch": 2.043698590677776, "grad_norm": 0.21232879446319053, "learning_rate": 1.77088863925024e-05, "loss": 0.3041, "num_tokens": 1142600217.0, "step": 1498 }, { "epoch": 2.0450641716364566, "grad_norm": 0.1882634178488866, "learning_rate": 1.7705571874549635e-05, "loss": 0.3032, "num_tokens": 1143355489.0, "step": 1499 }, { "epoch": 2.046429752595137, "grad_norm": 0.19855595816415186, "learning_rate": 1.7702255311017286e-05, "loss": 0.3018, "num_tokens": 1144092999.0, "step": 1500 }, { "epoch": 2.0477953335538177, "grad_norm": 0.20322239670781705, "learning_rate": 1.7698936702917087e-05, "loss": 0.309, "num_tokens": 1144978396.0, "step": 1501 }, { "epoch": 2.0491609145124983, "grad_norm": 0.17967185800937913, "learning_rate": 1.769561605126141e-05, "loss": 0.3099, "num_tokens": 1145779524.0, "step": 1502 }, { "epoch": 2.050526495471179, "grad_norm": 0.200849557148035, "learning_rate": 1.7692293357063243e-05, "loss": 0.3195, "num_tokens": 1146581057.0, "step": 1503 }, { "epoch": 2.0518920764298594, "grad_norm": 0.18978824196135377, "learning_rate": 1.7688968621336186e-05, "loss": 0.3007, "num_tokens": 1147314748.0, "step": 1504 }, { "epoch": 2.05325765738854, "grad_norm": 0.19017984236375973, "learning_rate": 1.7685641845094476e-05, "loss": 0.3024, "num_tokens": 1148057925.0, "step": 1505 }, { "epoch": 2.0546232383472205, "grad_norm": 0.20010666869913468, "learning_rate": 1.7682313029352972e-05, "loss": 0.293, "num_tokens": 1148809488.0, "step": 1506 }, { "epoch": 2.0559888193059006, "grad_norm": 0.17567069348537093, "learning_rate": 1.767898217512715e-05, "loss": 0.2951, "num_tokens": 1149529398.0, "step": 1507 }, { "epoch": 2.057354400264581, "grad_norm": 0.2183021900390405, "learning_rate": 1.7675649283433106e-05, "loss": 0.308, "num_tokens": 1150279434.0, "step": 1508 }, { "epoch": 2.0587199812232617, "grad_norm": 0.1957513529284227, "learning_rate": 1.767231435528757e-05, "loss": 0.3062, "num_tokens": 1151052985.0, "step": 1509 }, { "epoch": 2.0600855621819423, "grad_norm": 0.1909157754813959, "learning_rate": 1.766897739170787e-05, "loss": 0.3023, "num_tokens": 1151792037.0, "step": 1510 }, { "epoch": 2.061451143140623, "grad_norm": 0.2004641695412717, "learning_rate": 1.7665638393711976e-05, "loss": 0.3173, "num_tokens": 1152561644.0, "step": 1511 }, { "epoch": 2.0628167240993034, "grad_norm": 0.20399169084360003, "learning_rate": 1.7662297362318476e-05, "loss": 0.313, "num_tokens": 1153487062.0, "step": 1512 }, { "epoch": 2.064182305057984, "grad_norm": 0.19565431384966142, "learning_rate": 1.7658954298546566e-05, "loss": 0.2973, "num_tokens": 1154270702.0, "step": 1513 }, { "epoch": 2.0655478860166645, "grad_norm": 0.21373902807733913, "learning_rate": 1.765560920341608e-05, "loss": 0.2994, "num_tokens": 1155034975.0, "step": 1514 }, { "epoch": 2.066913466975345, "grad_norm": 0.1893228737894599, "learning_rate": 1.7652262077947455e-05, "loss": 0.2991, "num_tokens": 1155797178.0, "step": 1515 }, { "epoch": 2.068279047934025, "grad_norm": 0.19730966089781782, "learning_rate": 1.764891292316175e-05, "loss": 0.3086, "num_tokens": 1156566344.0, "step": 1516 }, { "epoch": 2.0696446288927057, "grad_norm": 0.19229642037229025, "learning_rate": 1.7645561740080658e-05, "loss": 0.3023, "num_tokens": 1157328685.0, "step": 1517 }, { "epoch": 2.0710102098513863, "grad_norm": 0.18879758039314692, "learning_rate": 1.764220852972647e-05, "loss": 0.3108, "num_tokens": 1158098621.0, "step": 1518 }, { "epoch": 2.072375790810067, "grad_norm": 0.21871952260531508, "learning_rate": 1.763885329312211e-05, "loss": 0.2953, "num_tokens": 1158761210.0, "step": 1519 }, { "epoch": 2.0737413717687474, "grad_norm": 0.19695721818712614, "learning_rate": 1.7635496031291118e-05, "loss": 0.3117, "num_tokens": 1159550811.0, "step": 1520 }, { "epoch": 2.075106952727428, "grad_norm": 0.21311383262612893, "learning_rate": 1.763213674525764e-05, "loss": 0.3077, "num_tokens": 1160326563.0, "step": 1521 }, { "epoch": 2.0764725336861085, "grad_norm": 0.20409204890170182, "learning_rate": 1.762877543604646e-05, "loss": 0.2967, "num_tokens": 1161088340.0, "step": 1522 }, { "epoch": 2.077838114644789, "grad_norm": 0.19262208810561374, "learning_rate": 1.762541210468296e-05, "loss": 0.3218, "num_tokens": 1161847490.0, "step": 1523 }, { "epoch": 2.0792036956034696, "grad_norm": 0.22016797439713584, "learning_rate": 1.762204675219315e-05, "loss": 0.2948, "num_tokens": 1162546306.0, "step": 1524 }, { "epoch": 2.0805692765621497, "grad_norm": 0.5837846262899884, "learning_rate": 1.761867937960365e-05, "loss": 0.3115, "num_tokens": 1163336518.0, "step": 1525 }, { "epoch": 2.0819348575208303, "grad_norm": 0.22224571839733556, "learning_rate": 1.7615309987941705e-05, "loss": 0.2952, "num_tokens": 1164100514.0, "step": 1526 }, { "epoch": 2.083300438479511, "grad_norm": 0.19730989018900974, "learning_rate": 1.7611938578235167e-05, "loss": 0.302, "num_tokens": 1164823377.0, "step": 1527 }, { "epoch": 2.0846660194381914, "grad_norm": 0.200725181027616, "learning_rate": 1.7608565151512504e-05, "loss": 0.2993, "num_tokens": 1165628898.0, "step": 1528 }, { "epoch": 2.086031600396872, "grad_norm": 0.2235255616948425, "learning_rate": 1.760518970880281e-05, "loss": 0.3126, "num_tokens": 1166381108.0, "step": 1529 }, { "epoch": 2.0873971813555525, "grad_norm": 0.20152611880984028, "learning_rate": 1.7601812251135776e-05, "loss": 0.2993, "num_tokens": 1167159948.0, "step": 1530 }, { "epoch": 2.088762762314233, "grad_norm": 0.20238639208630643, "learning_rate": 1.7598432779541733e-05, "loss": 0.3031, "num_tokens": 1167897464.0, "step": 1531 }, { "epoch": 2.0901283432729136, "grad_norm": 0.18577616050319412, "learning_rate": 1.7595051295051594e-05, "loss": 0.3009, "num_tokens": 1168608133.0, "step": 1532 }, { "epoch": 2.091493924231594, "grad_norm": 0.1989350837375857, "learning_rate": 1.759166779869692e-05, "loss": 0.3062, "num_tokens": 1169426566.0, "step": 1533 }, { "epoch": 2.0928595051902743, "grad_norm": 0.22239839062230013, "learning_rate": 1.7588282291509858e-05, "loss": 0.3008, "num_tokens": 1170136846.0, "step": 1534 }, { "epoch": 2.094225086148955, "grad_norm": 0.1958811077056453, "learning_rate": 1.7584894774523185e-05, "loss": 0.3132, "num_tokens": 1170893379.0, "step": 1535 }, { "epoch": 2.0955906671076354, "grad_norm": 0.21759500280643995, "learning_rate": 1.7581505248770278e-05, "loss": 0.3077, "num_tokens": 1171636982.0, "step": 1536 }, { "epoch": 2.096956248066316, "grad_norm": 0.224790581265609, "learning_rate": 1.7578113715285147e-05, "loss": 0.3259, "num_tokens": 1172436991.0, "step": 1537 }, { "epoch": 2.0983218290249965, "grad_norm": 0.20085084270047168, "learning_rate": 1.7574720175102392e-05, "loss": 0.2975, "num_tokens": 1173197965.0, "step": 1538 }, { "epoch": 2.099687409983677, "grad_norm": 0.20297960791562641, "learning_rate": 1.757132462925724e-05, "loss": 0.3093, "num_tokens": 1174004921.0, "step": 1539 }, { "epoch": 2.1010529909423576, "grad_norm": 0.2199653782129971, "learning_rate": 1.756792707878552e-05, "loss": 0.3095, "num_tokens": 1174717386.0, "step": 1540 }, { "epoch": 2.102418571901038, "grad_norm": 0.21154837187725112, "learning_rate": 1.7564527524723685e-05, "loss": 0.3028, "num_tokens": 1175500615.0, "step": 1541 }, { "epoch": 2.1037841528597188, "grad_norm": 0.19899016156145555, "learning_rate": 1.7561125968108787e-05, "loss": 0.299, "num_tokens": 1176259304.0, "step": 1542 }, { "epoch": 2.105149733818399, "grad_norm": 0.20454042291989116, "learning_rate": 1.7557722409978495e-05, "loss": 0.3069, "num_tokens": 1177018979.0, "step": 1543 }, { "epoch": 2.1065153147770794, "grad_norm": 0.19512931370978842, "learning_rate": 1.7554316851371083e-05, "loss": 0.3011, "num_tokens": 1177743745.0, "step": 1544 }, { "epoch": 2.10788089573576, "grad_norm": 0.2084311804297861, "learning_rate": 1.755090929332545e-05, "loss": 0.3011, "num_tokens": 1178499475.0, "step": 1545 }, { "epoch": 2.1092464766944405, "grad_norm": 0.1908326879518676, "learning_rate": 1.754749973688108e-05, "loss": 0.3126, "num_tokens": 1179308225.0, "step": 1546 }, { "epoch": 2.110612057653121, "grad_norm": 0.21271843018130182, "learning_rate": 1.754408818307809e-05, "loss": 0.2967, "num_tokens": 1180078786.0, "step": 1547 }, { "epoch": 2.1119776386118017, "grad_norm": 0.2101363786054184, "learning_rate": 1.75406746329572e-05, "loss": 0.3186, "num_tokens": 1180893609.0, "step": 1548 }, { "epoch": 2.113343219570482, "grad_norm": 0.18976509993678678, "learning_rate": 1.7537259087559727e-05, "loss": 0.3132, "num_tokens": 1181647251.0, "step": 1549 }, { "epoch": 2.1147088005291628, "grad_norm": 0.21991550872802687, "learning_rate": 1.7533841547927616e-05, "loss": 0.2997, "num_tokens": 1182372341.0, "step": 1550 }, { "epoch": 2.1160743814878433, "grad_norm": 0.18699414863598035, "learning_rate": 1.75304220151034e-05, "loss": 0.3053, "num_tokens": 1183165313.0, "step": 1551 }, { "epoch": 2.1174399624465234, "grad_norm": 0.19978010577754518, "learning_rate": 1.7527000490130238e-05, "loss": 0.3158, "num_tokens": 1183867519.0, "step": 1552 }, { "epoch": 2.118805543405204, "grad_norm": 0.21520606221239402, "learning_rate": 1.7523576974051887e-05, "loss": 0.3211, "num_tokens": 1184646914.0, "step": 1553 }, { "epoch": 2.1201711243638846, "grad_norm": 0.20855571815746415, "learning_rate": 1.7520151467912714e-05, "loss": 0.3081, "num_tokens": 1185371483.0, "step": 1554 }, { "epoch": 2.121536705322565, "grad_norm": 0.19249136333229466, "learning_rate": 1.751672397275769e-05, "loss": 0.3179, "num_tokens": 1186092583.0, "step": 1555 }, { "epoch": 2.1229022862812457, "grad_norm": 0.2066034868930569, "learning_rate": 1.75132944896324e-05, "loss": 0.3159, "num_tokens": 1186919447.0, "step": 1556 }, { "epoch": 2.124267867239926, "grad_norm": 0.19367397483400037, "learning_rate": 1.7509863019583028e-05, "loss": 0.3005, "num_tokens": 1187619757.0, "step": 1557 }, { "epoch": 2.125633448198607, "grad_norm": 0.2125593134694305, "learning_rate": 1.7506429563656365e-05, "loss": 0.2932, "num_tokens": 1188367092.0, "step": 1558 }, { "epoch": 2.1269990291572873, "grad_norm": 0.18227705087469964, "learning_rate": 1.750299412289981e-05, "loss": 0.2842, "num_tokens": 1189046025.0, "step": 1559 }, { "epoch": 2.128364610115968, "grad_norm": 0.19563572842176388, "learning_rate": 1.749955669836137e-05, "loss": 0.3019, "num_tokens": 1189811291.0, "step": 1560 }, { "epoch": 2.129730191074648, "grad_norm": 0.2042754187933906, "learning_rate": 1.7496117291089655e-05, "loss": 0.3266, "num_tokens": 1190641692.0, "step": 1561 }, { "epoch": 2.1310957720333286, "grad_norm": 0.18900395377253343, "learning_rate": 1.7492675902133875e-05, "loss": 0.2998, "num_tokens": 1191389977.0, "step": 1562 }, { "epoch": 2.132461352992009, "grad_norm": 0.18050829606039712, "learning_rate": 1.7489232532543854e-05, "loss": 0.3024, "num_tokens": 1192157715.0, "step": 1563 }, { "epoch": 2.1338269339506897, "grad_norm": 0.21298724928791526, "learning_rate": 1.7485787183370008e-05, "loss": 0.2911, "num_tokens": 1192861461.0, "step": 1564 }, { "epoch": 2.1351925149093702, "grad_norm": 0.18663028336315485, "learning_rate": 1.7482339855663373e-05, "loss": 0.3052, "num_tokens": 1193585101.0, "step": 1565 }, { "epoch": 2.136558095868051, "grad_norm": 0.18638920384754878, "learning_rate": 1.747889055047557e-05, "loss": 0.2981, "num_tokens": 1194398715.0, "step": 1566 }, { "epoch": 2.1379236768267313, "grad_norm": 0.18299159205711685, "learning_rate": 1.747543926885884e-05, "loss": 0.2997, "num_tokens": 1195098929.0, "step": 1567 }, { "epoch": 2.139289257785412, "grad_norm": 0.19668850772092272, "learning_rate": 1.7471986011866017e-05, "loss": 0.3049, "num_tokens": 1195892171.0, "step": 1568 }, { "epoch": 2.1406548387440925, "grad_norm": 0.19406109983595884, "learning_rate": 1.7468530780550537e-05, "loss": 0.2986, "num_tokens": 1196738073.0, "step": 1569 }, { "epoch": 2.1420204197027726, "grad_norm": 0.2013969570263123, "learning_rate": 1.7465073575966445e-05, "loss": 0.3001, "num_tokens": 1197411356.0, "step": 1570 }, { "epoch": 2.143386000661453, "grad_norm": 0.19124986157896903, "learning_rate": 1.7461614399168383e-05, "loss": 0.3143, "num_tokens": 1198278619.0, "step": 1571 }, { "epoch": 2.1447515816201337, "grad_norm": 0.1970164319044843, "learning_rate": 1.7458153251211595e-05, "loss": 0.3047, "num_tokens": 1199040956.0, "step": 1572 }, { "epoch": 2.1461171625788142, "grad_norm": 0.1926111711825931, "learning_rate": 1.745469013315193e-05, "loss": 0.2959, "num_tokens": 1199756148.0, "step": 1573 }, { "epoch": 2.147482743537495, "grad_norm": 0.20020157635562233, "learning_rate": 1.7451225046045838e-05, "loss": 0.3163, "num_tokens": 1200538061.0, "step": 1574 }, { "epoch": 2.1488483244961754, "grad_norm": 0.19638328177129943, "learning_rate": 1.744775799095036e-05, "loss": 0.3015, "num_tokens": 1201316915.0, "step": 1575 }, { "epoch": 2.150213905454856, "grad_norm": 0.18862844610352159, "learning_rate": 1.744428896892315e-05, "loss": 0.2992, "num_tokens": 1202098701.0, "step": 1576 }, { "epoch": 2.1515794864135365, "grad_norm": 0.20415456495265924, "learning_rate": 1.7440817981022447e-05, "loss": 0.3151, "num_tokens": 1202893733.0, "step": 1577 }, { "epoch": 2.152945067372217, "grad_norm": 0.1972198785915502, "learning_rate": 1.7437345028307112e-05, "loss": 0.3095, "num_tokens": 1203657787.0, "step": 1578 }, { "epoch": 2.154310648330897, "grad_norm": 0.21494962523549674, "learning_rate": 1.743387011183659e-05, "loss": 0.3123, "num_tokens": 1204430801.0, "step": 1579 }, { "epoch": 2.1556762292895777, "grad_norm": 0.19837030966970584, "learning_rate": 1.7430393232670917e-05, "loss": 0.3083, "num_tokens": 1205279288.0, "step": 1580 }, { "epoch": 2.1570418102482583, "grad_norm": 0.20019114996411247, "learning_rate": 1.742691439187075e-05, "loss": 0.2912, "num_tokens": 1206006243.0, "step": 1581 }, { "epoch": 2.158407391206939, "grad_norm": 0.20122898432263508, "learning_rate": 1.7423433590497324e-05, "loss": 0.2999, "num_tokens": 1206698774.0, "step": 1582 }, { "epoch": 2.1597729721656194, "grad_norm": 0.18935202120348568, "learning_rate": 1.7419950829612487e-05, "loss": 0.3137, "num_tokens": 1207492312.0, "step": 1583 }, { "epoch": 2.1611385531243, "grad_norm": 0.20394416353624736, "learning_rate": 1.7416466110278677e-05, "loss": 0.3107, "num_tokens": 1208340905.0, "step": 1584 }, { "epoch": 2.1625041340829805, "grad_norm": 0.20193750752191297, "learning_rate": 1.741297943355893e-05, "loss": 0.3004, "num_tokens": 1209060695.0, "step": 1585 }, { "epoch": 2.163869715041661, "grad_norm": 0.19997651054914212, "learning_rate": 1.7409490800516882e-05, "loss": 0.2969, "num_tokens": 1209811137.0, "step": 1586 }, { "epoch": 2.1652352960003416, "grad_norm": 0.19984011162803272, "learning_rate": 1.740600021221676e-05, "loss": 0.3131, "num_tokens": 1210503848.0, "step": 1587 }, { "epoch": 2.1666008769590217, "grad_norm": 0.1865080080772543, "learning_rate": 1.7402507669723392e-05, "loss": 0.3055, "num_tokens": 1211333338.0, "step": 1588 }, { "epoch": 2.1679664579177023, "grad_norm": 0.21849652876266337, "learning_rate": 1.7399013174102208e-05, "loss": 0.3079, "num_tokens": 1212059086.0, "step": 1589 }, { "epoch": 2.169332038876383, "grad_norm": 0.19010318637060003, "learning_rate": 1.7395516726419217e-05, "loss": 0.308, "num_tokens": 1212804267.0, "step": 1590 }, { "epoch": 2.1706976198350634, "grad_norm": 0.21516131046962358, "learning_rate": 1.739201832774104e-05, "loss": 0.3293, "num_tokens": 1213615914.0, "step": 1591 }, { "epoch": 2.172063200793744, "grad_norm": 0.17783115271461442, "learning_rate": 1.738851797913489e-05, "loss": 0.3098, "num_tokens": 1214404658.0, "step": 1592 }, { "epoch": 2.1734287817524245, "grad_norm": 0.1983888448186577, "learning_rate": 1.7385015681668563e-05, "loss": 0.3195, "num_tokens": 1215201452.0, "step": 1593 }, { "epoch": 2.174794362711105, "grad_norm": 0.20797598730458372, "learning_rate": 1.7381511436410463e-05, "loss": 0.3002, "num_tokens": 1215897206.0, "step": 1594 }, { "epoch": 2.1761599436697856, "grad_norm": 0.1919074603999815, "learning_rate": 1.7378005244429587e-05, "loss": 0.2969, "num_tokens": 1216639890.0, "step": 1595 }, { "epoch": 2.177525524628466, "grad_norm": 0.1942675409140386, "learning_rate": 1.7374497106795516e-05, "loss": 0.3147, "num_tokens": 1217393107.0, "step": 1596 }, { "epoch": 2.1788911055871463, "grad_norm": 0.2050117570320177, "learning_rate": 1.7370987024578438e-05, "loss": 0.3154, "num_tokens": 1218117690.0, "step": 1597 }, { "epoch": 2.180256686545827, "grad_norm": 0.18749249912544938, "learning_rate": 1.736747499884912e-05, "loss": 0.3241, "num_tokens": 1218954238.0, "step": 1598 }, { "epoch": 2.1816222675045074, "grad_norm": 0.18688078960573537, "learning_rate": 1.736396103067893e-05, "loss": 0.2895, "num_tokens": 1219750936.0, "step": 1599 }, { "epoch": 2.182987848463188, "grad_norm": 0.19025568548952487, "learning_rate": 1.736044512113983e-05, "loss": 0.3072, "num_tokens": 1220509174.0, "step": 1600 }, { "epoch": 2.1843534294218685, "grad_norm": 0.18586090830660376, "learning_rate": 1.7356927271304372e-05, "loss": 0.3045, "num_tokens": 1221262257.0, "step": 1601 }, { "epoch": 2.185719010380549, "grad_norm": 0.1896106330865507, "learning_rate": 1.7353407482245698e-05, "loss": 0.2897, "num_tokens": 1221994116.0, "step": 1602 }, { "epoch": 2.1870845913392296, "grad_norm": 0.1787010912940222, "learning_rate": 1.7349885755037546e-05, "loss": 0.2924, "num_tokens": 1222697836.0, "step": 1603 }, { "epoch": 2.18845017229791, "grad_norm": 0.2033940100402742, "learning_rate": 1.7346362090754237e-05, "loss": 0.2945, "num_tokens": 1223480523.0, "step": 1604 }, { "epoch": 2.1898157532565907, "grad_norm": 0.20025247514148492, "learning_rate": 1.7342836490470692e-05, "loss": 0.3027, "num_tokens": 1224216506.0, "step": 1605 }, { "epoch": 2.191181334215271, "grad_norm": 0.20670234173746446, "learning_rate": 1.733930895526242e-05, "loss": 0.2986, "num_tokens": 1224956603.0, "step": 1606 }, { "epoch": 2.1925469151739514, "grad_norm": 0.1822251451955554, "learning_rate": 1.733577948620552e-05, "loss": 0.3115, "num_tokens": 1225770045.0, "step": 1607 }, { "epoch": 2.193912496132632, "grad_norm": 0.18229291883434315, "learning_rate": 1.7332248084376677e-05, "loss": 0.3036, "num_tokens": 1226504346.0, "step": 1608 }, { "epoch": 2.1952780770913125, "grad_norm": 0.18697885462128633, "learning_rate": 1.7328714750853166e-05, "loss": 0.3116, "num_tokens": 1227269905.0, "step": 1609 }, { "epoch": 2.196643658049993, "grad_norm": 0.19786720943895045, "learning_rate": 1.732517948671286e-05, "loss": 0.2941, "num_tokens": 1228014620.0, "step": 1610 }, { "epoch": 2.1980092390086736, "grad_norm": 0.1906529325901666, "learning_rate": 1.7321642293034215e-05, "loss": 0.3183, "num_tokens": 1228827411.0, "step": 1611 }, { "epoch": 2.199374819967354, "grad_norm": 0.19223377948122866, "learning_rate": 1.731810317089627e-05, "loss": 0.2996, "num_tokens": 1229550333.0, "step": 1612 }, { "epoch": 2.2007404009260347, "grad_norm": 0.19067789112880787, "learning_rate": 1.7314562121378664e-05, "loss": 0.3065, "num_tokens": 1230246583.0, "step": 1613 }, { "epoch": 2.2021059818847153, "grad_norm": 0.19948609741612505, "learning_rate": 1.7311019145561615e-05, "loss": 0.306, "num_tokens": 1231011477.0, "step": 1614 }, { "epoch": 2.2034715628433954, "grad_norm": 0.2154389142213592, "learning_rate": 1.730747424452593e-05, "loss": 0.3183, "num_tokens": 1231789421.0, "step": 1615 }, { "epoch": 2.204837143802076, "grad_norm": 0.2144109103560242, "learning_rate": 1.7303927419353008e-05, "loss": 0.3096, "num_tokens": 1232553357.0, "step": 1616 }, { "epoch": 2.2062027247607565, "grad_norm": 0.18616152611126027, "learning_rate": 1.730037867112483e-05, "loss": 0.3048, "num_tokens": 1233326954.0, "step": 1617 }, { "epoch": 2.207568305719437, "grad_norm": 0.21542358680608134, "learning_rate": 1.7296828000923967e-05, "loss": 0.3088, "num_tokens": 1234112353.0, "step": 1618 }, { "epoch": 2.2089338866781176, "grad_norm": 0.18655179628888083, "learning_rate": 1.7293275409833572e-05, "loss": 0.3135, "num_tokens": 1234852535.0, "step": 1619 }, { "epoch": 2.210299467636798, "grad_norm": 0.2126899973965063, "learning_rate": 1.7289720898937384e-05, "loss": 0.3003, "num_tokens": 1235656199.0, "step": 1620 }, { "epoch": 2.2116650485954787, "grad_norm": 0.19741803874379485, "learning_rate": 1.728616446931974e-05, "loss": 0.2989, "num_tokens": 1236356066.0, "step": 1621 }, { "epoch": 2.2130306295541593, "grad_norm": 0.19424926932504472, "learning_rate": 1.7282606122065545e-05, "loss": 0.3196, "num_tokens": 1237138317.0, "step": 1622 }, { "epoch": 2.21439621051284, "grad_norm": 0.20728473533469957, "learning_rate": 1.72790458582603e-05, "loss": 0.3178, "num_tokens": 1237961751.0, "step": 1623 }, { "epoch": 2.21576179147152, "grad_norm": 0.20385084512263607, "learning_rate": 1.7275483678990085e-05, "loss": 0.3078, "num_tokens": 1238689402.0, "step": 1624 }, { "epoch": 2.2171273724302005, "grad_norm": 0.21621085657162664, "learning_rate": 1.7271919585341568e-05, "loss": 0.2948, "num_tokens": 1239455746.0, "step": 1625 }, { "epoch": 2.218492953388881, "grad_norm": 0.2020174550806951, "learning_rate": 1.7268353578401998e-05, "loss": 0.3059, "num_tokens": 1240189374.0, "step": 1626 }, { "epoch": 2.2198585343475616, "grad_norm": 0.2068754833803503, "learning_rate": 1.726478565925921e-05, "loss": 0.3138, "num_tokens": 1240951640.0, "step": 1627 }, { "epoch": 2.221224115306242, "grad_norm": 0.19063116738003477, "learning_rate": 1.7261215829001626e-05, "loss": 0.3007, "num_tokens": 1241720430.0, "step": 1628 }, { "epoch": 2.2225896962649228, "grad_norm": 0.2198151864235673, "learning_rate": 1.7257644088718237e-05, "loss": 0.313, "num_tokens": 1242548477.0, "step": 1629 }, { "epoch": 2.2239552772236033, "grad_norm": 0.1944092581464973, "learning_rate": 1.725407043949864e-05, "loss": 0.3023, "num_tokens": 1243335787.0, "step": 1630 }, { "epoch": 2.225320858182284, "grad_norm": 0.19795640152887986, "learning_rate": 1.7250494882432986e-05, "loss": 0.3163, "num_tokens": 1244055332.0, "step": 1631 }, { "epoch": 2.2266864391409644, "grad_norm": 0.1952205144872384, "learning_rate": 1.7246917418612033e-05, "loss": 0.3204, "num_tokens": 1244831964.0, "step": 1632 }, { "epoch": 2.2280520200996445, "grad_norm": 0.20040040043327595, "learning_rate": 1.7243338049127103e-05, "loss": 0.3131, "num_tokens": 1245681481.0, "step": 1633 }, { "epoch": 2.229417601058325, "grad_norm": 0.20349222040699316, "learning_rate": 1.7239756775070112e-05, "loss": 0.3098, "num_tokens": 1246501306.0, "step": 1634 }, { "epoch": 2.2307831820170057, "grad_norm": 0.18083847955470475, "learning_rate": 1.723617359753355e-05, "loss": 0.3001, "num_tokens": 1247262390.0, "step": 1635 }, { "epoch": 2.232148762975686, "grad_norm": 0.196535102643425, "learning_rate": 1.723258851761049e-05, "loss": 0.3035, "num_tokens": 1247979818.0, "step": 1636 }, { "epoch": 2.2335143439343668, "grad_norm": 0.22422620668025509, "learning_rate": 1.722900153639458e-05, "loss": 0.3104, "num_tokens": 1248698103.0, "step": 1637 }, { "epoch": 2.2348799248930473, "grad_norm": 0.19365652452317503, "learning_rate": 1.722541265498006e-05, "loss": 0.2936, "num_tokens": 1249361491.0, "step": 1638 }, { "epoch": 2.236245505851728, "grad_norm": 0.20718937873001325, "learning_rate": 1.7221821874461737e-05, "loss": 0.309, "num_tokens": 1250131505.0, "step": 1639 }, { "epoch": 2.2376110868104084, "grad_norm": 0.21751870221977251, "learning_rate": 1.7218229195935004e-05, "loss": 0.3091, "num_tokens": 1250833679.0, "step": 1640 }, { "epoch": 2.238976667769089, "grad_norm": 0.20252562127977577, "learning_rate": 1.7214634620495835e-05, "loss": 0.3088, "num_tokens": 1251607518.0, "step": 1641 }, { "epoch": 2.240342248727769, "grad_norm": 0.19491066411379782, "learning_rate": 1.7211038149240774e-05, "loss": 0.3091, "num_tokens": 1252444863.0, "step": 1642 }, { "epoch": 2.2417078296864497, "grad_norm": 0.1849233160627862, "learning_rate": 1.7207439783266952e-05, "loss": 0.3065, "num_tokens": 1253205956.0, "step": 1643 }, { "epoch": 2.24307341064513, "grad_norm": 0.21182690241234958, "learning_rate": 1.720383952367207e-05, "loss": 0.3217, "num_tokens": 1253956104.0, "step": 1644 }, { "epoch": 2.2444389916038108, "grad_norm": 0.18727600746545803, "learning_rate": 1.720023737155442e-05, "loss": 0.3064, "num_tokens": 1254688382.0, "step": 1645 }, { "epoch": 2.2458045725624913, "grad_norm": 0.21150420571988576, "learning_rate": 1.7196633328012856e-05, "loss": 0.2931, "num_tokens": 1255526705.0, "step": 1646 }, { "epoch": 2.247170153521172, "grad_norm": 0.18794664641355024, "learning_rate": 1.7193027394146812e-05, "loss": 0.3156, "num_tokens": 1256340114.0, "step": 1647 }, { "epoch": 2.2485357344798524, "grad_norm": 0.1816237566229095, "learning_rate": 1.7189419571056315e-05, "loss": 0.3119, "num_tokens": 1257082983.0, "step": 1648 }, { "epoch": 2.249901315438533, "grad_norm": 0.2066866483463397, "learning_rate": 1.718580985984194e-05, "loss": 0.311, "num_tokens": 1257865306.0, "step": 1649 }, { "epoch": 2.2512668963972136, "grad_norm": 0.19960621184870297, "learning_rate": 1.7182198261604866e-05, "loss": 0.3176, "num_tokens": 1258645258.0, "step": 1650 }, { "epoch": 2.2526324773558937, "grad_norm": 0.18158738690363585, "learning_rate": 1.7178584777446834e-05, "loss": 0.3004, "num_tokens": 1259425592.0, "step": 1651 }, { "epoch": 2.2539980583145742, "grad_norm": 0.19713885713576884, "learning_rate": 1.717496940847015e-05, "loss": 0.3002, "num_tokens": 1260204885.0, "step": 1652 }, { "epoch": 2.255363639273255, "grad_norm": 0.1947673036544335, "learning_rate": 1.717135215577772e-05, "loss": 0.3062, "num_tokens": 1260918033.0, "step": 1653 }, { "epoch": 2.2567292202319353, "grad_norm": 0.21048709810491506, "learning_rate": 1.7167733020473002e-05, "loss": 0.3134, "num_tokens": 1261661298.0, "step": 1654 }, { "epoch": 2.258094801190616, "grad_norm": 0.20406374159999827, "learning_rate": 1.7164112003660046e-05, "loss": 0.3162, "num_tokens": 1262460817.0, "step": 1655 }, { "epoch": 2.2594603821492965, "grad_norm": 0.19574851695131437, "learning_rate": 1.716048910644346e-05, "loss": 0.3195, "num_tokens": 1263196208.0, "step": 1656 }, { "epoch": 2.260825963107977, "grad_norm": 0.20838718341554965, "learning_rate": 1.7156864329928437e-05, "loss": 0.293, "num_tokens": 1263958686.0, "step": 1657 }, { "epoch": 2.2621915440666576, "grad_norm": 0.18974749214960016, "learning_rate": 1.715323767522074e-05, "loss": 0.3031, "num_tokens": 1264716705.0, "step": 1658 }, { "epoch": 2.263557125025338, "grad_norm": 0.19636076715119866, "learning_rate": 1.7149609143426697e-05, "loss": 0.3065, "num_tokens": 1265448868.0, "step": 1659 }, { "epoch": 2.2649227059840182, "grad_norm": 0.2201382594976657, "learning_rate": 1.7145978735653225e-05, "loss": 0.3029, "num_tokens": 1266217065.0, "step": 1660 }, { "epoch": 2.266288286942699, "grad_norm": 0.20426313353815884, "learning_rate": 1.7142346453007795e-05, "loss": 0.3021, "num_tokens": 1267028442.0, "step": 1661 }, { "epoch": 2.2676538679013793, "grad_norm": 0.20462336590489053, "learning_rate": 1.713871229659847e-05, "loss": 0.3008, "num_tokens": 1267765424.0, "step": 1662 }, { "epoch": 2.26901944886006, "grad_norm": 0.2117771807775812, "learning_rate": 1.713507626753387e-05, "loss": 0.2936, "num_tokens": 1268514972.0, "step": 1663 }, { "epoch": 2.2703850298187405, "grad_norm": 0.19396276361020615, "learning_rate": 1.7131438366923184e-05, "loss": 0.2952, "num_tokens": 1269174574.0, "step": 1664 }, { "epoch": 2.271750610777421, "grad_norm": 0.21589202532287316, "learning_rate": 1.7127798595876183e-05, "loss": 0.3061, "num_tokens": 1269979292.0, "step": 1665 }, { "epoch": 2.2731161917361016, "grad_norm": 0.19061958731902487, "learning_rate": 1.7124156955503208e-05, "loss": 0.3057, "num_tokens": 1270739608.0, "step": 1666 }, { "epoch": 2.274481772694782, "grad_norm": 0.20423152615671686, "learning_rate": 1.7120513446915154e-05, "loss": 0.2958, "num_tokens": 1271543309.0, "step": 1667 }, { "epoch": 2.2758473536534627, "grad_norm": 0.19432124546171314, "learning_rate": 1.711686807122351e-05, "loss": 0.2992, "num_tokens": 1272295795.0, "step": 1668 }, { "epoch": 2.277212934612143, "grad_norm": 0.1879319707203541, "learning_rate": 1.7113220829540313e-05, "loss": 0.3125, "num_tokens": 1273172358.0, "step": 1669 }, { "epoch": 2.2785785155708234, "grad_norm": 0.19337251202305897, "learning_rate": 1.710957172297818e-05, "loss": 0.3083, "num_tokens": 1273891520.0, "step": 1670 }, { "epoch": 2.279944096529504, "grad_norm": 0.18215176873970268, "learning_rate": 1.71059207526503e-05, "loss": 0.295, "num_tokens": 1274649585.0, "step": 1671 }, { "epoch": 2.2813096774881845, "grad_norm": 0.20089091204377357, "learning_rate": 1.710226791967042e-05, "loss": 0.3073, "num_tokens": 1275330787.0, "step": 1672 }, { "epoch": 2.282675258446865, "grad_norm": 0.18483141087785823, "learning_rate": 1.709861322515287e-05, "loss": 0.3197, "num_tokens": 1276185055.0, "step": 1673 }, { "epoch": 2.2840408394055456, "grad_norm": 0.20418536068109885, "learning_rate": 1.7094956670212527e-05, "loss": 0.3136, "num_tokens": 1276967152.0, "step": 1674 }, { "epoch": 2.285406420364226, "grad_norm": 0.18547875604168182, "learning_rate": 1.709129825596486e-05, "loss": 0.3085, "num_tokens": 1277682487.0, "step": 1675 }, { "epoch": 2.2867720013229067, "grad_norm": 0.21142835394399176, "learning_rate": 1.7087637983525883e-05, "loss": 0.2947, "num_tokens": 1278525929.0, "step": 1676 }, { "epoch": 2.2881375822815873, "grad_norm": 0.1770282166532555, "learning_rate": 1.7083975854012193e-05, "loss": 0.3118, "num_tokens": 1279252968.0, "step": 1677 }, { "epoch": 2.2895031632402674, "grad_norm": 0.19414659637115766, "learning_rate": 1.7080311868540943e-05, "loss": 0.3057, "num_tokens": 1280017395.0, "step": 1678 }, { "epoch": 2.290868744198948, "grad_norm": 0.18580560648037378, "learning_rate": 1.7076646028229857e-05, "loss": 0.3071, "num_tokens": 1280757734.0, "step": 1679 }, { "epoch": 2.2922343251576285, "grad_norm": 0.1966637819659975, "learning_rate": 1.707297833419723e-05, "loss": 0.3106, "num_tokens": 1281551240.0, "step": 1680 }, { "epoch": 2.293599906116309, "grad_norm": 0.20607549541357992, "learning_rate": 1.7069308787561905e-05, "loss": 0.3025, "num_tokens": 1282370339.0, "step": 1681 }, { "epoch": 2.2949654870749896, "grad_norm": 0.1713230446632435, "learning_rate": 1.7065637389443312e-05, "loss": 0.3155, "num_tokens": 1283142666.0, "step": 1682 }, { "epoch": 2.29633106803367, "grad_norm": 0.19393003440203835, "learning_rate": 1.7061964140961433e-05, "loss": 0.3016, "num_tokens": 1283883040.0, "step": 1683 }, { "epoch": 2.2976966489923507, "grad_norm": 0.20712994876831672, "learning_rate": 1.7058289043236813e-05, "loss": 0.3055, "num_tokens": 1284677952.0, "step": 1684 }, { "epoch": 2.2990622299510313, "grad_norm": 0.16819672682883605, "learning_rate": 1.7054612097390568e-05, "loss": 0.2974, "num_tokens": 1285506489.0, "step": 1685 }, { "epoch": 2.300427810909712, "grad_norm": 0.20247322645888335, "learning_rate": 1.7050933304544377e-05, "loss": 0.3081, "num_tokens": 1286221804.0, "step": 1686 }, { "epoch": 2.301793391868392, "grad_norm": 0.19824404538190513, "learning_rate": 1.7047252665820478e-05, "loss": 0.3032, "num_tokens": 1286988171.0, "step": 1687 }, { "epoch": 2.3031589728270725, "grad_norm": 0.20330454770350592, "learning_rate": 1.7043570182341672e-05, "loss": 0.3045, "num_tokens": 1287741158.0, "step": 1688 }, { "epoch": 2.304524553785753, "grad_norm": 0.20054945726315188, "learning_rate": 1.7039885855231333e-05, "loss": 0.3154, "num_tokens": 1288566326.0, "step": 1689 }, { "epoch": 2.3058901347444336, "grad_norm": 0.19127437395324873, "learning_rate": 1.7036199685613382e-05, "loss": 0.3151, "num_tokens": 1289324608.0, "step": 1690 }, { "epoch": 2.307255715703114, "grad_norm": 0.20730860967030984, "learning_rate": 1.7032511674612313e-05, "loss": 0.3129, "num_tokens": 1290115930.0, "step": 1691 }, { "epoch": 2.3086212966617947, "grad_norm": 0.198772882149762, "learning_rate": 1.7028821823353175e-05, "loss": 0.3026, "num_tokens": 1290858071.0, "step": 1692 }, { "epoch": 2.3099868776204753, "grad_norm": 0.21079257433506593, "learning_rate": 1.702513013296159e-05, "loss": 0.3015, "num_tokens": 1291671597.0, "step": 1693 }, { "epoch": 2.311352458579156, "grad_norm": 0.18442749390224808, "learning_rate": 1.7021436604563723e-05, "loss": 0.2976, "num_tokens": 1292397134.0, "step": 1694 }, { "epoch": 2.3127180395378364, "grad_norm": 0.19816470087034735, "learning_rate": 1.701774123928632e-05, "loss": 0.3099, "num_tokens": 1293140104.0, "step": 1695 }, { "epoch": 2.3140836204965165, "grad_norm": 0.20234045606228304, "learning_rate": 1.7014044038256664e-05, "loss": 0.3095, "num_tokens": 1293863467.0, "step": 1696 }, { "epoch": 2.315449201455197, "grad_norm": 0.19484575719690514, "learning_rate": 1.7010345002602622e-05, "loss": 0.2923, "num_tokens": 1294677183.0, "step": 1697 }, { "epoch": 2.3168147824138776, "grad_norm": 0.193949845702339, "learning_rate": 1.7006644133452607e-05, "loss": 0.3011, "num_tokens": 1295434715.0, "step": 1698 }, { "epoch": 2.318180363372558, "grad_norm": 0.19449065850452904, "learning_rate": 1.700294143193559e-05, "loss": 0.2929, "num_tokens": 1296182884.0, "step": 1699 }, { "epoch": 2.3195459443312387, "grad_norm": 0.19649819909504884, "learning_rate": 1.699923689918111e-05, "loss": 0.3062, "num_tokens": 1296858812.0, "step": 1700 }, { "epoch": 2.3209115252899193, "grad_norm": 0.21319153109177408, "learning_rate": 1.6995530536319255e-05, "loss": 0.3084, "num_tokens": 1297653607.0, "step": 1701 }, { "epoch": 2.3222771062486, "grad_norm": 0.19579320681527534, "learning_rate": 1.6991822344480675e-05, "loss": 0.31, "num_tokens": 1298372030.0, "step": 1702 }, { "epoch": 2.3236426872072804, "grad_norm": 0.19163136938177616, "learning_rate": 1.6988112324796586e-05, "loss": 0.316, "num_tokens": 1299189694.0, "step": 1703 }, { "epoch": 2.325008268165961, "grad_norm": 0.21259968765607098, "learning_rate": 1.6984400478398746e-05, "loss": 0.3149, "num_tokens": 1299875638.0, "step": 1704 }, { "epoch": 2.326373849124641, "grad_norm": 0.20177014424860873, "learning_rate": 1.698068680641949e-05, "loss": 0.3059, "num_tokens": 1300720094.0, "step": 1705 }, { "epoch": 2.3277394300833216, "grad_norm": 0.19593054888954822, "learning_rate": 1.697697130999168e-05, "loss": 0.3231, "num_tokens": 1301507197.0, "step": 1706 }, { "epoch": 2.329105011042002, "grad_norm": 0.21356463601493972, "learning_rate": 1.6973253990248774e-05, "loss": 0.3056, "num_tokens": 1302222817.0, "step": 1707 }, { "epoch": 2.3304705920006827, "grad_norm": 0.195159164538878, "learning_rate": 1.6969534848324747e-05, "loss": 0.3037, "num_tokens": 1302923451.0, "step": 1708 }, { "epoch": 2.3318361729593633, "grad_norm": 0.206143828763009, "learning_rate": 1.6965813885354163e-05, "loss": 0.3041, "num_tokens": 1303644790.0, "step": 1709 }, { "epoch": 2.333201753918044, "grad_norm": 0.20273519820856614, "learning_rate": 1.6962091102472115e-05, "loss": 0.316, "num_tokens": 1304429905.0, "step": 1710 }, { "epoch": 2.3345673348767244, "grad_norm": 0.1880606890411883, "learning_rate": 1.6958366500814268e-05, "loss": 0.3021, "num_tokens": 1305203401.0, "step": 1711 }, { "epoch": 2.335932915835405, "grad_norm": 0.20266948444705415, "learning_rate": 1.695464008151684e-05, "loss": 0.296, "num_tokens": 1305941214.0, "step": 1712 }, { "epoch": 2.3372984967940855, "grad_norm": 0.18955383428814435, "learning_rate": 1.6950911845716594e-05, "loss": 0.2993, "num_tokens": 1306690777.0, "step": 1713 }, { "epoch": 2.3386640777527656, "grad_norm": 0.19933509720989648, "learning_rate": 1.6947181794550858e-05, "loss": 0.317, "num_tokens": 1307446705.0, "step": 1714 }, { "epoch": 2.340029658711446, "grad_norm": 0.19156201042856566, "learning_rate": 1.6943449929157506e-05, "loss": 0.3023, "num_tokens": 1308219842.0, "step": 1715 }, { "epoch": 2.3413952396701267, "grad_norm": 0.20520868457329722, "learning_rate": 1.6939716250674973e-05, "loss": 0.3117, "num_tokens": 1308912931.0, "step": 1716 }, { "epoch": 2.3427608206288073, "grad_norm": 0.1992790805644641, "learning_rate": 1.6935980760242235e-05, "loss": 0.3133, "num_tokens": 1309677959.0, "step": 1717 }, { "epoch": 2.344126401587488, "grad_norm": 0.19540549884978065, "learning_rate": 1.693224345899883e-05, "loss": 0.3048, "num_tokens": 1310405001.0, "step": 1718 }, { "epoch": 2.3454919825461684, "grad_norm": 0.21243786664041073, "learning_rate": 1.6928504348084852e-05, "loss": 0.302, "num_tokens": 1311159861.0, "step": 1719 }, { "epoch": 2.346857563504849, "grad_norm": 0.20219654612213925, "learning_rate": 1.692476342864094e-05, "loss": 0.2964, "num_tokens": 1311827091.0, "step": 1720 }, { "epoch": 2.3482231444635295, "grad_norm": 0.19156045407198316, "learning_rate": 1.6921020701808285e-05, "loss": 0.3126, "num_tokens": 1312588648.0, "step": 1721 }, { "epoch": 2.34958872542221, "grad_norm": 0.21103019430205142, "learning_rate": 1.6917276168728634e-05, "loss": 0.3001, "num_tokens": 1313300228.0, "step": 1722 }, { "epoch": 2.35095430638089, "grad_norm": 0.19983071401172664, "learning_rate": 1.6913529830544275e-05, "loss": 0.2935, "num_tokens": 1313981114.0, "step": 1723 }, { "epoch": 2.3523198873395708, "grad_norm": 0.254239789527125, "learning_rate": 1.6909781688398055e-05, "loss": 0.3094, "num_tokens": 1314805436.0, "step": 1724 }, { "epoch": 2.3536854682982513, "grad_norm": 0.1781315176919803, "learning_rate": 1.6906031743433383e-05, "loss": 0.2986, "num_tokens": 1315615689.0, "step": 1725 }, { "epoch": 2.355051049256932, "grad_norm": 0.19337540572717118, "learning_rate": 1.6902279996794188e-05, "loss": 0.3061, "num_tokens": 1316340770.0, "step": 1726 }, { "epoch": 2.3564166302156124, "grad_norm": 0.20267254402251278, "learning_rate": 1.6898526449624972e-05, "loss": 0.3055, "num_tokens": 1317031406.0, "step": 1727 }, { "epoch": 2.357782211174293, "grad_norm": 0.17950527292320267, "learning_rate": 1.689477110307078e-05, "loss": 0.2986, "num_tokens": 1317733282.0, "step": 1728 }, { "epoch": 2.3591477921329735, "grad_norm": 0.19269285476691655, "learning_rate": 1.6891013958277207e-05, "loss": 0.2971, "num_tokens": 1318545253.0, "step": 1729 }, { "epoch": 2.360513373091654, "grad_norm": 0.19679638846635478, "learning_rate": 1.6887255016390396e-05, "loss": 0.3083, "num_tokens": 1319269525.0, "step": 1730 }, { "epoch": 2.3618789540503347, "grad_norm": 0.17335539450662207, "learning_rate": 1.688349427855703e-05, "loss": 0.2954, "num_tokens": 1320073829.0, "step": 1731 }, { "epoch": 2.3632445350090148, "grad_norm": 0.19406575782307683, "learning_rate": 1.6879731745924354e-05, "loss": 0.2931, "num_tokens": 1320819598.0, "step": 1732 }, { "epoch": 2.3646101159676953, "grad_norm": 0.18189585689578874, "learning_rate": 1.6875967419640148e-05, "loss": 0.3119, "num_tokens": 1321513625.0, "step": 1733 }, { "epoch": 2.365975696926376, "grad_norm": 0.18590180422552516, "learning_rate": 1.6872201300852754e-05, "loss": 0.3172, "num_tokens": 1322301532.0, "step": 1734 }, { "epoch": 2.3673412778850564, "grad_norm": 0.18158927744954478, "learning_rate": 1.6868433390711046e-05, "loss": 0.3144, "num_tokens": 1323133432.0, "step": 1735 }, { "epoch": 2.368706858843737, "grad_norm": 0.2110643341548387, "learning_rate": 1.6864663690364453e-05, "loss": 0.2924, "num_tokens": 1323861399.0, "step": 1736 }, { "epoch": 2.3700724398024176, "grad_norm": 0.18475461931198495, "learning_rate": 1.686089220096294e-05, "loss": 0.3035, "num_tokens": 1324562252.0, "step": 1737 }, { "epoch": 2.371438020761098, "grad_norm": 0.19228307601426683, "learning_rate": 1.685711892365703e-05, "loss": 0.3159, "num_tokens": 1325352214.0, "step": 1738 }, { "epoch": 2.3728036017197787, "grad_norm": 0.1964553078434741, "learning_rate": 1.6853343859597788e-05, "loss": 0.3015, "num_tokens": 1326106083.0, "step": 1739 }, { "epoch": 2.3741691826784592, "grad_norm": 0.1824549810438216, "learning_rate": 1.684956700993682e-05, "loss": 0.2959, "num_tokens": 1326884407.0, "step": 1740 }, { "epoch": 2.3755347636371393, "grad_norm": 0.1910958164305454, "learning_rate": 1.684578837582628e-05, "loss": 0.3087, "num_tokens": 1327620400.0, "step": 1741 }, { "epoch": 2.37690034459582, "grad_norm": 0.2069122557169906, "learning_rate": 1.6842007958418872e-05, "loss": 0.3102, "num_tokens": 1328369028.0, "step": 1742 }, { "epoch": 2.3782659255545004, "grad_norm": 0.19050795765191345, "learning_rate": 1.6838225758867825e-05, "loss": 0.2933, "num_tokens": 1329110107.0, "step": 1743 }, { "epoch": 2.379631506513181, "grad_norm": 0.1853091838143724, "learning_rate": 1.6834441778326935e-05, "loss": 0.2927, "num_tokens": 1329864602.0, "step": 1744 }, { "epoch": 2.3809970874718616, "grad_norm": 0.18559572262201543, "learning_rate": 1.683065601795053e-05, "loss": 0.3059, "num_tokens": 1330707441.0, "step": 1745 }, { "epoch": 2.382362668430542, "grad_norm": 0.18644185176733416, "learning_rate": 1.682686847889347e-05, "loss": 0.2984, "num_tokens": 1331401316.0, "step": 1746 }, { "epoch": 2.3837282493892227, "grad_norm": 0.19261806192168815, "learning_rate": 1.682307916231118e-05, "loss": 0.3082, "num_tokens": 1332189804.0, "step": 1747 }, { "epoch": 2.3850938303479032, "grad_norm": 0.20889617181108358, "learning_rate": 1.6819288069359615e-05, "loss": 0.2989, "num_tokens": 1332847679.0, "step": 1748 }, { "epoch": 2.386459411306584, "grad_norm": 0.19758380858618335, "learning_rate": 1.6815495201195273e-05, "loss": 0.3174, "num_tokens": 1333603702.0, "step": 1749 }, { "epoch": 2.387824992265264, "grad_norm": 0.1919446630613184, "learning_rate": 1.681170055897519e-05, "loss": 0.3043, "num_tokens": 1334412987.0, "step": 1750 }, { "epoch": 2.3891905732239445, "grad_norm": 0.19740319769283052, "learning_rate": 1.680790414385695e-05, "loss": 0.315, "num_tokens": 1335178156.0, "step": 1751 }, { "epoch": 2.390556154182625, "grad_norm": 0.20044992737745712, "learning_rate": 1.680410595699868e-05, "loss": 0.3083, "num_tokens": 1335961601.0, "step": 1752 }, { "epoch": 2.3919217351413056, "grad_norm": 0.21296519076299583, "learning_rate": 1.6800305999559033e-05, "loss": 0.2972, "num_tokens": 1336703067.0, "step": 1753 }, { "epoch": 2.393287316099986, "grad_norm": 0.19977033490623092, "learning_rate": 1.6796504272697214e-05, "loss": 0.3062, "num_tokens": 1337459233.0, "step": 1754 }, { "epoch": 2.3946528970586667, "grad_norm": 0.19139425118068465, "learning_rate": 1.679270077757297e-05, "loss": 0.3108, "num_tokens": 1338191424.0, "step": 1755 }, { "epoch": 2.3960184780173472, "grad_norm": 0.19191867991771133, "learning_rate": 1.6788895515346576e-05, "loss": 0.3151, "num_tokens": 1338961879.0, "step": 1756 }, { "epoch": 2.397384058976028, "grad_norm": 0.18356033301755798, "learning_rate": 1.6785088487178857e-05, "loss": 0.3204, "num_tokens": 1339773054.0, "step": 1757 }, { "epoch": 2.3987496399347084, "grad_norm": 0.21797916226012926, "learning_rate": 1.678127969423117e-05, "loss": 0.3089, "num_tokens": 1340539734.0, "step": 1758 }, { "epoch": 2.4001152208933885, "grad_norm": 0.18355068197129334, "learning_rate": 1.6777469137665414e-05, "loss": 0.3223, "num_tokens": 1341291107.0, "step": 1759 }, { "epoch": 2.401480801852069, "grad_norm": 0.3246926804687585, "learning_rate": 1.677365681864403e-05, "loss": 0.3139, "num_tokens": 1342082237.0, "step": 1760 }, { "epoch": 2.4028463828107496, "grad_norm": 0.19947208310684872, "learning_rate": 1.6769842738329982e-05, "loss": 0.3044, "num_tokens": 1342771301.0, "step": 1761 }, { "epoch": 2.40421196376943, "grad_norm": 0.19274663663254638, "learning_rate": 1.6766026897886786e-05, "loss": 0.3092, "num_tokens": 1343585949.0, "step": 1762 }, { "epoch": 2.4055775447281107, "grad_norm": 0.1938610460831104, "learning_rate": 1.676220929847849e-05, "loss": 0.3205, "num_tokens": 1344453263.0, "step": 1763 }, { "epoch": 2.4069431256867913, "grad_norm": 0.2016961687001354, "learning_rate": 1.6758389941269678e-05, "loss": 0.3177, "num_tokens": 1345189836.0, "step": 1764 }, { "epoch": 2.408308706645472, "grad_norm": 0.2112939565435269, "learning_rate": 1.675456882742547e-05, "loss": 0.3061, "num_tokens": 1345935629.0, "step": 1765 }, { "epoch": 2.4096742876041524, "grad_norm": 0.20541956899395342, "learning_rate": 1.6750745958111518e-05, "loss": 0.323, "num_tokens": 1346704479.0, "step": 1766 }, { "epoch": 2.411039868562833, "grad_norm": 0.21735762835936648, "learning_rate": 1.674692133449402e-05, "loss": 0.3149, "num_tokens": 1347589680.0, "step": 1767 }, { "epoch": 2.412405449521513, "grad_norm": 0.2037819349926451, "learning_rate": 1.6743094957739702e-05, "loss": 0.2901, "num_tokens": 1348338987.0, "step": 1768 }, { "epoch": 2.4137710304801936, "grad_norm": 0.19199746126601053, "learning_rate": 1.6739266829015822e-05, "loss": 0.3054, "num_tokens": 1349106948.0, "step": 1769 }, { "epoch": 2.415136611438874, "grad_norm": 0.20104827265082742, "learning_rate": 1.673543694949018e-05, "loss": 0.3025, "num_tokens": 1349835026.0, "step": 1770 }, { "epoch": 2.4165021923975547, "grad_norm": 0.20358652064853344, "learning_rate": 1.6731605320331104e-05, "loss": 0.3078, "num_tokens": 1350601070.0, "step": 1771 }, { "epoch": 2.4178677733562353, "grad_norm": 0.18925895869657822, "learning_rate": 1.6727771942707463e-05, "loss": 0.3084, "num_tokens": 1351296728.0, "step": 1772 }, { "epoch": 2.419233354314916, "grad_norm": 0.20301310859088148, "learning_rate": 1.672393681778865e-05, "loss": 0.3112, "num_tokens": 1352072637.0, "step": 1773 }, { "epoch": 2.4205989352735964, "grad_norm": 0.18532830854899202, "learning_rate": 1.6720099946744595e-05, "loss": 0.3184, "num_tokens": 1352887029.0, "step": 1774 }, { "epoch": 2.421964516232277, "grad_norm": 0.19253204620778372, "learning_rate": 1.6716261330745764e-05, "loss": 0.3052, "num_tokens": 1353583699.0, "step": 1775 }, { "epoch": 2.4233300971909575, "grad_norm": 0.1948269589186321, "learning_rate": 1.6712420970963152e-05, "loss": 0.3107, "num_tokens": 1354351110.0, "step": 1776 }, { "epoch": 2.4246956781496376, "grad_norm": 0.197498572970463, "learning_rate": 1.6708578868568287e-05, "loss": 0.3164, "num_tokens": 1355114870.0, "step": 1777 }, { "epoch": 2.426061259108318, "grad_norm": 0.19103693468603314, "learning_rate": 1.6704735024733222e-05, "loss": 0.3036, "num_tokens": 1355839648.0, "step": 1778 }, { "epoch": 2.4274268400669987, "grad_norm": 0.1971678846172706, "learning_rate": 1.6700889440630552e-05, "loss": 0.299, "num_tokens": 1356605855.0, "step": 1779 }, { "epoch": 2.4287924210256793, "grad_norm": 0.1879840678003008, "learning_rate": 1.66970421174334e-05, "loss": 0.3189, "num_tokens": 1357427989.0, "step": 1780 }, { "epoch": 2.43015800198436, "grad_norm": 0.1827175412572702, "learning_rate": 1.6693193056315418e-05, "loss": 0.3064, "num_tokens": 1358245928.0, "step": 1781 }, { "epoch": 2.4315235829430404, "grad_norm": 0.18628506083304214, "learning_rate": 1.6689342258450784e-05, "loss": 0.3171, "num_tokens": 1359027679.0, "step": 1782 }, { "epoch": 2.432889163901721, "grad_norm": 0.20039822186728254, "learning_rate": 1.6685489725014214e-05, "loss": 0.2999, "num_tokens": 1359814448.0, "step": 1783 }, { "epoch": 2.4342547448604015, "grad_norm": 0.17300985386462137, "learning_rate": 1.668163545718094e-05, "loss": 0.292, "num_tokens": 1360540315.0, "step": 1784 }, { "epoch": 2.435620325819082, "grad_norm": 0.1874318926728766, "learning_rate": 1.6677779456126745e-05, "loss": 0.3096, "num_tokens": 1361311581.0, "step": 1785 }, { "epoch": 2.436985906777762, "grad_norm": 0.19525452372091123, "learning_rate": 1.667392172302792e-05, "loss": 0.3081, "num_tokens": 1362128968.0, "step": 1786 }, { "epoch": 2.4383514877364427, "grad_norm": 0.18579020509499333, "learning_rate": 1.667006225906129e-05, "loss": 0.2988, "num_tokens": 1362844500.0, "step": 1787 }, { "epoch": 2.4397170686951233, "grad_norm": 0.18689288857046346, "learning_rate": 1.6666201065404217e-05, "loss": 0.3155, "num_tokens": 1363665788.0, "step": 1788 }, { "epoch": 2.441082649653804, "grad_norm": 0.1829712948817836, "learning_rate": 1.6662338143234585e-05, "loss": 0.3144, "num_tokens": 1364433671.0, "step": 1789 }, { "epoch": 2.4424482306124844, "grad_norm": 0.1863865382545085, "learning_rate": 1.6658473493730795e-05, "loss": 0.3143, "num_tokens": 1365282190.0, "step": 1790 }, { "epoch": 2.443813811571165, "grad_norm": 0.1847382793163872, "learning_rate": 1.6654607118071792e-05, "loss": 0.3095, "num_tokens": 1366045094.0, "step": 1791 }, { "epoch": 2.4451793925298455, "grad_norm": 0.195986415804053, "learning_rate": 1.665073901743704e-05, "loss": 0.3059, "num_tokens": 1366763997.0, "step": 1792 }, { "epoch": 2.446544973488526, "grad_norm": 0.16838809805175659, "learning_rate": 1.6646869193006523e-05, "loss": 0.3125, "num_tokens": 1367565978.0, "step": 1793 }, { "epoch": 2.4479105544472066, "grad_norm": 0.1904158903869617, "learning_rate": 1.6642997645960758e-05, "loss": 0.3083, "num_tokens": 1368338571.0, "step": 1794 }, { "epoch": 2.4492761354058867, "grad_norm": 0.1698568891877568, "learning_rate": 1.6639124377480796e-05, "loss": 0.3017, "num_tokens": 1369081718.0, "step": 1795 }, { "epoch": 2.4506417163645673, "grad_norm": 0.19670303272643044, "learning_rate": 1.663524938874819e-05, "loss": 0.3215, "num_tokens": 1369841051.0, "step": 1796 }, { "epoch": 2.452007297323248, "grad_norm": 0.18042457437149495, "learning_rate": 1.663137268094504e-05, "loss": 0.2881, "num_tokens": 1370607944.0, "step": 1797 }, { "epoch": 2.4533728782819284, "grad_norm": 0.18056674729188132, "learning_rate": 1.662749425525396e-05, "loss": 0.3125, "num_tokens": 1371402221.0, "step": 1798 }, { "epoch": 2.454738459240609, "grad_norm": 0.1962706256278912, "learning_rate": 1.6623614112858087e-05, "loss": 0.3146, "num_tokens": 1372189550.0, "step": 1799 }, { "epoch": 2.4561040401992895, "grad_norm": 0.17048026269053626, "learning_rate": 1.6619732254941085e-05, "loss": 0.3074, "num_tokens": 1372857974.0, "step": 1800 }, { "epoch": 2.45746962115797, "grad_norm": 0.20183197013013823, "learning_rate": 1.6615848682687145e-05, "loss": 0.3134, "num_tokens": 1373613075.0, "step": 1801 }, { "epoch": 2.4588352021166506, "grad_norm": 0.18835061137348444, "learning_rate": 1.661196339728097e-05, "loss": 0.3024, "num_tokens": 1374334754.0, "step": 1802 }, { "epoch": 2.460200783075331, "grad_norm": 0.2055162165725593, "learning_rate": 1.66080763999078e-05, "loss": 0.2975, "num_tokens": 1375035870.0, "step": 1803 }, { "epoch": 2.4615663640340113, "grad_norm": 0.19394692473860006, "learning_rate": 1.6604187691753384e-05, "loss": 0.3042, "num_tokens": 1375764284.0, "step": 1804 }, { "epoch": 2.462931944992692, "grad_norm": 0.19879226689360469, "learning_rate": 1.6600297274003997e-05, "loss": 0.302, "num_tokens": 1376508016.0, "step": 1805 }, { "epoch": 2.4642975259513724, "grad_norm": 0.19463303761962258, "learning_rate": 1.6596405147846443e-05, "loss": 0.3061, "num_tokens": 1377276417.0, "step": 1806 }, { "epoch": 2.465663106910053, "grad_norm": 0.2045789489462508, "learning_rate": 1.659251131446804e-05, "loss": 0.3041, "num_tokens": 1378034170.0, "step": 1807 }, { "epoch": 2.4670286878687335, "grad_norm": 0.18055824558524616, "learning_rate": 1.6588615775056624e-05, "loss": 0.2892, "num_tokens": 1378794263.0, "step": 1808 }, { "epoch": 2.468394268827414, "grad_norm": 0.18571029888485197, "learning_rate": 1.6584718530800555e-05, "loss": 0.3125, "num_tokens": 1379578390.0, "step": 1809 }, { "epoch": 2.4697598497860946, "grad_norm": 0.1816227023493871, "learning_rate": 1.6580819582888722e-05, "loss": 0.3326, "num_tokens": 1380352357.0, "step": 1810 }, { "epoch": 2.471125430744775, "grad_norm": 0.19984743169792377, "learning_rate": 1.6576918932510515e-05, "loss": 0.2952, "num_tokens": 1381028630.0, "step": 1811 }, { "epoch": 2.4724910117034558, "grad_norm": 0.17787967566070575, "learning_rate": 1.6573016580855855e-05, "loss": 0.3109, "num_tokens": 1381810142.0, "step": 1812 }, { "epoch": 2.473856592662136, "grad_norm": 0.1985651401099988, "learning_rate": 1.6569112529115183e-05, "loss": 0.3044, "num_tokens": 1382589992.0, "step": 1813 }, { "epoch": 2.4752221736208164, "grad_norm": 0.17589406487444043, "learning_rate": 1.6565206778479462e-05, "loss": 0.2933, "num_tokens": 1383367392.0, "step": 1814 }, { "epoch": 2.476587754579497, "grad_norm": 0.17131589659778726, "learning_rate": 1.656129933014016e-05, "loss": 0.3062, "num_tokens": 1384131046.0, "step": 1815 }, { "epoch": 2.4779533355381775, "grad_norm": 0.2324413531802864, "learning_rate": 1.6557390185289268e-05, "loss": 0.3122, "num_tokens": 1384845362.0, "step": 1816 }, { "epoch": 2.479318916496858, "grad_norm": 0.19204721213490225, "learning_rate": 1.6553479345119305e-05, "loss": 0.3189, "num_tokens": 1385634732.0, "step": 1817 }, { "epoch": 2.4806844974555387, "grad_norm": 0.20179048559254273, "learning_rate": 1.6549566810823287e-05, "loss": 0.2934, "num_tokens": 1386412843.0, "step": 1818 }, { "epoch": 2.482050078414219, "grad_norm": 0.1871331448871095, "learning_rate": 1.6545652583594772e-05, "loss": 0.3052, "num_tokens": 1387173246.0, "step": 1819 }, { "epoch": 2.4834156593728998, "grad_norm": 0.20534237035076985, "learning_rate": 1.6541736664627817e-05, "loss": 0.3043, "num_tokens": 1387923077.0, "step": 1820 }, { "epoch": 2.4847812403315803, "grad_norm": 0.20554996223630848, "learning_rate": 1.6537819055116994e-05, "loss": 0.3297, "num_tokens": 1388725455.0, "step": 1821 }, { "epoch": 2.4861468212902604, "grad_norm": 0.18282115665065474, "learning_rate": 1.6533899756257404e-05, "loss": 0.2997, "num_tokens": 1389420512.0, "step": 1822 }, { "epoch": 2.487512402248941, "grad_norm": 0.20202467237289098, "learning_rate": 1.652997876924465e-05, "loss": 0.2954, "num_tokens": 1390162276.0, "step": 1823 }, { "epoch": 2.4888779832076215, "grad_norm": 0.1981734857114372, "learning_rate": 1.6526056095274853e-05, "loss": 0.3171, "num_tokens": 1390922240.0, "step": 1824 }, { "epoch": 2.490243564166302, "grad_norm": 0.17068367108001678, "learning_rate": 1.652213173554466e-05, "loss": 0.2966, "num_tokens": 1391659533.0, "step": 1825 }, { "epoch": 2.4916091451249827, "grad_norm": 0.18652342701217167, "learning_rate": 1.6518205691251212e-05, "loss": 0.2997, "num_tokens": 1392344814.0, "step": 1826 }, { "epoch": 2.492974726083663, "grad_norm": 0.20401469276490514, "learning_rate": 1.651427796359218e-05, "loss": 0.3049, "num_tokens": 1393149909.0, "step": 1827 }, { "epoch": 2.4943403070423438, "grad_norm": 0.18292508568943253, "learning_rate": 1.6510348553765748e-05, "loss": 0.301, "num_tokens": 1393925870.0, "step": 1828 }, { "epoch": 2.4957058880010243, "grad_norm": 0.20881322230180588, "learning_rate": 1.650641746297061e-05, "loss": 0.318, "num_tokens": 1394673408.0, "step": 1829 }, { "epoch": 2.497071468959705, "grad_norm": 0.20872864550418962, "learning_rate": 1.6502484692405957e-05, "loss": 0.2997, "num_tokens": 1395469951.0, "step": 1830 }, { "epoch": 2.498437049918385, "grad_norm": 0.18536995811359025, "learning_rate": 1.6498550243271523e-05, "loss": 0.2916, "num_tokens": 1396201746.0, "step": 1831 }, { "epoch": 2.4998026308770656, "grad_norm": 0.17566702509453128, "learning_rate": 1.6494614116767528e-05, "loss": 0.3197, "num_tokens": 1397073615.0, "step": 1832 }, { "epoch": 2.501168211835746, "grad_norm": 0.19893092607256022, "learning_rate": 1.649067631409472e-05, "loss": 0.3061, "num_tokens": 1397791284.0, "step": 1833 }, { "epoch": 2.5025337927944267, "grad_norm": 0.20588124501966967, "learning_rate": 1.648673683645435e-05, "loss": 0.2998, "num_tokens": 1398530726.0, "step": 1834 }, { "epoch": 2.5038993737531072, "grad_norm": 0.19157104109156048, "learning_rate": 1.648279568504818e-05, "loss": 0.3155, "num_tokens": 1399211721.0, "step": 1835 }, { "epoch": 2.505264954711788, "grad_norm": 0.18344758376238146, "learning_rate": 1.6478852861078486e-05, "loss": 0.3089, "num_tokens": 1400006125.0, "step": 1836 }, { "epoch": 2.5066305356704683, "grad_norm": 0.19432139542098834, "learning_rate": 1.6474908365748053e-05, "loss": 0.3037, "num_tokens": 1400804425.0, "step": 1837 }, { "epoch": 2.507996116629149, "grad_norm": 0.17058818606556356, "learning_rate": 1.647096220026018e-05, "loss": 0.3058, "num_tokens": 1401534459.0, "step": 1838 }, { "epoch": 2.5093616975878295, "grad_norm": 0.20373951502928148, "learning_rate": 1.646701436581866e-05, "loss": 0.3183, "num_tokens": 1402304702.0, "step": 1839 }, { "epoch": 2.5107272785465096, "grad_norm": 0.18631203801907367, "learning_rate": 1.646306486362782e-05, "loss": 0.3133, "num_tokens": 1403089389.0, "step": 1840 }, { "epoch": 2.51209285950519, "grad_norm": 0.19512317900676007, "learning_rate": 1.645911369489247e-05, "loss": 0.3114, "num_tokens": 1403876738.0, "step": 1841 }, { "epoch": 2.5134584404638707, "grad_norm": 0.19641619117064488, "learning_rate": 1.645516086081795e-05, "loss": 0.3177, "num_tokens": 1404604983.0, "step": 1842 }, { "epoch": 2.5148240214225512, "grad_norm": 0.18974211346526246, "learning_rate": 1.645120636261009e-05, "loss": 0.2915, "num_tokens": 1405329463.0, "step": 1843 }, { "epoch": 2.516189602381232, "grad_norm": 0.1832146511251332, "learning_rate": 1.644725020147524e-05, "loss": 0.2966, "num_tokens": 1405990295.0, "step": 1844 }, { "epoch": 2.5175551833399124, "grad_norm": 0.170318686947924, "learning_rate": 1.644329237862026e-05, "loss": 0.3065, "num_tokens": 1406792919.0, "step": 1845 }, { "epoch": 2.518920764298593, "grad_norm": 0.17598531674618775, "learning_rate": 1.64393328952525e-05, "loss": 0.3072, "num_tokens": 1407608999.0, "step": 1846 }, { "epoch": 2.5202863452572735, "grad_norm": 0.29972620089882906, "learning_rate": 1.643537175257983e-05, "loss": 0.3084, "num_tokens": 1408395533.0, "step": 1847 }, { "epoch": 2.521651926215954, "grad_norm": 0.1874873311247297, "learning_rate": 1.6431408951810627e-05, "loss": 0.3179, "num_tokens": 1409107723.0, "step": 1848 }, { "epoch": 2.523017507174634, "grad_norm": 0.18163896068235807, "learning_rate": 1.6427444494153768e-05, "loss": 0.3085, "num_tokens": 1410008464.0, "step": 1849 }, { "epoch": 2.5243830881333147, "grad_norm": 0.16357119503302645, "learning_rate": 1.6423478380818633e-05, "loss": 0.3196, "num_tokens": 1410732552.0, "step": 1850 }, { "epoch": 2.5257486690919952, "grad_norm": 0.2219804416061581, "learning_rate": 1.6419510613015117e-05, "loss": 0.3235, "num_tokens": 1411446834.0, "step": 1851 }, { "epoch": 2.527114250050676, "grad_norm": 0.1838395842260659, "learning_rate": 1.641554119195361e-05, "loss": 0.3078, "num_tokens": 1412188644.0, "step": 1852 }, { "epoch": 2.5284798310093564, "grad_norm": 0.1849320925698564, "learning_rate": 1.6411570118845016e-05, "loss": 0.3196, "num_tokens": 1412963986.0, "step": 1853 }, { "epoch": 2.529845411968037, "grad_norm": 0.1807209677052282, "learning_rate": 1.6407597394900733e-05, "loss": 0.2822, "num_tokens": 1413668828.0, "step": 1854 }, { "epoch": 2.5312109929267175, "grad_norm": 0.17741259345953658, "learning_rate": 1.6403623021332667e-05, "loss": 0.3237, "num_tokens": 1414462053.0, "step": 1855 }, { "epoch": 2.532576573885398, "grad_norm": 0.20456645635323153, "learning_rate": 1.6399646999353226e-05, "loss": 0.3008, "num_tokens": 1415171077.0, "step": 1856 }, { "epoch": 2.5339421548440786, "grad_norm": 0.19067755078085916, "learning_rate": 1.6395669330175325e-05, "loss": 0.2982, "num_tokens": 1415939956.0, "step": 1857 }, { "epoch": 2.5353077358027587, "grad_norm": 0.1937914314415248, "learning_rate": 1.6391690015012382e-05, "loss": 0.3023, "num_tokens": 1416698042.0, "step": 1858 }, { "epoch": 2.5366733167614393, "grad_norm": 0.17573899284033595, "learning_rate": 1.6387709055078304e-05, "loss": 0.3122, "num_tokens": 1417580763.0, "step": 1859 }, { "epoch": 2.53803889772012, "grad_norm": 0.1797478693160339, "learning_rate": 1.638372645158752e-05, "loss": 0.3079, "num_tokens": 1418389376.0, "step": 1860 }, { "epoch": 2.5394044786788004, "grad_norm": 0.2029951259386221, "learning_rate": 1.6379742205754945e-05, "loss": 0.3202, "num_tokens": 1419112005.0, "step": 1861 }, { "epoch": 2.540770059637481, "grad_norm": 0.19947710835425853, "learning_rate": 1.6375756318795998e-05, "loss": 0.2998, "num_tokens": 1419933833.0, "step": 1862 }, { "epoch": 2.5421356405961615, "grad_norm": 0.1880204093701435, "learning_rate": 1.63717687919266e-05, "loss": 0.316, "num_tokens": 1420746843.0, "step": 1863 }, { "epoch": 2.543501221554842, "grad_norm": 0.1950252142701813, "learning_rate": 1.6367779626363177e-05, "loss": 0.3133, "num_tokens": 1421480601.0, "step": 1864 }, { "epoch": 2.5448668025135226, "grad_norm": 0.19507473666637723, "learning_rate": 1.636378882332265e-05, "loss": 0.3068, "num_tokens": 1422248596.0, "step": 1865 }, { "epoch": 2.546232383472203, "grad_norm": 0.17775236401347858, "learning_rate": 1.635979638402244e-05, "loss": 0.3021, "num_tokens": 1423045325.0, "step": 1866 }, { "epoch": 2.5475979644308833, "grad_norm": 0.18357144788339586, "learning_rate": 1.6355802309680466e-05, "loss": 0.2986, "num_tokens": 1423951072.0, "step": 1867 }, { "epoch": 2.548963545389564, "grad_norm": 0.19744844581714022, "learning_rate": 1.6351806601515143e-05, "loss": 0.3005, "num_tokens": 1424676843.0, "step": 1868 }, { "epoch": 2.5503291263482444, "grad_norm": 0.17359389248451276, "learning_rate": 1.6347809260745395e-05, "loss": 0.3085, "num_tokens": 1425512818.0, "step": 1869 }, { "epoch": 2.551694707306925, "grad_norm": 0.19436015723823413, "learning_rate": 1.634381028859064e-05, "loss": 0.3152, "num_tokens": 1426314016.0, "step": 1870 }, { "epoch": 2.5530602882656055, "grad_norm": 0.17852789329580818, "learning_rate": 1.633980968627078e-05, "loss": 0.2978, "num_tokens": 1427062453.0, "step": 1871 }, { "epoch": 2.554425869224286, "grad_norm": 0.19432470082901612, "learning_rate": 1.6335807455006238e-05, "loss": 0.3133, "num_tokens": 1427781104.0, "step": 1872 }, { "epoch": 2.5557914501829666, "grad_norm": 0.18826108663964913, "learning_rate": 1.633180359601791e-05, "loss": 0.3026, "num_tokens": 1428564427.0, "step": 1873 }, { "epoch": 2.557157031141647, "grad_norm": 0.17584268908346987, "learning_rate": 1.632779811052721e-05, "loss": 0.298, "num_tokens": 1429339515.0, "step": 1874 }, { "epoch": 2.5585226121003277, "grad_norm": 0.18562188462648505, "learning_rate": 1.632379099975603e-05, "loss": 0.3138, "num_tokens": 1430127707.0, "step": 1875 }, { "epoch": 2.559888193059008, "grad_norm": 0.19128223659297722, "learning_rate": 1.6319782264926773e-05, "loss": 0.3073, "num_tokens": 1430970867.0, "step": 1876 }, { "epoch": 2.5612537740176884, "grad_norm": 0.20927619837329367, "learning_rate": 1.6315771907262324e-05, "loss": 0.3072, "num_tokens": 1431685533.0, "step": 1877 }, { "epoch": 2.562619354976369, "grad_norm": 0.20395475623053075, "learning_rate": 1.6311759927986078e-05, "loss": 0.3105, "num_tokens": 1432420498.0, "step": 1878 }, { "epoch": 2.5639849359350495, "grad_norm": 0.19273412904207954, "learning_rate": 1.6307746328321906e-05, "loss": 0.3041, "num_tokens": 1433236123.0, "step": 1879 }, { "epoch": 2.56535051689373, "grad_norm": 0.18296336822006742, "learning_rate": 1.6303731109494193e-05, "loss": 0.3137, "num_tokens": 1434006479.0, "step": 1880 }, { "epoch": 2.5667160978524106, "grad_norm": 0.19662674547777298, "learning_rate": 1.6299714272727795e-05, "loss": 0.3116, "num_tokens": 1434769628.0, "step": 1881 }, { "epoch": 2.568081678811091, "grad_norm": 0.1837085566266636, "learning_rate": 1.6295695819248088e-05, "loss": 0.3105, "num_tokens": 1435552685.0, "step": 1882 }, { "epoch": 2.5694472597697717, "grad_norm": 0.20523535664034614, "learning_rate": 1.6291675750280928e-05, "loss": 0.3056, "num_tokens": 1436327722.0, "step": 1883 }, { "epoch": 2.5708128407284523, "grad_norm": 0.18281419755374187, "learning_rate": 1.6287654067052657e-05, "loss": 0.3206, "num_tokens": 1437057891.0, "step": 1884 }, { "epoch": 2.5721784216871324, "grad_norm": 0.2091946783319462, "learning_rate": 1.628363077079012e-05, "loss": 0.3069, "num_tokens": 1437803906.0, "step": 1885 }, { "epoch": 2.573544002645813, "grad_norm": 0.18686103802582169, "learning_rate": 1.6279605862720644e-05, "loss": 0.3078, "num_tokens": 1438553462.0, "step": 1886 }, { "epoch": 2.5749095836044935, "grad_norm": 0.3100589973500473, "learning_rate": 1.6275579344072064e-05, "loss": 0.3122, "num_tokens": 1439302157.0, "step": 1887 }, { "epoch": 2.576275164563174, "grad_norm": 0.19386902737651387, "learning_rate": 1.627155121607269e-05, "loss": 0.3194, "num_tokens": 1440092772.0, "step": 1888 }, { "epoch": 2.5776407455218546, "grad_norm": 0.18458489216829368, "learning_rate": 1.6267521479951332e-05, "loss": 0.3039, "num_tokens": 1440812008.0, "step": 1889 }, { "epoch": 2.579006326480535, "grad_norm": 0.19543146636823708, "learning_rate": 1.626349013693729e-05, "loss": 0.2999, "num_tokens": 1441576985.0, "step": 1890 }, { "epoch": 2.5803719074392157, "grad_norm": 0.1792935100828067, "learning_rate": 1.6259457188260345e-05, "loss": 0.3066, "num_tokens": 1442375915.0, "step": 1891 }, { "epoch": 2.5817374883978963, "grad_norm": 0.1999498118318816, "learning_rate": 1.6255422635150783e-05, "loss": 0.2964, "num_tokens": 1443115738.0, "step": 1892 }, { "epoch": 2.583103069356577, "grad_norm": 0.1886253689580606, "learning_rate": 1.6251386478839367e-05, "loss": 0.3184, "num_tokens": 1443886074.0, "step": 1893 }, { "epoch": 2.584468650315257, "grad_norm": 0.1986148467145687, "learning_rate": 1.6247348720557353e-05, "loss": 0.3053, "num_tokens": 1444634059.0, "step": 1894 }, { "epoch": 2.5858342312739375, "grad_norm": 0.21183333487230394, "learning_rate": 1.624330936153649e-05, "loss": 0.3231, "num_tokens": 1445454319.0, "step": 1895 }, { "epoch": 2.587199812232618, "grad_norm": 0.20845393069253954, "learning_rate": 1.623926840300901e-05, "loss": 0.3189, "num_tokens": 1446236930.0, "step": 1896 }, { "epoch": 2.5885653931912986, "grad_norm": 0.196179598653693, "learning_rate": 1.623522584620763e-05, "loss": 0.3103, "num_tokens": 1446990959.0, "step": 1897 }, { "epoch": 2.589930974149979, "grad_norm": 0.18662345439417685, "learning_rate": 1.6231181692365567e-05, "loss": 0.3184, "num_tokens": 1447721442.0, "step": 1898 }, { "epoch": 2.5912965551086597, "grad_norm": 0.20716555157166938, "learning_rate": 1.622713594271651e-05, "loss": 0.314, "num_tokens": 1448455544.0, "step": 1899 }, { "epoch": 2.5926621360673403, "grad_norm": 0.21809703453251683, "learning_rate": 1.6223088598494647e-05, "loss": 0.3148, "num_tokens": 1449272081.0, "step": 1900 }, { "epoch": 2.594027717026021, "grad_norm": 0.1840729877335866, "learning_rate": 1.6219039660934644e-05, "loss": 0.3004, "num_tokens": 1450028131.0, "step": 1901 }, { "epoch": 2.5953932979847014, "grad_norm": 0.19623738461388052, "learning_rate": 1.6214989131271658e-05, "loss": 0.3134, "num_tokens": 1450771886.0, "step": 1902 }, { "epoch": 2.5967588789433815, "grad_norm": 0.1879133926088854, "learning_rate": 1.621093701074133e-05, "loss": 0.3125, "num_tokens": 1451523813.0, "step": 1903 }, { "epoch": 2.598124459902062, "grad_norm": 0.19222472056042555, "learning_rate": 1.6206883300579785e-05, "loss": 0.3068, "num_tokens": 1452231344.0, "step": 1904 }, { "epoch": 2.5994900408607426, "grad_norm": 0.19853856488387872, "learning_rate": 1.6202828002023636e-05, "loss": 0.3288, "num_tokens": 1452958199.0, "step": 1905 }, { "epoch": 2.600855621819423, "grad_norm": 0.1867461065792624, "learning_rate": 1.6198771116309982e-05, "loss": 0.3171, "num_tokens": 1453778010.0, "step": 1906 }, { "epoch": 2.6022212027781038, "grad_norm": 0.2060554783728174, "learning_rate": 1.61947126446764e-05, "loss": 0.2992, "num_tokens": 1454491324.0, "step": 1907 }, { "epoch": 2.6035867837367843, "grad_norm": 0.19019171691173425, "learning_rate": 1.619065258836095e-05, "loss": 0.3132, "num_tokens": 1455341194.0, "step": 1908 }, { "epoch": 2.604952364695465, "grad_norm": 0.18897337960743313, "learning_rate": 1.6186590948602187e-05, "loss": 0.3144, "num_tokens": 1456106730.0, "step": 1909 }, { "epoch": 2.6063179456541454, "grad_norm": 0.19118505121416798, "learning_rate": 1.6182527726639133e-05, "loss": 0.3066, "num_tokens": 1456857613.0, "step": 1910 }, { "epoch": 2.607683526612826, "grad_norm": 0.19816234019608836, "learning_rate": 1.617846292371131e-05, "loss": 0.301, "num_tokens": 1457591432.0, "step": 1911 }, { "epoch": 2.609049107571506, "grad_norm": 0.17305868073983566, "learning_rate": 1.617439654105871e-05, "loss": 0.3054, "num_tokens": 1458368779.0, "step": 1912 }, { "epoch": 2.6104146885301867, "grad_norm": 0.1892454202067193, "learning_rate": 1.6170328579921805e-05, "loss": 0.2979, "num_tokens": 1459066112.0, "step": 1913 }, { "epoch": 2.611780269488867, "grad_norm": 0.19960050602208107, "learning_rate": 1.616625904154156e-05, "loss": 0.3043, "num_tokens": 1459791952.0, "step": 1914 }, { "epoch": 2.6131458504475478, "grad_norm": 0.17256542768792485, "learning_rate": 1.6162187927159415e-05, "loss": 0.3047, "num_tokens": 1460541770.0, "step": 1915 }, { "epoch": 2.6145114314062283, "grad_norm": 0.2016512719915629, "learning_rate": 1.615811523801729e-05, "loss": 0.2989, "num_tokens": 1461356324.0, "step": 1916 }, { "epoch": 2.615877012364909, "grad_norm": 0.18149632154924267, "learning_rate": 1.6154040975357583e-05, "loss": 0.3206, "num_tokens": 1462143492.0, "step": 1917 }, { "epoch": 2.6172425933235894, "grad_norm": 0.2043413613363849, "learning_rate": 1.6149965140423172e-05, "loss": 0.307, "num_tokens": 1462857213.0, "step": 1918 }, { "epoch": 2.61860817428227, "grad_norm": 0.19133462665870038, "learning_rate": 1.6145887734457432e-05, "loss": 0.2925, "num_tokens": 1463488127.0, "step": 1919 }, { "epoch": 2.6199737552409506, "grad_norm": 0.19064953902465892, "learning_rate": 1.6141808758704186e-05, "loss": 0.3051, "num_tokens": 1464297199.0, "step": 1920 }, { "epoch": 2.6213393361996307, "grad_norm": 0.18652768438623393, "learning_rate": 1.6137728214407766e-05, "loss": 0.3094, "num_tokens": 1465095846.0, "step": 1921 }, { "epoch": 2.622704917158311, "grad_norm": 0.17920156391457287, "learning_rate": 1.6133646102812957e-05, "loss": 0.2996, "num_tokens": 1465840495.0, "step": 1922 }, { "epoch": 2.624070498116992, "grad_norm": 0.1861393213900466, "learning_rate": 1.612956242516505e-05, "loss": 0.3165, "num_tokens": 1466624399.0, "step": 1923 }, { "epoch": 2.6254360790756723, "grad_norm": 0.17824811323546286, "learning_rate": 1.6125477182709784e-05, "loss": 0.2999, "num_tokens": 1467392149.0, "step": 1924 }, { "epoch": 2.626801660034353, "grad_norm": 0.1870470948269081, "learning_rate": 1.6121390376693394e-05, "loss": 0.3094, "num_tokens": 1468194714.0, "step": 1925 }, { "epoch": 2.6281672409930334, "grad_norm": 0.18157683507707956, "learning_rate": 1.611730200836259e-05, "loss": 0.3009, "num_tokens": 1468921710.0, "step": 1926 }, { "epoch": 2.629532821951714, "grad_norm": 0.18630761475974564, "learning_rate": 1.6113212078964558e-05, "loss": 0.3011, "num_tokens": 1469749002.0, "step": 1927 }, { "epoch": 2.6308984029103946, "grad_norm": 0.17952364575357013, "learning_rate": 1.6109120589746953e-05, "loss": 0.307, "num_tokens": 1470519522.0, "step": 1928 }, { "epoch": 2.632263983869075, "grad_norm": 0.18688291640483587, "learning_rate": 1.6105027541957914e-05, "loss": 0.3059, "num_tokens": 1471248108.0, "step": 1929 }, { "epoch": 2.6336295648277552, "grad_norm": 0.18929248456484843, "learning_rate": 1.610093293684605e-05, "loss": 0.306, "num_tokens": 1471937767.0, "step": 1930 }, { "epoch": 2.634995145786436, "grad_norm": 0.19219341504176976, "learning_rate": 1.609683677566045e-05, "loss": 0.302, "num_tokens": 1472682276.0, "step": 1931 }, { "epoch": 2.6363607267451163, "grad_norm": 0.1857107976934036, "learning_rate": 1.609273905965068e-05, "loss": 0.3209, "num_tokens": 1473508408.0, "step": 1932 }, { "epoch": 2.637726307703797, "grad_norm": 0.17809504353981542, "learning_rate": 1.6088639790066763e-05, "loss": 0.2955, "num_tokens": 1474299012.0, "step": 1933 }, { "epoch": 2.6390918886624775, "grad_norm": 0.1829549186326802, "learning_rate": 1.6084538968159217e-05, "loss": 0.3062, "num_tokens": 1475060852.0, "step": 1934 }, { "epoch": 2.640457469621158, "grad_norm": 0.19444955920007664, "learning_rate": 1.6080436595179028e-05, "loss": 0.3096, "num_tokens": 1475791789.0, "step": 1935 }, { "epoch": 2.6418230505798386, "grad_norm": 0.21423100625359065, "learning_rate": 1.607633267237765e-05, "loss": 0.3314, "num_tokens": 1476557808.0, "step": 1936 }, { "epoch": 2.643188631538519, "grad_norm": 0.19808431640029442, "learning_rate": 1.6072227201007006e-05, "loss": 0.3118, "num_tokens": 1477365856.0, "step": 1937 }, { "epoch": 2.6445542124971997, "grad_norm": 0.2080328732708811, "learning_rate": 1.6068120182319506e-05, "loss": 0.3135, "num_tokens": 1478097410.0, "step": 1938 }, { "epoch": 2.64591979345588, "grad_norm": 0.20736411986193498, "learning_rate": 1.606401161756802e-05, "loss": 0.3111, "num_tokens": 1478863190.0, "step": 1939 }, { "epoch": 2.6472853744145604, "grad_norm": 0.18344376864227943, "learning_rate": 1.6059901508005894e-05, "loss": 0.3071, "num_tokens": 1479639316.0, "step": 1940 }, { "epoch": 2.648650955373241, "grad_norm": 0.19662723114659078, "learning_rate": 1.605578985488694e-05, "loss": 0.309, "num_tokens": 1480382633.0, "step": 1941 }, { "epoch": 2.6500165363319215, "grad_norm": 0.19881150561054786, "learning_rate": 1.6051676659465448e-05, "loss": 0.3182, "num_tokens": 1481048833.0, "step": 1942 }, { "epoch": 2.651382117290602, "grad_norm": 0.2068832721569244, "learning_rate": 1.6047561922996182e-05, "loss": 0.3031, "num_tokens": 1481764793.0, "step": 1943 }, { "epoch": 2.6527476982492826, "grad_norm": 0.18243429150212043, "learning_rate": 1.604344564673436e-05, "loss": 0.3094, "num_tokens": 1482587905.0, "step": 1944 }, { "epoch": 2.654113279207963, "grad_norm": 0.20391940777603845, "learning_rate": 1.603932783193569e-05, "loss": 0.316, "num_tokens": 1483388092.0, "step": 1945 }, { "epoch": 2.6554788601666437, "grad_norm": 0.17099419166209298, "learning_rate": 1.603520847985633e-05, "loss": 0.2873, "num_tokens": 1484154604.0, "step": 1946 }, { "epoch": 2.6568444411253243, "grad_norm": 0.20127554485215263, "learning_rate": 1.603108759175292e-05, "loss": 0.3091, "num_tokens": 1484914329.0, "step": 1947 }, { "epoch": 2.6582100220840044, "grad_norm": 0.19569596646733337, "learning_rate": 1.602696516888256e-05, "loss": 0.3139, "num_tokens": 1485659032.0, "step": 1948 }, { "epoch": 2.659575603042685, "grad_norm": 0.19617826876756203, "learning_rate": 1.6022841212502827e-05, "loss": 0.2918, "num_tokens": 1486381380.0, "step": 1949 }, { "epoch": 2.6609411840013655, "grad_norm": 0.18432775172164842, "learning_rate": 1.6018715723871762e-05, "loss": 0.3047, "num_tokens": 1487124409.0, "step": 1950 }, { "epoch": 2.662306764960046, "grad_norm": 0.19271933152536327, "learning_rate": 1.6014588704247873e-05, "loss": 0.2979, "num_tokens": 1487912413.0, "step": 1951 }, { "epoch": 2.6636723459187266, "grad_norm": 0.18750291905434413, "learning_rate": 1.601046015489013e-05, "loss": 0.3243, "num_tokens": 1488734754.0, "step": 1952 }, { "epoch": 2.665037926877407, "grad_norm": 0.18374213491461577, "learning_rate": 1.6006330077057987e-05, "loss": 0.3209, "num_tokens": 1489469100.0, "step": 1953 }, { "epoch": 2.6664035078360877, "grad_norm": 0.2111620229182135, "learning_rate": 1.6002198472011334e-05, "loss": 0.2935, "num_tokens": 1490173238.0, "step": 1954 }, { "epoch": 2.6677690887947683, "grad_norm": 0.18248682414773168, "learning_rate": 1.599806534101056e-05, "loss": 0.3142, "num_tokens": 1490952911.0, "step": 1955 }, { "epoch": 2.669134669753449, "grad_norm": 0.20642846112482732, "learning_rate": 1.5993930685316494e-05, "loss": 0.2945, "num_tokens": 1491672850.0, "step": 1956 }, { "epoch": 2.670500250712129, "grad_norm": 0.17578282459340613, "learning_rate": 1.598979450619045e-05, "loss": 0.3003, "num_tokens": 1492470706.0, "step": 1957 }, { "epoch": 2.6718658316708095, "grad_norm": 0.18370256512509872, "learning_rate": 1.5985656804894184e-05, "loss": 0.3152, "num_tokens": 1493229132.0, "step": 1958 }, { "epoch": 2.67323141262949, "grad_norm": 0.19876504186671745, "learning_rate": 1.5981517582689942e-05, "loss": 0.2947, "num_tokens": 1493955181.0, "step": 1959 }, { "epoch": 2.6745969935881706, "grad_norm": 0.17876139155361528, "learning_rate": 1.5977376840840416e-05, "loss": 0.3057, "num_tokens": 1494681409.0, "step": 1960 }, { "epoch": 2.675962574546851, "grad_norm": 0.1835883610232555, "learning_rate": 1.5973234580608767e-05, "loss": 0.2993, "num_tokens": 1495394894.0, "step": 1961 }, { "epoch": 2.6773281555055317, "grad_norm": 0.19065572206936024, "learning_rate": 1.5969090803258622e-05, "loss": 0.3069, "num_tokens": 1496151766.0, "step": 1962 }, { "epoch": 2.6786937364642123, "grad_norm": 0.18315305643961519, "learning_rate": 1.5964945510054066e-05, "loss": 0.3261, "num_tokens": 1496898845.0, "step": 1963 }, { "epoch": 2.680059317422893, "grad_norm": 0.19384936994517646, "learning_rate": 1.5960798702259647e-05, "loss": 0.3108, "num_tokens": 1497706631.0, "step": 1964 }, { "epoch": 2.6814248983815734, "grad_norm": 0.17357645797926635, "learning_rate": 1.595665038114038e-05, "loss": 0.3106, "num_tokens": 1498541856.0, "step": 1965 }, { "epoch": 2.6827904793402535, "grad_norm": 0.19670567504020592, "learning_rate": 1.595250054796173e-05, "loss": 0.3085, "num_tokens": 1499322830.0, "step": 1966 }, { "epoch": 2.684156060298934, "grad_norm": 0.17672932944213016, "learning_rate": 1.5948349203989642e-05, "loss": 0.3032, "num_tokens": 1499995157.0, "step": 1967 }, { "epoch": 2.6855216412576146, "grad_norm": 0.20611559699087803, "learning_rate": 1.5944196350490505e-05, "loss": 0.318, "num_tokens": 1500816621.0, "step": 1968 }, { "epoch": 2.686887222216295, "grad_norm": 0.17473627599306446, "learning_rate": 1.594004198873118e-05, "loss": 0.3128, "num_tokens": 1501661704.0, "step": 1969 }, { "epoch": 2.6882528031749757, "grad_norm": 0.1749271467227588, "learning_rate": 1.5935886119978977e-05, "loss": 0.306, "num_tokens": 1502361364.0, "step": 1970 }, { "epoch": 2.6896183841336563, "grad_norm": 0.18415806208739374, "learning_rate": 1.5931728745501675e-05, "loss": 0.3025, "num_tokens": 1503010898.0, "step": 1971 }, { "epoch": 2.690983965092337, "grad_norm": 0.2008670509574121, "learning_rate": 1.5927569866567504e-05, "loss": 0.3155, "num_tokens": 1503749531.0, "step": 1972 }, { "epoch": 2.6923495460510174, "grad_norm": 0.17078202044409108, "learning_rate": 1.5923409484445168e-05, "loss": 0.289, "num_tokens": 1504394244.0, "step": 1973 }, { "epoch": 2.693715127009698, "grad_norm": 0.20885626768834292, "learning_rate": 1.5919247600403808e-05, "loss": 0.3138, "num_tokens": 1505163958.0, "step": 1974 }, { "epoch": 2.695080707968378, "grad_norm": 0.19013767250701316, "learning_rate": 1.5915084215713036e-05, "loss": 0.3016, "num_tokens": 1505977645.0, "step": 1975 }, { "epoch": 2.6964462889270586, "grad_norm": 0.2157650224900246, "learning_rate": 1.5910919331642932e-05, "loss": 0.2905, "num_tokens": 1506674391.0, "step": 1976 }, { "epoch": 2.697811869885739, "grad_norm": 0.18711202319051498, "learning_rate": 1.5906752949464008e-05, "loss": 0.3015, "num_tokens": 1507423484.0, "step": 1977 }, { "epoch": 2.6991774508444197, "grad_norm": 0.1778861821652975, "learning_rate": 1.5902585070447257e-05, "loss": 0.318, "num_tokens": 1508212358.0, "step": 1978 }, { "epoch": 2.7005430318031003, "grad_norm": 0.20251745426710593, "learning_rate": 1.5898415695864113e-05, "loss": 0.3016, "num_tokens": 1508937222.0, "step": 1979 }, { "epoch": 2.701908612761781, "grad_norm": 0.16643135541228601, "learning_rate": 1.589424482698647e-05, "loss": 0.322, "num_tokens": 1509691104.0, "step": 1980 }, { "epoch": 2.7032741937204614, "grad_norm": 0.20377538112996607, "learning_rate": 1.5890072465086685e-05, "loss": 0.3012, "num_tokens": 1510510392.0, "step": 1981 }, { "epoch": 2.704639774679142, "grad_norm": 0.19916106795507363, "learning_rate": 1.5885898611437558e-05, "loss": 0.2898, "num_tokens": 1511252490.0, "step": 1982 }, { "epoch": 2.7060053556378225, "grad_norm": 0.1736690189823564, "learning_rate": 1.588172326731236e-05, "loss": 0.3193, "num_tokens": 1512143817.0, "step": 1983 }, { "epoch": 2.7073709365965026, "grad_norm": 0.1962914970834948, "learning_rate": 1.58775464339848e-05, "loss": 0.3147, "num_tokens": 1512873263.0, "step": 1984 }, { "epoch": 2.708736517555183, "grad_norm": 0.19621335265369574, "learning_rate": 1.5873368112729052e-05, "loss": 0.3187, "num_tokens": 1513651686.0, "step": 1985 }, { "epoch": 2.7101020985138637, "grad_norm": 0.19113622152082774, "learning_rate": 1.586918830481974e-05, "loss": 0.2903, "num_tokens": 1514385434.0, "step": 1986 }, { "epoch": 2.7114676794725443, "grad_norm": 0.17762349135599528, "learning_rate": 1.586500701153194e-05, "loss": 0.3094, "num_tokens": 1515164356.0, "step": 1987 }, { "epoch": 2.712833260431225, "grad_norm": 0.1918050094962549, "learning_rate": 1.586082423414119e-05, "loss": 0.2999, "num_tokens": 1515922639.0, "step": 1988 }, { "epoch": 2.7141988413899054, "grad_norm": 0.1830446737747264, "learning_rate": 1.585663997392347e-05, "loss": 0.3105, "num_tokens": 1516724373.0, "step": 1989 }, { "epoch": 2.715564422348586, "grad_norm": 0.18818035744910167, "learning_rate": 1.5852454232155216e-05, "loss": 0.3078, "num_tokens": 1517540615.0, "step": 1990 }, { "epoch": 2.7169300033072665, "grad_norm": 0.16958844603642678, "learning_rate": 1.5848267010113318e-05, "loss": 0.3056, "num_tokens": 1518259705.0, "step": 1991 }, { "epoch": 2.718295584265947, "grad_norm": 0.1940516565257873, "learning_rate": 1.584407830907512e-05, "loss": 0.3082, "num_tokens": 1519056670.0, "step": 1992 }, { "epoch": 2.719661165224627, "grad_norm": 0.18102341432963825, "learning_rate": 1.58398881303184e-05, "loss": 0.3071, "num_tokens": 1519831618.0, "step": 1993 }, { "epoch": 2.7210267461833078, "grad_norm": 0.1902441731473542, "learning_rate": 1.5835696475121418e-05, "loss": 0.3139, "num_tokens": 1520558945.0, "step": 1994 }, { "epoch": 2.7223923271419883, "grad_norm": 0.18505147682469275, "learning_rate": 1.5831503344762854e-05, "loss": 0.3088, "num_tokens": 1521319408.0, "step": 1995 }, { "epoch": 2.723757908100669, "grad_norm": 0.17786564973003985, "learning_rate": 1.5827308740521853e-05, "loss": 0.3065, "num_tokens": 1522131685.0, "step": 1996 }, { "epoch": 2.7251234890593494, "grad_norm": 0.2111569063192298, "learning_rate": 1.582311266367801e-05, "loss": 0.2968, "num_tokens": 1522796011.0, "step": 1997 }, { "epoch": 2.72648907001803, "grad_norm": 0.30047040104391864, "learning_rate": 1.5818915115511363e-05, "loss": 0.3014, "num_tokens": 1523514448.0, "step": 1998 }, { "epoch": 2.7278546509767105, "grad_norm": 0.2085879115477984, "learning_rate": 1.58147160973024e-05, "loss": 0.3172, "num_tokens": 1524244415.0, "step": 1999 }, { "epoch": 2.729220231935391, "grad_norm": 0.17196797697629762, "learning_rate": 1.5810515610332072e-05, "loss": 0.3162, "num_tokens": 1525073445.0, "step": 2000 }, { "epoch": 2.7305858128940717, "grad_norm": 0.17956950253335185, "learning_rate": 1.5806313655881755e-05, "loss": 0.3038, "num_tokens": 1525836411.0, "step": 2001 }, { "epoch": 2.7319513938527518, "grad_norm": 0.18863467971230788, "learning_rate": 1.5802110235233284e-05, "loss": 0.3157, "num_tokens": 1526555017.0, "step": 2002 }, { "epoch": 2.7333169748114323, "grad_norm": 0.1924796762970696, "learning_rate": 1.579790534966894e-05, "loss": 0.3038, "num_tokens": 1527290561.0, "step": 2003 }, { "epoch": 2.734682555770113, "grad_norm": 0.17473338872740446, "learning_rate": 1.5793699000471453e-05, "loss": 0.3194, "num_tokens": 1528144211.0, "step": 2004 }, { "epoch": 2.7360481367287934, "grad_norm": 0.17859681502213878, "learning_rate": 1.5789491188924e-05, "loss": 0.3176, "num_tokens": 1528940233.0, "step": 2005 }, { "epoch": 2.737413717687474, "grad_norm": 0.1780843190279203, "learning_rate": 1.57852819163102e-05, "loss": 0.3132, "num_tokens": 1529739089.0, "step": 2006 }, { "epoch": 2.7387792986461545, "grad_norm": 0.17241840122098126, "learning_rate": 1.5781071183914124e-05, "loss": 0.3148, "num_tokens": 1530535202.0, "step": 2007 }, { "epoch": 2.740144879604835, "grad_norm": 0.16736850481255391, "learning_rate": 1.5776858993020275e-05, "loss": 0.3231, "num_tokens": 1531366180.0, "step": 2008 }, { "epoch": 2.7415104605635157, "grad_norm": 0.18064793122751724, "learning_rate": 1.577264534491362e-05, "loss": 0.2963, "num_tokens": 1532130781.0, "step": 2009 }, { "epoch": 2.742876041522196, "grad_norm": 0.17049947476077676, "learning_rate": 1.5768430240879553e-05, "loss": 0.2989, "num_tokens": 1532831553.0, "step": 2010 }, { "epoch": 2.7442416224808763, "grad_norm": 0.1804073326101257, "learning_rate": 1.5764213682203927e-05, "loss": 0.31, "num_tokens": 1533554243.0, "step": 2011 }, { "epoch": 2.745607203439557, "grad_norm": 0.1830577992918019, "learning_rate": 1.575999567017302e-05, "loss": 0.3049, "num_tokens": 1534274206.0, "step": 2012 }, { "epoch": 2.7469727843982374, "grad_norm": 0.18012470517482207, "learning_rate": 1.5755776206073575e-05, "loss": 0.2921, "num_tokens": 1535003594.0, "step": 2013 }, { "epoch": 2.748338365356918, "grad_norm": 0.16839631479627096, "learning_rate": 1.5751555291192767e-05, "loss": 0.2966, "num_tokens": 1535742456.0, "step": 2014 }, { "epoch": 2.7497039463155986, "grad_norm": 0.17717540871324788, "learning_rate": 1.574733292681821e-05, "loss": 0.294, "num_tokens": 1536504692.0, "step": 2015 }, { "epoch": 2.751069527274279, "grad_norm": 0.16826412657979858, "learning_rate": 1.5743109114237968e-05, "loss": 0.3048, "num_tokens": 1537413578.0, "step": 2016 }, { "epoch": 2.7524351082329597, "grad_norm": 0.16936450271806502, "learning_rate": 1.573888385474054e-05, "loss": 0.3175, "num_tokens": 1538242925.0, "step": 2017 }, { "epoch": 2.7538006891916402, "grad_norm": 0.17457369347799437, "learning_rate": 1.573465714961487e-05, "loss": 0.3184, "num_tokens": 1539119585.0, "step": 2018 }, { "epoch": 2.755166270150321, "grad_norm": 0.17287512750451553, "learning_rate": 1.573042900015035e-05, "loss": 0.309, "num_tokens": 1539886400.0, "step": 2019 }, { "epoch": 2.756531851109001, "grad_norm": 0.17748656146744635, "learning_rate": 1.5726199407636794e-05, "loss": 0.3206, "num_tokens": 1540662660.0, "step": 2020 }, { "epoch": 2.7578974320676815, "grad_norm": 0.17986026394182325, "learning_rate": 1.5721968373364475e-05, "loss": 0.3009, "num_tokens": 1541425254.0, "step": 2021 }, { "epoch": 2.759263013026362, "grad_norm": 0.20611931559420996, "learning_rate": 1.5717735898624094e-05, "loss": 0.3087, "num_tokens": 1542138955.0, "step": 2022 }, { "epoch": 2.7606285939850426, "grad_norm": 0.17915543676970772, "learning_rate": 1.5713501984706803e-05, "loss": 0.3176, "num_tokens": 1542946291.0, "step": 2023 }, { "epoch": 2.761994174943723, "grad_norm": 0.1849957986779023, "learning_rate": 1.5709266632904176e-05, "loss": 0.3147, "num_tokens": 1543757717.0, "step": 2024 }, { "epoch": 2.7633597559024037, "grad_norm": 0.17867712404877176, "learning_rate": 1.5705029844508242e-05, "loss": 0.2987, "num_tokens": 1544601415.0, "step": 2025 }, { "epoch": 2.7647253368610842, "grad_norm": 0.18016826128694488, "learning_rate": 1.5700791620811463e-05, "loss": 0.3046, "num_tokens": 1545311476.0, "step": 2026 }, { "epoch": 2.766090917819765, "grad_norm": 0.18206916032855192, "learning_rate": 1.569655196310673e-05, "loss": 0.3038, "num_tokens": 1546033137.0, "step": 2027 }, { "epoch": 2.7674564987784454, "grad_norm": 0.18779454141894894, "learning_rate": 1.5692310872687386e-05, "loss": 0.3078, "num_tokens": 1546851226.0, "step": 2028 }, { "epoch": 2.7688220797371255, "grad_norm": 0.1850291238511179, "learning_rate": 1.5688068350847202e-05, "loss": 0.2995, "num_tokens": 1547692709.0, "step": 2029 }, { "epoch": 2.770187660695806, "grad_norm": 0.17234622168266184, "learning_rate": 1.5683824398880388e-05, "loss": 0.319, "num_tokens": 1548501157.0, "step": 2030 }, { "epoch": 2.7715532416544866, "grad_norm": 0.18765325178759032, "learning_rate": 1.5679579018081583e-05, "loss": 0.3121, "num_tokens": 1549293356.0, "step": 2031 }, { "epoch": 2.772918822613167, "grad_norm": 0.2002498508813386, "learning_rate": 1.5675332209745883e-05, "loss": 0.3138, "num_tokens": 1550104515.0, "step": 2032 }, { "epoch": 2.7742844035718477, "grad_norm": 0.16714120275881097, "learning_rate": 1.567108397516879e-05, "loss": 0.3221, "num_tokens": 1550962342.0, "step": 2033 }, { "epoch": 2.7756499845305282, "grad_norm": 0.19469077286333075, "learning_rate": 1.5666834315646266e-05, "loss": 0.3138, "num_tokens": 1551761563.0, "step": 2034 }, { "epoch": 2.777015565489209, "grad_norm": 0.2066678404556175, "learning_rate": 1.56625832324747e-05, "loss": 0.2952, "num_tokens": 1552442608.0, "step": 2035 }, { "epoch": 2.7783811464478894, "grad_norm": 0.18144005215829043, "learning_rate": 1.5658330726950903e-05, "loss": 0.3099, "num_tokens": 1553169029.0, "step": 2036 }, { "epoch": 2.77974672740657, "grad_norm": 0.1817823115530332, "learning_rate": 1.565407680037214e-05, "loss": 0.2967, "num_tokens": 1553935121.0, "step": 2037 }, { "epoch": 2.78111230836525, "grad_norm": 0.16833731571563537, "learning_rate": 1.5649821454036095e-05, "loss": 0.3037, "num_tokens": 1554728804.0, "step": 2038 }, { "epoch": 2.7824778893239306, "grad_norm": 0.1898468306081889, "learning_rate": 1.564556468924089e-05, "loss": 0.3064, "num_tokens": 1555505128.0, "step": 2039 }, { "epoch": 2.783843470282611, "grad_norm": 0.18958211080636667, "learning_rate": 1.5641306507285083e-05, "loss": 0.2921, "num_tokens": 1556268399.0, "step": 2040 }, { "epoch": 2.7852090512412917, "grad_norm": 0.17221289746435242, "learning_rate": 1.5637046909467656e-05, "loss": 0.3078, "num_tokens": 1557008983.0, "step": 2041 }, { "epoch": 2.7865746321999723, "grad_norm": 0.18247352701471983, "learning_rate": 1.5632785897088033e-05, "loss": 0.3156, "num_tokens": 1557768226.0, "step": 2042 }, { "epoch": 2.787940213158653, "grad_norm": 0.1950310392843476, "learning_rate": 1.562852347144606e-05, "loss": 0.3083, "num_tokens": 1558523087.0, "step": 2043 }, { "epoch": 2.7893057941173334, "grad_norm": 0.19371996089378746, "learning_rate": 1.5624259633842022e-05, "loss": 0.3134, "num_tokens": 1559323848.0, "step": 2044 }, { "epoch": 2.790671375076014, "grad_norm": 0.18054486951423213, "learning_rate": 1.5619994385576628e-05, "loss": 0.3047, "num_tokens": 1560120139.0, "step": 2045 }, { "epoch": 2.7920369560346945, "grad_norm": 0.1924141706886034, "learning_rate": 1.561572772795102e-05, "loss": 0.3128, "num_tokens": 1560893595.0, "step": 2046 }, { "epoch": 2.7934025369933746, "grad_norm": 0.24394453996484644, "learning_rate": 1.5611459662266776e-05, "loss": 0.3161, "num_tokens": 1561691170.0, "step": 2047 }, { "epoch": 2.794768117952055, "grad_norm": 0.18964739816910425, "learning_rate": 1.5607190189825892e-05, "loss": 0.3102, "num_tokens": 1562456890.0, "step": 2048 }, { "epoch": 2.7961336989107357, "grad_norm": 0.19406170209338747, "learning_rate": 1.56029193119308e-05, "loss": 0.313, "num_tokens": 1563192703.0, "step": 2049 }, { "epoch": 2.7974992798694163, "grad_norm": 0.19325592834061386, "learning_rate": 1.5598647029884365e-05, "loss": 0.315, "num_tokens": 1563969623.0, "step": 2050 }, { "epoch": 2.798864860828097, "grad_norm": 0.19646622920678042, "learning_rate": 1.559437334498987e-05, "loss": 0.3121, "num_tokens": 1564667276.0, "step": 2051 }, { "epoch": 2.8002304417867774, "grad_norm": 0.18917839608836245, "learning_rate": 1.5590098258551033e-05, "loss": 0.3097, "num_tokens": 1565406796.0, "step": 2052 }, { "epoch": 2.801596022745458, "grad_norm": 0.2026016081274513, "learning_rate": 1.5585821771871997e-05, "loss": 0.3293, "num_tokens": 1566213673.0, "step": 2053 }, { "epoch": 2.8029616037041385, "grad_norm": 0.18783210680270168, "learning_rate": 1.5581543886257333e-05, "loss": 0.3283, "num_tokens": 1566965418.0, "step": 2054 }, { "epoch": 2.804327184662819, "grad_norm": 0.18936274551563168, "learning_rate": 1.5577264603012038e-05, "loss": 0.303, "num_tokens": 1567757793.0, "step": 2055 }, { "epoch": 2.805692765621499, "grad_norm": 0.18546471277022178, "learning_rate": 1.557298392344154e-05, "loss": 0.3027, "num_tokens": 1568524984.0, "step": 2056 }, { "epoch": 2.8070583465801797, "grad_norm": 0.16717594657101006, "learning_rate": 1.5568701848851688e-05, "loss": 0.2963, "num_tokens": 1569374994.0, "step": 2057 }, { "epoch": 2.8084239275388603, "grad_norm": 0.18397270230676047, "learning_rate": 1.5564418380548754e-05, "loss": 0.3065, "num_tokens": 1570128408.0, "step": 2058 }, { "epoch": 2.809789508497541, "grad_norm": 0.18190322735558026, "learning_rate": 1.5560133519839438e-05, "loss": 0.3133, "num_tokens": 1570914021.0, "step": 2059 }, { "epoch": 2.8111550894562214, "grad_norm": 0.1756713193899078, "learning_rate": 1.5555847268030867e-05, "loss": 0.3031, "num_tokens": 1571679817.0, "step": 2060 }, { "epoch": 2.812520670414902, "grad_norm": 0.1786951073297999, "learning_rate": 1.5551559626430595e-05, "loss": 0.3006, "num_tokens": 1572445465.0, "step": 2061 }, { "epoch": 2.8138862513735825, "grad_norm": 0.18366943597661836, "learning_rate": 1.5547270596346588e-05, "loss": 0.3094, "num_tokens": 1573239961.0, "step": 2062 }, { "epoch": 2.815251832332263, "grad_norm": 0.1655356872478009, "learning_rate": 1.5542980179087253e-05, "loss": 0.3125, "num_tokens": 1574079768.0, "step": 2063 }, { "epoch": 2.8166174132909436, "grad_norm": 0.19804365650071404, "learning_rate": 1.5538688375961403e-05, "loss": 0.3048, "num_tokens": 1574845718.0, "step": 2064 }, { "epoch": 2.8179829942496237, "grad_norm": 0.17618962572489064, "learning_rate": 1.553439518827828e-05, "loss": 0.3177, "num_tokens": 1575600068.0, "step": 2065 }, { "epoch": 2.8193485752083043, "grad_norm": 0.18919250065100415, "learning_rate": 1.5530100617347555e-05, "loss": 0.3029, "num_tokens": 1576348638.0, "step": 2066 }, { "epoch": 2.820714156166985, "grad_norm": 0.1874101527724353, "learning_rate": 1.552580466447932e-05, "loss": 0.3087, "num_tokens": 1577105931.0, "step": 2067 }, { "epoch": 2.8220797371256654, "grad_norm": 0.19431234424305938, "learning_rate": 1.5521507330984066e-05, "loss": 0.3037, "num_tokens": 1577823053.0, "step": 2068 }, { "epoch": 2.823445318084346, "grad_norm": 0.17819570589730788, "learning_rate": 1.5517208618172742e-05, "loss": 0.3205, "num_tokens": 1578623716.0, "step": 2069 }, { "epoch": 2.8248108990430265, "grad_norm": 0.19518746684846902, "learning_rate": 1.5512908527356693e-05, "loss": 0.3195, "num_tokens": 1579394406.0, "step": 2070 }, { "epoch": 2.826176480001707, "grad_norm": 0.18595276032186542, "learning_rate": 1.550860705984769e-05, "loss": 0.3057, "num_tokens": 1580055876.0, "step": 2071 }, { "epoch": 2.8275420609603876, "grad_norm": 0.2161784532935942, "learning_rate": 1.5504304216957922e-05, "loss": 0.3069, "num_tokens": 1580755449.0, "step": 2072 }, { "epoch": 2.828907641919068, "grad_norm": 0.1824185715570481, "learning_rate": 1.55e-05, "loss": 0.3005, "num_tokens": 1581506354.0, "step": 2073 }, { "epoch": 2.8302732228777483, "grad_norm": 0.18893054996977351, "learning_rate": 1.5495694410286962e-05, "loss": 0.3019, "num_tokens": 1582268051.0, "step": 2074 }, { "epoch": 2.831638803836429, "grad_norm": 0.18217246747003787, "learning_rate": 1.5491387449132247e-05, "loss": 0.2994, "num_tokens": 1583024561.0, "step": 2075 }, { "epoch": 2.8330043847951094, "grad_norm": 0.18440679043069302, "learning_rate": 1.548707911784973e-05, "loss": 0.2872, "num_tokens": 1583785679.0, "step": 2076 }, { "epoch": 2.83436996575379, "grad_norm": 0.17573684818584168, "learning_rate": 1.548276941775369e-05, "loss": 0.3179, "num_tokens": 1584475065.0, "step": 2077 }, { "epoch": 2.8357355467124705, "grad_norm": 0.19096204224773403, "learning_rate": 1.5478458350158832e-05, "loss": 0.3036, "num_tokens": 1585276381.0, "step": 2078 }, { "epoch": 2.837101127671151, "grad_norm": 0.1749934542593658, "learning_rate": 1.547414591638028e-05, "loss": 0.3054, "num_tokens": 1586060990.0, "step": 2079 }, { "epoch": 2.8384667086298316, "grad_norm": 0.16720660377609387, "learning_rate": 1.5469832117733568e-05, "loss": 0.2907, "num_tokens": 1586766488.0, "step": 2080 }, { "epoch": 2.839832289588512, "grad_norm": 0.21342029092249312, "learning_rate": 1.5465516955534646e-05, "loss": 0.3075, "num_tokens": 1587427568.0, "step": 2081 }, { "epoch": 2.8411978705471927, "grad_norm": 0.18596297208968338, "learning_rate": 1.546120043109989e-05, "loss": 0.3087, "num_tokens": 1588132602.0, "step": 2082 }, { "epoch": 2.842563451505873, "grad_norm": 0.19182021182483255, "learning_rate": 1.5456882545746078e-05, "loss": 0.3118, "num_tokens": 1588926680.0, "step": 2083 }, { "epoch": 2.8439290324645534, "grad_norm": 0.1870324665270737, "learning_rate": 1.545256330079041e-05, "loss": 0.3197, "num_tokens": 1589785120.0, "step": 2084 }, { "epoch": 2.845294613423234, "grad_norm": 0.1848265526921873, "learning_rate": 1.5448242697550508e-05, "loss": 0.3045, "num_tokens": 1590536924.0, "step": 2085 }, { "epoch": 2.8466601943819145, "grad_norm": 0.18066425398432745, "learning_rate": 1.544392073734439e-05, "loss": 0.3091, "num_tokens": 1591267859.0, "step": 2086 }, { "epoch": 2.848025775340595, "grad_norm": 0.20429050030958537, "learning_rate": 1.5439597421490512e-05, "loss": 0.3121, "num_tokens": 1591984438.0, "step": 2087 }, { "epoch": 2.8493913562992756, "grad_norm": 0.18085291524163258, "learning_rate": 1.543527275130772e-05, "loss": 0.3005, "num_tokens": 1592693094.0, "step": 2088 }, { "epoch": 2.850756937257956, "grad_norm": 0.16726142382856288, "learning_rate": 1.543094672811529e-05, "loss": 0.3053, "num_tokens": 1593488758.0, "step": 2089 }, { "epoch": 2.8521225182166368, "grad_norm": 0.18203249630644283, "learning_rate": 1.5426619353232896e-05, "loss": 0.3047, "num_tokens": 1594261078.0, "step": 2090 }, { "epoch": 2.8534880991753173, "grad_norm": 0.1892804476039286, "learning_rate": 1.5422290627980638e-05, "loss": 0.314, "num_tokens": 1594984957.0, "step": 2091 }, { "epoch": 2.8548536801339974, "grad_norm": 0.18089035190536298, "learning_rate": 1.5417960553679025e-05, "loss": 0.3031, "num_tokens": 1595755119.0, "step": 2092 }, { "epoch": 2.856219261092678, "grad_norm": 0.1866565524238595, "learning_rate": 1.541362913164897e-05, "loss": 0.3215, "num_tokens": 1596549344.0, "step": 2093 }, { "epoch": 2.8575848420513585, "grad_norm": 0.20351930556756695, "learning_rate": 1.5409296363211808e-05, "loss": 0.3078, "num_tokens": 1597383512.0, "step": 2094 }, { "epoch": 2.858950423010039, "grad_norm": 0.18762002041813716, "learning_rate": 1.5404962249689275e-05, "loss": 0.2984, "num_tokens": 1598205610.0, "step": 2095 }, { "epoch": 2.8603160039687197, "grad_norm": 0.16919060871526442, "learning_rate": 1.5400626792403524e-05, "loss": 0.3099, "num_tokens": 1599016746.0, "step": 2096 }, { "epoch": 2.8616815849274, "grad_norm": 0.16466552776452642, "learning_rate": 1.5396289992677115e-05, "loss": 0.3232, "num_tokens": 1599903289.0, "step": 2097 }, { "epoch": 2.8630471658860808, "grad_norm": 0.19560373521242896, "learning_rate": 1.539195185183301e-05, "loss": 0.3076, "num_tokens": 1600666887.0, "step": 2098 }, { "epoch": 2.8644127468447613, "grad_norm": 0.17274749781715565, "learning_rate": 1.53876123711946e-05, "loss": 0.3148, "num_tokens": 1601490092.0, "step": 2099 }, { "epoch": 2.865778327803442, "grad_norm": 0.18172910192156871, "learning_rate": 1.5383271552085663e-05, "loss": 0.3095, "num_tokens": 1602203796.0, "step": 2100 }, { "epoch": 2.867143908762122, "grad_norm": 0.17310525957497444, "learning_rate": 1.53789293958304e-05, "loss": 0.3144, "num_tokens": 1603040348.0, "step": 2101 }, { "epoch": 2.8685094897208026, "grad_norm": 0.17437523702201463, "learning_rate": 1.5374585903753413e-05, "loss": 0.3136, "num_tokens": 1603834290.0, "step": 2102 }, { "epoch": 2.869875070679483, "grad_norm": 0.17798869939092224, "learning_rate": 1.5370241077179717e-05, "loss": 0.3105, "num_tokens": 1604588774.0, "step": 2103 }, { "epoch": 2.8712406516381637, "grad_norm": 0.18170458080760385, "learning_rate": 1.5365894917434726e-05, "loss": 0.3238, "num_tokens": 1605430958.0, "step": 2104 }, { "epoch": 2.8726062325968442, "grad_norm": 0.20264812324393244, "learning_rate": 1.5361547425844266e-05, "loss": 0.3083, "num_tokens": 1606190754.0, "step": 2105 }, { "epoch": 2.873971813555525, "grad_norm": 0.1902867326013823, "learning_rate": 1.5357198603734565e-05, "loss": 0.3013, "num_tokens": 1606919157.0, "step": 2106 }, { "epoch": 2.8753373945142053, "grad_norm": 0.18229370340010695, "learning_rate": 1.535284845243227e-05, "loss": 0.3138, "num_tokens": 1607734009.0, "step": 2107 }, { "epoch": 2.876702975472886, "grad_norm": 0.18395722609050766, "learning_rate": 1.5348496973264414e-05, "loss": 0.3067, "num_tokens": 1608513231.0, "step": 2108 }, { "epoch": 2.8780685564315664, "grad_norm": 0.17507617546912901, "learning_rate": 1.5344144167558453e-05, "loss": 0.3172, "num_tokens": 1609290562.0, "step": 2109 }, { "epoch": 2.8794341373902466, "grad_norm": 0.20659375898966983, "learning_rate": 1.5339790036642233e-05, "loss": 0.3116, "num_tokens": 1610051268.0, "step": 2110 }, { "epoch": 2.880799718348927, "grad_norm": 0.1975041848410723, "learning_rate": 1.5335434581844016e-05, "loss": 0.3073, "num_tokens": 1610792903.0, "step": 2111 }, { "epoch": 2.8821652993076077, "grad_norm": 0.1703320019567883, "learning_rate": 1.5331077804492456e-05, "loss": 0.3054, "num_tokens": 1611558246.0, "step": 2112 }, { "epoch": 2.8835308802662882, "grad_norm": 0.2002152589199548, "learning_rate": 1.532671970591662e-05, "loss": 0.3027, "num_tokens": 1612286445.0, "step": 2113 }, { "epoch": 2.884896461224969, "grad_norm": 0.20061619269187725, "learning_rate": 1.532236028744598e-05, "loss": 0.304, "num_tokens": 1613051462.0, "step": 2114 }, { "epoch": 2.8862620421836493, "grad_norm": 0.1941099060287833, "learning_rate": 1.53179995504104e-05, "loss": 0.3096, "num_tokens": 1613820234.0, "step": 2115 }, { "epoch": 2.88762762314233, "grad_norm": 0.18705057134418998, "learning_rate": 1.5313637496140153e-05, "loss": 0.3029, "num_tokens": 1614584273.0, "step": 2116 }, { "epoch": 2.8889932041010105, "grad_norm": 0.2368040723156937, "learning_rate": 1.5309274125965913e-05, "loss": 0.3064, "num_tokens": 1615355565.0, "step": 2117 }, { "epoch": 2.890358785059691, "grad_norm": 0.1758984417740813, "learning_rate": 1.530490944121876e-05, "loss": 0.3052, "num_tokens": 1616110340.0, "step": 2118 }, { "epoch": 2.891724366018371, "grad_norm": 0.18290762972473337, "learning_rate": 1.5300543443230164e-05, "loss": 0.2994, "num_tokens": 1616832491.0, "step": 2119 }, { "epoch": 2.8930899469770517, "grad_norm": 0.20703595229796132, "learning_rate": 1.5296176133332004e-05, "loss": 0.3082, "num_tokens": 1617561953.0, "step": 2120 }, { "epoch": 2.8944555279357322, "grad_norm": 0.20253356335629422, "learning_rate": 1.529180751285656e-05, "loss": 0.3141, "num_tokens": 1618327028.0, "step": 2121 }, { "epoch": 2.895821108894413, "grad_norm": 0.1845378774891888, "learning_rate": 1.52874375831365e-05, "loss": 0.3131, "num_tokens": 1619058169.0, "step": 2122 }, { "epoch": 2.8971866898530934, "grad_norm": 0.2175473032452687, "learning_rate": 1.528306634550491e-05, "loss": 0.326, "num_tokens": 1619756907.0, "step": 2123 }, { "epoch": 2.898552270811774, "grad_norm": 0.19070636539816416, "learning_rate": 1.5278693801295263e-05, "loss": 0.3099, "num_tokens": 1620564265.0, "step": 2124 }, { "epoch": 2.8999178517704545, "grad_norm": 0.19225487616592124, "learning_rate": 1.527431995184143e-05, "loss": 0.3114, "num_tokens": 1621323055.0, "step": 2125 }, { "epoch": 2.901283432729135, "grad_norm": 0.19770345864034772, "learning_rate": 1.5269944798477683e-05, "loss": 0.3189, "num_tokens": 1622043973.0, "step": 2126 }, { "epoch": 2.9026490136878156, "grad_norm": 0.18285251218820472, "learning_rate": 1.5265568342538698e-05, "loss": 0.3045, "num_tokens": 1622805818.0, "step": 2127 }, { "epoch": 2.9040145946464957, "grad_norm": 0.17723271278829994, "learning_rate": 1.5261190585359535e-05, "loss": 0.3041, "num_tokens": 1623593197.0, "step": 2128 }, { "epoch": 2.9053801756051763, "grad_norm": 0.1877148446131528, "learning_rate": 1.5256811528275661e-05, "loss": 0.326, "num_tokens": 1624397773.0, "step": 2129 }, { "epoch": 2.906745756563857, "grad_norm": 0.18004737962838546, "learning_rate": 1.5252431172622938e-05, "loss": 0.3013, "num_tokens": 1625200763.0, "step": 2130 }, { "epoch": 2.9081113375225374, "grad_norm": 0.19137899856987836, "learning_rate": 1.5248049519737618e-05, "loss": 0.3197, "num_tokens": 1625924357.0, "step": 2131 }, { "epoch": 2.909476918481218, "grad_norm": 0.20375145438236403, "learning_rate": 1.524366657095636e-05, "loss": 0.3061, "num_tokens": 1626661744.0, "step": 2132 }, { "epoch": 2.9108424994398985, "grad_norm": 0.18835986689714515, "learning_rate": 1.5239282327616207e-05, "loss": 0.3002, "num_tokens": 1627388872.0, "step": 2133 }, { "epoch": 2.912208080398579, "grad_norm": 0.20009699897484454, "learning_rate": 1.5234896791054603e-05, "loss": 0.302, "num_tokens": 1628169555.0, "step": 2134 }, { "epoch": 2.9135736613572596, "grad_norm": 0.1882383207583699, "learning_rate": 1.523050996260939e-05, "loss": 0.3113, "num_tokens": 1628909147.0, "step": 2135 }, { "epoch": 2.91493924231594, "grad_norm": 0.20665657439419013, "learning_rate": 1.522612184361879e-05, "loss": 0.3195, "num_tokens": 1629736045.0, "step": 2136 }, { "epoch": 2.9163048232746203, "grad_norm": 0.22556934474825374, "learning_rate": 1.5221732435421436e-05, "loss": 0.3083, "num_tokens": 1630540779.0, "step": 2137 }, { "epoch": 2.917670404233301, "grad_norm": 0.1984581438575971, "learning_rate": 1.5217341739356343e-05, "loss": 0.3155, "num_tokens": 1631334054.0, "step": 2138 }, { "epoch": 2.9190359851919814, "grad_norm": 0.2129716694813734, "learning_rate": 1.5212949756762925e-05, "loss": 0.3183, "num_tokens": 1632106899.0, "step": 2139 }, { "epoch": 2.920401566150662, "grad_norm": 0.19883198534267346, "learning_rate": 1.5208556488980984e-05, "loss": 0.2858, "num_tokens": 1632859433.0, "step": 2140 }, { "epoch": 2.9217671471093425, "grad_norm": 0.21648750861286428, "learning_rate": 1.5204161937350713e-05, "loss": 0.3188, "num_tokens": 1633574311.0, "step": 2141 }, { "epoch": 2.923132728068023, "grad_norm": 0.17798198891781183, "learning_rate": 1.5199766103212701e-05, "loss": 0.2942, "num_tokens": 1634431326.0, "step": 2142 }, { "epoch": 2.9244983090267036, "grad_norm": 0.2003664857466294, "learning_rate": 1.5195368987907931e-05, "loss": 0.3023, "num_tokens": 1635163965.0, "step": 2143 }, { "epoch": 2.925863889985384, "grad_norm": 0.19817568897495416, "learning_rate": 1.5190970592777763e-05, "loss": 0.3013, "num_tokens": 1635900716.0, "step": 2144 }, { "epoch": 2.9272294709440647, "grad_norm": 0.1984341540971505, "learning_rate": 1.5186570919163965e-05, "loss": 0.3057, "num_tokens": 1636621730.0, "step": 2145 }, { "epoch": 2.928595051902745, "grad_norm": 0.20146093928000117, "learning_rate": 1.5182169968408686e-05, "loss": 0.3133, "num_tokens": 1637412984.0, "step": 2146 }, { "epoch": 2.9299606328614254, "grad_norm": 0.17766358689088885, "learning_rate": 1.5177767741854461e-05, "loss": 0.2939, "num_tokens": 1638112779.0, "step": 2147 }, { "epoch": 2.931326213820106, "grad_norm": 0.19692550994476404, "learning_rate": 1.5173364240844226e-05, "loss": 0.3243, "num_tokens": 1638911250.0, "step": 2148 }, { "epoch": 2.9326917947787865, "grad_norm": 0.20655734511780335, "learning_rate": 1.5168959466721291e-05, "loss": 0.3037, "num_tokens": 1639667027.0, "step": 2149 }, { "epoch": 2.934057375737467, "grad_norm": 0.2292820618951723, "learning_rate": 1.5164553420829367e-05, "loss": 0.318, "num_tokens": 1640437047.0, "step": 2150 }, { "epoch": 2.9354229566961476, "grad_norm": 0.19969420773824326, "learning_rate": 1.5160146104512546e-05, "loss": 0.3073, "num_tokens": 1641213566.0, "step": 2151 }, { "epoch": 2.936788537654828, "grad_norm": 0.19694906216974098, "learning_rate": 1.5155737519115308e-05, "loss": 0.3038, "num_tokens": 1641946172.0, "step": 2152 }, { "epoch": 2.9381541186135087, "grad_norm": 0.20249739089745913, "learning_rate": 1.5151327665982522e-05, "loss": 0.2993, "num_tokens": 1642609631.0, "step": 2153 }, { "epoch": 2.9395196995721893, "grad_norm": 0.1957259808699525, "learning_rate": 1.5146916546459443e-05, "loss": 0.303, "num_tokens": 1643321452.0, "step": 2154 }, { "epoch": 2.9408852805308694, "grad_norm": 0.19675826511115072, "learning_rate": 1.5142504161891715e-05, "loss": 0.3093, "num_tokens": 1644123007.0, "step": 2155 }, { "epoch": 2.94225086148955, "grad_norm": 0.17509380023595492, "learning_rate": 1.5138090513625362e-05, "loss": 0.3045, "num_tokens": 1644928081.0, "step": 2156 }, { "epoch": 2.9436164424482305, "grad_norm": 0.19334999669989342, "learning_rate": 1.5133675603006801e-05, "loss": 0.2984, "num_tokens": 1645636129.0, "step": 2157 }, { "epoch": 2.944982023406911, "grad_norm": 0.19482735678217422, "learning_rate": 1.512925943138283e-05, "loss": 0.3059, "num_tokens": 1646411454.0, "step": 2158 }, { "epoch": 2.9463476043655916, "grad_norm": 0.17760842062601973, "learning_rate": 1.5124842000100625e-05, "loss": 0.3109, "num_tokens": 1647244853.0, "step": 2159 }, { "epoch": 2.947713185324272, "grad_norm": 0.20471910085021677, "learning_rate": 1.5120423310507762e-05, "loss": 0.3112, "num_tokens": 1648000197.0, "step": 2160 }, { "epoch": 2.9490787662829527, "grad_norm": 0.19125507683006296, "learning_rate": 1.5116003363952184e-05, "loss": 0.3155, "num_tokens": 1648780753.0, "step": 2161 }, { "epoch": 2.9504443472416333, "grad_norm": 0.17952067623025666, "learning_rate": 1.5111582161782228e-05, "loss": 0.3059, "num_tokens": 1649501775.0, "step": 2162 }, { "epoch": 2.951809928200314, "grad_norm": 0.18988718754676528, "learning_rate": 1.5107159705346612e-05, "loss": 0.3079, "num_tokens": 1650238419.0, "step": 2163 }, { "epoch": 2.953175509158994, "grad_norm": 0.17932709142277242, "learning_rate": 1.5102735995994438e-05, "loss": 0.3063, "num_tokens": 1650995298.0, "step": 2164 }, { "epoch": 2.9545410901176745, "grad_norm": 0.1995439969083902, "learning_rate": 1.5098311035075185e-05, "loss": 0.3166, "num_tokens": 1651729942.0, "step": 2165 }, { "epoch": 2.955906671076355, "grad_norm": 0.17500159920829444, "learning_rate": 1.5093884823938715e-05, "loss": 0.3068, "num_tokens": 1652519149.0, "step": 2166 }, { "epoch": 2.9572722520350356, "grad_norm": 0.1972690426327307, "learning_rate": 1.508945736393528e-05, "loss": 0.3198, "num_tokens": 1653227951.0, "step": 2167 }, { "epoch": 2.958637832993716, "grad_norm": 0.18162593161012908, "learning_rate": 1.5085028656415503e-05, "loss": 0.3094, "num_tokens": 1654014993.0, "step": 2168 }, { "epoch": 2.9600034139523967, "grad_norm": 0.18362743145140545, "learning_rate": 1.5080598702730387e-05, "loss": 0.2924, "num_tokens": 1654756241.0, "step": 2169 }, { "epoch": 2.9613689949110773, "grad_norm": 0.17654844109249448, "learning_rate": 1.5076167504231327e-05, "loss": 0.2934, "num_tokens": 1655450021.0, "step": 2170 }, { "epoch": 2.962734575869758, "grad_norm": 0.1771805601557758, "learning_rate": 1.5071735062270084e-05, "loss": 0.3142, "num_tokens": 1656181156.0, "step": 2171 }, { "epoch": 2.9641001568284384, "grad_norm": 0.19159106797596323, "learning_rate": 1.5067301378198801e-05, "loss": 0.3037, "num_tokens": 1656924363.0, "step": 2172 }, { "epoch": 2.9654657377871185, "grad_norm": 0.19595295247745959, "learning_rate": 1.5062866453370012e-05, "loss": 0.3131, "num_tokens": 1657669419.0, "step": 2173 }, { "epoch": 2.966831318745799, "grad_norm": 0.19002962163436748, "learning_rate": 1.5058430289136616e-05, "loss": 0.2921, "num_tokens": 1658374370.0, "step": 2174 }, { "epoch": 2.9681968997044796, "grad_norm": 0.17831954932973135, "learning_rate": 1.5053992886851892e-05, "loss": 0.322, "num_tokens": 1659198922.0, "step": 2175 }, { "epoch": 2.96956248066316, "grad_norm": 0.18637839599764777, "learning_rate": 1.5049554247869503e-05, "loss": 0.3041, "num_tokens": 1659993027.0, "step": 2176 }, { "epoch": 2.9709280616218408, "grad_norm": 0.17905577515401397, "learning_rate": 1.5045114373543484e-05, "loss": 0.3021, "num_tokens": 1660723056.0, "step": 2177 }, { "epoch": 2.9722936425805213, "grad_norm": 0.18161086183452435, "learning_rate": 1.5040673265228248e-05, "loss": 0.3166, "num_tokens": 1661458478.0, "step": 2178 }, { "epoch": 2.973659223539202, "grad_norm": 0.1729246987548811, "learning_rate": 1.5036230924278585e-05, "loss": 0.3102, "num_tokens": 1662256130.0, "step": 2179 }, { "epoch": 2.9750248044978824, "grad_norm": 0.17081408096723785, "learning_rate": 1.5031787352049665e-05, "loss": 0.2925, "num_tokens": 1663002835.0, "step": 2180 }, { "epoch": 2.976390385456563, "grad_norm": 0.1800011919152803, "learning_rate": 1.502734254989702e-05, "loss": 0.3088, "num_tokens": 1663769802.0, "step": 2181 }, { "epoch": 2.977755966415243, "grad_norm": 0.1667537178179067, "learning_rate": 1.5022896519176577e-05, "loss": 0.3161, "num_tokens": 1664645047.0, "step": 2182 }, { "epoch": 2.9791215473739237, "grad_norm": 0.17073663296944946, "learning_rate": 1.5018449261244623e-05, "loss": 0.3021, "num_tokens": 1665403249.0, "step": 2183 }, { "epoch": 2.980487128332604, "grad_norm": 0.1819218911819258, "learning_rate": 1.501400077745782e-05, "loss": 0.3111, "num_tokens": 1666182797.0, "step": 2184 }, { "epoch": 2.9818527092912848, "grad_norm": 0.18629202355330532, "learning_rate": 1.5009551069173214e-05, "loss": 0.3079, "num_tokens": 1666911464.0, "step": 2185 }, { "epoch": 2.9832182902499653, "grad_norm": 0.176975324111544, "learning_rate": 1.5005100137748214e-05, "loss": 0.3081, "num_tokens": 1667753519.0, "step": 2186 }, { "epoch": 2.984583871208646, "grad_norm": 0.1682447333768825, "learning_rate": 1.500064798454061e-05, "loss": 0.3144, "num_tokens": 1668592779.0, "step": 2187 }, { "epoch": 2.9859494521673264, "grad_norm": 0.18156437103065917, "learning_rate": 1.4996194610908557e-05, "loss": 0.3078, "num_tokens": 1669353988.0, "step": 2188 }, { "epoch": 2.987315033126007, "grad_norm": 0.17340718832087504, "learning_rate": 1.4991740018210594e-05, "loss": 0.2991, "num_tokens": 1670090523.0, "step": 2189 }, { "epoch": 2.9886806140846875, "grad_norm": 0.17694651077371376, "learning_rate": 1.4987284207805614e-05, "loss": 0.312, "num_tokens": 1670884394.0, "step": 2190 }, { "epoch": 2.9900461950433677, "grad_norm": 0.1883532757941866, "learning_rate": 1.49828271810529e-05, "loss": 0.3183, "num_tokens": 1671655906.0, "step": 2191 }, { "epoch": 2.991411776002048, "grad_norm": 0.17640232933837383, "learning_rate": 1.497836893931209e-05, "loss": 0.3135, "num_tokens": 1672458813.0, "step": 2192 }, { "epoch": 2.9927773569607288, "grad_norm": 0.17405100580136232, "learning_rate": 1.4973909483943208e-05, "loss": 0.3, "num_tokens": 1673210726.0, "step": 2193 }, { "epoch": 2.9941429379194093, "grad_norm": 0.1912232751370459, "learning_rate": 1.4969448816306638e-05, "loss": 0.3095, "num_tokens": 1673992648.0, "step": 2194 }, { "epoch": 2.99550851887809, "grad_norm": 0.17870899580522215, "learning_rate": 1.4964986937763135e-05, "loss": 0.3005, "num_tokens": 1674753102.0, "step": 2195 }, { "epoch": 2.9968740998367704, "grad_norm": 0.17104428158078847, "learning_rate": 1.4960523849673826e-05, "loss": 0.3015, "num_tokens": 1675506607.0, "step": 2196 }, { "epoch": 2.998239680795451, "grad_norm": 0.18408415612995824, "learning_rate": 1.4956059553400211e-05, "loss": 0.3096, "num_tokens": 1676226838.0, "step": 2197 }, { "epoch": 2.9996052617541316, "grad_norm": 0.18334579443981053, "learning_rate": 1.4951594050304146e-05, "loss": 0.3038, "num_tokens": 1676898951.0, "step": 2198 }, { "epoch": 3.0, "grad_norm": 0.18334579443981053, "learning_rate": 1.4947127341747862e-05, "loss": 0.2979, "num_tokens": 1677126408.0, "step": 2199 }, { "epoch": 3.0013655809586806, "grad_norm": 0.3444156282804254, "learning_rate": 1.4942659429093962e-05, "loss": 0.2717, "num_tokens": 1677916942.0, "step": 2200 }, { "epoch": 3.002731161917361, "grad_norm": 0.29108608662801516, "learning_rate": 1.4938190313705416e-05, "loss": 0.2619, "num_tokens": 1678717166.0, "step": 2201 }, { "epoch": 3.0040967428760417, "grad_norm": 0.2562279535482217, "learning_rate": 1.4933719996945552e-05, "loss": 0.2665, "num_tokens": 1679458334.0, "step": 2202 }, { "epoch": 3.0054623238347222, "grad_norm": 0.2041129811610529, "learning_rate": 1.492924848017807e-05, "loss": 0.2718, "num_tokens": 1680236898.0, "step": 2203 }, { "epoch": 3.0068279047934023, "grad_norm": 0.32243815358448286, "learning_rate": 1.492477576476704e-05, "loss": 0.285, "num_tokens": 1681041707.0, "step": 2204 }, { "epoch": 3.008193485752083, "grad_norm": 0.29003048941352066, "learning_rate": 1.4920301852076892e-05, "loss": 0.276, "num_tokens": 1681775995.0, "step": 2205 }, { "epoch": 3.0095590667107635, "grad_norm": 0.2403321672799092, "learning_rate": 1.4915826743472424e-05, "loss": 0.2752, "num_tokens": 1682594543.0, "step": 2206 }, { "epoch": 3.010924647669444, "grad_norm": 0.21995235829265286, "learning_rate": 1.4911350440318793e-05, "loss": 0.2675, "num_tokens": 1683357198.0, "step": 2207 }, { "epoch": 3.0122902286281246, "grad_norm": 0.23887160898304638, "learning_rate": 1.4906872943981533e-05, "loss": 0.2778, "num_tokens": 1684121225.0, "step": 2208 }, { "epoch": 3.013655809586805, "grad_norm": 0.2433321087547303, "learning_rate": 1.4902394255826527e-05, "loss": 0.2817, "num_tokens": 1684923547.0, "step": 2209 }, { "epoch": 3.0150213905454857, "grad_norm": 0.20764288209821682, "learning_rate": 1.4897914377220033e-05, "loss": 0.2588, "num_tokens": 1685699250.0, "step": 2210 }, { "epoch": 3.0163869715041662, "grad_norm": 0.23732561401948254, "learning_rate": 1.4893433309528664e-05, "loss": 0.2672, "num_tokens": 1686434028.0, "step": 2211 }, { "epoch": 3.017752552462847, "grad_norm": 0.23328926515678808, "learning_rate": 1.4888951054119405e-05, "loss": 0.2666, "num_tokens": 1687148227.0, "step": 2212 }, { "epoch": 3.019118133421527, "grad_norm": 0.22279346932404173, "learning_rate": 1.4884467612359597e-05, "loss": 0.2568, "num_tokens": 1687929275.0, "step": 2213 }, { "epoch": 3.0204837143802075, "grad_norm": 0.19510000105424902, "learning_rate": 1.487998298561694e-05, "loss": 0.2785, "num_tokens": 1688651922.0, "step": 2214 }, { "epoch": 3.021849295338888, "grad_norm": 0.22198811521891376, "learning_rate": 1.4875497175259503e-05, "loss": 0.2727, "num_tokens": 1689459178.0, "step": 2215 }, { "epoch": 3.0232148762975686, "grad_norm": 0.20222064711836382, "learning_rate": 1.4871010182655712e-05, "loss": 0.2722, "num_tokens": 1690235706.0, "step": 2216 }, { "epoch": 3.024580457256249, "grad_norm": 0.20101925668929865, "learning_rate": 1.4866522009174354e-05, "loss": 0.2689, "num_tokens": 1690937730.0, "step": 2217 }, { "epoch": 3.0259460382149297, "grad_norm": 0.19204799521617283, "learning_rate": 1.4862032656184573e-05, "loss": 0.2827, "num_tokens": 1691672712.0, "step": 2218 }, { "epoch": 3.0273116191736102, "grad_norm": 0.2176374302004761, "learning_rate": 1.485754212505588e-05, "loss": 0.2604, "num_tokens": 1692360559.0, "step": 2219 }, { "epoch": 3.028677200132291, "grad_norm": 0.224149606737412, "learning_rate": 1.4853050417158141e-05, "loss": 0.2609, "num_tokens": 1693097004.0, "step": 2220 }, { "epoch": 3.0300427810909714, "grad_norm": 0.1966986586497178, "learning_rate": 1.4848557533861583e-05, "loss": 0.2737, "num_tokens": 1693863860.0, "step": 2221 }, { "epoch": 3.0314083620496515, "grad_norm": 0.21408858027582164, "learning_rate": 1.4844063476536785e-05, "loss": 0.2732, "num_tokens": 1694616497.0, "step": 2222 }, { "epoch": 3.032773943008332, "grad_norm": 0.19806226474534677, "learning_rate": 1.4839568246554696e-05, "loss": 0.2698, "num_tokens": 1695335422.0, "step": 2223 }, { "epoch": 3.0341395239670126, "grad_norm": 0.2085523460068365, "learning_rate": 1.4835071845286608e-05, "loss": 0.2591, "num_tokens": 1696106399.0, "step": 2224 }, { "epoch": 3.035505104925693, "grad_norm": 0.19466263058273708, "learning_rate": 1.4830574274104185e-05, "loss": 0.2789, "num_tokens": 1696901217.0, "step": 2225 }, { "epoch": 3.0368706858843737, "grad_norm": 0.20992490609055456, "learning_rate": 1.4826075534379441e-05, "loss": 0.2754, "num_tokens": 1697703322.0, "step": 2226 }, { "epoch": 3.0382362668430543, "grad_norm": 0.21760042672567967, "learning_rate": 1.4821575627484744e-05, "loss": 0.2656, "num_tokens": 1698451933.0, "step": 2227 }, { "epoch": 3.039601847801735, "grad_norm": 0.20209876152292114, "learning_rate": 1.4817074554792821e-05, "loss": 0.2711, "num_tokens": 1699266887.0, "step": 2228 }, { "epoch": 3.0409674287604154, "grad_norm": 0.2065950437730651, "learning_rate": 1.4812572317676757e-05, "loss": 0.2652, "num_tokens": 1700005537.0, "step": 2229 }, { "epoch": 3.042333009719096, "grad_norm": 0.21266386571001472, "learning_rate": 1.4808068917509984e-05, "loss": 0.2608, "num_tokens": 1700755292.0, "step": 2230 }, { "epoch": 3.043698590677776, "grad_norm": 0.2033819324565466, "learning_rate": 1.4803564355666295e-05, "loss": 0.2694, "num_tokens": 1701426341.0, "step": 2231 }, { "epoch": 3.0450641716364566, "grad_norm": 0.21856298426476906, "learning_rate": 1.4799058633519845e-05, "loss": 0.2683, "num_tokens": 1702180580.0, "step": 2232 }, { "epoch": 3.046429752595137, "grad_norm": 0.2030676099008066, "learning_rate": 1.4794551752445127e-05, "loss": 0.273, "num_tokens": 1702977186.0, "step": 2233 }, { "epoch": 3.0477953335538177, "grad_norm": 0.19178490525907393, "learning_rate": 1.4790043713816997e-05, "loss": 0.2667, "num_tokens": 1703725763.0, "step": 2234 }, { "epoch": 3.0491609145124983, "grad_norm": 0.21793329251451785, "learning_rate": 1.4785534519010662e-05, "loss": 0.2711, "num_tokens": 1704490265.0, "step": 2235 }, { "epoch": 3.050526495471179, "grad_norm": 0.18599691837743756, "learning_rate": 1.4781024169401687e-05, "loss": 0.2671, "num_tokens": 1705216386.0, "step": 2236 }, { "epoch": 3.0518920764298594, "grad_norm": 0.20586285380129404, "learning_rate": 1.4776512666365976e-05, "loss": 0.2722, "num_tokens": 1705989014.0, "step": 2237 }, { "epoch": 3.05325765738854, "grad_norm": 0.19360615454332133, "learning_rate": 1.4772000011279799e-05, "loss": 0.2754, "num_tokens": 1706711821.0, "step": 2238 }, { "epoch": 3.0546232383472205, "grad_norm": 0.20145638778911903, "learning_rate": 1.4767486205519767e-05, "loss": 0.2708, "num_tokens": 1707528930.0, "step": 2239 }, { "epoch": 3.0559888193059006, "grad_norm": 0.1895104940576205, "learning_rate": 1.4762971250462855e-05, "loss": 0.279, "num_tokens": 1708288649.0, "step": 2240 }, { "epoch": 3.057354400264581, "grad_norm": 0.19613206463228147, "learning_rate": 1.4758455147486373e-05, "loss": 0.2602, "num_tokens": 1708976971.0, "step": 2241 }, { "epoch": 3.0587199812232617, "grad_norm": 0.2197896441430753, "learning_rate": 1.4753937897967987e-05, "loss": 0.2548, "num_tokens": 1709658194.0, "step": 2242 }, { "epoch": 3.0600855621819423, "grad_norm": 0.1948262871334908, "learning_rate": 1.4749419503285719e-05, "loss": 0.2841, "num_tokens": 1710497344.0, "step": 2243 }, { "epoch": 3.061451143140623, "grad_norm": 0.19825310629761053, "learning_rate": 1.4744899964817938e-05, "loss": 0.2641, "num_tokens": 1711246475.0, "step": 2244 }, { "epoch": 3.0628167240993034, "grad_norm": 0.20244652951535042, "learning_rate": 1.4740379283943353e-05, "loss": 0.2972, "num_tokens": 1712035162.0, "step": 2245 }, { "epoch": 3.064182305057984, "grad_norm": 0.20181102642957294, "learning_rate": 1.4735857462041028e-05, "loss": 0.2759, "num_tokens": 1712805969.0, "step": 2246 }, { "epoch": 3.0655478860166645, "grad_norm": 0.22250525983278033, "learning_rate": 1.4731334500490381e-05, "loss": 0.2766, "num_tokens": 1713567437.0, "step": 2247 }, { "epoch": 3.066913466975345, "grad_norm": 0.19996608881583855, "learning_rate": 1.4726810400671168e-05, "loss": 0.2865, "num_tokens": 1714432834.0, "step": 2248 }, { "epoch": 3.068279047934025, "grad_norm": 0.2100388748331839, "learning_rate": 1.4722285163963492e-05, "loss": 0.2632, "num_tokens": 1715198984.0, "step": 2249 }, { "epoch": 3.0696446288927057, "grad_norm": 0.19491158630472177, "learning_rate": 1.4717758791747817e-05, "loss": 0.2744, "num_tokens": 1716030529.0, "step": 2250 }, { "epoch": 3.0710102098513863, "grad_norm": 0.1950865257467894, "learning_rate": 1.4713231285404934e-05, "loss": 0.2543, "num_tokens": 1716773004.0, "step": 2251 }, { "epoch": 3.072375790810067, "grad_norm": 0.1858118096923295, "learning_rate": 1.470870264631599e-05, "loss": 0.2648, "num_tokens": 1717459740.0, "step": 2252 }, { "epoch": 3.0737413717687474, "grad_norm": 0.2144494873951648, "learning_rate": 1.4704172875862483e-05, "loss": 0.2607, "num_tokens": 1718233093.0, "step": 2253 }, { "epoch": 3.075106952727428, "grad_norm": 0.20119930563263919, "learning_rate": 1.4699641975426245e-05, "loss": 0.2834, "num_tokens": 1718984808.0, "step": 2254 }, { "epoch": 3.0764725336861085, "grad_norm": 0.2031480891942438, "learning_rate": 1.4695109946389457e-05, "loss": 0.2796, "num_tokens": 1719749428.0, "step": 2255 }, { "epoch": 3.077838114644789, "grad_norm": 0.20016253818939397, "learning_rate": 1.4690576790134645e-05, "loss": 0.2653, "num_tokens": 1720501981.0, "step": 2256 }, { "epoch": 3.0792036956034696, "grad_norm": 0.19431408912614256, "learning_rate": 1.468604250804468e-05, "loss": 0.2724, "num_tokens": 1721285224.0, "step": 2257 }, { "epoch": 3.0805692765621497, "grad_norm": 0.19346712776447386, "learning_rate": 1.4681507101502776e-05, "loss": 0.2802, "num_tokens": 1722057796.0, "step": 2258 }, { "epoch": 3.0819348575208303, "grad_norm": 0.21536613128401977, "learning_rate": 1.4676970571892488e-05, "loss": 0.279, "num_tokens": 1722793551.0, "step": 2259 }, { "epoch": 3.083300438479511, "grad_norm": 0.22407404542449041, "learning_rate": 1.4672432920597714e-05, "loss": 0.2784, "num_tokens": 1723508822.0, "step": 2260 }, { "epoch": 3.0846660194381914, "grad_norm": 0.20391085926257044, "learning_rate": 1.4667894149002695e-05, "loss": 0.2562, "num_tokens": 1724308513.0, "step": 2261 }, { "epoch": 3.086031600396872, "grad_norm": 0.22101200362401402, "learning_rate": 1.4663354258492016e-05, "loss": 0.2618, "num_tokens": 1725062416.0, "step": 2262 }, { "epoch": 3.0873971813555525, "grad_norm": 0.2338530629617751, "learning_rate": 1.4658813250450597e-05, "loss": 0.2815, "num_tokens": 1725880813.0, "step": 2263 }, { "epoch": 3.088762762314233, "grad_norm": 0.19856320981260248, "learning_rate": 1.4654271126263703e-05, "loss": 0.2761, "num_tokens": 1726610715.0, "step": 2264 }, { "epoch": 3.0901283432729136, "grad_norm": 0.21023859548137452, "learning_rate": 1.4649727887316947e-05, "loss": 0.27, "num_tokens": 1727451931.0, "step": 2265 }, { "epoch": 3.091493924231594, "grad_norm": 0.20164994326479355, "learning_rate": 1.4645183534996264e-05, "loss": 0.2727, "num_tokens": 1728339002.0, "step": 2266 }, { "epoch": 3.0928595051902743, "grad_norm": 0.18537715192793544, "learning_rate": 1.4640638070687948e-05, "loss": 0.27, "num_tokens": 1729082821.0, "step": 2267 }, { "epoch": 3.094225086148955, "grad_norm": 0.19129862178750195, "learning_rate": 1.4636091495778615e-05, "loss": 0.2683, "num_tokens": 1729888792.0, "step": 2268 }, { "epoch": 3.0955906671076354, "grad_norm": 0.20608486159850312, "learning_rate": 1.4631543811655235e-05, "loss": 0.2826, "num_tokens": 1730641414.0, "step": 2269 }, { "epoch": 3.096956248066316, "grad_norm": 0.20037329802373388, "learning_rate": 1.4626995019705106e-05, "loss": 0.2636, "num_tokens": 1731357190.0, "step": 2270 }, { "epoch": 3.0983218290249965, "grad_norm": 0.2052007138857161, "learning_rate": 1.462244512131587e-05, "loss": 0.2711, "num_tokens": 1732135733.0, "step": 2271 }, { "epoch": 3.099687409983677, "grad_norm": 0.20759986867845528, "learning_rate": 1.4617894117875502e-05, "loss": 0.2689, "num_tokens": 1732925347.0, "step": 2272 }, { "epoch": 3.1010529909423576, "grad_norm": 0.20173762938712766, "learning_rate": 1.4613342010772314e-05, "loss": 0.2625, "num_tokens": 1733653877.0, "step": 2273 }, { "epoch": 3.102418571901038, "grad_norm": 0.21620064090798946, "learning_rate": 1.460878880139496e-05, "loss": 0.266, "num_tokens": 1734409740.0, "step": 2274 }, { "epoch": 3.1037841528597188, "grad_norm": 0.1899976534473343, "learning_rate": 1.4604234491132428e-05, "loss": 0.2782, "num_tokens": 1735177761.0, "step": 2275 }, { "epoch": 3.105149733818399, "grad_norm": 0.20802884763149862, "learning_rate": 1.4599679081374036e-05, "loss": 0.2629, "num_tokens": 1735921648.0, "step": 2276 }, { "epoch": 3.1065153147770794, "grad_norm": 0.19791023976606456, "learning_rate": 1.4595122573509446e-05, "loss": 0.2558, "num_tokens": 1736695076.0, "step": 2277 }, { "epoch": 3.10788089573576, "grad_norm": 0.19058118757596232, "learning_rate": 1.4590564968928651e-05, "loss": 0.2614, "num_tokens": 1737542493.0, "step": 2278 }, { "epoch": 3.1092464766944405, "grad_norm": 0.1937897011912724, "learning_rate": 1.4586006269021977e-05, "loss": 0.2752, "num_tokens": 1738300508.0, "step": 2279 }, { "epoch": 3.110612057653121, "grad_norm": 0.20998352941266432, "learning_rate": 1.4581446475180088e-05, "loss": 0.2666, "num_tokens": 1739104154.0, "step": 2280 }, { "epoch": 3.1119776386118017, "grad_norm": 0.20212651580658242, "learning_rate": 1.457688558879398e-05, "loss": 0.2728, "num_tokens": 1739836600.0, "step": 2281 }, { "epoch": 3.113343219570482, "grad_norm": 0.2073165129399981, "learning_rate": 1.4572323611254978e-05, "loss": 0.2656, "num_tokens": 1740594005.0, "step": 2282 }, { "epoch": 3.1147088005291628, "grad_norm": 0.19481001058483588, "learning_rate": 1.4567760543954747e-05, "loss": 0.2687, "num_tokens": 1741404529.0, "step": 2283 }, { "epoch": 3.1160743814878433, "grad_norm": 0.18718907762552472, "learning_rate": 1.456319638828528e-05, "loss": 0.2737, "num_tokens": 1742280480.0, "step": 2284 }, { "epoch": 3.1174399624465234, "grad_norm": 0.1924321447505693, "learning_rate": 1.4558631145638906e-05, "loss": 0.2501, "num_tokens": 1743055740.0, "step": 2285 }, { "epoch": 3.118805543405204, "grad_norm": 0.19046795790715754, "learning_rate": 1.455406481740828e-05, "loss": 0.264, "num_tokens": 1743842841.0, "step": 2286 }, { "epoch": 3.1201711243638846, "grad_norm": 0.18117077662199915, "learning_rate": 1.4549497404986392e-05, "loss": 0.2667, "num_tokens": 1744607456.0, "step": 2287 }, { "epoch": 3.121536705322565, "grad_norm": 0.21438378314956888, "learning_rate": 1.454492890976656e-05, "loss": 0.2662, "num_tokens": 1745325118.0, "step": 2288 }, { "epoch": 3.1229022862812457, "grad_norm": 0.18141864442326086, "learning_rate": 1.4540359333142436e-05, "loss": 0.2745, "num_tokens": 1746063993.0, "step": 2289 }, { "epoch": 3.124267867239926, "grad_norm": 0.20276168547371853, "learning_rate": 1.4535788676508001e-05, "loss": 0.2726, "num_tokens": 1746854453.0, "step": 2290 }, { "epoch": 3.125633448198607, "grad_norm": 0.18599996441046254, "learning_rate": 1.4531216941257563e-05, "loss": 0.2826, "num_tokens": 1747702955.0, "step": 2291 }, { "epoch": 3.1269990291572873, "grad_norm": 0.2014531676595862, "learning_rate": 1.4526644128785757e-05, "loss": 0.2658, "num_tokens": 1748497979.0, "step": 2292 }, { "epoch": 3.128364610115968, "grad_norm": 0.1861971793732089, "learning_rate": 1.4522070240487554e-05, "loss": 0.2667, "num_tokens": 1749194255.0, "step": 2293 }, { "epoch": 3.129730191074648, "grad_norm": 0.20896970759347083, "learning_rate": 1.4517495277758249e-05, "loss": 0.2671, "num_tokens": 1749893265.0, "step": 2294 }, { "epoch": 3.1310957720333286, "grad_norm": 0.19647917896789985, "learning_rate": 1.4512919241993464e-05, "loss": 0.2765, "num_tokens": 1750646794.0, "step": 2295 }, { "epoch": 3.132461352992009, "grad_norm": 0.21683591979593728, "learning_rate": 1.450834213458915e-05, "loss": 0.2573, "num_tokens": 1751373646.0, "step": 2296 }, { "epoch": 3.1338269339506897, "grad_norm": 0.18484300579181753, "learning_rate": 1.4503763956941588e-05, "loss": 0.2693, "num_tokens": 1752135850.0, "step": 2297 }, { "epoch": 3.1351925149093702, "grad_norm": 0.19595746680646864, "learning_rate": 1.4499184710447373e-05, "loss": 0.2759, "num_tokens": 1752925287.0, "step": 2298 }, { "epoch": 3.136558095868051, "grad_norm": 0.18327516046627232, "learning_rate": 1.4494604396503442e-05, "loss": 0.2713, "num_tokens": 1753659189.0, "step": 2299 }, { "epoch": 3.1379236768267313, "grad_norm": 0.2111461277787238, "learning_rate": 1.4490023016507047e-05, "loss": 0.2878, "num_tokens": 1754553281.0, "step": 2300 }, { "epoch": 3.139289257785412, "grad_norm": 0.1797702702193455, "learning_rate": 1.4485440571855771e-05, "loss": 0.2526, "num_tokens": 1755263921.0, "step": 2301 }, { "epoch": 3.1406548387440925, "grad_norm": 0.18523503943669864, "learning_rate": 1.4480857063947516e-05, "loss": 0.2724, "num_tokens": 1755995142.0, "step": 2302 }, { "epoch": 3.1420204197027726, "grad_norm": 0.20206616344745992, "learning_rate": 1.4476272494180516e-05, "loss": 0.2787, "num_tokens": 1756821472.0, "step": 2303 }, { "epoch": 3.143386000661453, "grad_norm": 0.17951393219051484, "learning_rate": 1.447168686395332e-05, "loss": 0.2648, "num_tokens": 1757594663.0, "step": 2304 }, { "epoch": 3.1447515816201337, "grad_norm": 0.19361205421147917, "learning_rate": 1.4467100174664812e-05, "loss": 0.265, "num_tokens": 1758386255.0, "step": 2305 }, { "epoch": 3.1461171625788142, "grad_norm": 0.19344878730399623, "learning_rate": 1.4462512427714183e-05, "loss": 0.2594, "num_tokens": 1759156714.0, "step": 2306 }, { "epoch": 3.147482743537495, "grad_norm": 0.19904136982215143, "learning_rate": 1.4457923624500967e-05, "loss": 0.2649, "num_tokens": 1759930511.0, "step": 2307 }, { "epoch": 3.1488483244961754, "grad_norm": 0.18525687102058397, "learning_rate": 1.4453333766424999e-05, "loss": 0.2803, "num_tokens": 1760701781.0, "step": 2308 }, { "epoch": 3.150213905454856, "grad_norm": 0.19989293676980155, "learning_rate": 1.4448742854886453e-05, "loss": 0.2711, "num_tokens": 1761461085.0, "step": 2309 }, { "epoch": 3.1515794864135365, "grad_norm": 0.1989199359620941, "learning_rate": 1.4444150891285809e-05, "loss": 0.2778, "num_tokens": 1762219826.0, "step": 2310 }, { "epoch": 3.152945067372217, "grad_norm": 0.20635260619112514, "learning_rate": 1.4439557877023884e-05, "loss": 0.2627, "num_tokens": 1762969198.0, "step": 2311 }, { "epoch": 3.154310648330897, "grad_norm": 0.20101007515774208, "learning_rate": 1.4434963813501805e-05, "loss": 0.2795, "num_tokens": 1763798499.0, "step": 2312 }, { "epoch": 3.1556762292895777, "grad_norm": 0.20200903864651423, "learning_rate": 1.4430368702121025e-05, "loss": 0.2729, "num_tokens": 1764512846.0, "step": 2313 }, { "epoch": 3.1570418102482583, "grad_norm": 0.21121387588087298, "learning_rate": 1.442577254428331e-05, "loss": 0.2761, "num_tokens": 1765280950.0, "step": 2314 }, { "epoch": 3.158407391206939, "grad_norm": 0.22211445826910664, "learning_rate": 1.4421175341390748e-05, "loss": 0.2687, "num_tokens": 1766006894.0, "step": 2315 }, { "epoch": 3.1597729721656194, "grad_norm": 0.20714235361611177, "learning_rate": 1.4416577094845747e-05, "loss": 0.284, "num_tokens": 1766756459.0, "step": 2316 }, { "epoch": 3.1611385531243, "grad_norm": 0.20840775425867972, "learning_rate": 1.4411977806051034e-05, "loss": 0.2733, "num_tokens": 1767519194.0, "step": 2317 }, { "epoch": 3.1625041340829805, "grad_norm": 0.19170847634261362, "learning_rate": 1.4407377476409653e-05, "loss": 0.2793, "num_tokens": 1768285423.0, "step": 2318 }, { "epoch": 3.163869715041661, "grad_norm": 0.19536993777396655, "learning_rate": 1.4402776107324964e-05, "loss": 0.2706, "num_tokens": 1769003459.0, "step": 2319 }, { "epoch": 3.1652352960003416, "grad_norm": 0.19983814951734666, "learning_rate": 1.4398173700200646e-05, "loss": 0.2659, "num_tokens": 1769763774.0, "step": 2320 }, { "epoch": 3.1666008769590217, "grad_norm": 0.18557608694347913, "learning_rate": 1.4393570256440699e-05, "loss": 0.2838, "num_tokens": 1770502224.0, "step": 2321 }, { "epoch": 3.1679664579177023, "grad_norm": 0.20565048084962154, "learning_rate": 1.4388965777449428e-05, "loss": 0.2671, "num_tokens": 1771285274.0, "step": 2322 }, { "epoch": 3.169332038876383, "grad_norm": 0.28458540471991706, "learning_rate": 1.4384360264631458e-05, "loss": 0.2883, "num_tokens": 1772115136.0, "step": 2323 }, { "epoch": 3.1706976198350634, "grad_norm": 0.2079189114188602, "learning_rate": 1.437975371939174e-05, "loss": 0.2604, "num_tokens": 1772873775.0, "step": 2324 }, { "epoch": 3.172063200793744, "grad_norm": 0.20119644810775764, "learning_rate": 1.4375146143135524e-05, "loss": 0.2749, "num_tokens": 1773591314.0, "step": 2325 }, { "epoch": 3.1734287817524245, "grad_norm": 0.20406512052702003, "learning_rate": 1.4370537537268386e-05, "loss": 0.2735, "num_tokens": 1774349412.0, "step": 2326 }, { "epoch": 3.174794362711105, "grad_norm": 0.20175414011455178, "learning_rate": 1.436592790319621e-05, "loss": 0.2772, "num_tokens": 1775162092.0, "step": 2327 }, { "epoch": 3.1761599436697856, "grad_norm": 0.1900146731182938, "learning_rate": 1.4361317242325199e-05, "loss": 0.275, "num_tokens": 1775965500.0, "step": 2328 }, { "epoch": 3.177525524628466, "grad_norm": 0.209487503110832, "learning_rate": 1.4356705556061861e-05, "loss": 0.2794, "num_tokens": 1776761987.0, "step": 2329 }, { "epoch": 3.1788911055871463, "grad_norm": 0.20249349503618314, "learning_rate": 1.4352092845813026e-05, "loss": 0.2796, "num_tokens": 1777572641.0, "step": 2330 }, { "epoch": 3.180256686545827, "grad_norm": 0.2097367881031723, "learning_rate": 1.4347479112985832e-05, "loss": 0.2696, "num_tokens": 1778306091.0, "step": 2331 }, { "epoch": 3.1816222675045074, "grad_norm": 0.21131397648090708, "learning_rate": 1.4342864358987724e-05, "loss": 0.2801, "num_tokens": 1779088127.0, "step": 2332 }, { "epoch": 3.182987848463188, "grad_norm": 0.19704727665835065, "learning_rate": 1.4338248585226468e-05, "loss": 0.2783, "num_tokens": 1779844476.0, "step": 2333 }, { "epoch": 3.1843534294218685, "grad_norm": 0.19629770635218005, "learning_rate": 1.433363179311014e-05, "loss": 0.2683, "num_tokens": 1780636469.0, "step": 2334 }, { "epoch": 3.185719010380549, "grad_norm": 0.19324929266346322, "learning_rate": 1.4329013984047113e-05, "loss": 0.2726, "num_tokens": 1781442926.0, "step": 2335 }, { "epoch": 3.1870845913392296, "grad_norm": 0.28401242765190365, "learning_rate": 1.4324395159446091e-05, "loss": 0.2712, "num_tokens": 1782217502.0, "step": 2336 }, { "epoch": 3.18845017229791, "grad_norm": 0.21936401161232488, "learning_rate": 1.4319775320716072e-05, "loss": 0.2729, "num_tokens": 1782994920.0, "step": 2337 }, { "epoch": 3.1898157532565907, "grad_norm": 0.21051432451405766, "learning_rate": 1.431515446926637e-05, "loss": 0.2798, "num_tokens": 1783820902.0, "step": 2338 }, { "epoch": 3.191181334215271, "grad_norm": 0.20491197340481876, "learning_rate": 1.4310532606506606e-05, "loss": 0.2678, "num_tokens": 1784466390.0, "step": 2339 }, { "epoch": 3.1925469151739514, "grad_norm": 0.20206995950075815, "learning_rate": 1.4305909733846712e-05, "loss": 0.2719, "num_tokens": 1785193712.0, "step": 2340 }, { "epoch": 3.193912496132632, "grad_norm": 0.19334300379917063, "learning_rate": 1.4301285852696925e-05, "loss": 0.2854, "num_tokens": 1785983454.0, "step": 2341 }, { "epoch": 3.1952780770913125, "grad_norm": 0.21481896418852425, "learning_rate": 1.429666096446779e-05, "loss": 0.2911, "num_tokens": 1786769124.0, "step": 2342 }, { "epoch": 3.196643658049993, "grad_norm": 0.20075091837290981, "learning_rate": 1.429203507057016e-05, "loss": 0.2791, "num_tokens": 1787578782.0, "step": 2343 }, { "epoch": 3.1980092390086736, "grad_norm": 0.1748558500887479, "learning_rate": 1.4287408172415196e-05, "loss": 0.2625, "num_tokens": 1788348886.0, "step": 2344 }, { "epoch": 3.199374819967354, "grad_norm": 0.18902015027805866, "learning_rate": 1.4282780271414364e-05, "loss": 0.2706, "num_tokens": 1789173291.0, "step": 2345 }, { "epoch": 3.2007404009260347, "grad_norm": 0.1878271559729813, "learning_rate": 1.427815136897944e-05, "loss": 0.2746, "num_tokens": 1789902962.0, "step": 2346 }, { "epoch": 3.2021059818847153, "grad_norm": 0.19244106197322086, "learning_rate": 1.4273521466522495e-05, "loss": 0.2632, "num_tokens": 1790634271.0, "step": 2347 }, { "epoch": 3.2034715628433954, "grad_norm": 0.1856597327097042, "learning_rate": 1.4268890565455915e-05, "loss": 0.268, "num_tokens": 1791383433.0, "step": 2348 }, { "epoch": 3.204837143802076, "grad_norm": 0.20744931370732766, "learning_rate": 1.4264258667192387e-05, "loss": 0.2791, "num_tokens": 1792093988.0, "step": 2349 }, { "epoch": 3.2062027247607565, "grad_norm": 0.20645614812173774, "learning_rate": 1.4259625773144903e-05, "loss": 0.262, "num_tokens": 1792863344.0, "step": 2350 }, { "epoch": 3.207568305719437, "grad_norm": 0.18987659061215575, "learning_rate": 1.4254991884726757e-05, "loss": 0.2841, "num_tokens": 1793618958.0, "step": 2351 }, { "epoch": 3.2089338866781176, "grad_norm": 0.19647829624502972, "learning_rate": 1.425035700335155e-05, "loss": 0.262, "num_tokens": 1794413563.0, "step": 2352 }, { "epoch": 3.210299467636798, "grad_norm": 0.2054099217499659, "learning_rate": 1.4245721130433186e-05, "loss": 0.2839, "num_tokens": 1795183676.0, "step": 2353 }, { "epoch": 3.2116650485954787, "grad_norm": 0.1894342294148697, "learning_rate": 1.4241084267385858e-05, "loss": 0.2574, "num_tokens": 1795919763.0, "step": 2354 }, { "epoch": 3.2130306295541593, "grad_norm": 0.1945660092639355, "learning_rate": 1.4236446415624086e-05, "loss": 0.2566, "num_tokens": 1796757219.0, "step": 2355 }, { "epoch": 3.21439621051284, "grad_norm": 0.18536416046550494, "learning_rate": 1.4231807576562666e-05, "loss": 0.2674, "num_tokens": 1797511233.0, "step": 2356 }, { "epoch": 3.21576179147152, "grad_norm": 0.1894574178871485, "learning_rate": 1.4227167751616713e-05, "loss": 0.2634, "num_tokens": 1798315183.0, "step": 2357 }, { "epoch": 3.2171273724302005, "grad_norm": 0.1990392713232286, "learning_rate": 1.4222526942201638e-05, "loss": 0.2617, "num_tokens": 1799033399.0, "step": 2358 }, { "epoch": 3.218492953388881, "grad_norm": 0.19377607820070814, "learning_rate": 1.4217885149733144e-05, "loss": 0.2817, "num_tokens": 1799850743.0, "step": 2359 }, { "epoch": 3.2198585343475616, "grad_norm": 0.22330869409881712, "learning_rate": 1.421324237562725e-05, "loss": 0.2747, "num_tokens": 1800662498.0, "step": 2360 }, { "epoch": 3.221224115306242, "grad_norm": 0.1856030499956643, "learning_rate": 1.420859862130026e-05, "loss": 0.2717, "num_tokens": 1801470029.0, "step": 2361 }, { "epoch": 3.2225896962649228, "grad_norm": 0.17760080327852162, "learning_rate": 1.4203953888168785e-05, "loss": 0.2634, "num_tokens": 1802182497.0, "step": 2362 }, { "epoch": 3.2239552772236033, "grad_norm": 0.20090915754407884, "learning_rate": 1.4199308177649726e-05, "loss": 0.2726, "num_tokens": 1802946743.0, "step": 2363 }, { "epoch": 3.225320858182284, "grad_norm": 0.20099890582760246, "learning_rate": 1.4194661491160296e-05, "loss": 0.2677, "num_tokens": 1803698167.0, "step": 2364 }, { "epoch": 3.2266864391409644, "grad_norm": 0.18938295677206265, "learning_rate": 1.4190013830117991e-05, "loss": 0.2705, "num_tokens": 1804442908.0, "step": 2365 }, { "epoch": 3.2280520200996445, "grad_norm": 0.2127502761146136, "learning_rate": 1.4185365195940615e-05, "loss": 0.2648, "num_tokens": 1805209741.0, "step": 2366 }, { "epoch": 3.229417601058325, "grad_norm": 0.1756509205093806, "learning_rate": 1.4180715590046267e-05, "loss": 0.2578, "num_tokens": 1805997003.0, "step": 2367 }, { "epoch": 3.2307831820170057, "grad_norm": 0.18894554059244187, "learning_rate": 1.4176065013853333e-05, "loss": 0.2567, "num_tokens": 1806710104.0, "step": 2368 }, { "epoch": 3.232148762975686, "grad_norm": 0.1945951109195739, "learning_rate": 1.417141346878051e-05, "loss": 0.2735, "num_tokens": 1807514842.0, "step": 2369 }, { "epoch": 3.2335143439343668, "grad_norm": 0.18533993257871448, "learning_rate": 1.4166760956246779e-05, "loss": 0.2733, "num_tokens": 1808263080.0, "step": 2370 }, { "epoch": 3.2348799248930473, "grad_norm": 0.19325603496391036, "learning_rate": 1.4162107477671422e-05, "loss": 0.2715, "num_tokens": 1808983353.0, "step": 2371 }, { "epoch": 3.236245505851728, "grad_norm": 0.21337066444507233, "learning_rate": 1.4157453034474013e-05, "loss": 0.2692, "num_tokens": 1809756018.0, "step": 2372 }, { "epoch": 3.2376110868104084, "grad_norm": 0.1902265953661659, "learning_rate": 1.415279762807442e-05, "loss": 0.2734, "num_tokens": 1810533372.0, "step": 2373 }, { "epoch": 3.238976667769089, "grad_norm": 0.19632325918192975, "learning_rate": 1.4148141259892807e-05, "loss": 0.262, "num_tokens": 1811294632.0, "step": 2374 }, { "epoch": 3.240342248727769, "grad_norm": 0.1929337668074922, "learning_rate": 1.4143483931349629e-05, "loss": 0.2755, "num_tokens": 1812064769.0, "step": 2375 }, { "epoch": 3.2417078296864497, "grad_norm": 0.18730339925908246, "learning_rate": 1.4138825643865636e-05, "loss": 0.2642, "num_tokens": 1812762179.0, "step": 2376 }, { "epoch": 3.24307341064513, "grad_norm": 0.20695056031931852, "learning_rate": 1.4134166398861875e-05, "loss": 0.2683, "num_tokens": 1813498263.0, "step": 2377 }, { "epoch": 3.2444389916038108, "grad_norm": 0.21467923502621347, "learning_rate": 1.4129506197759668e-05, "loss": 0.2726, "num_tokens": 1814303267.0, "step": 2378 }, { "epoch": 3.2458045725624913, "grad_norm": 0.1910362099474822, "learning_rate": 1.412484504198065e-05, "loss": 0.2672, "num_tokens": 1815100521.0, "step": 2379 }, { "epoch": 3.247170153521172, "grad_norm": 0.20428481339616017, "learning_rate": 1.4120182932946735e-05, "loss": 0.277, "num_tokens": 1815859424.0, "step": 2380 }, { "epoch": 3.2485357344798524, "grad_norm": 0.2030097769393471, "learning_rate": 1.4115519872080133e-05, "loss": 0.28, "num_tokens": 1816711193.0, "step": 2381 }, { "epoch": 3.249901315438533, "grad_norm": 0.186672110681332, "learning_rate": 1.4110855860803336e-05, "loss": 0.2715, "num_tokens": 1817520515.0, "step": 2382 }, { "epoch": 3.2512668963972136, "grad_norm": 0.2063978254589925, "learning_rate": 1.4106190900539141e-05, "loss": 0.272, "num_tokens": 1818251730.0, "step": 2383 }, { "epoch": 3.2526324773558937, "grad_norm": 0.19739164519681418, "learning_rate": 1.4101524992710616e-05, "loss": 0.2661, "num_tokens": 1819001705.0, "step": 2384 }, { "epoch": 3.2539980583145742, "grad_norm": 0.18905795610669557, "learning_rate": 1.409685813874113e-05, "loss": 0.2824, "num_tokens": 1819754800.0, "step": 2385 }, { "epoch": 3.255363639273255, "grad_norm": 0.20196025312584504, "learning_rate": 1.4092190340054344e-05, "loss": 0.2717, "num_tokens": 1820516845.0, "step": 2386 }, { "epoch": 3.2567292202319353, "grad_norm": 0.2079661419465257, "learning_rate": 1.4087521598074195e-05, "loss": 0.2808, "num_tokens": 1821280716.0, "step": 2387 }, { "epoch": 3.258094801190616, "grad_norm": 0.20596046428315215, "learning_rate": 1.4082851914224913e-05, "loss": 0.28, "num_tokens": 1822024802.0, "step": 2388 }, { "epoch": 3.2594603821492965, "grad_norm": 0.20696138524936541, "learning_rate": 1.4078181289931021e-05, "loss": 0.2797, "num_tokens": 1822815644.0, "step": 2389 }, { "epoch": 3.260825963107977, "grad_norm": 0.1972246795085495, "learning_rate": 1.407350972661732e-05, "loss": 0.2588, "num_tokens": 1823567588.0, "step": 2390 }, { "epoch": 3.2621915440666576, "grad_norm": 0.20894970870859864, "learning_rate": 1.4068837225708904e-05, "loss": 0.2726, "num_tokens": 1824278408.0, "step": 2391 }, { "epoch": 3.263557125025338, "grad_norm": 0.20751431948978596, "learning_rate": 1.4064163788631153e-05, "loss": 0.2747, "num_tokens": 1825024934.0, "step": 2392 }, { "epoch": 3.2649227059840182, "grad_norm": 0.19778436940820787, "learning_rate": 1.4059489416809729e-05, "loss": 0.2866, "num_tokens": 1825789539.0, "step": 2393 }, { "epoch": 3.266288286942699, "grad_norm": 0.21734713939537242, "learning_rate": 1.4054814111670575e-05, "loss": 0.2669, "num_tokens": 1826550378.0, "step": 2394 }, { "epoch": 3.2676538679013793, "grad_norm": 0.19293344811952878, "learning_rate": 1.405013787463993e-05, "loss": 0.2521, "num_tokens": 1827299610.0, "step": 2395 }, { "epoch": 3.26901944886006, "grad_norm": 0.19622870374747675, "learning_rate": 1.4045460707144311e-05, "loss": 0.2663, "num_tokens": 1828107717.0, "step": 2396 }, { "epoch": 3.2703850298187405, "grad_norm": 0.1902307529714498, "learning_rate": 1.4040782610610516e-05, "loss": 0.2777, "num_tokens": 1828902636.0, "step": 2397 }, { "epoch": 3.271750610777421, "grad_norm": 0.2088105453250827, "learning_rate": 1.4036103586465632e-05, "loss": 0.2755, "num_tokens": 1829627314.0, "step": 2398 }, { "epoch": 3.2731161917361016, "grad_norm": 0.18934224697762408, "learning_rate": 1.4031423636137028e-05, "loss": 0.2701, "num_tokens": 1830274481.0, "step": 2399 }, { "epoch": 3.274481772694782, "grad_norm": 0.2320238836704446, "learning_rate": 1.4026742761052353e-05, "loss": 0.2794, "num_tokens": 1831045697.0, "step": 2400 }, { "epoch": 3.2758473536534627, "grad_norm": 0.21981480376341683, "learning_rate": 1.4022060962639534e-05, "loss": 0.2627, "num_tokens": 1831762032.0, "step": 2401 }, { "epoch": 3.277212934612143, "grad_norm": 0.18903806305561144, "learning_rate": 1.4017378242326794e-05, "loss": 0.2687, "num_tokens": 1832574196.0, "step": 2402 }, { "epoch": 3.2785785155708234, "grad_norm": 0.1856954897004164, "learning_rate": 1.401269460154262e-05, "loss": 0.2737, "num_tokens": 1833308461.0, "step": 2403 }, { "epoch": 3.279944096529504, "grad_norm": 0.20026380157428866, "learning_rate": 1.4008010041715796e-05, "loss": 0.2621, "num_tokens": 1834044395.0, "step": 2404 }, { "epoch": 3.2813096774881845, "grad_norm": 0.1967556879550702, "learning_rate": 1.4003324564275371e-05, "loss": 0.271, "num_tokens": 1834811463.0, "step": 2405 }, { "epoch": 3.282675258446865, "grad_norm": 0.1894988566107046, "learning_rate": 1.3998638170650685e-05, "loss": 0.2811, "num_tokens": 1835535399.0, "step": 2406 }, { "epoch": 3.2840408394055456, "grad_norm": 0.20507101297566238, "learning_rate": 1.3993950862271355e-05, "loss": 0.2835, "num_tokens": 1836314807.0, "step": 2407 }, { "epoch": 3.285406420364226, "grad_norm": 0.19607694201006956, "learning_rate": 1.3989262640567273e-05, "loss": 0.2904, "num_tokens": 1837137603.0, "step": 2408 }, { "epoch": 3.2867720013229067, "grad_norm": 0.217607442685979, "learning_rate": 1.3984573506968614e-05, "loss": 0.2703, "num_tokens": 1837951755.0, "step": 2409 }, { "epoch": 3.2881375822815873, "grad_norm": 0.18462937823399556, "learning_rate": 1.3979883462905825e-05, "loss": 0.2648, "num_tokens": 1838738364.0, "step": 2410 }, { "epoch": 3.2895031632402674, "grad_norm": 0.19751045746999715, "learning_rate": 1.3975192509809644e-05, "loss": 0.2814, "num_tokens": 1839543067.0, "step": 2411 }, { "epoch": 3.290868744198948, "grad_norm": 0.18789992618267778, "learning_rate": 1.397050064911107e-05, "loss": 0.2764, "num_tokens": 1840324419.0, "step": 2412 }, { "epoch": 3.2922343251576285, "grad_norm": 0.19088428721500467, "learning_rate": 1.396580788224139e-05, "loss": 0.2688, "num_tokens": 1841045468.0, "step": 2413 }, { "epoch": 3.293599906116309, "grad_norm": 0.20203068783355285, "learning_rate": 1.3961114210632163e-05, "loss": 0.2706, "num_tokens": 1841771222.0, "step": 2414 }, { "epoch": 3.2949654870749896, "grad_norm": 0.1974406144672401, "learning_rate": 1.3956419635715225e-05, "loss": 0.266, "num_tokens": 1842507358.0, "step": 2415 }, { "epoch": 3.29633106803367, "grad_norm": 0.20496574393735745, "learning_rate": 1.3951724158922691e-05, "loss": 0.2731, "num_tokens": 1843281092.0, "step": 2416 }, { "epoch": 3.2976966489923507, "grad_norm": 0.19216996053962682, "learning_rate": 1.3947027781686942e-05, "loss": 0.2713, "num_tokens": 1844043217.0, "step": 2417 }, { "epoch": 3.2990622299510313, "grad_norm": 0.19858536354206732, "learning_rate": 1.3942330505440642e-05, "loss": 0.2774, "num_tokens": 1844747286.0, "step": 2418 }, { "epoch": 3.300427810909712, "grad_norm": 0.19381621347966296, "learning_rate": 1.3937632331616725e-05, "loss": 0.2757, "num_tokens": 1845555121.0, "step": 2419 }, { "epoch": 3.301793391868392, "grad_norm": 0.20715845541118993, "learning_rate": 1.3932933261648403e-05, "loss": 0.2787, "num_tokens": 1846294143.0, "step": 2420 }, { "epoch": 3.3031589728270725, "grad_norm": 0.20096629358001156, "learning_rate": 1.3928233296969155e-05, "loss": 0.2629, "num_tokens": 1847059883.0, "step": 2421 }, { "epoch": 3.304524553785753, "grad_norm": 0.17963840500325345, "learning_rate": 1.3923532439012742e-05, "loss": 0.26, "num_tokens": 1847836713.0, "step": 2422 }, { "epoch": 3.3058901347444336, "grad_norm": 0.1900830619021606, "learning_rate": 1.391883068921319e-05, "loss": 0.2775, "num_tokens": 1848613558.0, "step": 2423 }, { "epoch": 3.307255715703114, "grad_norm": 0.17813354642806067, "learning_rate": 1.3914128049004797e-05, "loss": 0.2825, "num_tokens": 1849411909.0, "step": 2424 }, { "epoch": 3.3086212966617947, "grad_norm": 0.1954031959867918, "learning_rate": 1.3909424519822136e-05, "loss": 0.2647, "num_tokens": 1850210919.0, "step": 2425 }, { "epoch": 3.3099868776204753, "grad_norm": 0.19952271418198922, "learning_rate": 1.3904720103100051e-05, "loss": 0.2576, "num_tokens": 1850908224.0, "step": 2426 }, { "epoch": 3.311352458579156, "grad_norm": 0.1896737061869976, "learning_rate": 1.3900014800273656e-05, "loss": 0.2574, "num_tokens": 1851682845.0, "step": 2427 }, { "epoch": 3.3127180395378364, "grad_norm": 0.1938494565211211, "learning_rate": 1.3895308612778333e-05, "loss": 0.2777, "num_tokens": 1852463298.0, "step": 2428 }, { "epoch": 3.3140836204965165, "grad_norm": 0.21082332608871743, "learning_rate": 1.3890601542049737e-05, "loss": 0.2712, "num_tokens": 1853227973.0, "step": 2429 }, { "epoch": 3.315449201455197, "grad_norm": 0.18966939316155573, "learning_rate": 1.3885893589523794e-05, "loss": 0.2734, "num_tokens": 1853999112.0, "step": 2430 }, { "epoch": 3.3168147824138776, "grad_norm": 0.19628588546367426, "learning_rate": 1.3881184756636691e-05, "loss": 0.2539, "num_tokens": 1854747751.0, "step": 2431 }, { "epoch": 3.318180363372558, "grad_norm": 0.1790324785243348, "learning_rate": 1.3876475044824897e-05, "loss": 0.2781, "num_tokens": 1855490157.0, "step": 2432 }, { "epoch": 3.3195459443312387, "grad_norm": 0.19526522859432022, "learning_rate": 1.3871764455525134e-05, "loss": 0.2661, "num_tokens": 1856274547.0, "step": 2433 }, { "epoch": 3.3209115252899193, "grad_norm": 0.19259644863012654, "learning_rate": 1.3867052990174399e-05, "loss": 0.2714, "num_tokens": 1856990190.0, "step": 2434 }, { "epoch": 3.3222771062486, "grad_norm": 0.21815807432967999, "learning_rate": 1.3862340650209962e-05, "loss": 0.2667, "num_tokens": 1857781952.0, "step": 2435 }, { "epoch": 3.3236426872072804, "grad_norm": 0.18692719282055642, "learning_rate": 1.3857627437069348e-05, "loss": 0.2916, "num_tokens": 1858632728.0, "step": 2436 }, { "epoch": 3.325008268165961, "grad_norm": 0.19516290912115158, "learning_rate": 1.3852913352190356e-05, "loss": 0.2744, "num_tokens": 1859407217.0, "step": 2437 }, { "epoch": 3.326373849124641, "grad_norm": 0.20362220225490232, "learning_rate": 1.384819839701105e-05, "loss": 0.2696, "num_tokens": 1860132185.0, "step": 2438 }, { "epoch": 3.3277394300833216, "grad_norm": 0.1943262438276806, "learning_rate": 1.384348257296976e-05, "loss": 0.2838, "num_tokens": 1860871687.0, "step": 2439 }, { "epoch": 3.329105011042002, "grad_norm": 0.2296024867260925, "learning_rate": 1.3838765881505074e-05, "loss": 0.2804, "num_tokens": 1861721303.0, "step": 2440 }, { "epoch": 3.3304705920006827, "grad_norm": 0.2012551643836199, "learning_rate": 1.3834048324055857e-05, "loss": 0.2794, "num_tokens": 1862521097.0, "step": 2441 }, { "epoch": 3.3318361729593633, "grad_norm": 0.20123997884403427, "learning_rate": 1.3829329902061226e-05, "loss": 0.27, "num_tokens": 1863338232.0, "step": 2442 }, { "epoch": 3.333201753918044, "grad_norm": 0.1942542322580485, "learning_rate": 1.3824610616960573e-05, "loss": 0.2597, "num_tokens": 1864055409.0, "step": 2443 }, { "epoch": 3.3345673348767244, "grad_norm": 0.21029191526714253, "learning_rate": 1.3819890470193543e-05, "loss": 0.2662, "num_tokens": 1864806238.0, "step": 2444 }, { "epoch": 3.335932915835405, "grad_norm": 0.19476125011994028, "learning_rate": 1.3815169463200046e-05, "loss": 0.2701, "num_tokens": 1865484777.0, "step": 2445 }, { "epoch": 3.3372984967940855, "grad_norm": 0.2047422994765326, "learning_rate": 1.3810447597420262e-05, "loss": 0.2677, "num_tokens": 1866297124.0, "step": 2446 }, { "epoch": 3.3386640777527656, "grad_norm": 0.20260875584191437, "learning_rate": 1.3805724874294628e-05, "loss": 0.2682, "num_tokens": 1867014965.0, "step": 2447 }, { "epoch": 3.340029658711446, "grad_norm": 0.19322247646533522, "learning_rate": 1.3801001295263842e-05, "loss": 0.275, "num_tokens": 1867800592.0, "step": 2448 }, { "epoch": 3.3413952396701267, "grad_norm": 0.19475628367282763, "learning_rate": 1.3796276861768859e-05, "loss": 0.2695, "num_tokens": 1868551776.0, "step": 2449 }, { "epoch": 3.3427608206288073, "grad_norm": 0.20169251895697923, "learning_rate": 1.3791551575250901e-05, "loss": 0.2721, "num_tokens": 1869333305.0, "step": 2450 }, { "epoch": 3.344126401587488, "grad_norm": 0.1854383161796759, "learning_rate": 1.3786825437151453e-05, "loss": 0.2761, "num_tokens": 1870111426.0, "step": 2451 }, { "epoch": 3.3454919825461684, "grad_norm": 0.1902383091227898, "learning_rate": 1.3782098448912247e-05, "loss": 0.28, "num_tokens": 1870907351.0, "step": 2452 }, { "epoch": 3.346857563504849, "grad_norm": 0.20149107053487655, "learning_rate": 1.3777370611975291e-05, "loss": 0.2706, "num_tokens": 1871697360.0, "step": 2453 }, { "epoch": 3.3482231444635295, "grad_norm": 0.18943868323894744, "learning_rate": 1.3772641927782836e-05, "loss": 0.2699, "num_tokens": 1872447283.0, "step": 2454 }, { "epoch": 3.34958872542221, "grad_norm": 0.20753744857512585, "learning_rate": 1.3767912397777403e-05, "loss": 0.2707, "num_tokens": 1873155344.0, "step": 2455 }, { "epoch": 3.35095430638089, "grad_norm": 0.2111458906802961, "learning_rate": 1.3763182023401765e-05, "loss": 0.2772, "num_tokens": 1873997082.0, "step": 2456 }, { "epoch": 3.3523198873395708, "grad_norm": 0.20664780080299744, "learning_rate": 1.3758450806098956e-05, "loss": 0.2805, "num_tokens": 1874759460.0, "step": 2457 }, { "epoch": 3.3536854682982513, "grad_norm": 0.20519094498415677, "learning_rate": 1.3753718747312261e-05, "loss": 0.2816, "num_tokens": 1875534961.0, "step": 2458 }, { "epoch": 3.355051049256932, "grad_norm": 0.1930402388354139, "learning_rate": 1.3748985848485232e-05, "loss": 0.2758, "num_tokens": 1876285425.0, "step": 2459 }, { "epoch": 3.3564166302156124, "grad_norm": 0.19042022777565823, "learning_rate": 1.3744252111061671e-05, "loss": 0.2676, "num_tokens": 1876996711.0, "step": 2460 }, { "epoch": 3.357782211174293, "grad_norm": 0.20789373696126046, "learning_rate": 1.373951753648563e-05, "loss": 0.2747, "num_tokens": 1877840940.0, "step": 2461 }, { "epoch": 3.3591477921329735, "grad_norm": 0.18976215866525817, "learning_rate": 1.3734782126201431e-05, "loss": 0.2739, "num_tokens": 1878639306.0, "step": 2462 }, { "epoch": 3.360513373091654, "grad_norm": 0.2053641138825127, "learning_rate": 1.3730045881653637e-05, "loss": 0.2724, "num_tokens": 1879304301.0, "step": 2463 }, { "epoch": 3.3618789540503347, "grad_norm": 0.1938436681002965, "learning_rate": 1.3725308804287075e-05, "loss": 0.2765, "num_tokens": 1880167411.0, "step": 2464 }, { "epoch": 3.3632445350090148, "grad_norm": 0.19896691433652267, "learning_rate": 1.3720570895546814e-05, "loss": 0.2678, "num_tokens": 1880951109.0, "step": 2465 }, { "epoch": 3.3646101159676953, "grad_norm": 0.20641809136092917, "learning_rate": 1.3715832156878197e-05, "loss": 0.2742, "num_tokens": 1881693312.0, "step": 2466 }, { "epoch": 3.365975696926376, "grad_norm": 0.1868806566599478, "learning_rate": 1.37110925897268e-05, "loss": 0.2681, "num_tokens": 1882440139.0, "step": 2467 }, { "epoch": 3.3673412778850564, "grad_norm": 0.19087987163746697, "learning_rate": 1.3706352195538457e-05, "loss": 0.2788, "num_tokens": 1883271173.0, "step": 2468 }, { "epoch": 3.368706858843737, "grad_norm": 0.19728181390038432, "learning_rate": 1.3701610975759267e-05, "loss": 0.2695, "num_tokens": 1884033818.0, "step": 2469 }, { "epoch": 3.3700724398024176, "grad_norm": 0.19744136807443977, "learning_rate": 1.3696868931835563e-05, "loss": 0.2749, "num_tokens": 1884807066.0, "step": 2470 }, { "epoch": 3.371438020761098, "grad_norm": 0.20298352797963695, "learning_rate": 1.3692126065213939e-05, "loss": 0.2714, "num_tokens": 1885592805.0, "step": 2471 }, { "epoch": 3.3728036017197787, "grad_norm": 0.19179462600346447, "learning_rate": 1.3687382377341241e-05, "loss": 0.284, "num_tokens": 1886496945.0, "step": 2472 }, { "epoch": 3.3741691826784592, "grad_norm": 0.1917417755239768, "learning_rate": 1.3682637869664556e-05, "loss": 0.275, "num_tokens": 1887261885.0, "step": 2473 }, { "epoch": 3.3755347636371393, "grad_norm": 0.20338183972265308, "learning_rate": 1.3677892543631234e-05, "loss": 0.2664, "num_tokens": 1887965663.0, "step": 2474 }, { "epoch": 3.37690034459582, "grad_norm": 0.1939924387253658, "learning_rate": 1.3673146400688868e-05, "loss": 0.2695, "num_tokens": 1888744626.0, "step": 2475 }, { "epoch": 3.3782659255545004, "grad_norm": 0.18963691754621137, "learning_rate": 1.3668399442285299e-05, "loss": 0.2702, "num_tokens": 1889487824.0, "step": 2476 }, { "epoch": 3.379631506513181, "grad_norm": 0.19401112564812104, "learning_rate": 1.3663651669868614e-05, "loss": 0.266, "num_tokens": 1890221171.0, "step": 2477 }, { "epoch": 3.3809970874718616, "grad_norm": 0.17970366470464838, "learning_rate": 1.3658903084887162e-05, "loss": 0.2841, "num_tokens": 1890986458.0, "step": 2478 }, { "epoch": 3.382362668430542, "grad_norm": 0.209819324660752, "learning_rate": 1.3654153688789529e-05, "loss": 0.2678, "num_tokens": 1891726386.0, "step": 2479 }, { "epoch": 3.3837282493892227, "grad_norm": 0.19468479445014905, "learning_rate": 1.3649403483024541e-05, "loss": 0.2775, "num_tokens": 1892403062.0, "step": 2480 }, { "epoch": 3.3850938303479032, "grad_norm": 0.19372877877570702, "learning_rate": 1.3644652469041291e-05, "loss": 0.2818, "num_tokens": 1893140884.0, "step": 2481 }, { "epoch": 3.386459411306584, "grad_norm": 0.21671291637107462, "learning_rate": 1.36399006482891e-05, "loss": 0.2748, "num_tokens": 1893883172.0, "step": 2482 }, { "epoch": 3.387824992265264, "grad_norm": 0.1962409281487731, "learning_rate": 1.3635148022217544e-05, "loss": 0.2699, "num_tokens": 1894656447.0, "step": 2483 }, { "epoch": 3.3891905732239445, "grad_norm": 0.18812391806010237, "learning_rate": 1.3630394592276451e-05, "loss": 0.2675, "num_tokens": 1895475016.0, "step": 2484 }, { "epoch": 3.390556154182625, "grad_norm": 0.19683642602932855, "learning_rate": 1.3625640359915873e-05, "loss": 0.2727, "num_tokens": 1896285167.0, "step": 2485 }, { "epoch": 3.3919217351413056, "grad_norm": 0.179649493662047, "learning_rate": 1.3620885326586136e-05, "loss": 0.2651, "num_tokens": 1897060389.0, "step": 2486 }, { "epoch": 3.393287316099986, "grad_norm": 0.19712674329729446, "learning_rate": 1.361612949373778e-05, "loss": 0.2844, "num_tokens": 1897907424.0, "step": 2487 }, { "epoch": 3.3946528970586667, "grad_norm": 0.19348003716670728, "learning_rate": 1.3611372862821614e-05, "loss": 0.2753, "num_tokens": 1898709519.0, "step": 2488 }, { "epoch": 3.3960184780173472, "grad_norm": 0.1874047937541052, "learning_rate": 1.3606615435288675e-05, "loss": 0.2763, "num_tokens": 1899368170.0, "step": 2489 }, { "epoch": 3.397384058976028, "grad_norm": 0.20103156215225343, "learning_rate": 1.3601857212590249e-05, "loss": 0.2707, "num_tokens": 1900133953.0, "step": 2490 }, { "epoch": 3.3987496399347084, "grad_norm": 0.2007663818279447, "learning_rate": 1.3597098196177866e-05, "loss": 0.2686, "num_tokens": 1900916871.0, "step": 2491 }, { "epoch": 3.4001152208933885, "grad_norm": 0.1913154928112898, "learning_rate": 1.3592338387503295e-05, "loss": 0.2838, "num_tokens": 1901743286.0, "step": 2492 }, { "epoch": 3.401480801852069, "grad_norm": 0.18371347394543572, "learning_rate": 1.3587577788018545e-05, "loss": 0.2755, "num_tokens": 1902545155.0, "step": 2493 }, { "epoch": 3.4028463828107496, "grad_norm": 0.20021289993918298, "learning_rate": 1.358281639917587e-05, "loss": 0.2613, "num_tokens": 1903277404.0, "step": 2494 }, { "epoch": 3.40421196376943, "grad_norm": 0.19384550242090315, "learning_rate": 1.3578054222427764e-05, "loss": 0.2528, "num_tokens": 1904004212.0, "step": 2495 }, { "epoch": 3.4055775447281107, "grad_norm": 0.19035716421672197, "learning_rate": 1.3573291259226959e-05, "loss": 0.265, "num_tokens": 1904686245.0, "step": 2496 }, { "epoch": 3.4069431256867913, "grad_norm": 0.20240596964074745, "learning_rate": 1.3568527511026435e-05, "loss": 0.2596, "num_tokens": 1905402694.0, "step": 2497 }, { "epoch": 3.408308706645472, "grad_norm": 0.19297427680815182, "learning_rate": 1.3563762979279398e-05, "loss": 0.2557, "num_tokens": 1906137761.0, "step": 2498 }, { "epoch": 3.4096742876041524, "grad_norm": 0.172725618734487, "learning_rate": 1.3558997665439302e-05, "loss": 0.2848, "num_tokens": 1906983650.0, "step": 2499 }, { "epoch": 3.411039868562833, "grad_norm": 0.20377964053038924, "learning_rate": 1.3554231570959842e-05, "loss": 0.2704, "num_tokens": 1907718910.0, "step": 2500 }, { "epoch": 3.412405449521513, "grad_norm": 0.20708230060795674, "learning_rate": 1.354946469729494e-05, "loss": 0.2885, "num_tokens": 1908476952.0, "step": 2501 }, { "epoch": 3.4137710304801936, "grad_norm": 0.2029456493004474, "learning_rate": 1.3544697045898773e-05, "loss": 0.2757, "num_tokens": 1909207694.0, "step": 2502 }, { "epoch": 3.415136611438874, "grad_norm": 0.2027907367834815, "learning_rate": 1.3539928618225737e-05, "loss": 0.2722, "num_tokens": 1909966790.0, "step": 2503 }, { "epoch": 3.4165021923975547, "grad_norm": 0.18771884084846815, "learning_rate": 1.3535159415730472e-05, "loss": 0.263, "num_tokens": 1910756122.0, "step": 2504 }, { "epoch": 3.4178677733562353, "grad_norm": 0.1720881833827725, "learning_rate": 1.3530389439867857e-05, "loss": 0.2807, "num_tokens": 1911550831.0, "step": 2505 }, { "epoch": 3.419233354314916, "grad_norm": 0.19301820060729827, "learning_rate": 1.352561869209301e-05, "loss": 0.2675, "num_tokens": 1912321032.0, "step": 2506 }, { "epoch": 3.4205989352735964, "grad_norm": 0.19378443466087455, "learning_rate": 1.352084717386127e-05, "loss": 0.2714, "num_tokens": 1913045546.0, "step": 2507 }, { "epoch": 3.421964516232277, "grad_norm": 0.18576755896524016, "learning_rate": 1.3516074886628227e-05, "loss": 0.2712, "num_tokens": 1913843770.0, "step": 2508 }, { "epoch": 3.4233300971909575, "grad_norm": 0.18615933562394205, "learning_rate": 1.3511301831849702e-05, "loss": 0.2734, "num_tokens": 1914663045.0, "step": 2509 }, { "epoch": 3.4246956781496376, "grad_norm": 0.2010388051258587, "learning_rate": 1.3506528010981742e-05, "loss": 0.2759, "num_tokens": 1915374848.0, "step": 2510 }, { "epoch": 3.426061259108318, "grad_norm": 0.20805550464081904, "learning_rate": 1.3501753425480631e-05, "loss": 0.2751, "num_tokens": 1916150049.0, "step": 2511 }, { "epoch": 3.4274268400669987, "grad_norm": 0.18851166363777921, "learning_rate": 1.3496978076802896e-05, "loss": 0.2671, "num_tokens": 1916878480.0, "step": 2512 }, { "epoch": 3.4287924210256793, "grad_norm": 0.19383299677484198, "learning_rate": 1.349220196640528e-05, "loss": 0.2709, "num_tokens": 1917643468.0, "step": 2513 }, { "epoch": 3.43015800198436, "grad_norm": 0.19555487284818668, "learning_rate": 1.3487425095744775e-05, "loss": 0.2667, "num_tokens": 1918402807.0, "step": 2514 }, { "epoch": 3.4315235829430404, "grad_norm": 0.17720645845372424, "learning_rate": 1.3482647466278595e-05, "loss": 0.2785, "num_tokens": 1919108193.0, "step": 2515 }, { "epoch": 3.432889163901721, "grad_norm": 0.2085794357080601, "learning_rate": 1.3477869079464185e-05, "loss": 0.2799, "num_tokens": 1919920785.0, "step": 2516 }, { "epoch": 3.4342547448604015, "grad_norm": 0.20141684231959586, "learning_rate": 1.3473089936759227e-05, "loss": 0.2726, "num_tokens": 1920675676.0, "step": 2517 }, { "epoch": 3.435620325819082, "grad_norm": 0.20117236436187297, "learning_rate": 1.3468310039621633e-05, "loss": 0.2789, "num_tokens": 1921393551.0, "step": 2518 }, { "epoch": 3.436985906777762, "grad_norm": 0.18359171538845093, "learning_rate": 1.3463529389509537e-05, "loss": 0.2757, "num_tokens": 1922151515.0, "step": 2519 }, { "epoch": 3.4383514877364427, "grad_norm": 0.1977178753223143, "learning_rate": 1.3458747987881312e-05, "loss": 0.2642, "num_tokens": 1922938495.0, "step": 2520 }, { "epoch": 3.4397170686951233, "grad_norm": 0.18538075094600875, "learning_rate": 1.3453965836195553e-05, "loss": 0.2552, "num_tokens": 1923678478.0, "step": 2521 }, { "epoch": 3.441082649653804, "grad_norm": 0.1950919222247887, "learning_rate": 1.3449182935911088e-05, "loss": 0.2599, "num_tokens": 1924441622.0, "step": 2522 }, { "epoch": 3.4424482306124844, "grad_norm": 0.1882237133241943, "learning_rate": 1.3444399288486977e-05, "loss": 0.2686, "num_tokens": 1925285595.0, "step": 2523 }, { "epoch": 3.443813811571165, "grad_norm": 0.19152197216924946, "learning_rate": 1.3439614895382502e-05, "loss": 0.2844, "num_tokens": 1926034125.0, "step": 2524 }, { "epoch": 3.4451793925298455, "grad_norm": 0.20280915368768077, "learning_rate": 1.3434829758057172e-05, "loss": 0.2779, "num_tokens": 1926784034.0, "step": 2525 }, { "epoch": 3.446544973488526, "grad_norm": 0.19642324265407612, "learning_rate": 1.3430043877970727e-05, "loss": 0.2666, "num_tokens": 1927584954.0, "step": 2526 }, { "epoch": 3.4479105544472066, "grad_norm": 0.20032063623894614, "learning_rate": 1.3425257256583128e-05, "loss": 0.2707, "num_tokens": 1928355876.0, "step": 2527 }, { "epoch": 3.4492761354058867, "grad_norm": 0.1975555915710422, "learning_rate": 1.3420469895354572e-05, "loss": 0.2698, "num_tokens": 1929099433.0, "step": 2528 }, { "epoch": 3.4506417163645673, "grad_norm": 0.21009271639145913, "learning_rate": 1.3415681795745472e-05, "loss": 0.2567, "num_tokens": 1929793860.0, "step": 2529 }, { "epoch": 3.452007297323248, "grad_norm": 0.19852528194959224, "learning_rate": 1.3410892959216471e-05, "loss": 0.274, "num_tokens": 1930543005.0, "step": 2530 }, { "epoch": 3.4533728782819284, "grad_norm": 0.2040496950466502, "learning_rate": 1.3406103387228434e-05, "loss": 0.2765, "num_tokens": 1931272603.0, "step": 2531 }, { "epoch": 3.454738459240609, "grad_norm": 0.19307050671548423, "learning_rate": 1.340131308124245e-05, "loss": 0.2623, "num_tokens": 1932043592.0, "step": 2532 }, { "epoch": 3.4561040401992895, "grad_norm": 0.20658651738071773, "learning_rate": 1.3396522042719844e-05, "loss": 0.2858, "num_tokens": 1932771613.0, "step": 2533 }, { "epoch": 3.45746962115797, "grad_norm": 0.2036538194352677, "learning_rate": 1.3391730273122143e-05, "loss": 0.2665, "num_tokens": 1933545792.0, "step": 2534 }, { "epoch": 3.4588352021166506, "grad_norm": 0.1920918659159465, "learning_rate": 1.338693777391111e-05, "loss": 0.2629, "num_tokens": 1934247075.0, "step": 2535 }, { "epoch": 3.460200783075331, "grad_norm": 0.1943059498371593, "learning_rate": 1.3382144546548737e-05, "loss": 0.2596, "num_tokens": 1935038575.0, "step": 2536 }, { "epoch": 3.4615663640340113, "grad_norm": 0.19318730096807168, "learning_rate": 1.3377350592497222e-05, "loss": 0.2706, "num_tokens": 1935828880.0, "step": 2537 }, { "epoch": 3.462931944992692, "grad_norm": 0.197044712851578, "learning_rate": 1.3372555913218994e-05, "loss": 0.2664, "num_tokens": 1936600590.0, "step": 2538 }, { "epoch": 3.4642975259513724, "grad_norm": 0.1852762750949683, "learning_rate": 1.3367760510176706e-05, "loss": 0.2606, "num_tokens": 1937386674.0, "step": 2539 }, { "epoch": 3.465663106910053, "grad_norm": 0.1977998907819235, "learning_rate": 1.3362964384833226e-05, "loss": 0.2696, "num_tokens": 1938199391.0, "step": 2540 }, { "epoch": 3.4670286878687335, "grad_norm": 0.19029359552648914, "learning_rate": 1.3358167538651645e-05, "loss": 0.2794, "num_tokens": 1938919703.0, "step": 2541 }, { "epoch": 3.468394268827414, "grad_norm": 0.1962455699681018, "learning_rate": 1.3353369973095268e-05, "loss": 0.2649, "num_tokens": 1939589122.0, "step": 2542 }, { "epoch": 3.4697598497860946, "grad_norm": 0.20725837787793033, "learning_rate": 1.3348571689627629e-05, "loss": 0.2607, "num_tokens": 1940293135.0, "step": 2543 }, { "epoch": 3.471125430744775, "grad_norm": 0.20254833021537585, "learning_rate": 1.3343772689712477e-05, "loss": 0.2868, "num_tokens": 1941056699.0, "step": 2544 }, { "epoch": 3.4724910117034558, "grad_norm": 0.2145236154593661, "learning_rate": 1.3338972974813773e-05, "loss": 0.2655, "num_tokens": 1941771437.0, "step": 2545 }, { "epoch": 3.473856592662136, "grad_norm": 0.19949528200866834, "learning_rate": 1.3334172546395711e-05, "loss": 0.2728, "num_tokens": 1942524165.0, "step": 2546 }, { "epoch": 3.4752221736208164, "grad_norm": 0.20771278234908777, "learning_rate": 1.3329371405922688e-05, "loss": 0.2632, "num_tokens": 1943256919.0, "step": 2547 }, { "epoch": 3.476587754579497, "grad_norm": 0.41857656568113377, "learning_rate": 1.3324569554859325e-05, "loss": 0.2636, "num_tokens": 1943962410.0, "step": 2548 }, { "epoch": 3.4779533355381775, "grad_norm": 0.2209847204678078, "learning_rate": 1.3319766994670462e-05, "loss": 0.2824, "num_tokens": 1944748848.0, "step": 2549 }, { "epoch": 3.479318916496858, "grad_norm": 0.20713280345759105, "learning_rate": 1.3314963726821146e-05, "loss": 0.2752, "num_tokens": 1945517537.0, "step": 2550 }, { "epoch": 3.4806844974555387, "grad_norm": 0.19283812412856524, "learning_rate": 1.3310159752776652e-05, "loss": 0.2741, "num_tokens": 1946276483.0, "step": 2551 }, { "epoch": 3.482050078414219, "grad_norm": 0.19909290020160622, "learning_rate": 1.3305355074002461e-05, "loss": 0.2708, "num_tokens": 1947018662.0, "step": 2552 }, { "epoch": 3.4834156593728998, "grad_norm": 0.2003826252323021, "learning_rate": 1.3300549691964272e-05, "loss": 0.2605, "num_tokens": 1947804065.0, "step": 2553 }, { "epoch": 3.4847812403315803, "grad_norm": 0.19213728246126396, "learning_rate": 1.3295743608128002e-05, "loss": 0.2665, "num_tokens": 1948582658.0, "step": 2554 }, { "epoch": 3.4861468212902604, "grad_norm": 0.19020390872365578, "learning_rate": 1.3290936823959778e-05, "loss": 0.2649, "num_tokens": 1949378355.0, "step": 2555 }, { "epoch": 3.487512402248941, "grad_norm": 0.1919980306058776, "learning_rate": 1.3286129340925939e-05, "loss": 0.2762, "num_tokens": 1950131762.0, "step": 2556 }, { "epoch": 3.4888779832076215, "grad_norm": 0.2009257651250307, "learning_rate": 1.3281321160493043e-05, "loss": 0.2648, "num_tokens": 1950896103.0, "step": 2557 }, { "epoch": 3.490243564166302, "grad_norm": 0.1995506703565711, "learning_rate": 1.327651228412786e-05, "loss": 0.2756, "num_tokens": 1951643190.0, "step": 2558 }, { "epoch": 3.4916091451249827, "grad_norm": 0.185958526724269, "learning_rate": 1.3271702713297362e-05, "loss": 0.2701, "num_tokens": 1952365984.0, "step": 2559 }, { "epoch": 3.492974726083663, "grad_norm": 0.18734736780097885, "learning_rate": 1.3266892449468745e-05, "loss": 0.2674, "num_tokens": 1953134885.0, "step": 2560 }, { "epoch": 3.4943403070423438, "grad_norm": 0.19251823110019026, "learning_rate": 1.3262081494109415e-05, "loss": 0.2759, "num_tokens": 1953934754.0, "step": 2561 }, { "epoch": 3.4957058880010243, "grad_norm": 0.19199313849764513, "learning_rate": 1.3257269848686982e-05, "loss": 0.2767, "num_tokens": 1954617801.0, "step": 2562 }, { "epoch": 3.497071468959705, "grad_norm": 0.18380246582856521, "learning_rate": 1.3252457514669272e-05, "loss": 0.2686, "num_tokens": 1955332262.0, "step": 2563 }, { "epoch": 3.498437049918385, "grad_norm": 0.19681265557312358, "learning_rate": 1.3247644493524323e-05, "loss": 0.2608, "num_tokens": 1956079591.0, "step": 2564 }, { "epoch": 3.4998026308770656, "grad_norm": 0.17713832483423744, "learning_rate": 1.3242830786720373e-05, "loss": 0.2665, "num_tokens": 1956785519.0, "step": 2565 }, { "epoch": 3.501168211835746, "grad_norm": 0.18800492911250277, "learning_rate": 1.323801639572588e-05, "loss": 0.2757, "num_tokens": 1957605451.0, "step": 2566 }, { "epoch": 3.5025337927944267, "grad_norm": 0.19447126754028624, "learning_rate": 1.3233201322009504e-05, "loss": 0.2688, "num_tokens": 1958488516.0, "step": 2567 }, { "epoch": 3.5038993737531072, "grad_norm": 0.18832072408479703, "learning_rate": 1.3228385567040116e-05, "loss": 0.2781, "num_tokens": 1959308605.0, "step": 2568 }, { "epoch": 3.505264954711788, "grad_norm": 0.17783297275431081, "learning_rate": 1.3223569132286795e-05, "loss": 0.2788, "num_tokens": 1960109856.0, "step": 2569 }, { "epoch": 3.5066305356704683, "grad_norm": 0.20042694905817546, "learning_rate": 1.3218752019218829e-05, "loss": 0.2733, "num_tokens": 1960831380.0, "step": 2570 }, { "epoch": 3.507996116629149, "grad_norm": 0.18949246502137007, "learning_rate": 1.3213934229305702e-05, "loss": 0.2664, "num_tokens": 1961549095.0, "step": 2571 }, { "epoch": 3.5093616975878295, "grad_norm": 0.18964435621637324, "learning_rate": 1.3209115764017125e-05, "loss": 0.2765, "num_tokens": 1962328061.0, "step": 2572 }, { "epoch": 3.5107272785465096, "grad_norm": 0.1928220862944233, "learning_rate": 1.3204296624822994e-05, "loss": 0.28, "num_tokens": 1963089803.0, "step": 2573 }, { "epoch": 3.51209285950519, "grad_norm": 0.18587022024654912, "learning_rate": 1.3199476813193426e-05, "loss": 0.2688, "num_tokens": 1963873791.0, "step": 2574 }, { "epoch": 3.5134584404638707, "grad_norm": 0.17641316441617477, "learning_rate": 1.319465633059873e-05, "loss": 0.2742, "num_tokens": 1964710938.0, "step": 2575 }, { "epoch": 3.5148240214225512, "grad_norm": 0.18636422222444946, "learning_rate": 1.3189835178509427e-05, "loss": 0.2736, "num_tokens": 1965517489.0, "step": 2576 }, { "epoch": 3.516189602381232, "grad_norm": 0.18407179287461142, "learning_rate": 1.3185013358396253e-05, "loss": 0.2826, "num_tokens": 1966268072.0, "step": 2577 }, { "epoch": 3.5175551833399124, "grad_norm": 0.19614632794925257, "learning_rate": 1.3180190871730121e-05, "loss": 0.2802, "num_tokens": 1967054925.0, "step": 2578 }, { "epoch": 3.518920764298593, "grad_norm": 0.20634466786235944, "learning_rate": 1.3175367719982173e-05, "loss": 0.2716, "num_tokens": 1967832618.0, "step": 2579 }, { "epoch": 3.5202863452572735, "grad_norm": 0.20206473937702774, "learning_rate": 1.3170543904623745e-05, "loss": 0.2717, "num_tokens": 1968520499.0, "step": 2580 }, { "epoch": 3.521651926215954, "grad_norm": 0.2329915402251311, "learning_rate": 1.3165719427126364e-05, "loss": 0.283, "num_tokens": 1969224492.0, "step": 2581 }, { "epoch": 3.523017507174634, "grad_norm": 0.2014463586915332, "learning_rate": 1.3160894288961776e-05, "loss": 0.2601, "num_tokens": 1969983578.0, "step": 2582 }, { "epoch": 3.5243830881333147, "grad_norm": 0.18346402291264471, "learning_rate": 1.3156068491601924e-05, "loss": 0.2777, "num_tokens": 1970780398.0, "step": 2583 }, { "epoch": 3.5257486690919952, "grad_norm": 0.18669106487588266, "learning_rate": 1.3151242036518943e-05, "loss": 0.2829, "num_tokens": 1971546284.0, "step": 2584 }, { "epoch": 3.527114250050676, "grad_norm": 0.18970488198479252, "learning_rate": 1.3146414925185177e-05, "loss": 0.2658, "num_tokens": 1972321640.0, "step": 2585 }, { "epoch": 3.5284798310093564, "grad_norm": 0.17836610048866824, "learning_rate": 1.3141587159073176e-05, "loss": 0.2724, "num_tokens": 1973092596.0, "step": 2586 }, { "epoch": 3.529845411968037, "grad_norm": 0.191779004659685, "learning_rate": 1.3136758739655674e-05, "loss": 0.2815, "num_tokens": 1973788094.0, "step": 2587 }, { "epoch": 3.5312109929267175, "grad_norm": 0.209553315428444, "learning_rate": 1.3131929668405618e-05, "loss": 0.2702, "num_tokens": 1974611603.0, "step": 2588 }, { "epoch": 3.532576573885398, "grad_norm": 0.18428076226219425, "learning_rate": 1.3127099946796146e-05, "loss": 0.282, "num_tokens": 1975392803.0, "step": 2589 }, { "epoch": 3.5339421548440786, "grad_norm": 0.18122919137488255, "learning_rate": 1.3122269576300597e-05, "loss": 0.2746, "num_tokens": 1976171281.0, "step": 2590 }, { "epoch": 3.5353077358027587, "grad_norm": 0.1905703479033559, "learning_rate": 1.3117438558392503e-05, "loss": 0.2876, "num_tokens": 1976960959.0, "step": 2591 }, { "epoch": 3.5366733167614393, "grad_norm": 0.19754457966522895, "learning_rate": 1.3112606894545609e-05, "loss": 0.2631, "num_tokens": 1977733352.0, "step": 2592 }, { "epoch": 3.53803889772012, "grad_norm": 0.1912159607643056, "learning_rate": 1.310777458623384e-05, "loss": 0.2759, "num_tokens": 1978468671.0, "step": 2593 }, { "epoch": 3.5394044786788004, "grad_norm": 0.18585790325928842, "learning_rate": 1.3102941634931323e-05, "loss": 0.2819, "num_tokens": 1979266086.0, "step": 2594 }, { "epoch": 3.540770059637481, "grad_norm": 0.2079923511216364, "learning_rate": 1.3098108042112385e-05, "loss": 0.2847, "num_tokens": 1980066232.0, "step": 2595 }, { "epoch": 3.5421356405961615, "grad_norm": 0.19334730459406207, "learning_rate": 1.3093273809251546e-05, "loss": 0.2692, "num_tokens": 1980925828.0, "step": 2596 }, { "epoch": 3.543501221554842, "grad_norm": 0.19617916186719186, "learning_rate": 1.3088438937823518e-05, "loss": 0.2722, "num_tokens": 1981656027.0, "step": 2597 }, { "epoch": 3.5448668025135226, "grad_norm": 0.20125307903020132, "learning_rate": 1.308360342930321e-05, "loss": 0.2686, "num_tokens": 1982459508.0, "step": 2598 }, { "epoch": 3.546232383472203, "grad_norm": 0.1884102600359854, "learning_rate": 1.3078767285165733e-05, "loss": 0.2744, "num_tokens": 1983192509.0, "step": 2599 }, { "epoch": 3.5475979644308833, "grad_norm": 0.2145912888357499, "learning_rate": 1.3073930506886378e-05, "loss": 0.277, "num_tokens": 1983951615.0, "step": 2600 }, { "epoch": 3.548963545389564, "grad_norm": 0.19796607180915476, "learning_rate": 1.3069093095940644e-05, "loss": 0.2779, "num_tokens": 1984726049.0, "step": 2601 }, { "epoch": 3.5503291263482444, "grad_norm": 0.1876446439700682, "learning_rate": 1.3064255053804207e-05, "loss": 0.279, "num_tokens": 1985466794.0, "step": 2602 }, { "epoch": 3.551694707306925, "grad_norm": 0.21125962050736097, "learning_rate": 1.3059416381952953e-05, "loss": 0.2791, "num_tokens": 1986181342.0, "step": 2603 }, { "epoch": 3.5530602882656055, "grad_norm": 0.19142851489082047, "learning_rate": 1.305457708186294e-05, "loss": 0.2657, "num_tokens": 1987024765.0, "step": 2604 }, { "epoch": 3.554425869224286, "grad_norm": 0.19094480285312718, "learning_rate": 1.304973715501044e-05, "loss": 0.2549, "num_tokens": 1987886251.0, "step": 2605 }, { "epoch": 3.5557914501829666, "grad_norm": 0.18115841333125474, "learning_rate": 1.3044896602871899e-05, "loss": 0.2566, "num_tokens": 1988601355.0, "step": 2606 }, { "epoch": 3.557157031141647, "grad_norm": 0.19172622005638135, "learning_rate": 1.3040055426923961e-05, "loss": 0.2782, "num_tokens": 1989338032.0, "step": 2607 }, { "epoch": 3.5585226121003277, "grad_norm": 0.1992859634815598, "learning_rate": 1.3035213628643461e-05, "loss": 0.2766, "num_tokens": 1990166913.0, "step": 2608 }, { "epoch": 3.559888193059008, "grad_norm": 0.1821027190112494, "learning_rate": 1.303037120950742e-05, "loss": 0.2762, "num_tokens": 1990941252.0, "step": 2609 }, { "epoch": 3.5612537740176884, "grad_norm": 0.18329733908695148, "learning_rate": 1.302552817099305e-05, "loss": 0.2719, "num_tokens": 1991721991.0, "step": 2610 }, { "epoch": 3.562619354976369, "grad_norm": 0.1954274203881713, "learning_rate": 1.3020684514577757e-05, "loss": 0.2732, "num_tokens": 1992559224.0, "step": 2611 }, { "epoch": 3.5639849359350495, "grad_norm": 0.1836353833501746, "learning_rate": 1.3015840241739122e-05, "loss": 0.2626, "num_tokens": 1993262365.0, "step": 2612 }, { "epoch": 3.56535051689373, "grad_norm": 0.20483328197908685, "learning_rate": 1.3010995353954929e-05, "loss": 0.2738, "num_tokens": 1994020396.0, "step": 2613 }, { "epoch": 3.5667160978524106, "grad_norm": 0.18888719127840975, "learning_rate": 1.3006149852703145e-05, "loss": 0.275, "num_tokens": 1994809654.0, "step": 2614 }, { "epoch": 3.568081678811091, "grad_norm": 0.17721732697252682, "learning_rate": 1.300130373946192e-05, "loss": 0.2754, "num_tokens": 1995599209.0, "step": 2615 }, { "epoch": 3.5694472597697717, "grad_norm": 0.21184523862972246, "learning_rate": 1.2996457015709587e-05, "loss": 0.2709, "num_tokens": 1996275936.0, "step": 2616 }, { "epoch": 3.5708128407284523, "grad_norm": 0.1894194308110246, "learning_rate": 1.2991609682924683e-05, "loss": 0.2712, "num_tokens": 1997049058.0, "step": 2617 }, { "epoch": 3.5721784216871324, "grad_norm": 0.19324649684130324, "learning_rate": 1.2986761742585913e-05, "loss": 0.279, "num_tokens": 1997825402.0, "step": 2618 }, { "epoch": 3.573544002645813, "grad_norm": 0.18049832777848993, "learning_rate": 1.2981913196172176e-05, "loss": 0.2604, "num_tokens": 1998600108.0, "step": 2619 }, { "epoch": 3.5749095836044935, "grad_norm": 0.1755039368072696, "learning_rate": 1.2977064045162553e-05, "loss": 0.2663, "num_tokens": 1999312863.0, "step": 2620 }, { "epoch": 3.576275164563174, "grad_norm": 0.183983226717234, "learning_rate": 1.2972214291036307e-05, "loss": 0.2737, "num_tokens": 2000076698.0, "step": 2621 }, { "epoch": 3.5776407455218546, "grad_norm": 0.19419771269729033, "learning_rate": 1.296736393527289e-05, "loss": 0.2732, "num_tokens": 2000862634.0, "step": 2622 }, { "epoch": 3.579006326480535, "grad_norm": 0.20014527878600524, "learning_rate": 1.2962512979351936e-05, "loss": 0.2915, "num_tokens": 2001649940.0, "step": 2623 }, { "epoch": 3.5803719074392157, "grad_norm": 0.19179278663010066, "learning_rate": 1.295766142475326e-05, "loss": 0.2702, "num_tokens": 2002480706.0, "step": 2624 }, { "epoch": 3.5817374883978963, "grad_norm": 0.20104056041765797, "learning_rate": 1.2952809272956864e-05, "loss": 0.2727, "num_tokens": 2003238777.0, "step": 2625 }, { "epoch": 3.583103069356577, "grad_norm": 0.20129892773419475, "learning_rate": 1.2947956525442926e-05, "loss": 0.2736, "num_tokens": 2003940199.0, "step": 2626 }, { "epoch": 3.584468650315257, "grad_norm": 0.2009056112660725, "learning_rate": 1.2943103183691815e-05, "loss": 0.2847, "num_tokens": 2004695868.0, "step": 2627 }, { "epoch": 3.5858342312739375, "grad_norm": 0.19656987007815585, "learning_rate": 1.2938249249184067e-05, "loss": 0.2742, "num_tokens": 2005484205.0, "step": 2628 }, { "epoch": 3.587199812232618, "grad_norm": 0.18548445520362633, "learning_rate": 1.2933394723400414e-05, "loss": 0.2741, "num_tokens": 2006295966.0, "step": 2629 }, { "epoch": 3.5885653931912986, "grad_norm": 0.1958557659730161, "learning_rate": 1.2928539607821758e-05, "loss": 0.2729, "num_tokens": 2007121558.0, "step": 2630 }, { "epoch": 3.589930974149979, "grad_norm": 0.18521889638111735, "learning_rate": 1.2923683903929185e-05, "loss": 0.2715, "num_tokens": 2007926470.0, "step": 2631 }, { "epoch": 3.5912965551086597, "grad_norm": 0.1905367404709362, "learning_rate": 1.2918827613203964e-05, "loss": 0.278, "num_tokens": 2008778588.0, "step": 2632 }, { "epoch": 3.5926621360673403, "grad_norm": 0.19606372132649233, "learning_rate": 1.291397073712753e-05, "loss": 0.2878, "num_tokens": 2009540181.0, "step": 2633 }, { "epoch": 3.594027717026021, "grad_norm": 0.19756952102606753, "learning_rate": 1.2909113277181514e-05, "loss": 0.2742, "num_tokens": 2010296763.0, "step": 2634 }, { "epoch": 3.5953932979847014, "grad_norm": 0.19932357284198993, "learning_rate": 1.2904255234847718e-05, "loss": 0.2799, "num_tokens": 2011100490.0, "step": 2635 }, { "epoch": 3.5967588789433815, "grad_norm": 0.19379643068183566, "learning_rate": 1.2899396611608108e-05, "loss": 0.2803, "num_tokens": 2011863073.0, "step": 2636 }, { "epoch": 3.598124459902062, "grad_norm": 0.25561938339021284, "learning_rate": 1.2894537408944852e-05, "loss": 0.2751, "num_tokens": 2012680225.0, "step": 2637 }, { "epoch": 3.5994900408607426, "grad_norm": 0.19456877373999115, "learning_rate": 1.288967762834028e-05, "loss": 0.2851, "num_tokens": 2013466051.0, "step": 2638 }, { "epoch": 3.600855621819423, "grad_norm": 0.20637521910436038, "learning_rate": 1.2884817271276895e-05, "loss": 0.2789, "num_tokens": 2014236841.0, "step": 2639 }, { "epoch": 3.6022212027781038, "grad_norm": 0.761712527429455, "learning_rate": 1.2879956339237385e-05, "loss": 0.2786, "num_tokens": 2014999108.0, "step": 2640 }, { "epoch": 3.6035867837367843, "grad_norm": 0.20592064905848018, "learning_rate": 1.2875094833704611e-05, "loss": 0.2902, "num_tokens": 2015847716.0, "step": 2641 }, { "epoch": 3.604952364695465, "grad_norm": 0.19271108990906194, "learning_rate": 1.2870232756161606e-05, "loss": 0.264, "num_tokens": 2016575341.0, "step": 2642 }, { "epoch": 3.6063179456541454, "grad_norm": 0.22280050896852308, "learning_rate": 1.2865370108091584e-05, "loss": 0.2814, "num_tokens": 2017308005.0, "step": 2643 }, { "epoch": 3.607683526612826, "grad_norm": 0.20959048945477554, "learning_rate": 1.286050689097792e-05, "loss": 0.2852, "num_tokens": 2018126576.0, "step": 2644 }, { "epoch": 3.609049107571506, "grad_norm": 0.20277190340387008, "learning_rate": 1.2855643106304177e-05, "loss": 0.2728, "num_tokens": 2018877119.0, "step": 2645 }, { "epoch": 3.6104146885301867, "grad_norm": 0.23266271148964704, "learning_rate": 1.285077875555408e-05, "loss": 0.2744, "num_tokens": 2019642747.0, "step": 2646 }, { "epoch": 3.611780269488867, "grad_norm": 0.19618290816274078, "learning_rate": 1.2845913840211541e-05, "loss": 0.276, "num_tokens": 2020352591.0, "step": 2647 }, { "epoch": 3.6131458504475478, "grad_norm": 0.2152301427475682, "learning_rate": 1.2841048361760624e-05, "loss": 0.2813, "num_tokens": 2021142134.0, "step": 2648 }, { "epoch": 3.6145114314062283, "grad_norm": 0.1980488193021334, "learning_rate": 1.2836182321685585e-05, "loss": 0.2665, "num_tokens": 2021845019.0, "step": 2649 }, { "epoch": 3.615877012364909, "grad_norm": 0.1854119596073934, "learning_rate": 1.2831315721470834e-05, "loss": 0.2716, "num_tokens": 2022542438.0, "step": 2650 }, { "epoch": 3.6172425933235894, "grad_norm": 0.21120018728703158, "learning_rate": 1.2826448562600969e-05, "loss": 0.2769, "num_tokens": 2023340615.0, "step": 2651 }, { "epoch": 3.61860817428227, "grad_norm": 0.18852834828105855, "learning_rate": 1.2821580846560738e-05, "loss": 0.2693, "num_tokens": 2024042940.0, "step": 2652 }, { "epoch": 3.6199737552409506, "grad_norm": 0.20405466039905662, "learning_rate": 1.281671257483508e-05, "loss": 0.2831, "num_tokens": 2024840946.0, "step": 2653 }, { "epoch": 3.6213393361996307, "grad_norm": 0.18033997197077126, "learning_rate": 1.2811843748909092e-05, "loss": 0.2744, "num_tokens": 2025551275.0, "step": 2654 }, { "epoch": 3.622704917158311, "grad_norm": 0.19822049117702406, "learning_rate": 1.2806974370268035e-05, "loss": 0.2704, "num_tokens": 2026300509.0, "step": 2655 }, { "epoch": 3.624070498116992, "grad_norm": 0.19126032224378545, "learning_rate": 1.2802104440397354e-05, "loss": 0.2698, "num_tokens": 2027123993.0, "step": 2656 }, { "epoch": 3.6254360790756723, "grad_norm": 0.18868169854758937, "learning_rate": 1.2797233960782654e-05, "loss": 0.2714, "num_tokens": 2027882375.0, "step": 2657 }, { "epoch": 3.626801660034353, "grad_norm": 0.19494948350338692, "learning_rate": 1.27923629329097e-05, "loss": 0.2816, "num_tokens": 2028646558.0, "step": 2658 }, { "epoch": 3.6281672409930334, "grad_norm": 0.1997236727921334, "learning_rate": 1.278749135826444e-05, "loss": 0.2758, "num_tokens": 2029375579.0, "step": 2659 }, { "epoch": 3.629532821951714, "grad_norm": 0.196931508194056, "learning_rate": 1.2782619238332975e-05, "loss": 0.2792, "num_tokens": 2030206674.0, "step": 2660 }, { "epoch": 3.6308984029103946, "grad_norm": 0.19094941460790268, "learning_rate": 1.2777746574601575e-05, "loss": 0.2786, "num_tokens": 2030931719.0, "step": 2661 }, { "epoch": 3.632263983869075, "grad_norm": 0.18737084837277906, "learning_rate": 1.2772873368556687e-05, "loss": 0.2814, "num_tokens": 2031646398.0, "step": 2662 }, { "epoch": 3.6336295648277552, "grad_norm": 0.20165298162048342, "learning_rate": 1.276799962168491e-05, "loss": 0.2689, "num_tokens": 2032355625.0, "step": 2663 }, { "epoch": 3.634995145786436, "grad_norm": 0.19417234660385413, "learning_rate": 1.2763125335473013e-05, "loss": 0.2673, "num_tokens": 2033085957.0, "step": 2664 }, { "epoch": 3.6363607267451163, "grad_norm": 0.18236449629585605, "learning_rate": 1.2758250511407929e-05, "loss": 0.2714, "num_tokens": 2033901005.0, "step": 2665 }, { "epoch": 3.637726307703797, "grad_norm": 0.18038632013280667, "learning_rate": 1.275337515097676e-05, "loss": 0.2795, "num_tokens": 2034712876.0, "step": 2666 }, { "epoch": 3.6390918886624775, "grad_norm": 0.18734806689163802, "learning_rate": 1.2748499255666764e-05, "loss": 0.2756, "num_tokens": 2035502788.0, "step": 2667 }, { "epoch": 3.640457469621158, "grad_norm": 0.18340321434801612, "learning_rate": 1.2743622826965368e-05, "loss": 0.2731, "num_tokens": 2036298910.0, "step": 2668 }, { "epoch": 3.6418230505798386, "grad_norm": 0.1825691459463724, "learning_rate": 1.2738745866360154e-05, "loss": 0.2754, "num_tokens": 2037066288.0, "step": 2669 }, { "epoch": 3.643188631538519, "grad_norm": 0.18923821560165238, "learning_rate": 1.2733868375338875e-05, "loss": 0.2636, "num_tokens": 2037799088.0, "step": 2670 }, { "epoch": 3.6445542124971997, "grad_norm": 0.18046538366054035, "learning_rate": 1.2728990355389443e-05, "loss": 0.2637, "num_tokens": 2038649924.0, "step": 2671 }, { "epoch": 3.64591979345588, "grad_norm": 0.18167410958431113, "learning_rate": 1.2724111807999933e-05, "loss": 0.2856, "num_tokens": 2039486132.0, "step": 2672 }, { "epoch": 3.6472853744145604, "grad_norm": 0.1859579098610536, "learning_rate": 1.2719232734658571e-05, "loss": 0.2674, "num_tokens": 2040273686.0, "step": 2673 }, { "epoch": 3.648650955373241, "grad_norm": 0.18296367951891823, "learning_rate": 1.271435313685376e-05, "loss": 0.2545, "num_tokens": 2040919930.0, "step": 2674 }, { "epoch": 3.6500165363319215, "grad_norm": 0.18984239391911453, "learning_rate": 1.2709473016074045e-05, "loss": 0.2672, "num_tokens": 2041672532.0, "step": 2675 }, { "epoch": 3.651382117290602, "grad_norm": 0.17753564751444464, "learning_rate": 1.2704592373808142e-05, "loss": 0.2664, "num_tokens": 2042391154.0, "step": 2676 }, { "epoch": 3.6527476982492826, "grad_norm": 0.19559072663247842, "learning_rate": 1.269971121154493e-05, "loss": 0.277, "num_tokens": 2043094001.0, "step": 2677 }, { "epoch": 3.654113279207963, "grad_norm": 0.1958963874317884, "learning_rate": 1.2694829530773434e-05, "loss": 0.2895, "num_tokens": 2043924247.0, "step": 2678 }, { "epoch": 3.6554788601666437, "grad_norm": 0.19344698258123805, "learning_rate": 1.2689947332982843e-05, "loss": 0.2692, "num_tokens": 2044639662.0, "step": 2679 }, { "epoch": 3.6568444411253243, "grad_norm": 0.21124840603401285, "learning_rate": 1.2685064619662509e-05, "loss": 0.2784, "num_tokens": 2045380574.0, "step": 2680 }, { "epoch": 3.6582100220840044, "grad_norm": 0.18657868552412768, "learning_rate": 1.2680181392301933e-05, "loss": 0.2695, "num_tokens": 2046148297.0, "step": 2681 }, { "epoch": 3.659575603042685, "grad_norm": 0.19755029582517397, "learning_rate": 1.2675297652390779e-05, "loss": 0.2751, "num_tokens": 2046869787.0, "step": 2682 }, { "epoch": 3.6609411840013655, "grad_norm": 0.19215252672737523, "learning_rate": 1.2670413401418857e-05, "loss": 0.2651, "num_tokens": 2047625042.0, "step": 2683 }, { "epoch": 3.662306764960046, "grad_norm": 0.18767241124428832, "learning_rate": 1.2665528640876149e-05, "loss": 0.2648, "num_tokens": 2048306904.0, "step": 2684 }, { "epoch": 3.6636723459187266, "grad_norm": 0.21343399165080304, "learning_rate": 1.2660643372252779e-05, "loss": 0.2614, "num_tokens": 2049055911.0, "step": 2685 }, { "epoch": 3.665037926877407, "grad_norm": 0.18964528445779366, "learning_rate": 1.265575759703903e-05, "loss": 0.2676, "num_tokens": 2049874221.0, "step": 2686 }, { "epoch": 3.6664035078360877, "grad_norm": 0.16806798512924506, "learning_rate": 1.265087131672535e-05, "loss": 0.2775, "num_tokens": 2050594653.0, "step": 2687 }, { "epoch": 3.6677690887947683, "grad_norm": 0.1879488780591285, "learning_rate": 1.2645984532802318e-05, "loss": 0.26, "num_tokens": 2051268200.0, "step": 2688 }, { "epoch": 3.669134669753449, "grad_norm": 0.20844188428551907, "learning_rate": 1.264109724676069e-05, "loss": 0.2704, "num_tokens": 2052102523.0, "step": 2689 }, { "epoch": 3.670500250712129, "grad_norm": 0.16878883160675093, "learning_rate": 1.263620946009136e-05, "loss": 0.2752, "num_tokens": 2052888796.0, "step": 2690 }, { "epoch": 3.6718658316708095, "grad_norm": 0.19544866381460788, "learning_rate": 1.2631321174285385e-05, "loss": 0.2683, "num_tokens": 2053609407.0, "step": 2691 }, { "epoch": 3.67323141262949, "grad_norm": 0.18885411564657836, "learning_rate": 1.2626432390833962e-05, "loss": 0.2737, "num_tokens": 2054391530.0, "step": 2692 }, { "epoch": 3.6745969935881706, "grad_norm": 0.18302791371672222, "learning_rate": 1.2621543111228454e-05, "loss": 0.2827, "num_tokens": 2055192638.0, "step": 2693 }, { "epoch": 3.675962574546851, "grad_norm": 0.1938932014844546, "learning_rate": 1.2616653336960368e-05, "loss": 0.2713, "num_tokens": 2055933861.0, "step": 2694 }, { "epoch": 3.6773281555055317, "grad_norm": 0.18332711214206296, "learning_rate": 1.2611763069521355e-05, "loss": 0.2774, "num_tokens": 2056692053.0, "step": 2695 }, { "epoch": 3.6786937364642123, "grad_norm": 0.2031376915225029, "learning_rate": 1.260687231040323e-05, "loss": 0.2728, "num_tokens": 2057436737.0, "step": 2696 }, { "epoch": 3.680059317422893, "grad_norm": 0.19479105811512495, "learning_rate": 1.2601981061097957e-05, "loss": 0.2736, "num_tokens": 2058121427.0, "step": 2697 }, { "epoch": 3.6814248983815734, "grad_norm": 0.20509748801547273, "learning_rate": 1.2597089323097631e-05, "loss": 0.263, "num_tokens": 2058847138.0, "step": 2698 }, { "epoch": 3.6827904793402535, "grad_norm": 0.18469655679963684, "learning_rate": 1.2592197097894518e-05, "loss": 0.2667, "num_tokens": 2059574393.0, "step": 2699 }, { "epoch": 3.684156060298934, "grad_norm": 0.20066495874261378, "learning_rate": 1.2587304386981025e-05, "loss": 0.2587, "num_tokens": 2060297424.0, "step": 2700 }, { "epoch": 3.6855216412576146, "grad_norm": 0.18155228010531013, "learning_rate": 1.2582411191849702e-05, "loss": 0.2769, "num_tokens": 2061085934.0, "step": 2701 }, { "epoch": 3.686887222216295, "grad_norm": 0.18600413435343388, "learning_rate": 1.2577517513993251e-05, "loss": 0.2801, "num_tokens": 2061849310.0, "step": 2702 }, { "epoch": 3.6882528031749757, "grad_norm": 0.19659669083187198, "learning_rate": 1.2572623354904525e-05, "loss": 0.2772, "num_tokens": 2062601441.0, "step": 2703 }, { "epoch": 3.6896183841336563, "grad_norm": 0.18447906898323174, "learning_rate": 1.2567728716076519e-05, "loss": 0.2803, "num_tokens": 2063379133.0, "step": 2704 }, { "epoch": 3.690983965092337, "grad_norm": 0.20722936681569312, "learning_rate": 1.2562833599002376e-05, "loss": 0.2791, "num_tokens": 2064167002.0, "step": 2705 }, { "epoch": 3.6923495460510174, "grad_norm": 0.19300014821417424, "learning_rate": 1.2557938005175381e-05, "loss": 0.2654, "num_tokens": 2064910953.0, "step": 2706 }, { "epoch": 3.693715127009698, "grad_norm": 0.19303988058598234, "learning_rate": 1.2553041936088965e-05, "loss": 0.275, "num_tokens": 2065690666.0, "step": 2707 }, { "epoch": 3.695080707968378, "grad_norm": 0.2047642652989613, "learning_rate": 1.2548145393236715e-05, "loss": 0.2891, "num_tokens": 2066501502.0, "step": 2708 }, { "epoch": 3.6964462889270586, "grad_norm": 0.1999941675856263, "learning_rate": 1.254324837811235e-05, "loss": 0.2691, "num_tokens": 2067216923.0, "step": 2709 }, { "epoch": 3.697811869885739, "grad_norm": 0.18586705101566192, "learning_rate": 1.2538350892209734e-05, "loss": 0.2704, "num_tokens": 2068030916.0, "step": 2710 }, { "epoch": 3.6991774508444197, "grad_norm": 0.20901193848640354, "learning_rate": 1.2533452937022883e-05, "loss": 0.273, "num_tokens": 2068880764.0, "step": 2711 }, { "epoch": 3.7005430318031003, "grad_norm": 0.17975874011744003, "learning_rate": 1.252855451404595e-05, "loss": 0.276, "num_tokens": 2069671861.0, "step": 2712 }, { "epoch": 3.701908612761781, "grad_norm": 0.18706385627403072, "learning_rate": 1.2523655624773232e-05, "loss": 0.2733, "num_tokens": 2070450799.0, "step": 2713 }, { "epoch": 3.7032741937204614, "grad_norm": 0.18975455102639588, "learning_rate": 1.2518756270699164e-05, "loss": 0.2733, "num_tokens": 2071242957.0, "step": 2714 }, { "epoch": 3.704639774679142, "grad_norm": 0.1807031933967458, "learning_rate": 1.2513856453318332e-05, "loss": 0.2838, "num_tokens": 2072099839.0, "step": 2715 }, { "epoch": 3.7060053556378225, "grad_norm": 0.18606169726167773, "learning_rate": 1.2508956174125452e-05, "loss": 0.2808, "num_tokens": 2072865067.0, "step": 2716 }, { "epoch": 3.7073709365965026, "grad_norm": 0.19416276301640129, "learning_rate": 1.2504055434615393e-05, "loss": 0.2763, "num_tokens": 2073667708.0, "step": 2717 }, { "epoch": 3.708736517555183, "grad_norm": 0.18527070927025358, "learning_rate": 1.2499154236283157e-05, "loss": 0.2833, "num_tokens": 2074422608.0, "step": 2718 }, { "epoch": 3.7101020985138637, "grad_norm": 0.18707274044725591, "learning_rate": 1.2494252580623884e-05, "loss": 0.2637, "num_tokens": 2075228491.0, "step": 2719 }, { "epoch": 3.7114676794725443, "grad_norm": 0.19452307798846163, "learning_rate": 1.248935046913286e-05, "loss": 0.2891, "num_tokens": 2075963678.0, "step": 2720 }, { "epoch": 3.712833260431225, "grad_norm": 0.20477156153089343, "learning_rate": 1.2484447903305507e-05, "loss": 0.2718, "num_tokens": 2076709381.0, "step": 2721 }, { "epoch": 3.7141988413899054, "grad_norm": 0.18372817797044128, "learning_rate": 1.2479544884637387e-05, "loss": 0.2621, "num_tokens": 2077410213.0, "step": 2722 }, { "epoch": 3.715564422348586, "grad_norm": 0.1911337726059762, "learning_rate": 1.2474641414624191e-05, "loss": 0.2746, "num_tokens": 2078195231.0, "step": 2723 }, { "epoch": 3.7169300033072665, "grad_norm": 0.18050590815202203, "learning_rate": 1.2469737494761765e-05, "loss": 0.2839, "num_tokens": 2079025859.0, "step": 2724 }, { "epoch": 3.718295584265947, "grad_norm": 0.20577172301925137, "learning_rate": 1.2464833126546073e-05, "loss": 0.2681, "num_tokens": 2079723610.0, "step": 2725 }, { "epoch": 3.719661165224627, "grad_norm": 0.19845720403249834, "learning_rate": 1.2459928311473238e-05, "loss": 0.2594, "num_tokens": 2080443951.0, "step": 2726 }, { "epoch": 3.7210267461833078, "grad_norm": 0.19297395835064893, "learning_rate": 1.2455023051039496e-05, "loss": 0.289, "num_tokens": 2081248047.0, "step": 2727 }, { "epoch": 3.7223923271419883, "grad_norm": 0.19574325622487967, "learning_rate": 1.245011734674123e-05, "loss": 0.2677, "num_tokens": 2081990849.0, "step": 2728 }, { "epoch": 3.723757908100669, "grad_norm": 0.17840723045202134, "learning_rate": 1.2445211200074965e-05, "loss": 0.2687, "num_tokens": 2082732321.0, "step": 2729 }, { "epoch": 3.7251234890593494, "grad_norm": 0.1846245191307462, "learning_rate": 1.2440304612537345e-05, "loss": 0.2635, "num_tokens": 2083419807.0, "step": 2730 }, { "epoch": 3.72648907001803, "grad_norm": 0.1950389053377489, "learning_rate": 1.243539758562517e-05, "loss": 0.2694, "num_tokens": 2084122527.0, "step": 2731 }, { "epoch": 3.7278546509767105, "grad_norm": 0.20307223338996785, "learning_rate": 1.2430490120835343e-05, "loss": 0.2785, "num_tokens": 2084893016.0, "step": 2732 }, { "epoch": 3.729220231935391, "grad_norm": 0.18975066496594, "learning_rate": 1.2425582219664936e-05, "loss": 0.2714, "num_tokens": 2085649748.0, "step": 2733 }, { "epoch": 3.7305858128940717, "grad_norm": 0.19244402354453602, "learning_rate": 1.2420673883611127e-05, "loss": 0.2838, "num_tokens": 2086398509.0, "step": 2734 }, { "epoch": 3.7319513938527518, "grad_norm": 0.19615807187116674, "learning_rate": 1.2415765114171244e-05, "loss": 0.2821, "num_tokens": 2087238268.0, "step": 2735 }, { "epoch": 3.7333169748114323, "grad_norm": 0.1724599414399223, "learning_rate": 1.2410855912842734e-05, "loss": 0.2825, "num_tokens": 2087998936.0, "step": 2736 }, { "epoch": 3.734682555770113, "grad_norm": 0.18935645652590646, "learning_rate": 1.2405946281123186e-05, "loss": 0.2858, "num_tokens": 2088770875.0, "step": 2737 }, { "epoch": 3.7360481367287934, "grad_norm": 0.18294946197861378, "learning_rate": 1.2401036220510312e-05, "loss": 0.2843, "num_tokens": 2089559813.0, "step": 2738 }, { "epoch": 3.737413717687474, "grad_norm": 0.17980966805747078, "learning_rate": 1.239612573250196e-05, "loss": 0.263, "num_tokens": 2090266449.0, "step": 2739 }, { "epoch": 3.7387792986461545, "grad_norm": 0.2060281870093605, "learning_rate": 1.239121481859611e-05, "loss": 0.2736, "num_tokens": 2090959476.0, "step": 2740 }, { "epoch": 3.740144879604835, "grad_norm": 0.18594089023363577, "learning_rate": 1.2386303480290866e-05, "loss": 0.2719, "num_tokens": 2091747617.0, "step": 2741 }, { "epoch": 3.7415104605635157, "grad_norm": 0.1797515906850866, "learning_rate": 1.2381391719084465e-05, "loss": 0.266, "num_tokens": 2092555125.0, "step": 2742 }, { "epoch": 3.742876041522196, "grad_norm": 0.18644546579867172, "learning_rate": 1.2376479536475276e-05, "loss": 0.2571, "num_tokens": 2093277052.0, "step": 2743 }, { "epoch": 3.7442416224808763, "grad_norm": 0.1942348020856113, "learning_rate": 1.2371566933961791e-05, "loss": 0.2778, "num_tokens": 2093993271.0, "step": 2744 }, { "epoch": 3.745607203439557, "grad_norm": 0.19911230108369407, "learning_rate": 1.2366653913042633e-05, "loss": 0.2759, "num_tokens": 2094738564.0, "step": 2745 }, { "epoch": 3.7469727843982374, "grad_norm": 0.19480569966507755, "learning_rate": 1.2361740475216547e-05, "loss": 0.2694, "num_tokens": 2095489229.0, "step": 2746 }, { "epoch": 3.748338365356918, "grad_norm": 0.17135929265441227, "learning_rate": 1.2356826621982417e-05, "loss": 0.2816, "num_tokens": 2096289641.0, "step": 2747 }, { "epoch": 3.7497039463155986, "grad_norm": 0.1937363727022568, "learning_rate": 1.235191235483924e-05, "loss": 0.2746, "num_tokens": 2097060714.0, "step": 2748 }, { "epoch": 3.751069527274279, "grad_norm": 0.17720977198663948, "learning_rate": 1.2346997675286152e-05, "loss": 0.27, "num_tokens": 2097783057.0, "step": 2749 }, { "epoch": 3.7524351082329597, "grad_norm": 0.1952270103136921, "learning_rate": 1.2342082584822405e-05, "loss": 0.2816, "num_tokens": 2098559100.0, "step": 2750 }, { "epoch": 3.7538006891916402, "grad_norm": 0.17959210305848036, "learning_rate": 1.2337167084947385e-05, "loss": 0.2729, "num_tokens": 2099299098.0, "step": 2751 }, { "epoch": 3.755166270150321, "grad_norm": 0.1866105064013948, "learning_rate": 1.2332251177160596e-05, "loss": 0.2676, "num_tokens": 2100012292.0, "step": 2752 }, { "epoch": 3.756531851109001, "grad_norm": 0.19921434939732213, "learning_rate": 1.2327334862961665e-05, "loss": 0.2819, "num_tokens": 2100781243.0, "step": 2753 }, { "epoch": 3.7578974320676815, "grad_norm": 0.19295128626425426, "learning_rate": 1.2322418143850352e-05, "loss": 0.2739, "num_tokens": 2101515045.0, "step": 2754 }, { "epoch": 3.759263013026362, "grad_norm": 0.18955166324852463, "learning_rate": 1.231750102132653e-05, "loss": 0.2674, "num_tokens": 2102278399.0, "step": 2755 }, { "epoch": 3.7606285939850426, "grad_norm": 0.1919399417360505, "learning_rate": 1.2312583496890203e-05, "loss": 0.2782, "num_tokens": 2103039788.0, "step": 2756 }, { "epoch": 3.761994174943723, "grad_norm": 0.19163597591661544, "learning_rate": 1.2307665572041492e-05, "loss": 0.2775, "num_tokens": 2103772826.0, "step": 2757 }, { "epoch": 3.7633597559024037, "grad_norm": 0.20857181673066194, "learning_rate": 1.2302747248280651e-05, "loss": 0.2781, "num_tokens": 2104529421.0, "step": 2758 }, { "epoch": 3.7647253368610842, "grad_norm": 0.1783327092192551, "learning_rate": 1.2297828527108035e-05, "loss": 0.2743, "num_tokens": 2105241715.0, "step": 2759 }, { "epoch": 3.766090917819765, "grad_norm": 0.2003581026522545, "learning_rate": 1.2292909410024145e-05, "loss": 0.2751, "num_tokens": 2106076525.0, "step": 2760 }, { "epoch": 3.7674564987784454, "grad_norm": 0.2032644801572929, "learning_rate": 1.2287989898529582e-05, "loss": 0.2896, "num_tokens": 2106873302.0, "step": 2761 }, { "epoch": 3.7688220797371255, "grad_norm": 0.18686585403459757, "learning_rate": 1.228306999412508e-05, "loss": 0.2829, "num_tokens": 2107671218.0, "step": 2762 }, { "epoch": 3.770187660695806, "grad_norm": 0.19075484788691757, "learning_rate": 1.2278149698311488e-05, "loss": 0.2691, "num_tokens": 2108421521.0, "step": 2763 }, { "epoch": 3.7715532416544866, "grad_norm": 0.19155880134762765, "learning_rate": 1.2273229012589775e-05, "loss": 0.2801, "num_tokens": 2109174248.0, "step": 2764 }, { "epoch": 3.772918822613167, "grad_norm": 0.19576166616485824, "learning_rate": 1.2268307938461028e-05, "loss": 0.2698, "num_tokens": 2109856941.0, "step": 2765 }, { "epoch": 3.7742844035718477, "grad_norm": 0.20556887999602738, "learning_rate": 1.2263386477426455e-05, "loss": 0.2707, "num_tokens": 2110656486.0, "step": 2766 }, { "epoch": 3.7756499845305282, "grad_norm": 0.1824956213016528, "learning_rate": 1.2258464630987381e-05, "loss": 0.2778, "num_tokens": 2111457511.0, "step": 2767 }, { "epoch": 3.777015565489209, "grad_norm": 0.19585954563985702, "learning_rate": 1.2253542400645251e-05, "loss": 0.2745, "num_tokens": 2112183886.0, "step": 2768 }, { "epoch": 3.7783811464478894, "grad_norm": 0.19659560064469153, "learning_rate": 1.2248619787901616e-05, "loss": 0.2747, "num_tokens": 2112963378.0, "step": 2769 }, { "epoch": 3.77974672740657, "grad_norm": 0.19174071238717225, "learning_rate": 1.2243696794258158e-05, "loss": 0.2639, "num_tokens": 2113739620.0, "step": 2770 }, { "epoch": 3.78111230836525, "grad_norm": 0.21871994435625502, "learning_rate": 1.2238773421216669e-05, "loss": 0.2978, "num_tokens": 2114482553.0, "step": 2771 }, { "epoch": 3.7824778893239306, "grad_norm": 0.21344046758266486, "learning_rate": 1.2233849670279054e-05, "loss": 0.2609, "num_tokens": 2115305947.0, "step": 2772 }, { "epoch": 3.783843470282611, "grad_norm": 0.19034071300625777, "learning_rate": 1.2228925542947336e-05, "loss": 0.2818, "num_tokens": 2116055659.0, "step": 2773 }, { "epoch": 3.7852090512412917, "grad_norm": 0.19200904017164308, "learning_rate": 1.2224001040723661e-05, "loss": 0.2831, "num_tokens": 2116843763.0, "step": 2774 }, { "epoch": 3.7865746321999723, "grad_norm": 0.1832623049268719, "learning_rate": 1.2219076165110273e-05, "loss": 0.2769, "num_tokens": 2117637942.0, "step": 2775 }, { "epoch": 3.787940213158653, "grad_norm": 0.18449035620230572, "learning_rate": 1.2214150917609542e-05, "loss": 0.2657, "num_tokens": 2118347747.0, "step": 2776 }, { "epoch": 3.7893057941173334, "grad_norm": 0.18194880416656836, "learning_rate": 1.2209225299723945e-05, "loss": 0.265, "num_tokens": 2119104242.0, "step": 2777 }, { "epoch": 3.790671375076014, "grad_norm": 0.20309455134351426, "learning_rate": 1.2204299312956077e-05, "loss": 0.2857, "num_tokens": 2119900035.0, "step": 2778 }, { "epoch": 3.7920369560346945, "grad_norm": 0.19734914208437798, "learning_rate": 1.219937295880864e-05, "loss": 0.2803, "num_tokens": 2120604547.0, "step": 2779 }, { "epoch": 3.7934025369933746, "grad_norm": 0.19987788932810982, "learning_rate": 1.2194446238784455e-05, "loss": 0.2773, "num_tokens": 2121377425.0, "step": 2780 }, { "epoch": 3.794768117952055, "grad_norm": 0.19005768219147293, "learning_rate": 1.2189519154386448e-05, "loss": 0.2662, "num_tokens": 2122167477.0, "step": 2781 }, { "epoch": 3.7961336989107357, "grad_norm": 0.18833601065097397, "learning_rate": 1.2184591707117664e-05, "loss": 0.2654, "num_tokens": 2122968575.0, "step": 2782 }, { "epoch": 3.7974992798694163, "grad_norm": 0.17927847203948158, "learning_rate": 1.217966389848125e-05, "loss": 0.2728, "num_tokens": 2123699037.0, "step": 2783 }, { "epoch": 3.798864860828097, "grad_norm": 0.2059540543092451, "learning_rate": 1.2174735729980468e-05, "loss": 0.2691, "num_tokens": 2124458783.0, "step": 2784 }, { "epoch": 3.8002304417867774, "grad_norm": 0.190636278591924, "learning_rate": 1.2169807203118684e-05, "loss": 0.2787, "num_tokens": 2125297712.0, "step": 2785 }, { "epoch": 3.801596022745458, "grad_norm": 0.18251056275473332, "learning_rate": 1.2164878319399385e-05, "loss": 0.2708, "num_tokens": 2126064413.0, "step": 2786 }, { "epoch": 3.8029616037041385, "grad_norm": 0.1937658326660195, "learning_rate": 1.2159949080326156e-05, "loss": 0.2804, "num_tokens": 2126828111.0, "step": 2787 }, { "epoch": 3.804327184662819, "grad_norm": 0.18992459991923663, "learning_rate": 1.2155019487402697e-05, "loss": 0.2668, "num_tokens": 2127641641.0, "step": 2788 }, { "epoch": 3.805692765621499, "grad_norm": 0.18240824864502217, "learning_rate": 1.2150089542132815e-05, "loss": 0.2573, "num_tokens": 2128350185.0, "step": 2789 }, { "epoch": 3.8070583465801797, "grad_norm": 0.18653203793404666, "learning_rate": 1.2145159246020418e-05, "loss": 0.2808, "num_tokens": 2129111241.0, "step": 2790 }, { "epoch": 3.8084239275388603, "grad_norm": 0.19422800088850103, "learning_rate": 1.2140228600569525e-05, "loss": 0.2701, "num_tokens": 2129806659.0, "step": 2791 }, { "epoch": 3.809789508497541, "grad_norm": 0.18943705583676687, "learning_rate": 1.2135297607284268e-05, "loss": 0.2736, "num_tokens": 2130571628.0, "step": 2792 }, { "epoch": 3.8111550894562214, "grad_norm": 0.19482674299618707, "learning_rate": 1.2130366267668874e-05, "loss": 0.2835, "num_tokens": 2131389267.0, "step": 2793 }, { "epoch": 3.812520670414902, "grad_norm": 0.21489912199630065, "learning_rate": 1.2125434583227684e-05, "loss": 0.288, "num_tokens": 2132215214.0, "step": 2794 }, { "epoch": 3.8138862513735825, "grad_norm": 0.18065096597718308, "learning_rate": 1.2120502555465144e-05, "loss": 0.2552, "num_tokens": 2132958581.0, "step": 2795 }, { "epoch": 3.815251832332263, "grad_norm": 0.2368835459542438, "learning_rate": 1.2115570185885796e-05, "loss": 0.2828, "num_tokens": 2133691840.0, "step": 2796 }, { "epoch": 3.8166174132909436, "grad_norm": 0.20623990902897232, "learning_rate": 1.2110637475994295e-05, "loss": 0.2669, "num_tokens": 2134449953.0, "step": 2797 }, { "epoch": 3.8179829942496237, "grad_norm": 0.19153767221914167, "learning_rate": 1.2105704427295402e-05, "loss": 0.2723, "num_tokens": 2135283520.0, "step": 2798 }, { "epoch": 3.8193485752083043, "grad_norm": 0.19623636416372042, "learning_rate": 1.2100771041293971e-05, "loss": 0.2721, "num_tokens": 2136064065.0, "step": 2799 }, { "epoch": 3.820714156166985, "grad_norm": 0.1824177792349838, "learning_rate": 1.2095837319494963e-05, "loss": 0.2801, "num_tokens": 2136834562.0, "step": 2800 }, { "epoch": 3.8220797371256654, "grad_norm": 0.18645821438240134, "learning_rate": 1.2090903263403446e-05, "loss": 0.2673, "num_tokens": 2137572153.0, "step": 2801 }, { "epoch": 3.823445318084346, "grad_norm": 0.19188856532441442, "learning_rate": 1.2085968874524587e-05, "loss": 0.2751, "num_tokens": 2138372245.0, "step": 2802 }, { "epoch": 3.8248108990430265, "grad_norm": 0.19120158184617256, "learning_rate": 1.2081034154363653e-05, "loss": 0.2772, "num_tokens": 2139168394.0, "step": 2803 }, { "epoch": 3.826176480001707, "grad_norm": 0.19236220709216992, "learning_rate": 1.2076099104426018e-05, "loss": 0.2729, "num_tokens": 2139866388.0, "step": 2804 }, { "epoch": 3.8275420609603876, "grad_norm": 0.20212239880626054, "learning_rate": 1.2071163726217146e-05, "loss": 0.2739, "num_tokens": 2140640628.0, "step": 2805 }, { "epoch": 3.828907641919068, "grad_norm": 0.19017019631164922, "learning_rate": 1.2066228021242605e-05, "loss": 0.2699, "num_tokens": 2141444463.0, "step": 2806 }, { "epoch": 3.8302732228777483, "grad_norm": 0.2080933767334414, "learning_rate": 1.2061291991008073e-05, "loss": 0.2658, "num_tokens": 2142203551.0, "step": 2807 }, { "epoch": 3.831638803836429, "grad_norm": 0.1950021763887986, "learning_rate": 1.205635563701932e-05, "loss": 0.2628, "num_tokens": 2142951167.0, "step": 2808 }, { "epoch": 3.8330043847951094, "grad_norm": 0.20166263943806925, "learning_rate": 1.20514189607822e-05, "loss": 0.2802, "num_tokens": 2143700954.0, "step": 2809 }, { "epoch": 3.83436996575379, "grad_norm": 0.19513575118875268, "learning_rate": 1.204648196380269e-05, "loss": 0.2785, "num_tokens": 2144482898.0, "step": 2810 }, { "epoch": 3.8357355467124705, "grad_norm": 0.2041560762899071, "learning_rate": 1.2041544647586858e-05, "loss": 0.2813, "num_tokens": 2145292145.0, "step": 2811 }, { "epoch": 3.837101127671151, "grad_norm": 0.19593471493138664, "learning_rate": 1.2036607013640852e-05, "loss": 0.2814, "num_tokens": 2146005386.0, "step": 2812 }, { "epoch": 3.8384667086298316, "grad_norm": 0.18899230303622547, "learning_rate": 1.2031669063470944e-05, "loss": 0.2807, "num_tokens": 2146727011.0, "step": 2813 }, { "epoch": 3.839832289588512, "grad_norm": 0.2077225961454414, "learning_rate": 1.202673079858348e-05, "loss": 0.2764, "num_tokens": 2147449349.0, "step": 2814 }, { "epoch": 3.8411978705471927, "grad_norm": 0.18084673245410918, "learning_rate": 1.2021792220484911e-05, "loss": 0.2621, "num_tokens": 2148238855.0, "step": 2815 }, { "epoch": 3.842563451505873, "grad_norm": 0.19278863364816576, "learning_rate": 1.2016853330681788e-05, "loss": 0.2803, "num_tokens": 2149026579.0, "step": 2816 }, { "epoch": 3.8439290324645534, "grad_norm": 0.19799782079464068, "learning_rate": 1.201191413068075e-05, "loss": 0.2687, "num_tokens": 2149752579.0, "step": 2817 }, { "epoch": 3.845294613423234, "grad_norm": 0.1980322795448722, "learning_rate": 1.2006974621988532e-05, "loss": 0.2768, "num_tokens": 2150432996.0, "step": 2818 }, { "epoch": 3.8466601943819145, "grad_norm": 0.19640457970592315, "learning_rate": 1.2002034806111967e-05, "loss": 0.2634, "num_tokens": 2151121894.0, "step": 2819 }, { "epoch": 3.848025775340595, "grad_norm": 0.19285745425234604, "learning_rate": 1.1997094684557977e-05, "loss": 0.2685, "num_tokens": 2151894627.0, "step": 2820 }, { "epoch": 3.8493913562992756, "grad_norm": 0.18070522458846378, "learning_rate": 1.1992154258833578e-05, "loss": 0.2657, "num_tokens": 2152610488.0, "step": 2821 }, { "epoch": 3.850756937257956, "grad_norm": 0.2008182571667235, "learning_rate": 1.1987213530445882e-05, "loss": 0.2831, "num_tokens": 2153407328.0, "step": 2822 }, { "epoch": 3.8521225182166368, "grad_norm": 0.19121095027398888, "learning_rate": 1.198227250090209e-05, "loss": 0.2805, "num_tokens": 2154180150.0, "step": 2823 }, { "epoch": 3.8534880991753173, "grad_norm": 0.19650126759720318, "learning_rate": 1.1977331171709497e-05, "loss": 0.2713, "num_tokens": 2154897339.0, "step": 2824 }, { "epoch": 3.8548536801339974, "grad_norm": 0.18851202423737617, "learning_rate": 1.1972389544375487e-05, "loss": 0.2772, "num_tokens": 2155695471.0, "step": 2825 }, { "epoch": 3.856219261092678, "grad_norm": 0.1945974071933573, "learning_rate": 1.1967447620407538e-05, "loss": 0.2807, "num_tokens": 2156512906.0, "step": 2826 }, { "epoch": 3.8575848420513585, "grad_norm": 0.18654299157709153, "learning_rate": 1.1962505401313216e-05, "loss": 0.2632, "num_tokens": 2157319399.0, "step": 2827 }, { "epoch": 3.858950423010039, "grad_norm": 0.18124345267382863, "learning_rate": 1.1957562888600178e-05, "loss": 0.2712, "num_tokens": 2158040814.0, "step": 2828 }, { "epoch": 3.8603160039687197, "grad_norm": 0.18622432029295977, "learning_rate": 1.1952620083776174e-05, "loss": 0.2656, "num_tokens": 2158781074.0, "step": 2829 }, { "epoch": 3.8616815849274, "grad_norm": 0.18352174955932296, "learning_rate": 1.1947676988349036e-05, "loss": 0.2863, "num_tokens": 2159516726.0, "step": 2830 }, { "epoch": 3.8630471658860808, "grad_norm": 0.2015465600636992, "learning_rate": 1.1942733603826688e-05, "loss": 0.2828, "num_tokens": 2160199172.0, "step": 2831 }, { "epoch": 3.8644127468447613, "grad_norm": 0.20016052505064758, "learning_rate": 1.1937789931717148e-05, "loss": 0.2833, "num_tokens": 2160921798.0, "step": 2832 }, { "epoch": 3.865778327803442, "grad_norm": 0.21059898374287045, "learning_rate": 1.1932845973528505e-05, "loss": 0.2756, "num_tokens": 2161608658.0, "step": 2833 }, { "epoch": 3.867143908762122, "grad_norm": 0.20264706074826996, "learning_rate": 1.1927901730768958e-05, "loss": 0.2774, "num_tokens": 2162449314.0, "step": 2834 }, { "epoch": 3.8685094897208026, "grad_norm": 0.18907174333805163, "learning_rate": 1.1922957204946777e-05, "loss": 0.2748, "num_tokens": 2163216093.0, "step": 2835 }, { "epoch": 3.869875070679483, "grad_norm": 0.1840669265970139, "learning_rate": 1.191801239757032e-05, "loss": 0.2812, "num_tokens": 2164044896.0, "step": 2836 }, { "epoch": 3.8712406516381637, "grad_norm": 0.1898737548650166, "learning_rate": 1.1913067310148038e-05, "loss": 0.2784, "num_tokens": 2164853966.0, "step": 2837 }, { "epoch": 3.8726062325968442, "grad_norm": 0.18677686946709626, "learning_rate": 1.1908121944188463e-05, "loss": 0.2741, "num_tokens": 2165651610.0, "step": 2838 }, { "epoch": 3.873971813555525, "grad_norm": 0.20300931358661378, "learning_rate": 1.1903176301200207e-05, "loss": 0.2771, "num_tokens": 2166371873.0, "step": 2839 }, { "epoch": 3.8753373945142053, "grad_norm": 0.19275251200213692, "learning_rate": 1.1898230382691977e-05, "loss": 0.2747, "num_tokens": 2167075399.0, "step": 2840 }, { "epoch": 3.876702975472886, "grad_norm": 0.20894970136598703, "learning_rate": 1.1893284190172556e-05, "loss": 0.2649, "num_tokens": 2167773417.0, "step": 2841 }, { "epoch": 3.8780685564315664, "grad_norm": 0.20494836046975884, "learning_rate": 1.1888337725150814e-05, "loss": 0.2752, "num_tokens": 2168528681.0, "step": 2842 }, { "epoch": 3.8794341373902466, "grad_norm": 0.1872388471022263, "learning_rate": 1.1883390989135702e-05, "loss": 0.277, "num_tokens": 2169247210.0, "step": 2843 }, { "epoch": 3.880799718348927, "grad_norm": 0.20396414516300146, "learning_rate": 1.1878443983636257e-05, "loss": 0.2853, "num_tokens": 2170036730.0, "step": 2844 }, { "epoch": 3.8821652993076077, "grad_norm": 0.19272763526273273, "learning_rate": 1.1873496710161591e-05, "loss": 0.2768, "num_tokens": 2170830379.0, "step": 2845 }, { "epoch": 3.8835308802662882, "grad_norm": 0.18694973469301485, "learning_rate": 1.1868549170220909e-05, "loss": 0.2711, "num_tokens": 2171494407.0, "step": 2846 }, { "epoch": 3.884896461224969, "grad_norm": 0.19711953383933029, "learning_rate": 1.1863601365323488e-05, "loss": 0.2935, "num_tokens": 2172275877.0, "step": 2847 }, { "epoch": 3.8862620421836493, "grad_norm": 0.20055484595531897, "learning_rate": 1.1858653296978691e-05, "loss": 0.2706, "num_tokens": 2173008333.0, "step": 2848 }, { "epoch": 3.88762762314233, "grad_norm": 0.19573237679959776, "learning_rate": 1.1853704966695959e-05, "loss": 0.2813, "num_tokens": 2173782623.0, "step": 2849 }, { "epoch": 3.8889932041010105, "grad_norm": 0.20181516341560166, "learning_rate": 1.1848756375984809e-05, "loss": 0.2643, "num_tokens": 2174539187.0, "step": 2850 }, { "epoch": 3.890358785059691, "grad_norm": 0.17178869736908148, "learning_rate": 1.1843807526354846e-05, "loss": 0.2745, "num_tokens": 2175233857.0, "step": 2851 }, { "epoch": 3.891724366018371, "grad_norm": 0.20902971706297804, "learning_rate": 1.183885841931575e-05, "loss": 0.2721, "num_tokens": 2175985301.0, "step": 2852 }, { "epoch": 3.8930899469770517, "grad_norm": 0.1870193762909068, "learning_rate": 1.1833909056377281e-05, "loss": 0.2832, "num_tokens": 2176673538.0, "step": 2853 }, { "epoch": 3.8944555279357322, "grad_norm": 0.21194901856308482, "learning_rate": 1.182895943904927e-05, "loss": 0.2676, "num_tokens": 2177477467.0, "step": 2854 }, { "epoch": 3.895821108894413, "grad_norm": 0.17876395595503408, "learning_rate": 1.1824009568841632e-05, "loss": 0.2696, "num_tokens": 2178287413.0, "step": 2855 }, { "epoch": 3.8971866898530934, "grad_norm": 0.19487015420267695, "learning_rate": 1.181905944726436e-05, "loss": 0.2743, "num_tokens": 2179088898.0, "step": 2856 }, { "epoch": 3.898552270811774, "grad_norm": 0.18129217050232926, "learning_rate": 1.1814109075827522e-05, "loss": 0.2551, "num_tokens": 2179824483.0, "step": 2857 }, { "epoch": 3.8999178517704545, "grad_norm": 0.18490276960640273, "learning_rate": 1.180915845604126e-05, "loss": 0.2926, "num_tokens": 2180643390.0, "step": 2858 }, { "epoch": 3.901283432729135, "grad_norm": 0.1772768893657513, "learning_rate": 1.1804207589415795e-05, "loss": 0.2733, "num_tokens": 2181382910.0, "step": 2859 }, { "epoch": 3.9026490136878156, "grad_norm": 0.18512565574132048, "learning_rate": 1.1799256477461422e-05, "loss": 0.2646, "num_tokens": 2182167083.0, "step": 2860 }, { "epoch": 3.9040145946464957, "grad_norm": 0.18802635469625328, "learning_rate": 1.1794305121688514e-05, "loss": 0.2737, "num_tokens": 2182941310.0, "step": 2861 }, { "epoch": 3.9053801756051763, "grad_norm": 0.17408014720458487, "learning_rate": 1.1789353523607507e-05, "loss": 0.2726, "num_tokens": 2183721088.0, "step": 2862 }, { "epoch": 3.906745756563857, "grad_norm": 0.19765611183490855, "learning_rate": 1.1784401684728925e-05, "loss": 0.2787, "num_tokens": 2184502423.0, "step": 2863 }, { "epoch": 3.9081113375225374, "grad_norm": 0.1902035076905486, "learning_rate": 1.177944960656336e-05, "loss": 0.2561, "num_tokens": 2185174272.0, "step": 2864 }, { "epoch": 3.909476918481218, "grad_norm": 0.19412802819161168, "learning_rate": 1.1774497290621474e-05, "loss": 0.2779, "num_tokens": 2185831387.0, "step": 2865 }, { "epoch": 3.9108424994398985, "grad_norm": 0.21270900142292118, "learning_rate": 1.1769544738414004e-05, "loss": 0.273, "num_tokens": 2186565981.0, "step": 2866 }, { "epoch": 3.912208080398579, "grad_norm": 0.1902908766953016, "learning_rate": 1.1764591951451762e-05, "loss": 0.2677, "num_tokens": 2187260143.0, "step": 2867 }, { "epoch": 3.9135736613572596, "grad_norm": 0.19198980808764732, "learning_rate": 1.1759638931245628e-05, "loss": 0.2756, "num_tokens": 2188040523.0, "step": 2868 }, { "epoch": 3.91493924231594, "grad_norm": 0.18557396022064426, "learning_rate": 1.1754685679306554e-05, "loss": 0.2839, "num_tokens": 2188818887.0, "step": 2869 }, { "epoch": 3.9163048232746203, "grad_norm": 0.20098004087771795, "learning_rate": 1.1749732197145558e-05, "loss": 0.282, "num_tokens": 2189578574.0, "step": 2870 }, { "epoch": 3.917670404233301, "grad_norm": 0.1898873011734509, "learning_rate": 1.174477848627374e-05, "loss": 0.2816, "num_tokens": 2190320830.0, "step": 2871 }, { "epoch": 3.9190359851919814, "grad_norm": 0.193475367842913, "learning_rate": 1.1739824548202259e-05, "loss": 0.2745, "num_tokens": 2191045672.0, "step": 2872 }, { "epoch": 3.920401566150662, "grad_norm": 0.18985540353442987, "learning_rate": 1.1734870384442345e-05, "loss": 0.2836, "num_tokens": 2191844236.0, "step": 2873 }, { "epoch": 3.9217671471093425, "grad_norm": 0.19032991024893606, "learning_rate": 1.1729915996505303e-05, "loss": 0.2617, "num_tokens": 2192534480.0, "step": 2874 }, { "epoch": 3.923132728068023, "grad_norm": 0.19068933655181186, "learning_rate": 1.17249613859025e-05, "loss": 0.2776, "num_tokens": 2193224003.0, "step": 2875 }, { "epoch": 3.9244983090267036, "grad_norm": 0.20036886834900808, "learning_rate": 1.1720006554145374e-05, "loss": 0.277, "num_tokens": 2194045349.0, "step": 2876 }, { "epoch": 3.925863889985384, "grad_norm": 0.18162354002831063, "learning_rate": 1.1715051502745432e-05, "loss": 0.2707, "num_tokens": 2194860407.0, "step": 2877 }, { "epoch": 3.9272294709440647, "grad_norm": 0.19111614772196125, "learning_rate": 1.171009623321424e-05, "loss": 0.2788, "num_tokens": 2195668267.0, "step": 2878 }, { "epoch": 3.928595051902745, "grad_norm": 0.19190639560630313, "learning_rate": 1.1705140747063439e-05, "loss": 0.2668, "num_tokens": 2196456925.0, "step": 2879 }, { "epoch": 3.9299606328614254, "grad_norm": 0.17448997136061678, "learning_rate": 1.1700185045804732e-05, "loss": 0.2766, "num_tokens": 2197266440.0, "step": 2880 }, { "epoch": 3.931326213820106, "grad_norm": 0.16958770313199834, "learning_rate": 1.1695229130949891e-05, "loss": 0.2619, "num_tokens": 2197904976.0, "step": 2881 }, { "epoch": 3.9326917947787865, "grad_norm": 0.2110751657477687, "learning_rate": 1.169027300401075e-05, "loss": 0.2743, "num_tokens": 2198629232.0, "step": 2882 }, { "epoch": 3.934057375737467, "grad_norm": 0.18683787008807903, "learning_rate": 1.1685316666499206e-05, "loss": 0.2849, "num_tokens": 2199372275.0, "step": 2883 }, { "epoch": 3.9354229566961476, "grad_norm": 0.19733794407089136, "learning_rate": 1.1680360119927232e-05, "loss": 0.2698, "num_tokens": 2200148857.0, "step": 2884 }, { "epoch": 3.936788537654828, "grad_norm": 0.18568556341205966, "learning_rate": 1.1675403365806848e-05, "loss": 0.2705, "num_tokens": 2200889604.0, "step": 2885 }, { "epoch": 3.9381541186135087, "grad_norm": 0.17824599028413488, "learning_rate": 1.1670446405650142e-05, "loss": 0.2815, "num_tokens": 2201713357.0, "step": 2886 }, { "epoch": 3.9395196995721893, "grad_norm": 0.18160301330440995, "learning_rate": 1.1665489240969273e-05, "loss": 0.2735, "num_tokens": 2202495196.0, "step": 2887 }, { "epoch": 3.9408852805308694, "grad_norm": 0.19077299926070004, "learning_rate": 1.1660531873276457e-05, "loss": 0.2683, "num_tokens": 2203258383.0, "step": 2888 }, { "epoch": 3.94225086148955, "grad_norm": 0.18495759285160313, "learning_rate": 1.1655574304083972e-05, "loss": 0.2861, "num_tokens": 2204029846.0, "step": 2889 }, { "epoch": 3.9436164424482305, "grad_norm": 0.18633925999410986, "learning_rate": 1.1650616534904158e-05, "loss": 0.2948, "num_tokens": 2204786928.0, "step": 2890 }, { "epoch": 3.944982023406911, "grad_norm": 0.19250033367057048, "learning_rate": 1.1645658567249414e-05, "loss": 0.274, "num_tokens": 2205531132.0, "step": 2891 }, { "epoch": 3.9463476043655916, "grad_norm": 0.19274571411161967, "learning_rate": 1.1640700402632204e-05, "loss": 0.2644, "num_tokens": 2206351932.0, "step": 2892 }, { "epoch": 3.947713185324272, "grad_norm": 0.1676250651852623, "learning_rate": 1.1635742042565046e-05, "loss": 0.2851, "num_tokens": 2207155947.0, "step": 2893 }, { "epoch": 3.9490787662829527, "grad_norm": 0.18340675860214828, "learning_rate": 1.1630783488560524e-05, "loss": 0.272, "num_tokens": 2207996712.0, "step": 2894 }, { "epoch": 3.9504443472416333, "grad_norm": 0.1829074343008109, "learning_rate": 1.1625824742131273e-05, "loss": 0.2788, "num_tokens": 2208785219.0, "step": 2895 }, { "epoch": 3.951809928200314, "grad_norm": 0.18017690325060814, "learning_rate": 1.1620865804789998e-05, "loss": 0.2691, "num_tokens": 2209527451.0, "step": 2896 }, { "epoch": 3.953175509158994, "grad_norm": 0.19004819143239177, "learning_rate": 1.1615906678049456e-05, "loss": 0.2572, "num_tokens": 2210272063.0, "step": 2897 }, { "epoch": 3.9545410901176745, "grad_norm": 0.1799236341485269, "learning_rate": 1.1610947363422455e-05, "loss": 0.2728, "num_tokens": 2211077885.0, "step": 2898 }, { "epoch": 3.955906671076355, "grad_norm": 0.1863873518844361, "learning_rate": 1.160598786242187e-05, "loss": 0.2814, "num_tokens": 2211845544.0, "step": 2899 }, { "epoch": 3.9572722520350356, "grad_norm": 0.18782859082686726, "learning_rate": 1.1601028176560638e-05, "loss": 0.269, "num_tokens": 2212599513.0, "step": 2900 }, { "epoch": 3.958637832993716, "grad_norm": 0.19778566364365016, "learning_rate": 1.159606830735173e-05, "loss": 0.2676, "num_tokens": 2213334775.0, "step": 2901 }, { "epoch": 3.9600034139523967, "grad_norm": 0.17974034418024576, "learning_rate": 1.1591108256308199e-05, "loss": 0.278, "num_tokens": 2214186527.0, "step": 2902 }, { "epoch": 3.9613689949110773, "grad_norm": 0.19966645226381996, "learning_rate": 1.1586148024943139e-05, "loss": 0.2725, "num_tokens": 2214913266.0, "step": 2903 }, { "epoch": 3.962734575869758, "grad_norm": 0.19502215134012568, "learning_rate": 1.1581187614769697e-05, "loss": 0.2788, "num_tokens": 2215657562.0, "step": 2904 }, { "epoch": 3.9641001568284384, "grad_norm": 0.1791941090336952, "learning_rate": 1.1576227027301083e-05, "loss": 0.2687, "num_tokens": 2216420065.0, "step": 2905 }, { "epoch": 3.9654657377871185, "grad_norm": 0.19711852886322961, "learning_rate": 1.157126626405056e-05, "loss": 0.2595, "num_tokens": 2217179107.0, "step": 2906 }, { "epoch": 3.966831318745799, "grad_norm": 0.1838505357100646, "learning_rate": 1.1566305326531437e-05, "loss": 0.2636, "num_tokens": 2217875360.0, "step": 2907 }, { "epoch": 3.9681968997044796, "grad_norm": 0.18495498299911414, "learning_rate": 1.1561344216257087e-05, "loss": 0.2657, "num_tokens": 2218615164.0, "step": 2908 }, { "epoch": 3.96956248066316, "grad_norm": 0.18997089681925353, "learning_rate": 1.1556382934740926e-05, "loss": 0.2696, "num_tokens": 2219426841.0, "step": 2909 }, { "epoch": 3.9709280616218408, "grad_norm": 0.1789481299060695, "learning_rate": 1.1551421483496423e-05, "loss": 0.2787, "num_tokens": 2220217734.0, "step": 2910 }, { "epoch": 3.9722936425805213, "grad_norm": 0.18948043335010747, "learning_rate": 1.1546459864037105e-05, "loss": 0.2626, "num_tokens": 2220952925.0, "step": 2911 }, { "epoch": 3.973659223539202, "grad_norm": 0.18081036130446076, "learning_rate": 1.1541498077876552e-05, "loss": 0.267, "num_tokens": 2221740519.0, "step": 2912 }, { "epoch": 3.9750248044978824, "grad_norm": 0.17924454203021675, "learning_rate": 1.153653612652838e-05, "loss": 0.2663, "num_tokens": 2222435362.0, "step": 2913 }, { "epoch": 3.976390385456563, "grad_norm": 0.1877619924262889, "learning_rate": 1.1531574011506274e-05, "loss": 0.2705, "num_tokens": 2223221232.0, "step": 2914 }, { "epoch": 3.977755966415243, "grad_norm": 0.188899741527969, "learning_rate": 1.1526611734323957e-05, "loss": 0.2846, "num_tokens": 2224014841.0, "step": 2915 }, { "epoch": 3.9791215473739237, "grad_norm": 0.18898338604630513, "learning_rate": 1.152164929649521e-05, "loss": 0.2644, "num_tokens": 2224704990.0, "step": 2916 }, { "epoch": 3.980487128332604, "grad_norm": 0.18259753813004154, "learning_rate": 1.1516686699533848e-05, "loss": 0.2794, "num_tokens": 2225370203.0, "step": 2917 }, { "epoch": 3.9818527092912848, "grad_norm": 0.2051986681127575, "learning_rate": 1.1511723944953752e-05, "loss": 0.2792, "num_tokens": 2226114677.0, "step": 2918 }, { "epoch": 3.9832182902499653, "grad_norm": 0.1819290013582697, "learning_rate": 1.150676103426884e-05, "loss": 0.2638, "num_tokens": 2226873356.0, "step": 2919 }, { "epoch": 3.984583871208646, "grad_norm": 0.16729362640826265, "learning_rate": 1.1501797968993082e-05, "loss": 0.2728, "num_tokens": 2227558989.0, "step": 2920 }, { "epoch": 3.9859494521673264, "grad_norm": 0.1857059618879145, "learning_rate": 1.1496834750640497e-05, "loss": 0.2794, "num_tokens": 2228345563.0, "step": 2921 }, { "epoch": 3.987315033126007, "grad_norm": 0.18494213735430767, "learning_rate": 1.1491871380725143e-05, "loss": 0.2754, "num_tokens": 2229074785.0, "step": 2922 }, { "epoch": 3.9886806140846875, "grad_norm": 0.1879672316171795, "learning_rate": 1.1486907860761136e-05, "loss": 0.2787, "num_tokens": 2229803139.0, "step": 2923 }, { "epoch": 3.9900461950433677, "grad_norm": 0.18580227268952013, "learning_rate": 1.1481944192262625e-05, "loss": 0.267, "num_tokens": 2230574186.0, "step": 2924 }, { "epoch": 3.991411776002048, "grad_norm": 0.1794287409782044, "learning_rate": 1.1476980376743811e-05, "loss": 0.2827, "num_tokens": 2231362494.0, "step": 2925 }, { "epoch": 3.9927773569607288, "grad_norm": 0.18372679374058787, "learning_rate": 1.147201641571894e-05, "loss": 0.269, "num_tokens": 2232149136.0, "step": 2926 }, { "epoch": 3.9941429379194093, "grad_norm": 0.18404995919382425, "learning_rate": 1.1467052310702303e-05, "loss": 0.2663, "num_tokens": 2232837205.0, "step": 2927 }, { "epoch": 3.99550851887809, "grad_norm": 0.18852185320196313, "learning_rate": 1.1462088063208232e-05, "loss": 0.2924, "num_tokens": 2233653939.0, "step": 2928 }, { "epoch": 3.9968740998367704, "grad_norm": 0.19534993573658424, "learning_rate": 1.1457123674751103e-05, "loss": 0.2725, "num_tokens": 2234432943.0, "step": 2929 }, { "epoch": 3.998239680795451, "grad_norm": 0.17807193972253368, "learning_rate": 1.1452159146845341e-05, "loss": 0.2772, "num_tokens": 2235176833.0, "step": 2930 }, { "epoch": 3.9996052617541316, "grad_norm": 0.18177812987879696, "learning_rate": 1.1447194481005399e-05, "loss": 0.276, "num_tokens": 2235940096.0, "step": 2931 }, { "epoch": 4.0, "grad_norm": 0.259530751404826, "learning_rate": 1.1442229678745793e-05, "loss": 0.2413, "num_tokens": 2236168544.0, "step": 2932 }, { "epoch": 4.00136558095868, "grad_norm": 0.3567205198469742, "learning_rate": 1.143726474158106e-05, "loss": 0.2435, "num_tokens": 2236987360.0, "step": 2933 }, { "epoch": 4.002731161917361, "grad_norm": 0.3116754729913395, "learning_rate": 1.1432299671025792e-05, "loss": 0.244, "num_tokens": 2237758713.0, "step": 2934 }, { "epoch": 4.004096742876041, "grad_norm": 0.23221563228636755, "learning_rate": 1.1427334468594613e-05, "loss": 0.2408, "num_tokens": 2238587284.0, "step": 2935 }, { "epoch": 4.005462323834722, "grad_norm": 0.2821871462432716, "learning_rate": 1.1422369135802197e-05, "loss": 0.2394, "num_tokens": 2239322064.0, "step": 2936 }, { "epoch": 4.006827904793402, "grad_norm": 0.34115144000672554, "learning_rate": 1.1417403674163248e-05, "loss": 0.2341, "num_tokens": 2240110907.0, "step": 2937 }, { "epoch": 4.008193485752083, "grad_norm": 0.30077723103714255, "learning_rate": 1.1412438085192512e-05, "loss": 0.2378, "num_tokens": 2240870632.0, "step": 2938 }, { "epoch": 4.0095590667107635, "grad_norm": 0.2382640466385962, "learning_rate": 1.1407472370404783e-05, "loss": 0.2393, "num_tokens": 2241638141.0, "step": 2939 }, { "epoch": 4.0109246476694445, "grad_norm": 0.28695913251003635, "learning_rate": 1.1402506531314875e-05, "loss": 0.236, "num_tokens": 2242379438.0, "step": 2940 }, { "epoch": 4.012290228628125, "grad_norm": 0.2505091018236723, "learning_rate": 1.1397540569437657e-05, "loss": 0.2297, "num_tokens": 2243077339.0, "step": 2941 }, { "epoch": 4.013655809586805, "grad_norm": 0.2416135761754938, "learning_rate": 1.1392574486288026e-05, "loss": 0.2323, "num_tokens": 2243833947.0, "step": 2942 }, { "epoch": 4.015021390545486, "grad_norm": 0.2089812544514556, "learning_rate": 1.1387608283380921e-05, "loss": 0.2258, "num_tokens": 2244580544.0, "step": 2943 }, { "epoch": 4.016386971504166, "grad_norm": 0.2402718057030777, "learning_rate": 1.1382641962231316e-05, "loss": 0.2291, "num_tokens": 2245321912.0, "step": 2944 }, { "epoch": 4.017752552462847, "grad_norm": 0.27322868807481676, "learning_rate": 1.1377675524354214e-05, "loss": 0.2405, "num_tokens": 2246131359.0, "step": 2945 }, { "epoch": 4.019118133421527, "grad_norm": 0.22983203955466802, "learning_rate": 1.1372708971264672e-05, "loss": 0.2436, "num_tokens": 2246933675.0, "step": 2946 }, { "epoch": 4.020483714380208, "grad_norm": 0.2097140513996182, "learning_rate": 1.136774230447776e-05, "loss": 0.2327, "num_tokens": 2247689412.0, "step": 2947 }, { "epoch": 4.021849295338888, "grad_norm": 0.21786118623329076, "learning_rate": 1.1362775525508597e-05, "loss": 0.2289, "num_tokens": 2248483329.0, "step": 2948 }, { "epoch": 4.023214876297569, "grad_norm": 0.22194180757690518, "learning_rate": 1.1357808635872335e-05, "loss": 0.2412, "num_tokens": 2249242772.0, "step": 2949 }, { "epoch": 4.024580457256249, "grad_norm": 0.20848770662541802, "learning_rate": 1.1352841637084149e-05, "loss": 0.2247, "num_tokens": 2250015676.0, "step": 2950 }, { "epoch": 4.025946038214929, "grad_norm": 0.23970581517768458, "learning_rate": 1.134787453065926e-05, "loss": 0.2338, "num_tokens": 2250748202.0, "step": 2951 }, { "epoch": 4.02731161917361, "grad_norm": 0.2122427112895838, "learning_rate": 1.1342907318112919e-05, "loss": 0.2363, "num_tokens": 2251483866.0, "step": 2952 }, { "epoch": 4.02867720013229, "grad_norm": 0.23648340520337005, "learning_rate": 1.1337940000960403e-05, "loss": 0.2293, "num_tokens": 2252274994.0, "step": 2953 }, { "epoch": 4.030042781090971, "grad_norm": 0.20418911812607834, "learning_rate": 1.1332972580717028e-05, "loss": 0.2253, "num_tokens": 2252950740.0, "step": 2954 }, { "epoch": 4.0314083620496515, "grad_norm": 0.21687164625774571, "learning_rate": 1.1328005058898139e-05, "loss": 0.2344, "num_tokens": 2253725540.0, "step": 2955 }, { "epoch": 4.0327739430083325, "grad_norm": 0.20600558735052868, "learning_rate": 1.1323037437019113e-05, "loss": 0.2286, "num_tokens": 2254562951.0, "step": 2956 }, { "epoch": 4.034139523967013, "grad_norm": 0.20001676869141166, "learning_rate": 1.1318069716595353e-05, "loss": 0.2263, "num_tokens": 2255328853.0, "step": 2957 }, { "epoch": 4.035505104925694, "grad_norm": 0.2252733489817729, "learning_rate": 1.13131018991423e-05, "loss": 0.2384, "num_tokens": 2256029792.0, "step": 2958 }, { "epoch": 4.036870685884374, "grad_norm": 0.22880066081324976, "learning_rate": 1.1308133986175412e-05, "loss": 0.2245, "num_tokens": 2256690837.0, "step": 2959 }, { "epoch": 4.038236266843054, "grad_norm": 0.20723609057088757, "learning_rate": 1.1303165979210189e-05, "loss": 0.2265, "num_tokens": 2257404593.0, "step": 2960 }, { "epoch": 4.039601847801735, "grad_norm": 0.25350003585749636, "learning_rate": 1.1298197879762158e-05, "loss": 0.228, "num_tokens": 2258142808.0, "step": 2961 }, { "epoch": 4.040967428760415, "grad_norm": 0.2303284502804076, "learning_rate": 1.1293229689346868e-05, "loss": 0.2403, "num_tokens": 2258926808.0, "step": 2962 }, { "epoch": 4.042333009719096, "grad_norm": 0.19898947145167858, "learning_rate": 1.1288261409479898e-05, "loss": 0.2334, "num_tokens": 2259768969.0, "step": 2963 }, { "epoch": 4.043698590677776, "grad_norm": 0.19214636908592034, "learning_rate": 1.128329304167685e-05, "loss": 0.2316, "num_tokens": 2260539230.0, "step": 2964 }, { "epoch": 4.045064171636457, "grad_norm": 0.2274160130600864, "learning_rate": 1.1278324587453369e-05, "loss": 0.2395, "num_tokens": 2261278397.0, "step": 2965 }, { "epoch": 4.046429752595137, "grad_norm": 0.19315576180610558, "learning_rate": 1.1273356048325107e-05, "loss": 0.2289, "num_tokens": 2262055951.0, "step": 2966 }, { "epoch": 4.047795333553818, "grad_norm": 0.20215573834102485, "learning_rate": 1.1268387425807753e-05, "loss": 0.2334, "num_tokens": 2262797673.0, "step": 2967 }, { "epoch": 4.049160914512498, "grad_norm": 0.42570247561264096, "learning_rate": 1.1263418721417023e-05, "loss": 0.2328, "num_tokens": 2263620425.0, "step": 2968 }, { "epoch": 4.050526495471178, "grad_norm": 0.22980395835194284, "learning_rate": 1.1258449936668643e-05, "loss": 0.2393, "num_tokens": 2264328474.0, "step": 2969 }, { "epoch": 4.051892076429859, "grad_norm": 0.2127558652513868, "learning_rate": 1.1253481073078382e-05, "loss": 0.2241, "num_tokens": 2265066284.0, "step": 2970 }, { "epoch": 4.0532576573885395, "grad_norm": 0.19927313426214288, "learning_rate": 1.1248512132162026e-05, "loss": 0.2292, "num_tokens": 2265837009.0, "step": 2971 }, { "epoch": 4.0546232383472205, "grad_norm": 0.20560902151810134, "learning_rate": 1.1243543115435376e-05, "loss": 0.2237, "num_tokens": 2266590447.0, "step": 2972 }, { "epoch": 4.055988819305901, "grad_norm": 0.23235024654073558, "learning_rate": 1.1238574024414268e-05, "loss": 0.2354, "num_tokens": 2267347558.0, "step": 2973 }, { "epoch": 4.057354400264582, "grad_norm": 0.2113246169791323, "learning_rate": 1.123360486061456e-05, "loss": 0.2316, "num_tokens": 2268090185.0, "step": 2974 }, { "epoch": 4.058719981223262, "grad_norm": 0.21350457315541654, "learning_rate": 1.1228635625552124e-05, "loss": 0.2402, "num_tokens": 2268829092.0, "step": 2975 }, { "epoch": 4.060085562181943, "grad_norm": 0.2123411775104535, "learning_rate": 1.1223666320742858e-05, "loss": 0.2254, "num_tokens": 2269535727.0, "step": 2976 }, { "epoch": 4.061451143140623, "grad_norm": 0.21499485670566587, "learning_rate": 1.1218696947702688e-05, "loss": 0.2346, "num_tokens": 2270270478.0, "step": 2977 }, { "epoch": 4.062816724099303, "grad_norm": 0.20167318044401317, "learning_rate": 1.1213727507947547e-05, "loss": 0.2342, "num_tokens": 2271085462.0, "step": 2978 }, { "epoch": 4.064182305057984, "grad_norm": 0.2189921301777101, "learning_rate": 1.1208758002993403e-05, "loss": 0.2264, "num_tokens": 2271772482.0, "step": 2979 }, { "epoch": 4.065547886016664, "grad_norm": 0.22366448383397522, "learning_rate": 1.120378843435623e-05, "loss": 0.2366, "num_tokens": 2272494755.0, "step": 2980 }, { "epoch": 4.066913466975345, "grad_norm": 0.20274208786727263, "learning_rate": 1.119881880355203e-05, "loss": 0.2427, "num_tokens": 2273323102.0, "step": 2981 }, { "epoch": 4.068279047934025, "grad_norm": 0.19959727507650785, "learning_rate": 1.1193849112096827e-05, "loss": 0.2287, "num_tokens": 2274073193.0, "step": 2982 }, { "epoch": 4.069644628892706, "grad_norm": 0.20115044330998796, "learning_rate": 1.118887936150665e-05, "loss": 0.2345, "num_tokens": 2274874460.0, "step": 2983 }, { "epoch": 4.071010209851386, "grad_norm": 0.2097136680678643, "learning_rate": 1.1183909553297564e-05, "loss": 0.2404, "num_tokens": 2275574966.0, "step": 2984 }, { "epoch": 4.072375790810067, "grad_norm": 0.20026519486414557, "learning_rate": 1.1178939688985633e-05, "loss": 0.2407, "num_tokens": 2276343888.0, "step": 2985 }, { "epoch": 4.073741371768747, "grad_norm": 0.23382542737896203, "learning_rate": 1.1173969770086957e-05, "loss": 0.2216, "num_tokens": 2277085124.0, "step": 2986 }, { "epoch": 4.0751069527274275, "grad_norm": 0.18997779175287827, "learning_rate": 1.1168999798117635e-05, "loss": 0.2382, "num_tokens": 2277906360.0, "step": 2987 }, { "epoch": 4.0764725336861085, "grad_norm": 0.21599336846742037, "learning_rate": 1.1164029774593791e-05, "loss": 0.2358, "num_tokens": 2278676149.0, "step": 2988 }, { "epoch": 4.077838114644789, "grad_norm": 0.19831287778439152, "learning_rate": 1.1159059701031571e-05, "loss": 0.2376, "num_tokens": 2279536253.0, "step": 2989 }, { "epoch": 4.07920369560347, "grad_norm": 0.19533853289510084, "learning_rate": 1.1154089578947123e-05, "loss": 0.2341, "num_tokens": 2280345870.0, "step": 2990 }, { "epoch": 4.08056927656215, "grad_norm": 0.19971431101126547, "learning_rate": 1.1149119409856617e-05, "loss": 0.2282, "num_tokens": 2281166375.0, "step": 2991 }, { "epoch": 4.081934857520831, "grad_norm": 0.20992421928215338, "learning_rate": 1.1144149195276237e-05, "loss": 0.237, "num_tokens": 2281955896.0, "step": 2992 }, { "epoch": 4.083300438479511, "grad_norm": 0.19182293730256667, "learning_rate": 1.113917893672218e-05, "loss": 0.2291, "num_tokens": 2282819016.0, "step": 2993 }, { "epoch": 4.084666019438192, "grad_norm": 0.20195617196831128, "learning_rate": 1.1134208635710657e-05, "loss": 0.2376, "num_tokens": 2283607744.0, "step": 2994 }, { "epoch": 4.086031600396872, "grad_norm": 0.20433530698770025, "learning_rate": 1.1129238293757891e-05, "loss": 0.2307, "num_tokens": 2284358032.0, "step": 2995 }, { "epoch": 4.087397181355552, "grad_norm": 0.21619160180426805, "learning_rate": 1.112426791238012e-05, "loss": 0.2302, "num_tokens": 2285100283.0, "step": 2996 }, { "epoch": 4.088762762314233, "grad_norm": 0.19818609925744374, "learning_rate": 1.1119297493093588e-05, "loss": 0.2297, "num_tokens": 2285846988.0, "step": 2997 }, { "epoch": 4.090128343272913, "grad_norm": 0.20212439243901867, "learning_rate": 1.111432703741456e-05, "loss": 0.2325, "num_tokens": 2286620684.0, "step": 2998 }, { "epoch": 4.091493924231594, "grad_norm": 0.19034934487831467, "learning_rate": 1.11093565468593e-05, "loss": 0.2294, "num_tokens": 2287384151.0, "step": 2999 }, { "epoch": 4.092859505190274, "grad_norm": 0.22016614257432005, "learning_rate": 1.1104386022944096e-05, "loss": 0.2302, "num_tokens": 2288137806.0, "step": 3000 }, { "epoch": 4.094225086148955, "grad_norm": 0.20887131337377915, "learning_rate": 1.1099415467185237e-05, "loss": 0.2451, "num_tokens": 2288919886.0, "step": 3001 }, { "epoch": 4.095590667107635, "grad_norm": 0.26431269182398626, "learning_rate": 1.1094444881099025e-05, "loss": 0.2276, "num_tokens": 2289640952.0, "step": 3002 }, { "epoch": 4.096956248066316, "grad_norm": 0.21373615018941874, "learning_rate": 1.1089474266201769e-05, "loss": 0.2243, "num_tokens": 2290344618.0, "step": 3003 }, { "epoch": 4.0983218290249965, "grad_norm": 0.23399530516651257, "learning_rate": 1.1084503624009787e-05, "loss": 0.2391, "num_tokens": 2291000956.0, "step": 3004 }, { "epoch": 4.099687409983677, "grad_norm": 0.20161223947388524, "learning_rate": 1.1079532956039413e-05, "loss": 0.2289, "num_tokens": 2291746004.0, "step": 3005 }, { "epoch": 4.101052990942358, "grad_norm": 0.21269249421310113, "learning_rate": 1.1074562263806977e-05, "loss": 0.2371, "num_tokens": 2292480613.0, "step": 3006 }, { "epoch": 4.102418571901038, "grad_norm": 0.34864210026694215, "learning_rate": 1.106959154882882e-05, "loss": 0.2482, "num_tokens": 2293296004.0, "step": 3007 }, { "epoch": 4.103784152859719, "grad_norm": 0.20654039946391828, "learning_rate": 1.1064620812621299e-05, "loss": 0.2332, "num_tokens": 2294045155.0, "step": 3008 }, { "epoch": 4.105149733818399, "grad_norm": 0.20936868170671807, "learning_rate": 1.1059650056700763e-05, "loss": 0.2376, "num_tokens": 2294773432.0, "step": 3009 }, { "epoch": 4.10651531477708, "grad_norm": 0.25975734440976667, "learning_rate": 1.1054679282583583e-05, "loss": 0.2371, "num_tokens": 2295464793.0, "step": 3010 }, { "epoch": 4.10788089573576, "grad_norm": 0.24863519191567104, "learning_rate": 1.104970849178612e-05, "loss": 0.2349, "num_tokens": 2296242292.0, "step": 3011 }, { "epoch": 4.109246476694441, "grad_norm": 0.21221002973890402, "learning_rate": 1.1044737685824746e-05, "loss": 0.2307, "num_tokens": 2297010438.0, "step": 3012 }, { "epoch": 4.110612057653121, "grad_norm": 0.2079083851834388, "learning_rate": 1.1039766866215844e-05, "loss": 0.2478, "num_tokens": 2297848831.0, "step": 3013 }, { "epoch": 4.111977638611801, "grad_norm": 0.21974156278439927, "learning_rate": 1.1034796034475795e-05, "loss": 0.2334, "num_tokens": 2298581441.0, "step": 3014 }, { "epoch": 4.113343219570482, "grad_norm": 0.2220385321033225, "learning_rate": 1.1029825192120978e-05, "loss": 0.2413, "num_tokens": 2299342586.0, "step": 3015 }, { "epoch": 4.114708800529162, "grad_norm": 0.2245205056810607, "learning_rate": 1.102485434066779e-05, "loss": 0.2338, "num_tokens": 2300023907.0, "step": 3016 }, { "epoch": 4.116074381487843, "grad_norm": 0.20875638652486952, "learning_rate": 1.1019883481632618e-05, "loss": 0.2515, "num_tokens": 2300749921.0, "step": 3017 }, { "epoch": 4.117439962446523, "grad_norm": 0.21414916099508424, "learning_rate": 1.1014912616531858e-05, "loss": 0.2387, "num_tokens": 2301634068.0, "step": 3018 }, { "epoch": 4.118805543405204, "grad_norm": 0.2021738663416521, "learning_rate": 1.1009941746881903e-05, "loss": 0.2324, "num_tokens": 2302386679.0, "step": 3019 }, { "epoch": 4.1201711243638846, "grad_norm": 0.22044489951305987, "learning_rate": 1.1004970874199152e-05, "loss": 0.2372, "num_tokens": 2303181031.0, "step": 3020 }, { "epoch": 4.1215367053225656, "grad_norm": 0.23148069915324562, "learning_rate": 1.1000000000000001e-05, "loss": 0.2389, "num_tokens": 2303965214.0, "step": 3021 }, { "epoch": 4.122902286281246, "grad_norm": 0.20394877786838123, "learning_rate": 1.0995029125800854e-05, "loss": 0.2385, "num_tokens": 2304706226.0, "step": 3022 }, { "epoch": 4.124267867239926, "grad_norm": 0.21521157264487975, "learning_rate": 1.09900582531181e-05, "loss": 0.2258, "num_tokens": 2305449469.0, "step": 3023 }, { "epoch": 4.125633448198607, "grad_norm": 0.20771484509126392, "learning_rate": 1.098508738346815e-05, "loss": 0.2367, "num_tokens": 2306175759.0, "step": 3024 }, { "epoch": 4.126999029157287, "grad_norm": 0.22028533693698774, "learning_rate": 1.0980116518367383e-05, "loss": 0.2272, "num_tokens": 2306857275.0, "step": 3025 }, { "epoch": 4.128364610115968, "grad_norm": 0.22737495496610513, "learning_rate": 1.0975145659332216e-05, "loss": 0.237, "num_tokens": 2307698780.0, "step": 3026 }, { "epoch": 4.129730191074648, "grad_norm": 0.20107042611775644, "learning_rate": 1.0970174807879023e-05, "loss": 0.2323, "num_tokens": 2308387770.0, "step": 3027 }, { "epoch": 4.131095772033329, "grad_norm": 0.21356337464756264, "learning_rate": 1.096520396552421e-05, "loss": 0.2349, "num_tokens": 2309184442.0, "step": 3028 }, { "epoch": 4.132461352992009, "grad_norm": 0.21152912612828545, "learning_rate": 1.0960233133784159e-05, "loss": 0.2359, "num_tokens": 2309934760.0, "step": 3029 }, { "epoch": 4.13382693395069, "grad_norm": 0.21413171048858157, "learning_rate": 1.0955262314175256e-05, "loss": 0.24, "num_tokens": 2310770840.0, "step": 3030 }, { "epoch": 4.13519251490937, "grad_norm": 0.21260498689931373, "learning_rate": 1.0950291508213886e-05, "loss": 0.2301, "num_tokens": 2311483471.0, "step": 3031 }, { "epoch": 4.13655809586805, "grad_norm": 0.2025962812961191, "learning_rate": 1.0945320717416421e-05, "loss": 0.2406, "num_tokens": 2312280668.0, "step": 3032 }, { "epoch": 4.137923676826731, "grad_norm": 0.20384162225970578, "learning_rate": 1.094034994329924e-05, "loss": 0.2364, "num_tokens": 2313026099.0, "step": 3033 }, { "epoch": 4.1392892577854115, "grad_norm": 0.20681176141062596, "learning_rate": 1.0935379187378704e-05, "loss": 0.2392, "num_tokens": 2313777789.0, "step": 3034 }, { "epoch": 4.1406548387440925, "grad_norm": 0.20065539244099134, "learning_rate": 1.0930408451171184e-05, "loss": 0.2334, "num_tokens": 2314593680.0, "step": 3035 }, { "epoch": 4.142020419702773, "grad_norm": 0.22573382750884557, "learning_rate": 1.0925437736193028e-05, "loss": 0.2435, "num_tokens": 2315338516.0, "step": 3036 }, { "epoch": 4.143386000661454, "grad_norm": 0.20889935856263053, "learning_rate": 1.092046704396059e-05, "loss": 0.2334, "num_tokens": 2316062639.0, "step": 3037 }, { "epoch": 4.144751581620134, "grad_norm": 0.20089951816966278, "learning_rate": 1.0915496375990215e-05, "loss": 0.2297, "num_tokens": 2316855557.0, "step": 3038 }, { "epoch": 4.146117162578815, "grad_norm": 0.20241669095683282, "learning_rate": 1.0910525733798234e-05, "loss": 0.2336, "num_tokens": 2317637883.0, "step": 3039 }, { "epoch": 4.147482743537495, "grad_norm": 0.20179725072021962, "learning_rate": 1.0905555118900979e-05, "loss": 0.2357, "num_tokens": 2318407581.0, "step": 3040 }, { "epoch": 4.148848324496175, "grad_norm": 0.21925344051536316, "learning_rate": 1.0900584532814766e-05, "loss": 0.2428, "num_tokens": 2319176345.0, "step": 3041 }, { "epoch": 4.150213905454856, "grad_norm": 0.19972262253201878, "learning_rate": 1.0895613977055908e-05, "loss": 0.2397, "num_tokens": 2319902039.0, "step": 3042 }, { "epoch": 4.151579486413536, "grad_norm": 0.21847165264060467, "learning_rate": 1.0890643453140702e-05, "loss": 0.2402, "num_tokens": 2320649678.0, "step": 3043 }, { "epoch": 4.152945067372217, "grad_norm": 0.20672435870651132, "learning_rate": 1.0885672962585443e-05, "loss": 0.2352, "num_tokens": 2321393010.0, "step": 3044 }, { "epoch": 4.154310648330897, "grad_norm": 0.18754411335238574, "learning_rate": 1.0880702506906418e-05, "loss": 0.2363, "num_tokens": 2322189304.0, "step": 3045 }, { "epoch": 4.155676229289578, "grad_norm": 0.23884810056471587, "learning_rate": 1.0875732087619884e-05, "loss": 0.241, "num_tokens": 2322994037.0, "step": 3046 }, { "epoch": 4.157041810248258, "grad_norm": 0.22598991255324583, "learning_rate": 1.0870761706242112e-05, "loss": 0.2195, "num_tokens": 2323715960.0, "step": 3047 }, { "epoch": 4.158407391206939, "grad_norm": 0.20060243419477786, "learning_rate": 1.0865791364289347e-05, "loss": 0.2328, "num_tokens": 2324404936.0, "step": 3048 }, { "epoch": 4.159772972165619, "grad_norm": 0.2248131198820371, "learning_rate": 1.0860821063277824e-05, "loss": 0.2371, "num_tokens": 2325143633.0, "step": 3049 }, { "epoch": 4.1611385531242995, "grad_norm": 0.19441354638749594, "learning_rate": 1.0855850804723767e-05, "loss": 0.2299, "num_tokens": 2325939768.0, "step": 3050 }, { "epoch": 4.1625041340829805, "grad_norm": 0.21225564024239452, "learning_rate": 1.0850880590143387e-05, "loss": 0.2364, "num_tokens": 2326707300.0, "step": 3051 }, { "epoch": 4.163869715041661, "grad_norm": 0.19598277423915772, "learning_rate": 1.0845910421052878e-05, "loss": 0.2406, "num_tokens": 2327559022.0, "step": 3052 }, { "epoch": 4.165235296000342, "grad_norm": 0.2061929369454302, "learning_rate": 1.0840940298968432e-05, "loss": 0.2383, "num_tokens": 2328305326.0, "step": 3053 }, { "epoch": 4.166600876959022, "grad_norm": 0.21344630263274617, "learning_rate": 1.0835970225406213e-05, "loss": 0.2407, "num_tokens": 2329065882.0, "step": 3054 }, { "epoch": 4.167966457917703, "grad_norm": 0.2005535177505611, "learning_rate": 1.083100020188237e-05, "loss": 0.2369, "num_tokens": 2329833080.0, "step": 3055 }, { "epoch": 4.169332038876383, "grad_norm": 0.20869065443546297, "learning_rate": 1.082603022991305e-05, "loss": 0.242, "num_tokens": 2330593100.0, "step": 3056 }, { "epoch": 4.170697619835064, "grad_norm": 0.20516283692661014, "learning_rate": 1.0821060311014368e-05, "loss": 0.2492, "num_tokens": 2331345023.0, "step": 3057 }, { "epoch": 4.172063200793744, "grad_norm": 0.200393771532012, "learning_rate": 1.0816090446702443e-05, "loss": 0.2278, "num_tokens": 2332059904.0, "step": 3058 }, { "epoch": 4.173428781752424, "grad_norm": 0.20865356011847716, "learning_rate": 1.0811120638493353e-05, "loss": 0.2214, "num_tokens": 2332774494.0, "step": 3059 }, { "epoch": 4.174794362711105, "grad_norm": 0.20082147874317002, "learning_rate": 1.080615088790318e-05, "loss": 0.2279, "num_tokens": 2333508237.0, "step": 3060 }, { "epoch": 4.176159943669785, "grad_norm": 0.19885375270303932, "learning_rate": 1.0801181196447975e-05, "loss": 0.2367, "num_tokens": 2334343237.0, "step": 3061 }, { "epoch": 4.177525524628466, "grad_norm": 0.21133462373849365, "learning_rate": 1.0796211565643773e-05, "loss": 0.2338, "num_tokens": 2335059906.0, "step": 3062 }, { "epoch": 4.178891105587146, "grad_norm": 0.20702309773000258, "learning_rate": 1.0791241997006603e-05, "loss": 0.247, "num_tokens": 2335900304.0, "step": 3063 }, { "epoch": 4.180256686545827, "grad_norm": 0.21917674490134564, "learning_rate": 1.0786272492052454e-05, "loss": 0.2305, "num_tokens": 2336629569.0, "step": 3064 }, { "epoch": 4.181622267504507, "grad_norm": 0.20935642300242066, "learning_rate": 1.0781303052297316e-05, "loss": 0.2313, "num_tokens": 2337330186.0, "step": 3065 }, { "epoch": 4.182987848463188, "grad_norm": 0.1992655762736646, "learning_rate": 1.0776333679257143e-05, "loss": 0.2344, "num_tokens": 2338140663.0, "step": 3066 }, { "epoch": 4.1843534294218685, "grad_norm": 0.2121090168469975, "learning_rate": 1.0771364374447878e-05, "loss": 0.2348, "num_tokens": 2338861753.0, "step": 3067 }, { "epoch": 4.185719010380549, "grad_norm": 0.21660037780204572, "learning_rate": 1.0766395139385443e-05, "loss": 0.2316, "num_tokens": 2339582314.0, "step": 3068 }, { "epoch": 4.18708459133923, "grad_norm": 0.20805809975757786, "learning_rate": 1.0761425975585735e-05, "loss": 0.23, "num_tokens": 2340273598.0, "step": 3069 }, { "epoch": 4.18845017229791, "grad_norm": 0.2079837716099726, "learning_rate": 1.075645688456463e-05, "loss": 0.2321, "num_tokens": 2341005832.0, "step": 3070 }, { "epoch": 4.189815753256591, "grad_norm": 0.20598743450596463, "learning_rate": 1.075148786783798e-05, "loss": 0.2278, "num_tokens": 2341756683.0, "step": 3071 }, { "epoch": 4.191181334215271, "grad_norm": 0.21214065871272952, "learning_rate": 1.074651892692162e-05, "loss": 0.2408, "num_tokens": 2342522329.0, "step": 3072 }, { "epoch": 4.192546915173952, "grad_norm": 0.20393906945502938, "learning_rate": 1.0741550063331358e-05, "loss": 0.2457, "num_tokens": 2343333443.0, "step": 3073 }, { "epoch": 4.193912496132632, "grad_norm": 0.20964011962623855, "learning_rate": 1.0736581278582983e-05, "loss": 0.2342, "num_tokens": 2344085941.0, "step": 3074 }, { "epoch": 4.195278077091313, "grad_norm": 0.205432335308915, "learning_rate": 1.0731612574192248e-05, "loss": 0.2421, "num_tokens": 2344867882.0, "step": 3075 }, { "epoch": 4.196643658049993, "grad_norm": 0.20483511653651984, "learning_rate": 1.0726643951674895e-05, "loss": 0.2389, "num_tokens": 2345677941.0, "step": 3076 }, { "epoch": 4.198009239008673, "grad_norm": 0.20594712849069932, "learning_rate": 1.0721675412546636e-05, "loss": 0.2389, "num_tokens": 2346378536.0, "step": 3077 }, { "epoch": 4.199374819967354, "grad_norm": 0.20735257238572444, "learning_rate": 1.0716706958323153e-05, "loss": 0.2341, "num_tokens": 2347149882.0, "step": 3078 }, { "epoch": 4.200740400926034, "grad_norm": 0.21645782356358312, "learning_rate": 1.0711738590520109e-05, "loss": 0.2425, "num_tokens": 2347844545.0, "step": 3079 }, { "epoch": 4.202105981884715, "grad_norm": 0.2083967868462942, "learning_rate": 1.0706770310653137e-05, "loss": 0.2334, "num_tokens": 2348620408.0, "step": 3080 }, { "epoch": 4.203471562843395, "grad_norm": 0.2168416999625079, "learning_rate": 1.0701802120237845e-05, "loss": 0.2388, "num_tokens": 2349333715.0, "step": 3081 }, { "epoch": 4.204837143802076, "grad_norm": 0.1946529110591853, "learning_rate": 1.0696834020789812e-05, "loss": 0.2389, "num_tokens": 2350105779.0, "step": 3082 }, { "epoch": 4.2062027247607565, "grad_norm": 0.21922663210241894, "learning_rate": 1.0691866013824593e-05, "loss": 0.2431, "num_tokens": 2350825441.0, "step": 3083 }, { "epoch": 4.2075683057194375, "grad_norm": 0.21040965907356396, "learning_rate": 1.0686898100857708e-05, "loss": 0.2451, "num_tokens": 2351638437.0, "step": 3084 }, { "epoch": 4.208933886678118, "grad_norm": 0.20193490795667685, "learning_rate": 1.068193028340465e-05, "loss": 0.2338, "num_tokens": 2352401818.0, "step": 3085 }, { "epoch": 4.210299467636798, "grad_norm": 0.20976672578259514, "learning_rate": 1.0676962562980893e-05, "loss": 0.2317, "num_tokens": 2353076321.0, "step": 3086 }, { "epoch": 4.211665048595479, "grad_norm": 0.2158200670446393, "learning_rate": 1.0671994941101864e-05, "loss": 0.24, "num_tokens": 2353869317.0, "step": 3087 }, { "epoch": 4.213030629554159, "grad_norm": 0.21227295370896518, "learning_rate": 1.0667027419282976e-05, "loss": 0.2293, "num_tokens": 2354641343.0, "step": 3088 }, { "epoch": 4.21439621051284, "grad_norm": 0.19694899964490908, "learning_rate": 1.0662059999039598e-05, "loss": 0.2482, "num_tokens": 2355491971.0, "step": 3089 }, { "epoch": 4.21576179147152, "grad_norm": 0.2007243077750762, "learning_rate": 1.0657092681887084e-05, "loss": 0.2378, "num_tokens": 2356264724.0, "step": 3090 }, { "epoch": 4.217127372430201, "grad_norm": 0.20466828795644249, "learning_rate": 1.0652125469340742e-05, "loss": 0.2401, "num_tokens": 2357088088.0, "step": 3091 }, { "epoch": 4.218492953388881, "grad_norm": 0.22391692960079204, "learning_rate": 1.0647158362915854e-05, "loss": 0.2523, "num_tokens": 2357854793.0, "step": 3092 }, { "epoch": 4.219858534347562, "grad_norm": 0.20196338794581317, "learning_rate": 1.0642191364127672e-05, "loss": 0.2445, "num_tokens": 2358669950.0, "step": 3093 }, { "epoch": 4.221224115306242, "grad_norm": 0.20899101174786805, "learning_rate": 1.0637224474491405e-05, "loss": 0.2305, "num_tokens": 2359433702.0, "step": 3094 }, { "epoch": 4.222589696264922, "grad_norm": 0.190537831455725, "learning_rate": 1.0632257695522243e-05, "loss": 0.2402, "num_tokens": 2360259523.0, "step": 3095 }, { "epoch": 4.223955277223603, "grad_norm": 0.1980803021041769, "learning_rate": 1.062729102873533e-05, "loss": 0.2293, "num_tokens": 2361010442.0, "step": 3096 }, { "epoch": 4.225320858182283, "grad_norm": 0.1966411703073702, "learning_rate": 1.0622324475645789e-05, "loss": 0.2409, "num_tokens": 2361781135.0, "step": 3097 }, { "epoch": 4.226686439140964, "grad_norm": 0.21637400960642358, "learning_rate": 1.0617358037768689e-05, "loss": 0.2413, "num_tokens": 2362599399.0, "step": 3098 }, { "epoch": 4.2280520200996445, "grad_norm": 0.2061604804264618, "learning_rate": 1.0612391716619081e-05, "loss": 0.2337, "num_tokens": 2363302966.0, "step": 3099 }, { "epoch": 4.2294176010583255, "grad_norm": 0.19550469513994223, "learning_rate": 1.0607425513711977e-05, "loss": 0.234, "num_tokens": 2364123972.0, "step": 3100 }, { "epoch": 4.230783182017006, "grad_norm": 0.19493038231525944, "learning_rate": 1.0602459430562348e-05, "loss": 0.2254, "num_tokens": 2364879335.0, "step": 3101 }, { "epoch": 4.232148762975687, "grad_norm": 0.20462021460951182, "learning_rate": 1.059749346868513e-05, "loss": 0.2315, "num_tokens": 2365698147.0, "step": 3102 }, { "epoch": 4.233514343934367, "grad_norm": 0.21474196614100272, "learning_rate": 1.0592527629595222e-05, "loss": 0.2386, "num_tokens": 2366446852.0, "step": 3103 }, { "epoch": 4.234879924893047, "grad_norm": 0.1969899987509542, "learning_rate": 1.058756191480749e-05, "loss": 0.234, "num_tokens": 2367199993.0, "step": 3104 }, { "epoch": 4.236245505851728, "grad_norm": 0.21428092856227068, "learning_rate": 1.0582596325836754e-05, "loss": 0.2435, "num_tokens": 2367975227.0, "step": 3105 }, { "epoch": 4.237611086810408, "grad_norm": 0.20318132819512028, "learning_rate": 1.0577630864197808e-05, "loss": 0.2319, "num_tokens": 2368717223.0, "step": 3106 }, { "epoch": 4.238976667769089, "grad_norm": 0.22663260883454947, "learning_rate": 1.0572665531405393e-05, "loss": 0.2422, "num_tokens": 2369437957.0, "step": 3107 }, { "epoch": 4.240342248727769, "grad_norm": 0.1910331821684642, "learning_rate": 1.056770032897421e-05, "loss": 0.2391, "num_tokens": 2370315977.0, "step": 3108 }, { "epoch": 4.24170782968645, "grad_norm": 0.20847047091907853, "learning_rate": 1.0562735258418943e-05, "loss": 0.2442, "num_tokens": 2371059578.0, "step": 3109 }, { "epoch": 4.24307341064513, "grad_norm": 0.2164927402827901, "learning_rate": 1.055777032125421e-05, "loss": 0.239, "num_tokens": 2371881213.0, "step": 3110 }, { "epoch": 4.244438991603811, "grad_norm": 0.19065370483505387, "learning_rate": 1.0552805518994604e-05, "loss": 0.2448, "num_tokens": 2372727248.0, "step": 3111 }, { "epoch": 4.245804572562491, "grad_norm": 0.2132099726736033, "learning_rate": 1.0547840853154663e-05, "loss": 0.2368, "num_tokens": 2373487194.0, "step": 3112 }, { "epoch": 4.247170153521171, "grad_norm": 0.22539648099747198, "learning_rate": 1.0542876325248898e-05, "loss": 0.2419, "num_tokens": 2374239350.0, "step": 3113 }, { "epoch": 4.248535734479852, "grad_norm": 0.2061580866744798, "learning_rate": 1.053791193679177e-05, "loss": 0.2432, "num_tokens": 2375006854.0, "step": 3114 }, { "epoch": 4.249901315438533, "grad_norm": 0.20319103738308167, "learning_rate": 1.0532947689297702e-05, "loss": 0.2396, "num_tokens": 2375811675.0, "step": 3115 }, { "epoch": 4.251266896397214, "grad_norm": 0.21203571703425464, "learning_rate": 1.0527983584281065e-05, "loss": 0.2308, "num_tokens": 2376578361.0, "step": 3116 }, { "epoch": 4.252632477355894, "grad_norm": 0.20709345172654664, "learning_rate": 1.0523019623256191e-05, "loss": 0.2293, "num_tokens": 2377266865.0, "step": 3117 }, { "epoch": 4.253998058314575, "grad_norm": 0.22012829244602425, "learning_rate": 1.051805580773738e-05, "loss": 0.2372, "num_tokens": 2378006051.0, "step": 3118 }, { "epoch": 4.255363639273255, "grad_norm": 0.21757228619812713, "learning_rate": 1.0513092139238868e-05, "loss": 0.2348, "num_tokens": 2378773515.0, "step": 3119 }, { "epoch": 4.256729220231936, "grad_norm": 0.20403491850676322, "learning_rate": 1.0508128619274858e-05, "loss": 0.2414, "num_tokens": 2379504325.0, "step": 3120 }, { "epoch": 4.258094801190616, "grad_norm": 0.20634220418258029, "learning_rate": 1.0503165249359504e-05, "loss": 0.2341, "num_tokens": 2380268033.0, "step": 3121 }, { "epoch": 4.259460382149296, "grad_norm": 0.21142154749421194, "learning_rate": 1.049820203100692e-05, "loss": 0.2186, "num_tokens": 2380969674.0, "step": 3122 }, { "epoch": 4.260825963107977, "grad_norm": 0.20557494083720151, "learning_rate": 1.0493238965731164e-05, "loss": 0.2401, "num_tokens": 2381724214.0, "step": 3123 }, { "epoch": 4.262191544066657, "grad_norm": 0.2185561630493158, "learning_rate": 1.048827605504625e-05, "loss": 0.2406, "num_tokens": 2382479549.0, "step": 3124 }, { "epoch": 4.263557125025338, "grad_norm": 0.2136340245665799, "learning_rate": 1.0483313300466157e-05, "loss": 0.2385, "num_tokens": 2383230172.0, "step": 3125 }, { "epoch": 4.264922705984018, "grad_norm": 0.21772329313341057, "learning_rate": 1.0478350703504794e-05, "loss": 0.2336, "num_tokens": 2383940168.0, "step": 3126 }, { "epoch": 4.266288286942699, "grad_norm": 0.2196221576092348, "learning_rate": 1.0473388265676044e-05, "loss": 0.229, "num_tokens": 2384682361.0, "step": 3127 }, { "epoch": 4.267653867901379, "grad_norm": 0.19986364973988935, "learning_rate": 1.0468425988493727e-05, "loss": 0.2295, "num_tokens": 2385467555.0, "step": 3128 }, { "epoch": 4.26901944886006, "grad_norm": 0.2246800183907757, "learning_rate": 1.0463463873471622e-05, "loss": 0.241, "num_tokens": 2386168634.0, "step": 3129 }, { "epoch": 4.2703850298187405, "grad_norm": 0.22071674913394, "learning_rate": 1.0458501922123454e-05, "loss": 0.2303, "num_tokens": 2386923642.0, "step": 3130 }, { "epoch": 4.271750610777421, "grad_norm": 0.206462589627499, "learning_rate": 1.0453540135962898e-05, "loss": 0.2405, "num_tokens": 2387756070.0, "step": 3131 }, { "epoch": 4.273116191736102, "grad_norm": 0.19701788031258882, "learning_rate": 1.0448578516503583e-05, "loss": 0.2228, "num_tokens": 2388485437.0, "step": 3132 }, { "epoch": 4.274481772694782, "grad_norm": 0.2231102024361708, "learning_rate": 1.044361706525908e-05, "loss": 0.2337, "num_tokens": 2389268384.0, "step": 3133 }, { "epoch": 4.275847353653463, "grad_norm": 0.19923881354704373, "learning_rate": 1.0438655783742919e-05, "loss": 0.2382, "num_tokens": 2389988167.0, "step": 3134 }, { "epoch": 4.277212934612143, "grad_norm": 0.22460552365854378, "learning_rate": 1.0433694673468564e-05, "loss": 0.2385, "num_tokens": 2390723404.0, "step": 3135 }, { "epoch": 4.278578515570824, "grad_norm": 0.2227902728338538, "learning_rate": 1.0428733735949444e-05, "loss": 0.2366, "num_tokens": 2391423418.0, "step": 3136 }, { "epoch": 4.279944096529504, "grad_norm": 0.2231116027875236, "learning_rate": 1.0423772972698918e-05, "loss": 0.2409, "num_tokens": 2392204783.0, "step": 3137 }, { "epoch": 4.281309677488185, "grad_norm": 0.20626602628903765, "learning_rate": 1.0418812385230305e-05, "loss": 0.2343, "num_tokens": 2392995363.0, "step": 3138 }, { "epoch": 4.282675258446865, "grad_norm": 0.20258110015794845, "learning_rate": 1.0413851975056868e-05, "loss": 0.2305, "num_tokens": 2393823741.0, "step": 3139 }, { "epoch": 4.284040839405545, "grad_norm": 0.22402991385136467, "learning_rate": 1.0408891743691805e-05, "loss": 0.2372, "num_tokens": 2394588195.0, "step": 3140 }, { "epoch": 4.285406420364226, "grad_norm": 0.2084262540734205, "learning_rate": 1.0403931692648272e-05, "loss": 0.2436, "num_tokens": 2395293177.0, "step": 3141 }, { "epoch": 4.286772001322906, "grad_norm": 0.2152700258121204, "learning_rate": 1.0398971823439365e-05, "loss": 0.2505, "num_tokens": 2396163039.0, "step": 3142 }, { "epoch": 4.288137582281587, "grad_norm": 0.20905300767420062, "learning_rate": 1.0394012137578134e-05, "loss": 0.2306, "num_tokens": 2396958148.0, "step": 3143 }, { "epoch": 4.289503163240267, "grad_norm": 0.23328709792889446, "learning_rate": 1.0389052636577548e-05, "loss": 0.2404, "num_tokens": 2397696994.0, "step": 3144 }, { "epoch": 4.290868744198948, "grad_norm": 0.21807869380071418, "learning_rate": 1.0384093321950547e-05, "loss": 0.2337, "num_tokens": 2398439653.0, "step": 3145 }, { "epoch": 4.2922343251576285, "grad_norm": 0.2003066558491781, "learning_rate": 1.0379134195210003e-05, "loss": 0.2403, "num_tokens": 2399220416.0, "step": 3146 }, { "epoch": 4.2935999061163095, "grad_norm": 0.21146479468074358, "learning_rate": 1.0374175257868728e-05, "loss": 0.242, "num_tokens": 2399974725.0, "step": 3147 }, { "epoch": 4.29496548707499, "grad_norm": 0.2089664142422123, "learning_rate": 1.0369216511439482e-05, "loss": 0.2399, "num_tokens": 2400766156.0, "step": 3148 }, { "epoch": 4.29633106803367, "grad_norm": 0.21396785238387414, "learning_rate": 1.0364257957434958e-05, "loss": 0.2413, "num_tokens": 2401474912.0, "step": 3149 }, { "epoch": 4.297696648992351, "grad_norm": 0.22655708308340436, "learning_rate": 1.03592995973678e-05, "loss": 0.2348, "num_tokens": 2402232037.0, "step": 3150 }, { "epoch": 4.299062229951031, "grad_norm": 0.19453260614855458, "learning_rate": 1.0354341432750587e-05, "loss": 0.2314, "num_tokens": 2403054864.0, "step": 3151 }, { "epoch": 4.300427810909712, "grad_norm": 0.21400587071073163, "learning_rate": 1.0349383465095845e-05, "loss": 0.2362, "num_tokens": 2403761055.0, "step": 3152 }, { "epoch": 4.301793391868392, "grad_norm": 0.20024932788097385, "learning_rate": 1.0344425695916029e-05, "loss": 0.2376, "num_tokens": 2404526195.0, "step": 3153 }, { "epoch": 4.303158972827073, "grad_norm": 0.2067160122582665, "learning_rate": 1.0339468126723546e-05, "loss": 0.2405, "num_tokens": 2405328612.0, "step": 3154 }, { "epoch": 4.304524553785753, "grad_norm": 0.19658196290769328, "learning_rate": 1.0334510759030732e-05, "loss": 0.234, "num_tokens": 2406075822.0, "step": 3155 }, { "epoch": 4.305890134744434, "grad_norm": 0.19628473536842633, "learning_rate": 1.0329553594349861e-05, "loss": 0.2375, "num_tokens": 2406934728.0, "step": 3156 }, { "epoch": 4.307255715703114, "grad_norm": 0.20097333657444924, "learning_rate": 1.032459663419316e-05, "loss": 0.2319, "num_tokens": 2407688977.0, "step": 3157 }, { "epoch": 4.308621296661794, "grad_norm": 0.19747013620311202, "learning_rate": 1.0319639880072772e-05, "loss": 0.2301, "num_tokens": 2408497714.0, "step": 3158 }, { "epoch": 4.309986877620475, "grad_norm": 0.20800911509942782, "learning_rate": 1.0314683333500795e-05, "loss": 0.2383, "num_tokens": 2409221552.0, "step": 3159 }, { "epoch": 4.311352458579155, "grad_norm": 0.20078328818355712, "learning_rate": 1.0309726995989251e-05, "loss": 0.2292, "num_tokens": 2410012151.0, "step": 3160 }, { "epoch": 4.312718039537836, "grad_norm": 0.20614611838720331, "learning_rate": 1.0304770869050111e-05, "loss": 0.2424, "num_tokens": 2410855500.0, "step": 3161 }, { "epoch": 4.3140836204965165, "grad_norm": 0.21321262504507643, "learning_rate": 1.0299814954195272e-05, "loss": 0.233, "num_tokens": 2411571588.0, "step": 3162 }, { "epoch": 4.3154492014551975, "grad_norm": 0.20271265101863933, "learning_rate": 1.0294859252936564e-05, "loss": 0.2445, "num_tokens": 2412345733.0, "step": 3163 }, { "epoch": 4.316814782413878, "grad_norm": 0.20768513136762629, "learning_rate": 1.0289903766785765e-05, "loss": 0.2344, "num_tokens": 2413037575.0, "step": 3164 }, { "epoch": 4.318180363372559, "grad_norm": 0.20496357955517733, "learning_rate": 1.0284948497254573e-05, "loss": 0.2439, "num_tokens": 2413823654.0, "step": 3165 }, { "epoch": 4.319545944331239, "grad_norm": 0.21226865854034346, "learning_rate": 1.0279993445854629e-05, "loss": 0.238, "num_tokens": 2414512563.0, "step": 3166 }, { "epoch": 4.320911525289919, "grad_norm": 0.21760319472802733, "learning_rate": 1.02750386140975e-05, "loss": 0.2399, "num_tokens": 2415234866.0, "step": 3167 }, { "epoch": 4.3222771062486, "grad_norm": 0.20667083464791902, "learning_rate": 1.02700840034947e-05, "loss": 0.2419, "num_tokens": 2415958555.0, "step": 3168 }, { "epoch": 4.32364268720728, "grad_norm": 0.20095148506544197, "learning_rate": 1.0265129615557654e-05, "loss": 0.2406, "num_tokens": 2416748290.0, "step": 3169 }, { "epoch": 4.325008268165961, "grad_norm": 0.2071294201450318, "learning_rate": 1.0260175451797745e-05, "loss": 0.2443, "num_tokens": 2417495153.0, "step": 3170 }, { "epoch": 4.326373849124641, "grad_norm": 0.20614911034029376, "learning_rate": 1.0255221513726266e-05, "loss": 0.241, "num_tokens": 2418280297.0, "step": 3171 }, { "epoch": 4.327739430083322, "grad_norm": 0.2648824453267282, "learning_rate": 1.0250267802854443e-05, "loss": 0.2236, "num_tokens": 2418994602.0, "step": 3172 }, { "epoch": 4.329105011042002, "grad_norm": 0.2094247959596038, "learning_rate": 1.0245314320693452e-05, "loss": 0.2299, "num_tokens": 2419688727.0, "step": 3173 }, { "epoch": 4.330470592000683, "grad_norm": 0.19783690675715898, "learning_rate": 1.0240361068754377e-05, "loss": 0.2202, "num_tokens": 2420412511.0, "step": 3174 }, { "epoch": 4.331836172959363, "grad_norm": 0.22172887457051177, "learning_rate": 1.0235408048548241e-05, "loss": 0.2404, "num_tokens": 2421153131.0, "step": 3175 }, { "epoch": 4.333201753918043, "grad_norm": 0.20572281948309226, "learning_rate": 1.0230455261585999e-05, "loss": 0.2341, "num_tokens": 2421864895.0, "step": 3176 }, { "epoch": 4.334567334876724, "grad_norm": 0.20894601567127416, "learning_rate": 1.0225502709378529e-05, "loss": 0.2384, "num_tokens": 2422672677.0, "step": 3177 }, { "epoch": 4.3359329158354045, "grad_norm": 0.21486751628070896, "learning_rate": 1.0220550393436646e-05, "loss": 0.2327, "num_tokens": 2423380998.0, "step": 3178 }, { "epoch": 4.3372984967940855, "grad_norm": 0.19765620045895044, "learning_rate": 1.0215598315271076e-05, "loss": 0.2325, "num_tokens": 2424129863.0, "step": 3179 }, { "epoch": 4.338664077752766, "grad_norm": 0.2059990760145127, "learning_rate": 1.0210646476392498e-05, "loss": 0.2319, "num_tokens": 2424915607.0, "step": 3180 }, { "epoch": 4.340029658711447, "grad_norm": 0.19399451821854877, "learning_rate": 1.020569487831149e-05, "loss": 0.2436, "num_tokens": 2425783830.0, "step": 3181 }, { "epoch": 4.341395239670127, "grad_norm": 0.20282382613045738, "learning_rate": 1.0200743522538579e-05, "loss": 0.2342, "num_tokens": 2426582468.0, "step": 3182 }, { "epoch": 4.342760820628808, "grad_norm": 0.20726209468098505, "learning_rate": 1.0195792410584206e-05, "loss": 0.2411, "num_tokens": 2427393030.0, "step": 3183 }, { "epoch": 4.344126401587488, "grad_norm": 0.18939424772470798, "learning_rate": 1.0190841543958743e-05, "loss": 0.2382, "num_tokens": 2428214398.0, "step": 3184 }, { "epoch": 4.345491982546168, "grad_norm": 0.20025748370346355, "learning_rate": 1.018589092417248e-05, "loss": 0.2575, "num_tokens": 2429001351.0, "step": 3185 }, { "epoch": 4.346857563504849, "grad_norm": 0.21525428778025574, "learning_rate": 1.0180940552735643e-05, "loss": 0.2262, "num_tokens": 2429752578.0, "step": 3186 }, { "epoch": 4.348223144463529, "grad_norm": 0.1982954868386459, "learning_rate": 1.0175990431158374e-05, "loss": 0.2397, "num_tokens": 2430512562.0, "step": 3187 }, { "epoch": 4.34958872542221, "grad_norm": 0.21795621173091057, "learning_rate": 1.0171040560950733e-05, "loss": 0.2416, "num_tokens": 2431256168.0, "step": 3188 }, { "epoch": 4.35095430638089, "grad_norm": 0.19452179481071497, "learning_rate": 1.0166090943622722e-05, "loss": 0.2356, "num_tokens": 2432042331.0, "step": 3189 }, { "epoch": 4.352319887339571, "grad_norm": 0.20489150457548222, "learning_rate": 1.016114158068425e-05, "loss": 0.2316, "num_tokens": 2432800248.0, "step": 3190 }, { "epoch": 4.353685468298251, "grad_norm": 0.2059221878183246, "learning_rate": 1.0156192473645155e-05, "loss": 0.2297, "num_tokens": 2433530159.0, "step": 3191 }, { "epoch": 4.355051049256932, "grad_norm": 0.190457295859458, "learning_rate": 1.0151243624015193e-05, "loss": 0.2373, "num_tokens": 2434315894.0, "step": 3192 }, { "epoch": 4.356416630215612, "grad_norm": 0.21550763986197916, "learning_rate": 1.0146295033304046e-05, "loss": 0.2405, "num_tokens": 2435099043.0, "step": 3193 }, { "epoch": 4.3577822111742925, "grad_norm": 0.19677757105908222, "learning_rate": 1.0141346703021315e-05, "loss": 0.2397, "num_tokens": 2435958479.0, "step": 3194 }, { "epoch": 4.3591477921329735, "grad_norm": 0.19685748179310872, "learning_rate": 1.0136398634676517e-05, "loss": 0.2384, "num_tokens": 2436683495.0, "step": 3195 }, { "epoch": 4.360513373091654, "grad_norm": 0.2166413069222805, "learning_rate": 1.0131450829779097e-05, "loss": 0.2447, "num_tokens": 2437404356.0, "step": 3196 }, { "epoch": 4.361878954050335, "grad_norm": 0.198582164669502, "learning_rate": 1.0126503289838412e-05, "loss": 0.2251, "num_tokens": 2438129504.0, "step": 3197 }, { "epoch": 4.363244535009015, "grad_norm": 0.2142279846458367, "learning_rate": 1.012155601636375e-05, "loss": 0.2378, "num_tokens": 2438825462.0, "step": 3198 }, { "epoch": 4.364610115967696, "grad_norm": 0.22398051421244974, "learning_rate": 1.01166090108643e-05, "loss": 0.2401, "num_tokens": 2439576051.0, "step": 3199 }, { "epoch": 4.365975696926376, "grad_norm": 0.2034821742452059, "learning_rate": 1.011166227484919e-05, "loss": 0.2468, "num_tokens": 2440383564.0, "step": 3200 }, { "epoch": 4.367341277885057, "grad_norm": 0.19885246061455455, "learning_rate": 1.0106715809827447e-05, "loss": 0.232, "num_tokens": 2441081604.0, "step": 3201 }, { "epoch": 4.368706858843737, "grad_norm": 0.2188221875323839, "learning_rate": 1.0101769617308026e-05, "loss": 0.2336, "num_tokens": 2441844908.0, "step": 3202 }, { "epoch": 4.370072439802417, "grad_norm": 0.2092353099898902, "learning_rate": 1.0096823698799795e-05, "loss": 0.2364, "num_tokens": 2442576566.0, "step": 3203 }, { "epoch": 4.371438020761098, "grad_norm": 0.195727479191234, "learning_rate": 1.0091878055811542e-05, "loss": 0.2308, "num_tokens": 2443362562.0, "step": 3204 }, { "epoch": 4.372803601719778, "grad_norm": 0.2134476596014767, "learning_rate": 1.0086932689851965e-05, "loss": 0.2427, "num_tokens": 2444184204.0, "step": 3205 }, { "epoch": 4.374169182678459, "grad_norm": 0.22100931039205451, "learning_rate": 1.0081987602429682e-05, "loss": 0.2434, "num_tokens": 2444981261.0, "step": 3206 }, { "epoch": 4.375534763637139, "grad_norm": 0.2029730722178828, "learning_rate": 1.0077042795053227e-05, "loss": 0.2352, "num_tokens": 2445796301.0, "step": 3207 }, { "epoch": 4.37690034459582, "grad_norm": 0.2020227558159363, "learning_rate": 1.0072098269231043e-05, "loss": 0.2307, "num_tokens": 2446613119.0, "step": 3208 }, { "epoch": 4.3782659255545004, "grad_norm": 0.20150488223791843, "learning_rate": 1.0067154026471497e-05, "loss": 0.2431, "num_tokens": 2447398254.0, "step": 3209 }, { "epoch": 4.3796315065131814, "grad_norm": 0.21099602413069868, "learning_rate": 1.0062210068282859e-05, "loss": 0.2321, "num_tokens": 2448110862.0, "step": 3210 }, { "epoch": 4.380997087471862, "grad_norm": 0.20908240447364793, "learning_rate": 1.0057266396173315e-05, "loss": 0.2242, "num_tokens": 2448794908.0, "step": 3211 }, { "epoch": 4.382362668430542, "grad_norm": 0.18801575799228207, "learning_rate": 1.0052323011650969e-05, "loss": 0.2333, "num_tokens": 2449577799.0, "step": 3212 }, { "epoch": 4.383728249389223, "grad_norm": 0.22489598773748104, "learning_rate": 1.004737991622383e-05, "loss": 0.2458, "num_tokens": 2450407875.0, "step": 3213 }, { "epoch": 4.385093830347903, "grad_norm": 0.21934138708502926, "learning_rate": 1.0042437111399825e-05, "loss": 0.2449, "num_tokens": 2451142631.0, "step": 3214 }, { "epoch": 4.386459411306584, "grad_norm": 0.20893512927754052, "learning_rate": 1.0037494598686785e-05, "loss": 0.2396, "num_tokens": 2451907223.0, "step": 3215 }, { "epoch": 4.387824992265264, "grad_norm": 0.21434253921789745, "learning_rate": 1.0032552379592467e-05, "loss": 0.2353, "num_tokens": 2452625317.0, "step": 3216 }, { "epoch": 4.389190573223945, "grad_norm": 0.2119410518015286, "learning_rate": 1.0027610455624517e-05, "loss": 0.2314, "num_tokens": 2453378767.0, "step": 3217 }, { "epoch": 4.390556154182625, "grad_norm": 0.21634134716854447, "learning_rate": 1.0022668828290508e-05, "loss": 0.2351, "num_tokens": 2454101567.0, "step": 3218 }, { "epoch": 4.391921735141306, "grad_norm": 0.2135340501304843, "learning_rate": 1.0017727499097916e-05, "loss": 0.2417, "num_tokens": 2454837984.0, "step": 3219 }, { "epoch": 4.393287316099986, "grad_norm": 0.18534037279818108, "learning_rate": 1.001278646955412e-05, "loss": 0.2402, "num_tokens": 2455643241.0, "step": 3220 }, { "epoch": 4.394652897058666, "grad_norm": 0.20852008140909967, "learning_rate": 1.0007845741166427e-05, "loss": 0.2478, "num_tokens": 2456412463.0, "step": 3221 }, { "epoch": 4.396018478017347, "grad_norm": 0.2234845081015311, "learning_rate": 1.0002905315442024e-05, "loss": 0.2424, "num_tokens": 2457147773.0, "step": 3222 }, { "epoch": 4.397384058976027, "grad_norm": 0.19511750536196829, "learning_rate": 9.997965193888038e-06, "loss": 0.2278, "num_tokens": 2457841609.0, "step": 3223 }, { "epoch": 4.398749639934708, "grad_norm": 0.21038812018173747, "learning_rate": 9.993025378011467e-06, "loss": 0.2359, "num_tokens": 2458671666.0, "step": 3224 }, { "epoch": 4.4001152208933885, "grad_norm": 0.19728437399231036, "learning_rate": 9.988085869319252e-06, "loss": 0.2365, "num_tokens": 2459428955.0, "step": 3225 }, { "epoch": 4.4014808018520695, "grad_norm": 0.21251665627832095, "learning_rate": 9.983146669318215e-06, "loss": 0.2359, "num_tokens": 2460115392.0, "step": 3226 }, { "epoch": 4.40284638281075, "grad_norm": 0.229044435398285, "learning_rate": 9.97820777951509e-06, "loss": 0.2427, "num_tokens": 2460839201.0, "step": 3227 }, { "epoch": 4.404211963769431, "grad_norm": 0.20574494216169656, "learning_rate": 9.973269201416524e-06, "loss": 0.2353, "num_tokens": 2461582259.0, "step": 3228 }, { "epoch": 4.405577544728111, "grad_norm": 0.21740023731402494, "learning_rate": 9.96833093652906e-06, "loss": 0.247, "num_tokens": 2462362976.0, "step": 3229 }, { "epoch": 4.406943125686791, "grad_norm": 0.19064979701411286, "learning_rate": 9.96339298635915e-06, "loss": 0.2318, "num_tokens": 2463196713.0, "step": 3230 }, { "epoch": 4.408308706645472, "grad_norm": 0.2182592900958331, "learning_rate": 9.958455352413145e-06, "loss": 0.2469, "num_tokens": 2463916038.0, "step": 3231 }, { "epoch": 4.409674287604152, "grad_norm": 0.2103058438889468, "learning_rate": 9.953518036197312e-06, "loss": 0.2389, "num_tokens": 2464656861.0, "step": 3232 }, { "epoch": 4.411039868562833, "grad_norm": 0.2023554176311186, "learning_rate": 9.948581039217806e-06, "loss": 0.2332, "num_tokens": 2465406953.0, "step": 3233 }, { "epoch": 4.412405449521513, "grad_norm": 0.21048809218798498, "learning_rate": 9.943644362980687e-06, "loss": 0.2236, "num_tokens": 2466173926.0, "step": 3234 }, { "epoch": 4.413771030480194, "grad_norm": 0.2019177209699721, "learning_rate": 9.938708008991928e-06, "loss": 0.2362, "num_tokens": 2466924657.0, "step": 3235 }, { "epoch": 4.415136611438874, "grad_norm": 0.21595055939574123, "learning_rate": 9.933771978757396e-06, "loss": 0.2395, "num_tokens": 2467719516.0, "step": 3236 }, { "epoch": 4.416502192397555, "grad_norm": 0.2121565972395305, "learning_rate": 9.92883627378286e-06, "loss": 0.2375, "num_tokens": 2468461885.0, "step": 3237 }, { "epoch": 4.417867773356235, "grad_norm": 0.21919083427897634, "learning_rate": 9.923900895573986e-06, "loss": 0.2382, "num_tokens": 2469170003.0, "step": 3238 }, { "epoch": 4.419233354314915, "grad_norm": 0.20824971279247714, "learning_rate": 9.918965845636348e-06, "loss": 0.2296, "num_tokens": 2469924858.0, "step": 3239 }, { "epoch": 4.420598935273596, "grad_norm": 0.2183487509589547, "learning_rate": 9.914031125475415e-06, "loss": 0.2322, "num_tokens": 2470662621.0, "step": 3240 }, { "epoch": 4.4219645162322765, "grad_norm": 0.19647849103113701, "learning_rate": 9.909096736596557e-06, "loss": 0.2369, "num_tokens": 2471440349.0, "step": 3241 }, { "epoch": 4.4233300971909575, "grad_norm": 0.21332368129201937, "learning_rate": 9.904162680505044e-06, "loss": 0.2299, "num_tokens": 2472177764.0, "step": 3242 }, { "epoch": 4.424695678149638, "grad_norm": 0.2111524598451786, "learning_rate": 9.89922895870603e-06, "loss": 0.2365, "num_tokens": 2472907886.0, "step": 3243 }, { "epoch": 4.426061259108319, "grad_norm": 0.19820067040318498, "learning_rate": 9.894295572704603e-06, "loss": 0.2386, "num_tokens": 2473720043.0, "step": 3244 }, { "epoch": 4.427426840066999, "grad_norm": 0.20124821926463773, "learning_rate": 9.889362524005706e-06, "loss": 0.2279, "num_tokens": 2474470385.0, "step": 3245 }, { "epoch": 4.42879242102568, "grad_norm": 0.21317536260262898, "learning_rate": 9.884429814114207e-06, "loss": 0.2475, "num_tokens": 2475282151.0, "step": 3246 }, { "epoch": 4.43015800198436, "grad_norm": 0.20750393612863824, "learning_rate": 9.87949744453486e-06, "loss": 0.2482, "num_tokens": 2476063741.0, "step": 3247 }, { "epoch": 4.43152358294304, "grad_norm": 0.20594362347063608, "learning_rate": 9.874565416772319e-06, "loss": 0.235, "num_tokens": 2476809266.0, "step": 3248 }, { "epoch": 4.432889163901721, "grad_norm": 0.2168834010189078, "learning_rate": 9.86963373233113e-06, "loss": 0.2427, "num_tokens": 2477520149.0, "step": 3249 }, { "epoch": 4.434254744860401, "grad_norm": 0.20372742886844014, "learning_rate": 9.864702392715734e-06, "loss": 0.2438, "num_tokens": 2478320412.0, "step": 3250 }, { "epoch": 4.435620325819082, "grad_norm": 0.20486951899459122, "learning_rate": 9.85977139943048e-06, "loss": 0.243, "num_tokens": 2479062242.0, "step": 3251 }, { "epoch": 4.436985906777762, "grad_norm": 0.21228526721126598, "learning_rate": 9.854840753979587e-06, "loss": 0.2388, "num_tokens": 2479839646.0, "step": 3252 }, { "epoch": 4.438351487736443, "grad_norm": 0.19913430550378447, "learning_rate": 9.84991045786719e-06, "loss": 0.2407, "num_tokens": 2480638959.0, "step": 3253 }, { "epoch": 4.439717068695123, "grad_norm": 0.2017997618471524, "learning_rate": 9.844980512597302e-06, "loss": 0.2368, "num_tokens": 2481365238.0, "step": 3254 }, { "epoch": 4.441082649653804, "grad_norm": 0.20434163650274012, "learning_rate": 9.840050919673845e-06, "loss": 0.2405, "num_tokens": 2482107740.0, "step": 3255 }, { "epoch": 4.442448230612484, "grad_norm": 0.19822177518863007, "learning_rate": 9.835121680600618e-06, "loss": 0.239, "num_tokens": 2482836297.0, "step": 3256 }, { "epoch": 4.4438138115711645, "grad_norm": 0.21469634362688822, "learning_rate": 9.83019279688132e-06, "loss": 0.2443, "num_tokens": 2483649352.0, "step": 3257 }, { "epoch": 4.4451793925298455, "grad_norm": 0.21070001557931048, "learning_rate": 9.825264270019538e-06, "loss": 0.2332, "num_tokens": 2484338254.0, "step": 3258 }, { "epoch": 4.446544973488526, "grad_norm": 0.19699565545353745, "learning_rate": 9.820336101518752e-06, "loss": 0.2369, "num_tokens": 2485088732.0, "step": 3259 }, { "epoch": 4.447910554447207, "grad_norm": 0.1993014990435232, "learning_rate": 9.81540829288234e-06, "loss": 0.218, "num_tokens": 2485799066.0, "step": 3260 }, { "epoch": 4.449276135405887, "grad_norm": 0.17928026202144806, "learning_rate": 9.810480845613553e-06, "loss": 0.2357, "num_tokens": 2486661791.0, "step": 3261 }, { "epoch": 4.450641716364568, "grad_norm": 0.21149656833809066, "learning_rate": 9.805553761215548e-06, "loss": 0.2356, "num_tokens": 2487353802.0, "step": 3262 }, { "epoch": 4.452007297323248, "grad_norm": 0.21727423197286788, "learning_rate": 9.800627041191362e-06, "loss": 0.234, "num_tokens": 2488128649.0, "step": 3263 }, { "epoch": 4.453372878281929, "grad_norm": 0.19898821088816412, "learning_rate": 9.795700687043928e-06, "loss": 0.2384, "num_tokens": 2488933739.0, "step": 3264 }, { "epoch": 4.454738459240609, "grad_norm": 0.20650435184661378, "learning_rate": 9.79077470027606e-06, "loss": 0.252, "num_tokens": 2489743130.0, "step": 3265 }, { "epoch": 4.456104040199289, "grad_norm": 0.199590586910346, "learning_rate": 9.785849082390462e-06, "loss": 0.2401, "num_tokens": 2490509967.0, "step": 3266 }, { "epoch": 4.45746962115797, "grad_norm": 0.1930771290506454, "learning_rate": 9.78092383488973e-06, "loss": 0.2313, "num_tokens": 2491287752.0, "step": 3267 }, { "epoch": 4.45883520211665, "grad_norm": 0.21050522483695722, "learning_rate": 9.77599895927634e-06, "loss": 0.2527, "num_tokens": 2492043005.0, "step": 3268 }, { "epoch": 4.460200783075331, "grad_norm": 0.2129229154911381, "learning_rate": 9.771074457052665e-06, "loss": 0.2294, "num_tokens": 2492739112.0, "step": 3269 }, { "epoch": 4.461566364034011, "grad_norm": 0.2022771643785076, "learning_rate": 9.766150329720947e-06, "loss": 0.2432, "num_tokens": 2493533341.0, "step": 3270 }, { "epoch": 4.462931944992692, "grad_norm": 0.21981064931441902, "learning_rate": 9.761226578783336e-06, "loss": 0.2444, "num_tokens": 2494303289.0, "step": 3271 }, { "epoch": 4.464297525951372, "grad_norm": 0.2114809359313964, "learning_rate": 9.756303205741846e-06, "loss": 0.2435, "num_tokens": 2495021527.0, "step": 3272 }, { "epoch": 4.465663106910053, "grad_norm": 0.21435818822439512, "learning_rate": 9.751380212098386e-06, "loss": 0.2376, "num_tokens": 2495732215.0, "step": 3273 }, { "epoch": 4.4670286878687335, "grad_norm": 0.2084327439520346, "learning_rate": 9.746457599354755e-06, "loss": 0.2388, "num_tokens": 2496477046.0, "step": 3274 }, { "epoch": 4.468394268827414, "grad_norm": 0.19844860293568437, "learning_rate": 9.74153536901262e-06, "loss": 0.2311, "num_tokens": 2497271207.0, "step": 3275 }, { "epoch": 4.469759849786095, "grad_norm": 0.21147953293208258, "learning_rate": 9.736613522573548e-06, "loss": 0.2323, "num_tokens": 2498050945.0, "step": 3276 }, { "epoch": 4.471125430744775, "grad_norm": 0.19124672639813825, "learning_rate": 9.731692061538973e-06, "loss": 0.2323, "num_tokens": 2498787668.0, "step": 3277 }, { "epoch": 4.472491011703456, "grad_norm": 0.21026908842406816, "learning_rate": 9.726770987410228e-06, "loss": 0.2432, "num_tokens": 2499536440.0, "step": 3278 }, { "epoch": 4.473856592662136, "grad_norm": 0.20210406285693505, "learning_rate": 9.721850301688518e-06, "loss": 0.2297, "num_tokens": 2500317033.0, "step": 3279 }, { "epoch": 4.475222173620817, "grad_norm": 0.2235049615157691, "learning_rate": 9.716930005874923e-06, "loss": 0.2264, "num_tokens": 2501050533.0, "step": 3280 }, { "epoch": 4.476587754579497, "grad_norm": 0.209022674740525, "learning_rate": 9.712010101470422e-06, "loss": 0.2389, "num_tokens": 2501802209.0, "step": 3281 }, { "epoch": 4.477953335538178, "grad_norm": 0.1939644881159321, "learning_rate": 9.707090589975858e-06, "loss": 0.2264, "num_tokens": 2502546403.0, "step": 3282 }, { "epoch": 4.479318916496858, "grad_norm": 0.18758983732041834, "learning_rate": 9.702171472891968e-06, "loss": 0.2319, "num_tokens": 2503320996.0, "step": 3283 }, { "epoch": 4.480684497455538, "grad_norm": 0.18854466401622086, "learning_rate": 9.697252751719355e-06, "loss": 0.2378, "num_tokens": 2504110952.0, "step": 3284 }, { "epoch": 4.482050078414219, "grad_norm": 0.21339261827436723, "learning_rate": 9.69233442795851e-06, "loss": 0.2402, "num_tokens": 2504822869.0, "step": 3285 }, { "epoch": 4.483415659372899, "grad_norm": 0.20010072466409926, "learning_rate": 9.687416503109798e-06, "loss": 0.2447, "num_tokens": 2505633146.0, "step": 3286 }, { "epoch": 4.48478124033158, "grad_norm": 0.22004160438136497, "learning_rate": 9.682498978673474e-06, "loss": 0.2369, "num_tokens": 2506383091.0, "step": 3287 }, { "epoch": 4.48614682129026, "grad_norm": 0.20969941905338899, "learning_rate": 9.677581856149654e-06, "loss": 0.2402, "num_tokens": 2507197599.0, "step": 3288 }, { "epoch": 4.487512402248941, "grad_norm": 0.19788013714432823, "learning_rate": 9.672665137038337e-06, "loss": 0.2371, "num_tokens": 2507937058.0, "step": 3289 }, { "epoch": 4.4888779832076215, "grad_norm": 0.2099582888400549, "learning_rate": 9.667748822839406e-06, "loss": 0.2436, "num_tokens": 2508703781.0, "step": 3290 }, { "epoch": 4.4902435641663025, "grad_norm": 0.22515901315128767, "learning_rate": 9.662832915052616e-06, "loss": 0.2356, "num_tokens": 2509409480.0, "step": 3291 }, { "epoch": 4.491609145124983, "grad_norm": 0.2260261339761146, "learning_rate": 9.657917415177596e-06, "loss": 0.2339, "num_tokens": 2510154327.0, "step": 3292 }, { "epoch": 4.492974726083663, "grad_norm": 0.2022569419378603, "learning_rate": 9.653002324713849e-06, "loss": 0.2431, "num_tokens": 2511004262.0, "step": 3293 }, { "epoch": 4.494340307042344, "grad_norm": 0.20541387968765265, "learning_rate": 9.648087645160763e-06, "loss": 0.2401, "num_tokens": 2511782251.0, "step": 3294 }, { "epoch": 4.495705888001024, "grad_norm": 0.19875464995945022, "learning_rate": 9.64317337801759e-06, "loss": 0.2275, "num_tokens": 2512435363.0, "step": 3295 }, { "epoch": 4.497071468959705, "grad_norm": 0.21337728152647756, "learning_rate": 9.638259524783455e-06, "loss": 0.2451, "num_tokens": 2513169168.0, "step": 3296 }, { "epoch": 4.498437049918385, "grad_norm": 0.21168871412080995, "learning_rate": 9.633346086957374e-06, "loss": 0.2343, "num_tokens": 2513949184.0, "step": 3297 }, { "epoch": 4.499802630877066, "grad_norm": 0.20860766639760842, "learning_rate": 9.62843306603821e-06, "loss": 0.2405, "num_tokens": 2514760142.0, "step": 3298 }, { "epoch": 4.501168211835746, "grad_norm": 0.18979387273643028, "learning_rate": 9.623520463524725e-06, "loss": 0.2368, "num_tokens": 2515582299.0, "step": 3299 }, { "epoch": 4.502533792794427, "grad_norm": 0.203169628640453, "learning_rate": 9.618608280915534e-06, "loss": 0.2224, "num_tokens": 2516262855.0, "step": 3300 }, { "epoch": 4.503899373753107, "grad_norm": 0.19228877407098788, "learning_rate": 9.613696519709137e-06, "loss": 0.2326, "num_tokens": 2517010244.0, "step": 3301 }, { "epoch": 4.505264954711787, "grad_norm": 0.20171852543180552, "learning_rate": 9.608785181403892e-06, "loss": 0.2461, "num_tokens": 2517795363.0, "step": 3302 }, { "epoch": 4.506630535670468, "grad_norm": 0.20444488568503827, "learning_rate": 9.603874267498043e-06, "loss": 0.2378, "num_tokens": 2518603689.0, "step": 3303 }, { "epoch": 4.5079961166291485, "grad_norm": 0.2206896085005018, "learning_rate": 9.598963779489693e-06, "loss": 0.2359, "num_tokens": 2519352132.0, "step": 3304 }, { "epoch": 4.5093616975878295, "grad_norm": 0.19553770895643183, "learning_rate": 9.594053718876817e-06, "loss": 0.2482, "num_tokens": 2520187666.0, "step": 3305 }, { "epoch": 4.51072727854651, "grad_norm": 0.21334814037540908, "learning_rate": 9.589144087157268e-06, "loss": 0.2401, "num_tokens": 2520939976.0, "step": 3306 }, { "epoch": 4.512092859505191, "grad_norm": 0.21407323205075415, "learning_rate": 9.584234885828759e-06, "loss": 0.247, "num_tokens": 2521724011.0, "step": 3307 }, { "epoch": 4.513458440463871, "grad_norm": 0.21311862449104063, "learning_rate": 9.579326116388874e-06, "loss": 0.2265, "num_tokens": 2522455080.0, "step": 3308 }, { "epoch": 4.514824021422552, "grad_norm": 0.21249574053708586, "learning_rate": 9.574417780335065e-06, "loss": 0.2394, "num_tokens": 2523213036.0, "step": 3309 }, { "epoch": 4.516189602381232, "grad_norm": 0.1902922734441824, "learning_rate": 9.569509879164658e-06, "loss": 0.2386, "num_tokens": 2524100233.0, "step": 3310 }, { "epoch": 4.517555183339912, "grad_norm": 0.19500596371853096, "learning_rate": 9.564602414374837e-06, "loss": 0.2335, "num_tokens": 2524830608.0, "step": 3311 }, { "epoch": 4.518920764298593, "grad_norm": 0.23002574066241488, "learning_rate": 9.559695387462657e-06, "loss": 0.2398, "num_tokens": 2525507673.0, "step": 3312 }, { "epoch": 4.520286345257273, "grad_norm": 0.22684171617605037, "learning_rate": 9.554788799925041e-06, "loss": 0.2368, "num_tokens": 2526187291.0, "step": 3313 }, { "epoch": 4.521651926215954, "grad_norm": 0.20238387073527853, "learning_rate": 9.54988265325877e-06, "loss": 0.239, "num_tokens": 2526971354.0, "step": 3314 }, { "epoch": 4.523017507174634, "grad_norm": 0.21117097467299686, "learning_rate": 9.54497694896051e-06, "loss": 0.24, "num_tokens": 2527729679.0, "step": 3315 }, { "epoch": 4.524383088133315, "grad_norm": 0.21061796886520545, "learning_rate": 9.540071688526765e-06, "loss": 0.2355, "num_tokens": 2528525732.0, "step": 3316 }, { "epoch": 4.525748669091995, "grad_norm": 0.1998231650445214, "learning_rate": 9.535166873453928e-06, "loss": 0.2441, "num_tokens": 2529330692.0, "step": 3317 }, { "epoch": 4.527114250050676, "grad_norm": 0.20821820560054866, "learning_rate": 9.53026250523824e-06, "loss": 0.2327, "num_tokens": 2530092824.0, "step": 3318 }, { "epoch": 4.528479831009356, "grad_norm": 0.1974608459233468, "learning_rate": 9.52535858537581e-06, "loss": 0.2407, "num_tokens": 2530860365.0, "step": 3319 }, { "epoch": 4.5298454119680365, "grad_norm": 0.21264817780063427, "learning_rate": 9.52045511536262e-06, "loss": 0.2396, "num_tokens": 2531559643.0, "step": 3320 }, { "epoch": 4.5312109929267175, "grad_norm": 0.2032095804761357, "learning_rate": 9.515552096694496e-06, "loss": 0.2438, "num_tokens": 2532359462.0, "step": 3321 }, { "epoch": 4.532576573885398, "grad_norm": 0.206847131791177, "learning_rate": 9.510649530867141e-06, "loss": 0.2449, "num_tokens": 2533120591.0, "step": 3322 }, { "epoch": 4.533942154844079, "grad_norm": 0.1961256072640121, "learning_rate": 9.505747419376117e-06, "loss": 0.2397, "num_tokens": 2533901847.0, "step": 3323 }, { "epoch": 4.535307735802759, "grad_norm": 0.21022383561983476, "learning_rate": 9.500845763716846e-06, "loss": 0.2447, "num_tokens": 2534727619.0, "step": 3324 }, { "epoch": 4.53667331676144, "grad_norm": 0.18386993217022993, "learning_rate": 9.495944565384608e-06, "loss": 0.2345, "num_tokens": 2535548012.0, "step": 3325 }, { "epoch": 4.53803889772012, "grad_norm": 0.20480738204000826, "learning_rate": 9.49104382587455e-06, "loss": 0.2422, "num_tokens": 2536296194.0, "step": 3326 }, { "epoch": 4.539404478678801, "grad_norm": 0.2122268393634356, "learning_rate": 9.486143546681673e-06, "loss": 0.2332, "num_tokens": 2536967574.0, "step": 3327 }, { "epoch": 4.540770059637481, "grad_norm": 0.20121894239512383, "learning_rate": 9.48124372930084e-06, "loss": 0.2272, "num_tokens": 2537750449.0, "step": 3328 }, { "epoch": 4.542135640596161, "grad_norm": 0.23667169647472877, "learning_rate": 9.476344375226774e-06, "loss": 0.2477, "num_tokens": 2538525532.0, "step": 3329 }, { "epoch": 4.543501221554842, "grad_norm": 0.21366061234562636, "learning_rate": 9.471445485954052e-06, "loss": 0.2325, "num_tokens": 2539307832.0, "step": 3330 }, { "epoch": 4.544866802513522, "grad_norm": 0.19815201883866812, "learning_rate": 9.46654706297712e-06, "loss": 0.236, "num_tokens": 2540070224.0, "step": 3331 }, { "epoch": 4.546232383472203, "grad_norm": 0.20740861749296388, "learning_rate": 9.461649107790267e-06, "loss": 0.2455, "num_tokens": 2540806333.0, "step": 3332 }, { "epoch": 4.547597964430883, "grad_norm": 0.2170177498309866, "learning_rate": 9.456751621887655e-06, "loss": 0.2458, "num_tokens": 2541550248.0, "step": 3333 }, { "epoch": 4.548963545389564, "grad_norm": 0.19872950799517738, "learning_rate": 9.45185460676329e-06, "loss": 0.232, "num_tokens": 2542252341.0, "step": 3334 }, { "epoch": 4.550329126348244, "grad_norm": 0.20275107493927083, "learning_rate": 9.446958063911038e-06, "loss": 0.247, "num_tokens": 2543048499.0, "step": 3335 }, { "epoch": 4.551694707306925, "grad_norm": 0.1946748898295872, "learning_rate": 9.442061994824625e-06, "loss": 0.2414, "num_tokens": 2543837208.0, "step": 3336 }, { "epoch": 4.5530602882656055, "grad_norm": 0.21829189812186905, "learning_rate": 9.437166400997629e-06, "loss": 0.2373, "num_tokens": 2544468680.0, "step": 3337 }, { "epoch": 4.554425869224286, "grad_norm": 0.20345717987795855, "learning_rate": 9.432271283923484e-06, "loss": 0.2475, "num_tokens": 2545261263.0, "step": 3338 }, { "epoch": 4.555791450182967, "grad_norm": 0.2092678135699612, "learning_rate": 9.427376645095475e-06, "loss": 0.2497, "num_tokens": 2545984283.0, "step": 3339 }, { "epoch": 4.557157031141647, "grad_norm": 0.20776645458032952, "learning_rate": 9.422482486006752e-06, "loss": 0.2341, "num_tokens": 2546719893.0, "step": 3340 }, { "epoch": 4.558522612100328, "grad_norm": 0.19737905986640622, "learning_rate": 9.4175888081503e-06, "loss": 0.247, "num_tokens": 2547469000.0, "step": 3341 }, { "epoch": 4.559888193059008, "grad_norm": 0.20483525274851663, "learning_rate": 9.41269561301898e-06, "loss": 0.2399, "num_tokens": 2548285537.0, "step": 3342 }, { "epoch": 4.561253774017689, "grad_norm": 0.20461011556759764, "learning_rate": 9.407802902105485e-06, "loss": 0.2435, "num_tokens": 2549126393.0, "step": 3343 }, { "epoch": 4.562619354976369, "grad_norm": 0.19842686371041507, "learning_rate": 9.402910676902373e-06, "loss": 0.239, "num_tokens": 2549971245.0, "step": 3344 }, { "epoch": 4.56398493593505, "grad_norm": 0.20765518380211656, "learning_rate": 9.398018938902049e-06, "loss": 0.2342, "num_tokens": 2550709560.0, "step": 3345 }, { "epoch": 4.56535051689373, "grad_norm": 0.19830613044206608, "learning_rate": 9.39312768959677e-06, "loss": 0.244, "num_tokens": 2551515851.0, "step": 3346 }, { "epoch": 4.56671609785241, "grad_norm": 0.21071818175577023, "learning_rate": 9.38823693047865e-06, "loss": 0.2449, "num_tokens": 2552266174.0, "step": 3347 }, { "epoch": 4.568081678811091, "grad_norm": 0.22239948383968525, "learning_rate": 9.383346663039637e-06, "loss": 0.2406, "num_tokens": 2552988256.0, "step": 3348 }, { "epoch": 4.569447259769771, "grad_norm": 0.20886555791366224, "learning_rate": 9.378456888771549e-06, "loss": 0.2559, "num_tokens": 2553789787.0, "step": 3349 }, { "epoch": 4.570812840728452, "grad_norm": 0.19404738148780581, "learning_rate": 9.373567609166044e-06, "loss": 0.2347, "num_tokens": 2554535598.0, "step": 3350 }, { "epoch": 4.572178421687132, "grad_norm": 0.20752138244949273, "learning_rate": 9.36867882571462e-06, "loss": 0.2325, "num_tokens": 2555277493.0, "step": 3351 }, { "epoch": 4.573544002645813, "grad_norm": 0.19723591648963062, "learning_rate": 9.363790539908644e-06, "loss": 0.2482, "num_tokens": 2556037836.0, "step": 3352 }, { "epoch": 4.5749095836044935, "grad_norm": 0.20559341530574757, "learning_rate": 9.358902753239312e-06, "loss": 0.2372, "num_tokens": 2556854768.0, "step": 3353 }, { "epoch": 4.5762751645631745, "grad_norm": 0.19552347690470454, "learning_rate": 9.354015467197687e-06, "loss": 0.2329, "num_tokens": 2557586391.0, "step": 3354 }, { "epoch": 4.577640745521855, "grad_norm": 0.19827524158777876, "learning_rate": 9.349128683274655e-06, "loss": 0.2297, "num_tokens": 2558331440.0, "step": 3355 }, { "epoch": 4.579006326480535, "grad_norm": 0.1966743404356633, "learning_rate": 9.344242402960972e-06, "loss": 0.2441, "num_tokens": 2559170428.0, "step": 3356 }, { "epoch": 4.580371907439216, "grad_norm": 0.25065887768208667, "learning_rate": 9.339356627747227e-06, "loss": 0.2437, "num_tokens": 2559888014.0, "step": 3357 }, { "epoch": 4.581737488397896, "grad_norm": 0.20265510369293868, "learning_rate": 9.334471359123856e-06, "loss": 0.2281, "num_tokens": 2560626387.0, "step": 3358 }, { "epoch": 4.583103069356577, "grad_norm": 0.2017837716915504, "learning_rate": 9.329586598581148e-06, "loss": 0.2373, "num_tokens": 2561300602.0, "step": 3359 }, { "epoch": 4.584468650315257, "grad_norm": 0.20504440560350906, "learning_rate": 9.324702347609228e-06, "loss": 0.2398, "num_tokens": 2562128829.0, "step": 3360 }, { "epoch": 4.585834231273938, "grad_norm": 0.22478214511434785, "learning_rate": 9.31981860769807e-06, "loss": 0.2383, "num_tokens": 2562827276.0, "step": 3361 }, { "epoch": 4.587199812232618, "grad_norm": 0.21292453884830434, "learning_rate": 9.314935380337494e-06, "loss": 0.2453, "num_tokens": 2563572334.0, "step": 3362 }, { "epoch": 4.588565393191299, "grad_norm": 0.21732526611448022, "learning_rate": 9.31005266701716e-06, "loss": 0.2479, "num_tokens": 2564385427.0, "step": 3363 }, { "epoch": 4.589930974149979, "grad_norm": 0.20593138924171606, "learning_rate": 9.305170469226568e-06, "loss": 0.2426, "num_tokens": 2565140180.0, "step": 3364 }, { "epoch": 4.591296555108659, "grad_norm": 0.20879369659856262, "learning_rate": 9.300288788455074e-06, "loss": 0.2427, "num_tokens": 2565894902.0, "step": 3365 }, { "epoch": 4.59266213606734, "grad_norm": 0.21735351990921906, "learning_rate": 9.295407626191861e-06, "loss": 0.2441, "num_tokens": 2566706943.0, "step": 3366 }, { "epoch": 4.59402771702602, "grad_norm": 0.20813275971264644, "learning_rate": 9.290526983925958e-06, "loss": 0.2353, "num_tokens": 2567496426.0, "step": 3367 }, { "epoch": 4.595393297984701, "grad_norm": 0.2007255065621032, "learning_rate": 9.285646863146247e-06, "loss": 0.2406, "num_tokens": 2568301001.0, "step": 3368 }, { "epoch": 4.5967588789433815, "grad_norm": 0.20745305367779662, "learning_rate": 9.28076726534143e-06, "loss": 0.2436, "num_tokens": 2569107954.0, "step": 3369 }, { "epoch": 4.5981244599020625, "grad_norm": 0.2111653509822394, "learning_rate": 9.275888192000071e-06, "loss": 0.2377, "num_tokens": 2569814167.0, "step": 3370 }, { "epoch": 4.599490040860743, "grad_norm": 0.20382025044139543, "learning_rate": 9.271009644610558e-06, "loss": 0.2421, "num_tokens": 2570570103.0, "step": 3371 }, { "epoch": 4.600855621819424, "grad_norm": 0.2219712046819433, "learning_rate": 9.266131624661127e-06, "loss": 0.2499, "num_tokens": 2571375071.0, "step": 3372 }, { "epoch": 4.602221202778104, "grad_norm": 0.21584447497420187, "learning_rate": 9.26125413363985e-06, "loss": 0.2397, "num_tokens": 2572210349.0, "step": 3373 }, { "epoch": 4.603586783736784, "grad_norm": 0.19136172650251956, "learning_rate": 9.256377173034637e-06, "loss": 0.2394, "num_tokens": 2573010038.0, "step": 3374 }, { "epoch": 4.604952364695465, "grad_norm": 0.1980384169948712, "learning_rate": 9.25150074433324e-06, "loss": 0.2332, "num_tokens": 2573771652.0, "step": 3375 }, { "epoch": 4.606317945654145, "grad_norm": 0.21258289272872805, "learning_rate": 9.246624849023242e-06, "loss": 0.2437, "num_tokens": 2574514024.0, "step": 3376 }, { "epoch": 4.607683526612826, "grad_norm": 0.18987545444026027, "learning_rate": 9.241749488592076e-06, "loss": 0.2365, "num_tokens": 2575260299.0, "step": 3377 }, { "epoch": 4.609049107571506, "grad_norm": 0.20475513093298378, "learning_rate": 9.236874664526988e-06, "loss": 0.2414, "num_tokens": 2576056104.0, "step": 3378 }, { "epoch": 4.610414688530187, "grad_norm": 0.2186401168713881, "learning_rate": 9.232000378315093e-06, "loss": 0.2454, "num_tokens": 2576795730.0, "step": 3379 }, { "epoch": 4.611780269488867, "grad_norm": 0.2038955986182268, "learning_rate": 9.227126631443316e-06, "loss": 0.234, "num_tokens": 2577525944.0, "step": 3380 }, { "epoch": 4.613145850447548, "grad_norm": 0.21052793516269408, "learning_rate": 9.222253425398426e-06, "loss": 0.2311, "num_tokens": 2578180828.0, "step": 3381 }, { "epoch": 4.614511431406228, "grad_norm": 0.1918027864725675, "learning_rate": 9.217380761667032e-06, "loss": 0.216, "num_tokens": 2578856727.0, "step": 3382 }, { "epoch": 4.615877012364908, "grad_norm": 0.20523875165445205, "learning_rate": 9.212508641735565e-06, "loss": 0.2393, "num_tokens": 2579600204.0, "step": 3383 }, { "epoch": 4.617242593323589, "grad_norm": 0.1928714075772, "learning_rate": 9.207637067090303e-06, "loss": 0.2357, "num_tokens": 2580358411.0, "step": 3384 }, { "epoch": 4.6186081742822696, "grad_norm": 0.19573140085629262, "learning_rate": 9.202766039217347e-06, "loss": 0.2344, "num_tokens": 2581186137.0, "step": 3385 }, { "epoch": 4.6199737552409506, "grad_norm": 0.18818011183286545, "learning_rate": 9.197895559602647e-06, "loss": 0.2432, "num_tokens": 2581968330.0, "step": 3386 }, { "epoch": 4.621339336199631, "grad_norm": 0.20643431473450977, "learning_rate": 9.193025629731964e-06, "loss": 0.2409, "num_tokens": 2582745545.0, "step": 3387 }, { "epoch": 4.622704917158312, "grad_norm": 0.2101710158517749, "learning_rate": 9.188156251090912e-06, "loss": 0.2397, "num_tokens": 2583482951.0, "step": 3388 }, { "epoch": 4.624070498116992, "grad_norm": 0.19841672810635316, "learning_rate": 9.183287425164923e-06, "loss": 0.2361, "num_tokens": 2584221626.0, "step": 3389 }, { "epoch": 4.625436079075673, "grad_norm": 0.22442360568315275, "learning_rate": 9.178419153439265e-06, "loss": 0.2412, "num_tokens": 2585020015.0, "step": 3390 }, { "epoch": 4.626801660034353, "grad_norm": 0.19284497189387276, "learning_rate": 9.173551437399038e-06, "loss": 0.2417, "num_tokens": 2585814417.0, "step": 3391 }, { "epoch": 4.628167240993033, "grad_norm": 0.2043403700587299, "learning_rate": 9.16868427852917e-06, "loss": 0.2396, "num_tokens": 2586625349.0, "step": 3392 }, { "epoch": 4.629532821951714, "grad_norm": 0.2702723684086294, "learning_rate": 9.16381767831442e-06, "loss": 0.2465, "num_tokens": 2587410176.0, "step": 3393 }, { "epoch": 4.630898402910394, "grad_norm": 0.2008214337292333, "learning_rate": 9.158951638239377e-06, "loss": 0.2333, "num_tokens": 2588142873.0, "step": 3394 }, { "epoch": 4.632263983869075, "grad_norm": 0.2130384556909265, "learning_rate": 9.154086159788462e-06, "loss": 0.2388, "num_tokens": 2588841395.0, "step": 3395 }, { "epoch": 4.633629564827755, "grad_norm": 0.1927846463939635, "learning_rate": 9.14922124444592e-06, "loss": 0.2387, "num_tokens": 2589591040.0, "step": 3396 }, { "epoch": 4.634995145786436, "grad_norm": 0.1863155373662412, "learning_rate": 9.144356893695828e-06, "loss": 0.2278, "num_tokens": 2590353775.0, "step": 3397 }, { "epoch": 4.636360726745116, "grad_norm": 0.22174885153519772, "learning_rate": 9.139493109022084e-06, "loss": 0.2399, "num_tokens": 2591175150.0, "step": 3398 }, { "epoch": 4.637726307703797, "grad_norm": 0.19642442490557024, "learning_rate": 9.134629891908419e-06, "loss": 0.2369, "num_tokens": 2591933313.0, "step": 3399 }, { "epoch": 4.6390918886624775, "grad_norm": 0.19415599009522339, "learning_rate": 9.129767243838397e-06, "loss": 0.2467, "num_tokens": 2592706622.0, "step": 3400 }, { "epoch": 4.640457469621158, "grad_norm": 0.2097545898528867, "learning_rate": 9.12490516629539e-06, "loss": 0.2501, "num_tokens": 2593506766.0, "step": 3401 }, { "epoch": 4.641823050579839, "grad_norm": 0.22033954052570268, "learning_rate": 9.120043660762618e-06, "loss": 0.2389, "num_tokens": 2594192449.0, "step": 3402 }, { "epoch": 4.643188631538519, "grad_norm": 0.21173903316684015, "learning_rate": 9.115182728723107e-06, "loss": 0.2569, "num_tokens": 2595070814.0, "step": 3403 }, { "epoch": 4.6445542124972, "grad_norm": 0.1992122425682799, "learning_rate": 9.110322371659724e-06, "loss": 0.2402, "num_tokens": 2595865510.0, "step": 3404 }, { "epoch": 4.64591979345588, "grad_norm": 0.2183899045887962, "learning_rate": 9.105462591055153e-06, "loss": 0.2351, "num_tokens": 2596615035.0, "step": 3405 }, { "epoch": 4.647285374414561, "grad_norm": 0.20091140248998032, "learning_rate": 9.100603388391893e-06, "loss": 0.2307, "num_tokens": 2597330167.0, "step": 3406 }, { "epoch": 4.648650955373241, "grad_norm": 0.21109964597808042, "learning_rate": 9.095744765152289e-06, "loss": 0.2405, "num_tokens": 2598059193.0, "step": 3407 }, { "epoch": 4.650016536331922, "grad_norm": 0.21463726523173413, "learning_rate": 9.090886722818487e-06, "loss": 0.2502, "num_tokens": 2598818097.0, "step": 3408 }, { "epoch": 4.651382117290602, "grad_norm": 0.19615362984320264, "learning_rate": 9.086029262872474e-06, "loss": 0.2415, "num_tokens": 2599628169.0, "step": 3409 }, { "epoch": 4.652747698249282, "grad_norm": 0.2087793222332414, "learning_rate": 9.081172386796042e-06, "loss": 0.2322, "num_tokens": 2600280794.0, "step": 3410 }, { "epoch": 4.654113279207963, "grad_norm": 0.20867051152430605, "learning_rate": 9.07631609607082e-06, "loss": 0.2393, "num_tokens": 2601052424.0, "step": 3411 }, { "epoch": 4.655478860166643, "grad_norm": 0.19221732381700896, "learning_rate": 9.071460392178249e-06, "loss": 0.2307, "num_tokens": 2601859565.0, "step": 3412 }, { "epoch": 4.656844441125324, "grad_norm": 0.20093532971234912, "learning_rate": 9.066605276599587e-06, "loss": 0.2345, "num_tokens": 2602585349.0, "step": 3413 }, { "epoch": 4.658210022084004, "grad_norm": 0.21414510786184207, "learning_rate": 9.061750750815937e-06, "loss": 0.2316, "num_tokens": 2603312276.0, "step": 3414 }, { "epoch": 4.659575603042685, "grad_norm": 0.22765466836249135, "learning_rate": 9.05689681630819e-06, "loss": 0.2471, "num_tokens": 2604069856.0, "step": 3415 }, { "epoch": 4.6609411840013655, "grad_norm": 0.20959830526947132, "learning_rate": 9.052043474557075e-06, "loss": 0.2475, "num_tokens": 2604817797.0, "step": 3416 }, { "epoch": 4.6623067649600465, "grad_norm": 0.19917499229406238, "learning_rate": 9.047190727043139e-06, "loss": 0.2397, "num_tokens": 2605619999.0, "step": 3417 }, { "epoch": 4.663672345918727, "grad_norm": 0.20046073976503984, "learning_rate": 9.042338575246743e-06, "loss": 0.2325, "num_tokens": 2606353413.0, "step": 3418 }, { "epoch": 4.665037926877407, "grad_norm": 0.19794054127933924, "learning_rate": 9.037487020648069e-06, "loss": 0.2433, "num_tokens": 2607151533.0, "step": 3419 }, { "epoch": 4.666403507836088, "grad_norm": 0.1915607384046705, "learning_rate": 9.032636064727114e-06, "loss": 0.2365, "num_tokens": 2607906933.0, "step": 3420 }, { "epoch": 4.667769088794768, "grad_norm": 0.20413457413681754, "learning_rate": 9.0277857089637e-06, "loss": 0.2381, "num_tokens": 2608675546.0, "step": 3421 }, { "epoch": 4.669134669753449, "grad_norm": 0.1863915118290176, "learning_rate": 9.02293595483745e-06, "loss": 0.2392, "num_tokens": 2609520908.0, "step": 3422 }, { "epoch": 4.670500250712129, "grad_norm": 0.2032791408809255, "learning_rate": 9.018086803827825e-06, "loss": 0.2361, "num_tokens": 2610236046.0, "step": 3423 }, { "epoch": 4.67186583167081, "grad_norm": 0.20673513227509388, "learning_rate": 9.013238257414088e-06, "loss": 0.244, "num_tokens": 2610994910.0, "step": 3424 }, { "epoch": 4.67323141262949, "grad_norm": 0.19885318088084145, "learning_rate": 9.008390317075318e-06, "loss": 0.2289, "num_tokens": 2611725688.0, "step": 3425 }, { "epoch": 4.674596993588171, "grad_norm": 0.1918076829127812, "learning_rate": 9.003542984290412e-06, "loss": 0.2447, "num_tokens": 2612546821.0, "step": 3426 }, { "epoch": 4.675962574546851, "grad_norm": 0.19894733350992833, "learning_rate": 8.998696260538085e-06, "loss": 0.2467, "num_tokens": 2613366501.0, "step": 3427 }, { "epoch": 4.677328155505531, "grad_norm": 0.19085159155854678, "learning_rate": 8.993850147296858e-06, "loss": 0.2305, "num_tokens": 2614117450.0, "step": 3428 }, { "epoch": 4.678693736464212, "grad_norm": 0.2064451831799567, "learning_rate": 8.989004646045072e-06, "loss": 0.2466, "num_tokens": 2614974965.0, "step": 3429 }, { "epoch": 4.680059317422892, "grad_norm": 0.19476204598330513, "learning_rate": 8.984159758260881e-06, "loss": 0.2483, "num_tokens": 2615787674.0, "step": 3430 }, { "epoch": 4.681424898381573, "grad_norm": 0.20123243475213579, "learning_rate": 8.979315485422246e-06, "loss": 0.2329, "num_tokens": 2616555299.0, "step": 3431 }, { "epoch": 4.6827904793402535, "grad_norm": 0.19874328660448434, "learning_rate": 8.97447182900695e-06, "loss": 0.244, "num_tokens": 2617320667.0, "step": 3432 }, { "epoch": 4.6841560602989345, "grad_norm": 0.20427365755115598, "learning_rate": 8.969628790492581e-06, "loss": 0.2431, "num_tokens": 2618049419.0, "step": 3433 }, { "epoch": 4.685521641257615, "grad_norm": 0.1941240644124144, "learning_rate": 8.964786371356541e-06, "loss": 0.2447, "num_tokens": 2618887091.0, "step": 3434 }, { "epoch": 4.686887222216296, "grad_norm": 0.22380448327526603, "learning_rate": 8.95994457307604e-06, "loss": 0.2354, "num_tokens": 2619647365.0, "step": 3435 }, { "epoch": 4.688252803174976, "grad_norm": 0.1937653605399362, "learning_rate": 8.955103397128104e-06, "loss": 0.2329, "num_tokens": 2620471031.0, "step": 3436 }, { "epoch": 4.689618384133656, "grad_norm": 0.20201791530116348, "learning_rate": 8.950262844989566e-06, "loss": 0.2407, "num_tokens": 2621228066.0, "step": 3437 }, { "epoch": 4.690983965092337, "grad_norm": 0.19820667785130155, "learning_rate": 8.945422918137063e-06, "loss": 0.2455, "num_tokens": 2622012784.0, "step": 3438 }, { "epoch": 4.692349546051017, "grad_norm": 0.21931220453542963, "learning_rate": 8.940583618047054e-06, "loss": 0.2479, "num_tokens": 2622753444.0, "step": 3439 }, { "epoch": 4.693715127009698, "grad_norm": 0.21600074264636046, "learning_rate": 8.935744946195794e-06, "loss": 0.2474, "num_tokens": 2623531027.0, "step": 3440 }, { "epoch": 4.695080707968378, "grad_norm": 0.20162729571272842, "learning_rate": 8.93090690405936e-06, "loss": 0.234, "num_tokens": 2624247871.0, "step": 3441 }, { "epoch": 4.696446288927059, "grad_norm": 0.2155168583520979, "learning_rate": 8.926069493113623e-06, "loss": 0.2414, "num_tokens": 2624938296.0, "step": 3442 }, { "epoch": 4.697811869885739, "grad_norm": 0.1978173184758825, "learning_rate": 8.92123271483427e-06, "loss": 0.238, "num_tokens": 2625731718.0, "step": 3443 }, { "epoch": 4.69917745084442, "grad_norm": 0.21737327421965463, "learning_rate": 8.916396570696792e-06, "loss": 0.2487, "num_tokens": 2626510612.0, "step": 3444 }, { "epoch": 4.7005430318031, "grad_norm": 0.2251280320721177, "learning_rate": 8.911561062176489e-06, "loss": 0.2384, "num_tokens": 2627311777.0, "step": 3445 }, { "epoch": 4.70190861276178, "grad_norm": 0.20631562841274861, "learning_rate": 8.90672619074846e-06, "loss": 0.2361, "num_tokens": 2628058491.0, "step": 3446 }, { "epoch": 4.703274193720461, "grad_norm": 0.218136447843788, "learning_rate": 8.90189195788762e-06, "loss": 0.2384, "num_tokens": 2628888563.0, "step": 3447 }, { "epoch": 4.7046397746791415, "grad_norm": 0.2176447572319751, "learning_rate": 8.897058365068681e-06, "loss": 0.241, "num_tokens": 2629637704.0, "step": 3448 }, { "epoch": 4.7060053556378225, "grad_norm": 0.20738997901789635, "learning_rate": 8.892225413766162e-06, "loss": 0.2368, "num_tokens": 2630385190.0, "step": 3449 }, { "epoch": 4.707370936596503, "grad_norm": 0.20091440852386375, "learning_rate": 8.887393105454392e-06, "loss": 0.2512, "num_tokens": 2631152963.0, "step": 3450 }, { "epoch": 4.708736517555184, "grad_norm": 0.2098186171409711, "learning_rate": 8.882561441607498e-06, "loss": 0.2344, "num_tokens": 2631867937.0, "step": 3451 }, { "epoch": 4.710102098513864, "grad_norm": 0.21236019639692066, "learning_rate": 8.877730423699407e-06, "loss": 0.2347, "num_tokens": 2632602049.0, "step": 3452 }, { "epoch": 4.711467679472545, "grad_norm": 0.19826985284042017, "learning_rate": 8.872900053203858e-06, "loss": 0.2433, "num_tokens": 2633337686.0, "step": 3453 }, { "epoch": 4.712833260431225, "grad_norm": 0.20938059333507097, "learning_rate": 8.868070331594385e-06, "loss": 0.2461, "num_tokens": 2634062899.0, "step": 3454 }, { "epoch": 4.714198841389905, "grad_norm": 0.2079664207094596, "learning_rate": 8.863241260344328e-06, "loss": 0.249, "num_tokens": 2634824158.0, "step": 3455 }, { "epoch": 4.715564422348586, "grad_norm": 0.19787334732117126, "learning_rate": 8.858412840926825e-06, "loss": 0.2444, "num_tokens": 2635593791.0, "step": 3456 }, { "epoch": 4.716930003307266, "grad_norm": 0.21821675059146553, "learning_rate": 8.853585074814825e-06, "loss": 0.2393, "num_tokens": 2636338475.0, "step": 3457 }, { "epoch": 4.718295584265947, "grad_norm": 0.20631237813921496, "learning_rate": 8.848757963481058e-06, "loss": 0.2386, "num_tokens": 2637073299.0, "step": 3458 }, { "epoch": 4.719661165224627, "grad_norm": 0.21133152322323226, "learning_rate": 8.84393150839808e-06, "loss": 0.2451, "num_tokens": 2637795017.0, "step": 3459 }, { "epoch": 4.721026746183308, "grad_norm": 0.2068733099127569, "learning_rate": 8.839105711038226e-06, "loss": 0.2279, "num_tokens": 2638492681.0, "step": 3460 }, { "epoch": 4.722392327141988, "grad_norm": 0.1986085107551285, "learning_rate": 8.834280572873639e-06, "loss": 0.2362, "num_tokens": 2639232869.0, "step": 3461 }, { "epoch": 4.723757908100669, "grad_norm": 0.2132653353142382, "learning_rate": 8.829456095376261e-06, "loss": 0.2308, "num_tokens": 2639977426.0, "step": 3462 }, { "epoch": 4.725123489059349, "grad_norm": 0.2117474932924526, "learning_rate": 8.824632280017828e-06, "loss": 0.2335, "num_tokens": 2640760372.0, "step": 3463 }, { "epoch": 4.7264890700180295, "grad_norm": 0.19181322583481006, "learning_rate": 8.81980912826988e-06, "loss": 0.2455, "num_tokens": 2641560339.0, "step": 3464 }, { "epoch": 4.7278546509767105, "grad_norm": 0.1962853955445266, "learning_rate": 8.81498664160375e-06, "loss": 0.2383, "num_tokens": 2642327895.0, "step": 3465 }, { "epoch": 4.729220231935391, "grad_norm": 0.22798853304647806, "learning_rate": 8.810164821490575e-06, "loss": 0.2477, "num_tokens": 2643067545.0, "step": 3466 }, { "epoch": 4.730585812894072, "grad_norm": 0.19114849585864122, "learning_rate": 8.805343669401276e-06, "loss": 0.2356, "num_tokens": 2643870442.0, "step": 3467 }, { "epoch": 4.731951393852752, "grad_norm": 0.20823156144900565, "learning_rate": 8.80052318680658e-06, "loss": 0.2431, "num_tokens": 2644616278.0, "step": 3468 }, { "epoch": 4.733316974811433, "grad_norm": 0.20139828798289083, "learning_rate": 8.795703375177009e-06, "loss": 0.2388, "num_tokens": 2645417567.0, "step": 3469 }, { "epoch": 4.734682555770113, "grad_norm": 0.21371512111461002, "learning_rate": 8.790884235982878e-06, "loss": 0.2302, "num_tokens": 2646177617.0, "step": 3470 }, { "epoch": 4.736048136728794, "grad_norm": 0.20167306565140672, "learning_rate": 8.7860657706943e-06, "loss": 0.2425, "num_tokens": 2646940155.0, "step": 3471 }, { "epoch": 4.737413717687474, "grad_norm": 0.20455965938513435, "learning_rate": 8.781247980781176e-06, "loss": 0.245, "num_tokens": 2647764375.0, "step": 3472 }, { "epoch": 4.738779298646154, "grad_norm": 0.19280606796453045, "learning_rate": 8.776430867713207e-06, "loss": 0.2292, "num_tokens": 2648516705.0, "step": 3473 }, { "epoch": 4.740144879604835, "grad_norm": 0.20303943832322682, "learning_rate": 8.771614432959886e-06, "loss": 0.2388, "num_tokens": 2649260302.0, "step": 3474 }, { "epoch": 4.741510460563515, "grad_norm": 0.19880908136277226, "learning_rate": 8.766798677990502e-06, "loss": 0.2337, "num_tokens": 2650048652.0, "step": 3475 }, { "epoch": 4.742876041522196, "grad_norm": 0.20671883239911407, "learning_rate": 8.761983604274126e-06, "loss": 0.2425, "num_tokens": 2650786394.0, "step": 3476 }, { "epoch": 4.744241622480876, "grad_norm": 0.2043984311605638, "learning_rate": 8.75716921327963e-06, "loss": 0.2526, "num_tokens": 2651584884.0, "step": 3477 }, { "epoch": 4.745607203439557, "grad_norm": 0.20216514799825924, "learning_rate": 8.752355506475683e-06, "loss": 0.2381, "num_tokens": 2652368070.0, "step": 3478 }, { "epoch": 4.746972784398237, "grad_norm": 0.25437274362499285, "learning_rate": 8.747542485330731e-06, "loss": 0.2393, "num_tokens": 2653197918.0, "step": 3479 }, { "epoch": 4.7483383653569184, "grad_norm": 0.20064138211692928, "learning_rate": 8.742730151313021e-06, "loss": 0.243, "num_tokens": 2654018814.0, "step": 3480 }, { "epoch": 4.749703946315599, "grad_norm": 0.20461619489911198, "learning_rate": 8.737918505890588e-06, "loss": 0.2321, "num_tokens": 2654760579.0, "step": 3481 }, { "epoch": 4.751069527274279, "grad_norm": 0.21920725106126696, "learning_rate": 8.733107550531256e-06, "loss": 0.2397, "num_tokens": 2655530938.0, "step": 3482 }, { "epoch": 4.75243510823296, "grad_norm": 0.20517291122721565, "learning_rate": 8.728297286702642e-06, "loss": 0.2482, "num_tokens": 2656333173.0, "step": 3483 }, { "epoch": 4.75380068919164, "grad_norm": 0.1968468560053242, "learning_rate": 8.723487715872143e-06, "loss": 0.2437, "num_tokens": 2657124467.0, "step": 3484 }, { "epoch": 4.755166270150321, "grad_norm": 0.21128441083480096, "learning_rate": 8.71867883950696e-06, "loss": 0.2333, "num_tokens": 2657828303.0, "step": 3485 }, { "epoch": 4.756531851109001, "grad_norm": 0.20050348497069168, "learning_rate": 8.713870659074062e-06, "loss": 0.2404, "num_tokens": 2658563237.0, "step": 3486 }, { "epoch": 4.757897432067682, "grad_norm": 0.20120414035053463, "learning_rate": 8.709063176040224e-06, "loss": 0.2265, "num_tokens": 2659327453.0, "step": 3487 }, { "epoch": 4.759263013026362, "grad_norm": 0.20862976932418367, "learning_rate": 8.704256391871999e-06, "loss": 0.2286, "num_tokens": 2660005865.0, "step": 3488 }, { "epoch": 4.760628593985043, "grad_norm": 0.3231363466815293, "learning_rate": 8.699450308035729e-06, "loss": 0.2427, "num_tokens": 2660789628.0, "step": 3489 }, { "epoch": 4.761994174943723, "grad_norm": 0.21728893392592669, "learning_rate": 8.694644925997541e-06, "loss": 0.2344, "num_tokens": 2661476657.0, "step": 3490 }, { "epoch": 4.763359755902403, "grad_norm": 0.20313297060154203, "learning_rate": 8.689840247223351e-06, "loss": 0.2395, "num_tokens": 2662264009.0, "step": 3491 }, { "epoch": 4.764725336861084, "grad_norm": 0.21048672623855538, "learning_rate": 8.685036273178857e-06, "loss": 0.2488, "num_tokens": 2663131683.0, "step": 3492 }, { "epoch": 4.766090917819764, "grad_norm": 0.20000869715531253, "learning_rate": 8.680233005329542e-06, "loss": 0.2431, "num_tokens": 2664015817.0, "step": 3493 }, { "epoch": 4.767456498778445, "grad_norm": 0.20033951619164836, "learning_rate": 8.675430445140678e-06, "loss": 0.2545, "num_tokens": 2664863719.0, "step": 3494 }, { "epoch": 4.7688220797371255, "grad_norm": 0.9254328335471963, "learning_rate": 8.670628594077313e-06, "loss": 0.2428, "num_tokens": 2665668907.0, "step": 3495 }, { "epoch": 4.7701876606958065, "grad_norm": 0.22107240968437783, "learning_rate": 8.665827453604292e-06, "loss": 0.2409, "num_tokens": 2666494912.0, "step": 3496 }, { "epoch": 4.771553241654487, "grad_norm": 0.19736163664672604, "learning_rate": 8.661027025186228e-06, "loss": 0.2433, "num_tokens": 2667311062.0, "step": 3497 }, { "epoch": 4.772918822613168, "grad_norm": 0.20382838603919087, "learning_rate": 8.656227310287527e-06, "loss": 0.2298, "num_tokens": 2667997722.0, "step": 3498 }, { "epoch": 4.774284403571848, "grad_norm": 0.20056292305269374, "learning_rate": 8.651428310372374e-06, "loss": 0.2364, "num_tokens": 2668764375.0, "step": 3499 }, { "epoch": 4.775649984530528, "grad_norm": 0.21059994852897473, "learning_rate": 8.646630026904736e-06, "loss": 0.241, "num_tokens": 2669486381.0, "step": 3500 }, { "epoch": 4.777015565489209, "grad_norm": 0.21137167137458931, "learning_rate": 8.64183246134836e-06, "loss": 0.2445, "num_tokens": 2670286373.0, "step": 3501 }, { "epoch": 4.778381146447889, "grad_norm": 0.19737520379374746, "learning_rate": 8.637035615166773e-06, "loss": 0.2424, "num_tokens": 2671045700.0, "step": 3502 }, { "epoch": 4.77974672740657, "grad_norm": 0.22951256286280264, "learning_rate": 8.632239489823295e-06, "loss": 0.2344, "num_tokens": 2671735477.0, "step": 3503 }, { "epoch": 4.78111230836525, "grad_norm": 0.20014640779101314, "learning_rate": 8.627444086781006e-06, "loss": 0.24, "num_tokens": 2672527227.0, "step": 3504 }, { "epoch": 4.782477889323931, "grad_norm": 0.20229721560225042, "learning_rate": 8.62264940750278e-06, "loss": 0.249, "num_tokens": 2673358011.0, "step": 3505 }, { "epoch": 4.783843470282611, "grad_norm": 0.2007001466116834, "learning_rate": 8.617855453451266e-06, "loss": 0.2453, "num_tokens": 2674107022.0, "step": 3506 }, { "epoch": 4.785209051241292, "grad_norm": 0.21770157105788737, "learning_rate": 8.61306222608889e-06, "loss": 0.2435, "num_tokens": 2674869998.0, "step": 3507 }, { "epoch": 4.786574632199972, "grad_norm": 0.21458519033443413, "learning_rate": 8.608269726877863e-06, "loss": 0.2417, "num_tokens": 2675624791.0, "step": 3508 }, { "epoch": 4.787940213158652, "grad_norm": 0.19794176612638098, "learning_rate": 8.603477957280162e-06, "loss": 0.2407, "num_tokens": 2676405504.0, "step": 3509 }, { "epoch": 4.789305794117333, "grad_norm": 0.19773039485223795, "learning_rate": 8.598686918757552e-06, "loss": 0.241, "num_tokens": 2677178275.0, "step": 3510 }, { "epoch": 4.7906713750760135, "grad_norm": 0.2094566051703006, "learning_rate": 8.593896612771569e-06, "loss": 0.2407, "num_tokens": 2677910825.0, "step": 3511 }, { "epoch": 4.7920369560346945, "grad_norm": 0.1984124236256028, "learning_rate": 8.589107040783533e-06, "loss": 0.2364, "num_tokens": 2678684964.0, "step": 3512 }, { "epoch": 4.793402536993375, "grad_norm": 0.1944116653235451, "learning_rate": 8.584318204254532e-06, "loss": 0.2272, "num_tokens": 2679461280.0, "step": 3513 }, { "epoch": 4.794768117952056, "grad_norm": 0.20563776076336865, "learning_rate": 8.57953010464543e-06, "loss": 0.2375, "num_tokens": 2680276281.0, "step": 3514 }, { "epoch": 4.796133698910736, "grad_norm": 0.1924070099939996, "learning_rate": 8.574742743416875e-06, "loss": 0.2359, "num_tokens": 2681129655.0, "step": 3515 }, { "epoch": 4.797499279869417, "grad_norm": 0.20258910851669598, "learning_rate": 8.569956122029278e-06, "loss": 0.2459, "num_tokens": 2681932261.0, "step": 3516 }, { "epoch": 4.798864860828097, "grad_norm": 0.20474501298699052, "learning_rate": 8.565170241942832e-06, "loss": 0.2443, "num_tokens": 2682698141.0, "step": 3517 }, { "epoch": 4.800230441786777, "grad_norm": 0.20804501001678166, "learning_rate": 8.560385104617501e-06, "loss": 0.2381, "num_tokens": 2683447516.0, "step": 3518 }, { "epoch": 4.801596022745458, "grad_norm": 0.1896594868273113, "learning_rate": 8.555600711513027e-06, "loss": 0.2368, "num_tokens": 2684190675.0, "step": 3519 }, { "epoch": 4.802961603704138, "grad_norm": 0.1943357116919205, "learning_rate": 8.550817064088913e-06, "loss": 0.2436, "num_tokens": 2684943531.0, "step": 3520 }, { "epoch": 4.804327184662819, "grad_norm": 0.21083486838821808, "learning_rate": 8.546034163804451e-06, "loss": 0.2358, "num_tokens": 2685659176.0, "step": 3521 }, { "epoch": 4.805692765621499, "grad_norm": 0.19338824713139363, "learning_rate": 8.541252012118695e-06, "loss": 0.2366, "num_tokens": 2686413442.0, "step": 3522 }, { "epoch": 4.80705834658018, "grad_norm": 0.21829309274680628, "learning_rate": 8.536470610490465e-06, "loss": 0.2562, "num_tokens": 2687150164.0, "step": 3523 }, { "epoch": 4.80842392753886, "grad_norm": 0.20185850505335706, "learning_rate": 8.531689960378371e-06, "loss": 0.2419, "num_tokens": 2687866521.0, "step": 3524 }, { "epoch": 4.809789508497541, "grad_norm": 0.19894912460188235, "learning_rate": 8.526910063240773e-06, "loss": 0.2354, "num_tokens": 2688701906.0, "step": 3525 }, { "epoch": 4.811155089456221, "grad_norm": 0.20785615628428386, "learning_rate": 8.522130920535817e-06, "loss": 0.2429, "num_tokens": 2689404506.0, "step": 3526 }, { "epoch": 4.8125206704149015, "grad_norm": 0.19360111389745663, "learning_rate": 8.517352533721407e-06, "loss": 0.2335, "num_tokens": 2690207394.0, "step": 3527 }, { "epoch": 4.8138862513735825, "grad_norm": 0.20959764231373693, "learning_rate": 8.512574904255228e-06, "loss": 0.2416, "num_tokens": 2690948274.0, "step": 3528 }, { "epoch": 4.815251832332263, "grad_norm": 0.192612350058698, "learning_rate": 8.507798033594722e-06, "loss": 0.2379, "num_tokens": 2691757063.0, "step": 3529 }, { "epoch": 4.816617413290944, "grad_norm": 0.21385397308351053, "learning_rate": 8.503021923197109e-06, "loss": 0.2374, "num_tokens": 2692546647.0, "step": 3530 }, { "epoch": 4.817982994249624, "grad_norm": 0.1813102533553526, "learning_rate": 8.498246574519373e-06, "loss": 0.2281, "num_tokens": 2693296491.0, "step": 3531 }, { "epoch": 4.819348575208305, "grad_norm": 0.21140851803142244, "learning_rate": 8.493471989018261e-06, "loss": 0.2508, "num_tokens": 2694076947.0, "step": 3532 }, { "epoch": 4.820714156166985, "grad_norm": 0.2142010496657102, "learning_rate": 8.488698168150302e-06, "loss": 0.2539, "num_tokens": 2694795910.0, "step": 3533 }, { "epoch": 4.822079737125666, "grad_norm": 0.19629091281595965, "learning_rate": 8.483925113371772e-06, "loss": 0.2498, "num_tokens": 2695595776.0, "step": 3534 }, { "epoch": 4.823445318084346, "grad_norm": 0.1881607352649016, "learning_rate": 8.479152826138732e-06, "loss": 0.2381, "num_tokens": 2696356047.0, "step": 3535 }, { "epoch": 4.824810899043026, "grad_norm": 0.19693070624729248, "learning_rate": 8.474381307906994e-06, "loss": 0.2301, "num_tokens": 2697091620.0, "step": 3536 }, { "epoch": 4.826176480001707, "grad_norm": 0.8502824034458002, "learning_rate": 8.469610560132144e-06, "loss": 0.2314, "num_tokens": 2697788249.0, "step": 3537 }, { "epoch": 4.827542060960387, "grad_norm": 0.199977166336443, "learning_rate": 8.464840584269534e-06, "loss": 0.2432, "num_tokens": 2698629778.0, "step": 3538 }, { "epoch": 4.828907641919068, "grad_norm": 0.19464268395374384, "learning_rate": 8.460071381774266e-06, "loss": 0.2335, "num_tokens": 2699357028.0, "step": 3539 }, { "epoch": 4.830273222877748, "grad_norm": 0.20397808092604017, "learning_rate": 8.45530295410123e-06, "loss": 0.2529, "num_tokens": 2700151783.0, "step": 3540 }, { "epoch": 4.831638803836429, "grad_norm": 0.21083679893768728, "learning_rate": 8.450535302705058e-06, "loss": 0.2423, "num_tokens": 2700880652.0, "step": 3541 }, { "epoch": 4.833004384795109, "grad_norm": 0.20809018411875366, "learning_rate": 8.44576842904016e-06, "loss": 0.2434, "num_tokens": 2701613737.0, "step": 3542 }, { "epoch": 4.83436996575379, "grad_norm": 0.18860276815919091, "learning_rate": 8.441002334560697e-06, "loss": 0.2299, "num_tokens": 2702398696.0, "step": 3543 }, { "epoch": 4.8357355467124705, "grad_norm": 0.1953065425890423, "learning_rate": 8.436237020720606e-06, "loss": 0.2462, "num_tokens": 2703244713.0, "step": 3544 }, { "epoch": 4.837101127671151, "grad_norm": 0.20072072385286305, "learning_rate": 8.43147248897357e-06, "loss": 0.2272, "num_tokens": 2703977497.0, "step": 3545 }, { "epoch": 4.838466708629832, "grad_norm": 0.19756211446466596, "learning_rate": 8.426708740773042e-06, "loss": 0.2582, "num_tokens": 2704826854.0, "step": 3546 }, { "epoch": 4.839832289588512, "grad_norm": 0.21052234791682978, "learning_rate": 8.42194577757224e-06, "loss": 0.2455, "num_tokens": 2705615619.0, "step": 3547 }, { "epoch": 4.841197870547193, "grad_norm": 0.1964855047985355, "learning_rate": 8.417183600824132e-06, "loss": 0.245, "num_tokens": 2706392137.0, "step": 3548 }, { "epoch": 4.842563451505873, "grad_norm": 0.21128157463535954, "learning_rate": 8.41242221198146e-06, "loss": 0.2505, "num_tokens": 2707169899.0, "step": 3549 }, { "epoch": 4.843929032464554, "grad_norm": 0.21304770815320903, "learning_rate": 8.40766161249671e-06, "loss": 0.2337, "num_tokens": 2707889627.0, "step": 3550 }, { "epoch": 4.845294613423234, "grad_norm": 0.20810300247801905, "learning_rate": 8.402901803822137e-06, "loss": 0.2487, "num_tokens": 2708638035.0, "step": 3551 }, { "epoch": 4.846660194381915, "grad_norm": 0.20006911548321932, "learning_rate": 8.398142787409753e-06, "loss": 0.2366, "num_tokens": 2709405308.0, "step": 3552 }, { "epoch": 4.848025775340595, "grad_norm": 0.2045834627363637, "learning_rate": 8.39338456471133e-06, "loss": 0.2425, "num_tokens": 2710251963.0, "step": 3553 }, { "epoch": 4.849391356299275, "grad_norm": 0.20023266236681284, "learning_rate": 8.38862713717839e-06, "loss": 0.2539, "num_tokens": 2711018406.0, "step": 3554 }, { "epoch": 4.850756937257956, "grad_norm": 0.19065493841653242, "learning_rate": 8.383870506262222e-06, "loss": 0.2266, "num_tokens": 2711759197.0, "step": 3555 }, { "epoch": 4.852122518216636, "grad_norm": 0.22519587698657928, "learning_rate": 8.37911467341387e-06, "loss": 0.2552, "num_tokens": 2712655373.0, "step": 3556 }, { "epoch": 4.853488099175317, "grad_norm": 0.19900917809307814, "learning_rate": 8.374359640084126e-06, "loss": 0.2369, "num_tokens": 2713446651.0, "step": 3557 }, { "epoch": 4.854853680133997, "grad_norm": 0.20971205955338743, "learning_rate": 8.369605407723553e-06, "loss": 0.2471, "num_tokens": 2714220570.0, "step": 3558 }, { "epoch": 4.856219261092678, "grad_norm": 0.19241310772989498, "learning_rate": 8.364851977782455e-06, "loss": 0.2353, "num_tokens": 2714983794.0, "step": 3559 }, { "epoch": 4.8575848420513585, "grad_norm": 0.19908797912971923, "learning_rate": 8.360099351710904e-06, "loss": 0.2281, "num_tokens": 2715720729.0, "step": 3560 }, { "epoch": 4.8589504230100395, "grad_norm": 0.20084692782198404, "learning_rate": 8.355347530958715e-06, "loss": 0.2395, "num_tokens": 2716502020.0, "step": 3561 }, { "epoch": 4.86031600396872, "grad_norm": 0.2546755946201292, "learning_rate": 8.350596516975462e-06, "loss": 0.2354, "num_tokens": 2717196436.0, "step": 3562 }, { "epoch": 4.8616815849274, "grad_norm": 0.19918458987010348, "learning_rate": 8.345846311210477e-06, "loss": 0.2431, "num_tokens": 2717925040.0, "step": 3563 }, { "epoch": 4.863047165886081, "grad_norm": 0.2062546531429062, "learning_rate": 8.341096915112839e-06, "loss": 0.2301, "num_tokens": 2718688108.0, "step": 3564 }, { "epoch": 4.864412746844761, "grad_norm": 0.18615863063160107, "learning_rate": 8.336348330131387e-06, "loss": 0.2521, "num_tokens": 2719534484.0, "step": 3565 }, { "epoch": 4.865778327803442, "grad_norm": 0.19418140031904302, "learning_rate": 8.331600557714705e-06, "loss": 0.238, "num_tokens": 2720348891.0, "step": 3566 }, { "epoch": 4.867143908762122, "grad_norm": 0.20398233599754886, "learning_rate": 8.326853599311136e-06, "loss": 0.2281, "num_tokens": 2721079414.0, "step": 3567 }, { "epoch": 4.868509489720803, "grad_norm": 0.19131927059195475, "learning_rate": 8.32210745636877e-06, "loss": 0.2419, "num_tokens": 2721833865.0, "step": 3568 }, { "epoch": 4.869875070679483, "grad_norm": 0.223694240397884, "learning_rate": 8.317362130335447e-06, "loss": 0.2451, "num_tokens": 2722596641.0, "step": 3569 }, { "epoch": 4.871240651638164, "grad_norm": 0.20246446775712576, "learning_rate": 8.312617622658765e-06, "loss": 0.2484, "num_tokens": 2723309796.0, "step": 3570 }, { "epoch": 4.872606232596844, "grad_norm": 0.21352076980869142, "learning_rate": 8.307873934786064e-06, "loss": 0.2374, "num_tokens": 2724062240.0, "step": 3571 }, { "epoch": 4.873971813555524, "grad_norm": 0.20839495858598703, "learning_rate": 8.303131068164441e-06, "loss": 0.2391, "num_tokens": 2724784911.0, "step": 3572 }, { "epoch": 4.875337394514205, "grad_norm": 0.19283403743920724, "learning_rate": 8.298389024240734e-06, "loss": 0.2463, "num_tokens": 2725660346.0, "step": 3573 }, { "epoch": 4.8767029754728854, "grad_norm": 0.2188975380251881, "learning_rate": 8.293647804461544e-06, "loss": 0.2393, "num_tokens": 2726467827.0, "step": 3574 }, { "epoch": 4.8780685564315664, "grad_norm": 0.1930371054457472, "learning_rate": 8.288907410273204e-06, "loss": 0.2524, "num_tokens": 2727285599.0, "step": 3575 }, { "epoch": 4.879434137390247, "grad_norm": 0.20007964542876167, "learning_rate": 8.284167843121804e-06, "loss": 0.2398, "num_tokens": 2728032046.0, "step": 3576 }, { "epoch": 4.880799718348928, "grad_norm": 0.21066604275697146, "learning_rate": 8.279429104453188e-06, "loss": 0.2463, "num_tokens": 2728796451.0, "step": 3577 }, { "epoch": 4.882165299307608, "grad_norm": 0.19351901068068902, "learning_rate": 8.27469119571293e-06, "loss": 0.2314, "num_tokens": 2729582635.0, "step": 3578 }, { "epoch": 4.883530880266289, "grad_norm": 0.19368512619485292, "learning_rate": 8.269954118346366e-06, "loss": 0.2385, "num_tokens": 2730321865.0, "step": 3579 }, { "epoch": 4.884896461224969, "grad_norm": 0.19584588414226956, "learning_rate": 8.265217873798572e-06, "loss": 0.2282, "num_tokens": 2731093657.0, "step": 3580 }, { "epoch": 4.886262042183649, "grad_norm": 0.1965614017612995, "learning_rate": 8.260482463514371e-06, "loss": 0.2529, "num_tokens": 2731884885.0, "step": 3581 }, { "epoch": 4.88762762314233, "grad_norm": 0.1992329216673602, "learning_rate": 8.25574788893833e-06, "loss": 0.2369, "num_tokens": 2732693043.0, "step": 3582 }, { "epoch": 4.88899320410101, "grad_norm": 0.20053416948486844, "learning_rate": 8.251014151514771e-06, "loss": 0.239, "num_tokens": 2733435647.0, "step": 3583 }, { "epoch": 4.890358785059691, "grad_norm": 0.209623049122877, "learning_rate": 8.246281252687742e-06, "loss": 0.2509, "num_tokens": 2734171505.0, "step": 3584 }, { "epoch": 4.891724366018371, "grad_norm": 0.21257120346203087, "learning_rate": 8.241549193901049e-06, "loss": 0.2469, "num_tokens": 2734883730.0, "step": 3585 }, { "epoch": 4.893089946977052, "grad_norm": 0.21156674517408608, "learning_rate": 8.23681797659824e-06, "loss": 0.2386, "num_tokens": 2735634590.0, "step": 3586 }, { "epoch": 4.894455527935732, "grad_norm": 0.20416121945755328, "learning_rate": 8.2320876022226e-06, "loss": 0.2394, "num_tokens": 2736399928.0, "step": 3587 }, { "epoch": 4.895821108894413, "grad_norm": 0.19582114535550088, "learning_rate": 8.227358072217167e-06, "loss": 0.2439, "num_tokens": 2737162876.0, "step": 3588 }, { "epoch": 4.897186689853093, "grad_norm": 0.2062678687483563, "learning_rate": 8.222629388024711e-06, "loss": 0.2453, "num_tokens": 2737920368.0, "step": 3589 }, { "epoch": 4.8985522708117735, "grad_norm": 0.2102704331784953, "learning_rate": 8.217901551087754e-06, "loss": 0.2458, "num_tokens": 2738693557.0, "step": 3590 }, { "epoch": 4.8999178517704545, "grad_norm": 0.23030925621433862, "learning_rate": 8.213174562848549e-06, "loss": 0.2491, "num_tokens": 2739477387.0, "step": 3591 }, { "epoch": 4.901283432729135, "grad_norm": 0.20493383589286573, "learning_rate": 8.208448424749102e-06, "loss": 0.2503, "num_tokens": 2740316512.0, "step": 3592 }, { "epoch": 4.902649013687816, "grad_norm": 0.21124112920339275, "learning_rate": 8.203723138231146e-06, "loss": 0.236, "num_tokens": 2741100699.0, "step": 3593 }, { "epoch": 4.904014594646496, "grad_norm": 0.22236274232466346, "learning_rate": 8.198998704736162e-06, "loss": 0.2466, "num_tokens": 2741911977.0, "step": 3594 }, { "epoch": 4.905380175605177, "grad_norm": 0.1981689406096567, "learning_rate": 8.194275125705375e-06, "loss": 0.2301, "num_tokens": 2742631056.0, "step": 3595 }, { "epoch": 4.906745756563857, "grad_norm": 0.2076734004687157, "learning_rate": 8.18955240257974e-06, "loss": 0.2457, "num_tokens": 2743348445.0, "step": 3596 }, { "epoch": 4.908111337522538, "grad_norm": 0.2026126275998498, "learning_rate": 8.184830536799956e-06, "loss": 0.2446, "num_tokens": 2744105719.0, "step": 3597 }, { "epoch": 4.909476918481218, "grad_norm": 0.20359418709848537, "learning_rate": 8.180109529806462e-06, "loss": 0.24, "num_tokens": 2744835988.0, "step": 3598 }, { "epoch": 4.910842499439898, "grad_norm": 0.20000584641532482, "learning_rate": 8.175389383039433e-06, "loss": 0.2319, "num_tokens": 2745620653.0, "step": 3599 }, { "epoch": 4.912208080398579, "grad_norm": 0.21431804006258237, "learning_rate": 8.170670097938778e-06, "loss": 0.2407, "num_tokens": 2746332298.0, "step": 3600 }, { "epoch": 4.913573661357259, "grad_norm": 0.20882561410743025, "learning_rate": 8.165951675944147e-06, "loss": 0.2369, "num_tokens": 2747033935.0, "step": 3601 }, { "epoch": 4.91493924231594, "grad_norm": 0.19252495256970326, "learning_rate": 8.161234118494933e-06, "loss": 0.2278, "num_tokens": 2747729987.0, "step": 3602 }, { "epoch": 4.91630482327462, "grad_norm": 0.2130427304366715, "learning_rate": 8.156517427030245e-06, "loss": 0.2475, "num_tokens": 2748484715.0, "step": 3603 }, { "epoch": 4.917670404233301, "grad_norm": 0.2087640718858639, "learning_rate": 8.151801602988953e-06, "loss": 0.2513, "num_tokens": 2749251234.0, "step": 3604 }, { "epoch": 4.919035985191981, "grad_norm": 0.1975859531251446, "learning_rate": 8.147086647809648e-06, "loss": 0.2441, "num_tokens": 2749986031.0, "step": 3605 }, { "epoch": 4.920401566150662, "grad_norm": 0.21768128006095877, "learning_rate": 8.142372562930657e-06, "loss": 0.2376, "num_tokens": 2750693276.0, "step": 3606 }, { "epoch": 4.9217671471093425, "grad_norm": 0.2373770240738591, "learning_rate": 8.137659349790041e-06, "loss": 0.2491, "num_tokens": 2751419063.0, "step": 3607 }, { "epoch": 4.923132728068023, "grad_norm": 0.19890204640876294, "learning_rate": 8.132947009825602e-06, "loss": 0.2417, "num_tokens": 2752235292.0, "step": 3608 }, { "epoch": 4.924498309026704, "grad_norm": 0.1972316863217128, "learning_rate": 8.128235544474871e-06, "loss": 0.2356, "num_tokens": 2752981819.0, "step": 3609 }, { "epoch": 4.925863889985384, "grad_norm": 0.19256449218996477, "learning_rate": 8.123524955175106e-06, "loss": 0.2423, "num_tokens": 2753737463.0, "step": 3610 }, { "epoch": 4.927229470944065, "grad_norm": 0.20991514573889616, "learning_rate": 8.118815243363312e-06, "loss": 0.2386, "num_tokens": 2754539426.0, "step": 3611 }, { "epoch": 4.928595051902745, "grad_norm": 0.19130649572684622, "learning_rate": 8.114106410476207e-06, "loss": 0.237, "num_tokens": 2755325882.0, "step": 3612 }, { "epoch": 4.929960632861426, "grad_norm": 0.20109560292562279, "learning_rate": 8.109398457950266e-06, "loss": 0.2352, "num_tokens": 2756043334.0, "step": 3613 }, { "epoch": 4.931326213820106, "grad_norm": 0.20361907892763292, "learning_rate": 8.10469138722167e-06, "loss": 0.2361, "num_tokens": 2756755073.0, "step": 3614 }, { "epoch": 4.932691794778787, "grad_norm": 0.1976542993519808, "learning_rate": 8.099985199726348e-06, "loss": 0.2351, "num_tokens": 2757492095.0, "step": 3615 }, { "epoch": 4.934057375737467, "grad_norm": 0.20369470167830078, "learning_rate": 8.095279896899953e-06, "loss": 0.2394, "num_tokens": 2758317606.0, "step": 3616 }, { "epoch": 4.935422956696147, "grad_norm": 0.21028162628745248, "learning_rate": 8.090575480177867e-06, "loss": 0.2364, "num_tokens": 2759043312.0, "step": 3617 }, { "epoch": 4.936788537654828, "grad_norm": 0.18853819557674492, "learning_rate": 8.085871950995207e-06, "loss": 0.2507, "num_tokens": 2759838458.0, "step": 3618 }, { "epoch": 4.938154118613508, "grad_norm": 0.2075366540724957, "learning_rate": 8.081169310786812e-06, "loss": 0.2309, "num_tokens": 2760573341.0, "step": 3619 }, { "epoch": 4.939519699572189, "grad_norm": 0.19610952032696138, "learning_rate": 8.076467560987262e-06, "loss": 0.2379, "num_tokens": 2761411063.0, "step": 3620 }, { "epoch": 4.940885280530869, "grad_norm": 0.21686098564168244, "learning_rate": 8.071766703030846e-06, "loss": 0.2434, "num_tokens": 2762103709.0, "step": 3621 }, { "epoch": 4.94225086148955, "grad_norm": 0.19040258393560872, "learning_rate": 8.067066738351601e-06, "loss": 0.2429, "num_tokens": 2762953505.0, "step": 3622 }, { "epoch": 4.9436164424482305, "grad_norm": 0.20654010294140168, "learning_rate": 8.062367668383279e-06, "loss": 0.2288, "num_tokens": 2763703710.0, "step": 3623 }, { "epoch": 4.9449820234069115, "grad_norm": 0.20938723023885195, "learning_rate": 8.05766949455936e-06, "loss": 0.2568, "num_tokens": 2764462657.0, "step": 3624 }, { "epoch": 4.946347604365592, "grad_norm": 0.19743299926643723, "learning_rate": 8.052972218313061e-06, "loss": 0.2363, "num_tokens": 2765188444.0, "step": 3625 }, { "epoch": 4.947713185324272, "grad_norm": 0.20091260814849643, "learning_rate": 8.048275841077313e-06, "loss": 0.2438, "num_tokens": 2765965507.0, "step": 3626 }, { "epoch": 4.949078766282953, "grad_norm": 0.20382566711581449, "learning_rate": 8.043580364284778e-06, "loss": 0.252, "num_tokens": 2766842067.0, "step": 3627 }, { "epoch": 4.950444347241633, "grad_norm": 0.19639852185672865, "learning_rate": 8.038885789367838e-06, "loss": 0.232, "num_tokens": 2767582428.0, "step": 3628 }, { "epoch": 4.951809928200314, "grad_norm": 0.20249794311792543, "learning_rate": 8.034192117758613e-06, "loss": 0.2463, "num_tokens": 2768317341.0, "step": 3629 }, { "epoch": 4.953175509158994, "grad_norm": 0.21634909101175917, "learning_rate": 8.029499350888932e-06, "loss": 0.2357, "num_tokens": 2769082895.0, "step": 3630 }, { "epoch": 4.954541090117675, "grad_norm": 0.1883339979845605, "learning_rate": 8.024807490190361e-06, "loss": 0.2381, "num_tokens": 2769872620.0, "step": 3631 }, { "epoch": 4.955906671076355, "grad_norm": 0.2066586367517298, "learning_rate": 8.020116537094178e-06, "loss": 0.2493, "num_tokens": 2770615475.0, "step": 3632 }, { "epoch": 4.957272252035036, "grad_norm": 0.21854021752679548, "learning_rate": 8.015426493031391e-06, "loss": 0.2493, "num_tokens": 2771407134.0, "step": 3633 }, { "epoch": 4.958637832993716, "grad_norm": 0.18771337869133536, "learning_rate": 8.010737359432731e-06, "loss": 0.229, "num_tokens": 2772155870.0, "step": 3634 }, { "epoch": 4.960003413952396, "grad_norm": 0.19958105260276332, "learning_rate": 8.006049137728648e-06, "loss": 0.2396, "num_tokens": 2772910956.0, "step": 3635 }, { "epoch": 4.961368994911077, "grad_norm": 0.19672794892942547, "learning_rate": 8.00136182934932e-06, "loss": 0.2357, "num_tokens": 2773711165.0, "step": 3636 }, { "epoch": 4.962734575869757, "grad_norm": 0.20135473420140793, "learning_rate": 7.99667543572463e-06, "loss": 0.2352, "num_tokens": 2774518130.0, "step": 3637 }, { "epoch": 4.964100156828438, "grad_norm": 0.20136316220590142, "learning_rate": 7.991989958284205e-06, "loss": 0.2382, "num_tokens": 2775271792.0, "step": 3638 }, { "epoch": 4.9654657377871185, "grad_norm": 0.20712621670129858, "learning_rate": 7.987305398457383e-06, "loss": 0.242, "num_tokens": 2776003204.0, "step": 3639 }, { "epoch": 4.9668313187457995, "grad_norm": 0.2513608497472904, "learning_rate": 7.982621757673209e-06, "loss": 0.2258, "num_tokens": 2776781439.0, "step": 3640 }, { "epoch": 4.96819689970448, "grad_norm": 0.1957552292277791, "learning_rate": 7.977939037360469e-06, "loss": 0.2407, "num_tokens": 2777534767.0, "step": 3641 }, { "epoch": 4.969562480663161, "grad_norm": 0.19853809981331919, "learning_rate": 7.973257238947652e-06, "loss": 0.2396, "num_tokens": 2778328549.0, "step": 3642 }, { "epoch": 4.970928061621841, "grad_norm": 0.19211875610601312, "learning_rate": 7.968576363862977e-06, "loss": 0.2312, "num_tokens": 2779012715.0, "step": 3643 }, { "epoch": 4.972293642580521, "grad_norm": 0.20333173147422934, "learning_rate": 7.963896413534369e-06, "loss": 0.2324, "num_tokens": 2779781463.0, "step": 3644 }, { "epoch": 4.973659223539202, "grad_norm": 0.19915330485137925, "learning_rate": 7.959217389389487e-06, "loss": 0.232, "num_tokens": 2780532354.0, "step": 3645 }, { "epoch": 4.975024804497882, "grad_norm": 0.19389389625179473, "learning_rate": 7.95453929285569e-06, "loss": 0.2266, "num_tokens": 2781274866.0, "step": 3646 }, { "epoch": 4.976390385456563, "grad_norm": 0.19758436608221389, "learning_rate": 7.94986212536007e-06, "loss": 0.241, "num_tokens": 2782104841.0, "step": 3647 }, { "epoch": 4.977755966415243, "grad_norm": 0.19776832512293066, "learning_rate": 7.945185888329429e-06, "loss": 0.243, "num_tokens": 2782912449.0, "step": 3648 }, { "epoch": 4.979121547373924, "grad_norm": 0.20407772296522966, "learning_rate": 7.940510583190274e-06, "loss": 0.2456, "num_tokens": 2783653345.0, "step": 3649 }, { "epoch": 4.980487128332604, "grad_norm": 0.2075367628776983, "learning_rate": 7.935836211368849e-06, "loss": 0.2411, "num_tokens": 2784429840.0, "step": 3650 }, { "epoch": 4.981852709291285, "grad_norm": 0.19355230860662603, "learning_rate": 7.931162774291097e-06, "loss": 0.2427, "num_tokens": 2785198177.0, "step": 3651 }, { "epoch": 4.983218290249965, "grad_norm": 0.21356980656480173, "learning_rate": 7.926490273382683e-06, "loss": 0.2506, "num_tokens": 2786001429.0, "step": 3652 }, { "epoch": 4.984583871208645, "grad_norm": 0.19281030397362578, "learning_rate": 7.921818710068982e-06, "loss": 0.2383, "num_tokens": 2786759508.0, "step": 3653 }, { "epoch": 4.985949452167326, "grad_norm": 0.21570276716270875, "learning_rate": 7.917148085775093e-06, "loss": 0.2426, "num_tokens": 2787496913.0, "step": 3654 }, { "epoch": 4.9873150331260065, "grad_norm": 0.2313915573775155, "learning_rate": 7.912478401925811e-06, "loss": 0.2321, "num_tokens": 2788260660.0, "step": 3655 }, { "epoch": 4.9886806140846875, "grad_norm": 0.20281441677370476, "learning_rate": 7.907809659945658e-06, "loss": 0.2333, "num_tokens": 2789039060.0, "step": 3656 }, { "epoch": 4.990046195043368, "grad_norm": 0.20232074065909583, "learning_rate": 7.903141861258873e-06, "loss": 0.245, "num_tokens": 2789797427.0, "step": 3657 }, { "epoch": 4.991411776002049, "grad_norm": 0.20535304228083365, "learning_rate": 7.898475007289388e-06, "loss": 0.2288, "num_tokens": 2790494979.0, "step": 3658 }, { "epoch": 4.992777356960729, "grad_norm": 0.27039141867063016, "learning_rate": 7.893809099460865e-06, "loss": 0.2564, "num_tokens": 2791259668.0, "step": 3659 }, { "epoch": 4.99414293791941, "grad_norm": 0.2155714131010695, "learning_rate": 7.889144139196665e-06, "loss": 0.2482, "num_tokens": 2792032408.0, "step": 3660 }, { "epoch": 4.99550851887809, "grad_norm": 0.21672663142302623, "learning_rate": 7.884480127919872e-06, "loss": 0.237, "num_tokens": 2792767945.0, "step": 3661 }, { "epoch": 4.99687409983677, "grad_norm": 0.21372977274626537, "learning_rate": 7.879817067053268e-06, "loss": 0.2414, "num_tokens": 2793428592.0, "step": 3662 }, { "epoch": 4.998239680795451, "grad_norm": 0.2059980092405366, "learning_rate": 7.875154958019354e-06, "loss": 0.2406, "num_tokens": 2794167181.0, "step": 3663 }, { "epoch": 4.999605261754131, "grad_norm": 0.19841216371331458, "learning_rate": 7.870493802240335e-06, "loss": 0.2482, "num_tokens": 2794978290.0, "step": 3664 }, { "epoch": 5.0, "grad_norm": 0.19841216371331458, "learning_rate": 7.86583360113813e-06, "loss": 0.2267, "num_tokens": 2795210680.0, "step": 3665 }, { "epoch": 5.00136558095868, "grad_norm": 0.4001628279588914, "learning_rate": 7.861174356134365e-06, "loss": 0.1995, "num_tokens": 2795964972.0, "step": 3666 }, { "epoch": 5.002731161917361, "grad_norm": 0.34072026445612424, "learning_rate": 7.856516068650373e-06, "loss": 0.2018, "num_tokens": 2796643409.0, "step": 3667 }, { "epoch": 5.004096742876041, "grad_norm": 0.3275820481854563, "learning_rate": 7.851858740107196e-06, "loss": 0.219, "num_tokens": 2797453033.0, "step": 3668 }, { "epoch": 5.005462323834722, "grad_norm": 0.2324079320253321, "learning_rate": 7.847202371925581e-06, "loss": 0.199, "num_tokens": 2798174075.0, "step": 3669 }, { "epoch": 5.006827904793402, "grad_norm": 0.25975409431423935, "learning_rate": 7.84254696552599e-06, "loss": 0.2022, "num_tokens": 2799004723.0, "step": 3670 }, { "epoch": 5.008193485752083, "grad_norm": 0.34408952507927093, "learning_rate": 7.83789252232858e-06, "loss": 0.2026, "num_tokens": 2799755750.0, "step": 3671 }, { "epoch": 5.0095590667107635, "grad_norm": 0.3189497281640464, "learning_rate": 7.833239043753222e-06, "loss": 0.2066, "num_tokens": 2800564904.0, "step": 3672 }, { "epoch": 5.0109246476694445, "grad_norm": 0.3053696020576775, "learning_rate": 7.828586531219493e-06, "loss": 0.1954, "num_tokens": 2801252036.0, "step": 3673 }, { "epoch": 5.012290228628125, "grad_norm": 0.26771805028266477, "learning_rate": 7.823934986146667e-06, "loss": 0.2021, "num_tokens": 2802019845.0, "step": 3674 }, { "epoch": 5.013655809586805, "grad_norm": 0.2901828574004597, "learning_rate": 7.819284409953737e-06, "loss": 0.2013, "num_tokens": 2802841037.0, "step": 3675 }, { "epoch": 5.015021390545486, "grad_norm": 0.3024580261593043, "learning_rate": 7.814634804059384e-06, "loss": 0.1999, "num_tokens": 2803582236.0, "step": 3676 }, { "epoch": 5.016386971504166, "grad_norm": 0.24260497397171676, "learning_rate": 7.809986169882012e-06, "loss": 0.2019, "num_tokens": 2804409437.0, "step": 3677 }, { "epoch": 5.017752552462847, "grad_norm": 0.23583735591170157, "learning_rate": 7.805338508839709e-06, "loss": 0.2062, "num_tokens": 2805154820.0, "step": 3678 }, { "epoch": 5.019118133421527, "grad_norm": 0.2654460919568417, "learning_rate": 7.800691822350277e-06, "loss": 0.2043, "num_tokens": 2805941653.0, "step": 3679 }, { "epoch": 5.020483714380208, "grad_norm": 0.23139268627089235, "learning_rate": 7.796046111831221e-06, "loss": 0.2022, "num_tokens": 2806774308.0, "step": 3680 }, { "epoch": 5.021849295338888, "grad_norm": 0.22902000343778345, "learning_rate": 7.791401378699743e-06, "loss": 0.2118, "num_tokens": 2807512591.0, "step": 3681 }, { "epoch": 5.023214876297569, "grad_norm": 0.23770470185857015, "learning_rate": 7.786757624372753e-06, "loss": 0.201, "num_tokens": 2808283677.0, "step": 3682 }, { "epoch": 5.024580457256249, "grad_norm": 0.2146078958051942, "learning_rate": 7.782114850266856e-06, "loss": 0.1975, "num_tokens": 2808976730.0, "step": 3683 }, { "epoch": 5.025946038214929, "grad_norm": 0.23351771949925965, "learning_rate": 7.777473057798366e-06, "loss": 0.2015, "num_tokens": 2809735010.0, "step": 3684 }, { "epoch": 5.02731161917361, "grad_norm": 0.24001051018190273, "learning_rate": 7.772832248383291e-06, "loss": 0.2027, "num_tokens": 2810470048.0, "step": 3685 }, { "epoch": 5.02867720013229, "grad_norm": 0.22526015829022114, "learning_rate": 7.76819242343734e-06, "loss": 0.2033, "num_tokens": 2811258588.0, "step": 3686 }, { "epoch": 5.030042781090971, "grad_norm": 0.22520778826825588, "learning_rate": 7.763553584375922e-06, "loss": 0.2021, "num_tokens": 2812074882.0, "step": 3687 }, { "epoch": 5.0314083620496515, "grad_norm": 0.2348953482507149, "learning_rate": 7.758915732614145e-06, "loss": 0.1904, "num_tokens": 2812746233.0, "step": 3688 }, { "epoch": 5.0327739430083325, "grad_norm": 0.21166466512424384, "learning_rate": 7.754278869566823e-06, "loss": 0.2085, "num_tokens": 2813594249.0, "step": 3689 }, { "epoch": 5.034139523967013, "grad_norm": 0.2276856315949832, "learning_rate": 7.74964299664845e-06, "loss": 0.1896, "num_tokens": 2814329150.0, "step": 3690 }, { "epoch": 5.035505104925694, "grad_norm": 0.2286901139049727, "learning_rate": 7.745008115273245e-06, "loss": 0.1969, "num_tokens": 2815023690.0, "step": 3691 }, { "epoch": 5.036870685884374, "grad_norm": 0.21667529143283884, "learning_rate": 7.740374226855097e-06, "loss": 0.1963, "num_tokens": 2815717499.0, "step": 3692 }, { "epoch": 5.038236266843054, "grad_norm": 0.21341328472382612, "learning_rate": 7.735741332807615e-06, "loss": 0.2078, "num_tokens": 2816481407.0, "step": 3693 }, { "epoch": 5.039601847801735, "grad_norm": 0.21522423298526153, "learning_rate": 7.731109434544088e-06, "loss": 0.1915, "num_tokens": 2817263917.0, "step": 3694 }, { "epoch": 5.040967428760415, "grad_norm": 0.2196623554731143, "learning_rate": 7.726478533477506e-06, "loss": 0.2038, "num_tokens": 2818118964.0, "step": 3695 }, { "epoch": 5.042333009719096, "grad_norm": 0.2001364440613833, "learning_rate": 7.721848631020563e-06, "loss": 0.211, "num_tokens": 2818851156.0, "step": 3696 }, { "epoch": 5.043698590677776, "grad_norm": 0.22467046803907847, "learning_rate": 7.717219728585637e-06, "loss": 0.2064, "num_tokens": 2819665255.0, "step": 3697 }, { "epoch": 5.045064171636457, "grad_norm": 0.20383084349992897, "learning_rate": 7.712591827584808e-06, "loss": 0.1943, "num_tokens": 2820434186.0, "step": 3698 }, { "epoch": 5.046429752595137, "grad_norm": 0.19247511422154817, "learning_rate": 7.70796492942984e-06, "loss": 0.2064, "num_tokens": 2821179220.0, "step": 3699 }, { "epoch": 5.047795333553818, "grad_norm": 0.23128810215845758, "learning_rate": 7.703339035532215e-06, "loss": 0.1997, "num_tokens": 2821918596.0, "step": 3700 }, { "epoch": 5.049160914512498, "grad_norm": 0.21261078963470473, "learning_rate": 7.698714147303081e-06, "loss": 0.1963, "num_tokens": 2822697516.0, "step": 3701 }, { "epoch": 5.050526495471178, "grad_norm": 0.21669756619015867, "learning_rate": 7.69409026615329e-06, "loss": 0.1973, "num_tokens": 2823421267.0, "step": 3702 }, { "epoch": 5.051892076429859, "grad_norm": 0.20975109873183104, "learning_rate": 7.689467393493397e-06, "loss": 0.2073, "num_tokens": 2824164920.0, "step": 3703 }, { "epoch": 5.0532576573885395, "grad_norm": 0.2205330293566889, "learning_rate": 7.684845530733634e-06, "loss": 0.2024, "num_tokens": 2824857444.0, "step": 3704 }, { "epoch": 5.0546232383472205, "grad_norm": 0.22427808080102965, "learning_rate": 7.680224679283932e-06, "loss": 0.2024, "num_tokens": 2825571935.0, "step": 3705 }, { "epoch": 5.055988819305901, "grad_norm": 0.20347876564801795, "learning_rate": 7.675604840553912e-06, "loss": 0.2057, "num_tokens": 2826424442.0, "step": 3706 }, { "epoch": 5.057354400264582, "grad_norm": 0.2274325552370595, "learning_rate": 7.67098601595289e-06, "loss": 0.2156, "num_tokens": 2827244630.0, "step": 3707 }, { "epoch": 5.058719981223262, "grad_norm": 0.2268840569569265, "learning_rate": 7.666368206889863e-06, "loss": 0.2017, "num_tokens": 2827980597.0, "step": 3708 }, { "epoch": 5.060085562181943, "grad_norm": 0.22273853802410284, "learning_rate": 7.661751414773534e-06, "loss": 0.2026, "num_tokens": 2828694470.0, "step": 3709 }, { "epoch": 5.061451143140623, "grad_norm": 0.22282824953628386, "learning_rate": 7.65713564101228e-06, "loss": 0.1957, "num_tokens": 2829360585.0, "step": 3710 }, { "epoch": 5.062816724099303, "grad_norm": 0.22424864057079963, "learning_rate": 7.652520887014173e-06, "loss": 0.2022, "num_tokens": 2830203222.0, "step": 3711 }, { "epoch": 5.064182305057984, "grad_norm": 0.226770367953203, "learning_rate": 7.647907154186977e-06, "loss": 0.2029, "num_tokens": 2830991306.0, "step": 3712 }, { "epoch": 5.065547886016664, "grad_norm": 0.23004367929525155, "learning_rate": 7.64329444393814e-06, "loss": 0.2039, "num_tokens": 2831704948.0, "step": 3713 }, { "epoch": 5.066913466975345, "grad_norm": 0.2277017091020458, "learning_rate": 7.638682757674804e-06, "loss": 0.2135, "num_tokens": 2832444884.0, "step": 3714 }, { "epoch": 5.068279047934025, "grad_norm": 0.22953839710862786, "learning_rate": 7.634072096803791e-06, "loss": 0.2024, "num_tokens": 2833255268.0, "step": 3715 }, { "epoch": 5.069644628892706, "grad_norm": 0.19573686310396027, "learning_rate": 7.629462462731615e-06, "loss": 0.1905, "num_tokens": 2833992878.0, "step": 3716 }, { "epoch": 5.071010209851386, "grad_norm": 0.21668899057666977, "learning_rate": 7.62485385686448e-06, "loss": 0.2121, "num_tokens": 2834710899.0, "step": 3717 }, { "epoch": 5.072375790810067, "grad_norm": 0.22868741849257335, "learning_rate": 7.620246280608261e-06, "loss": 0.2003, "num_tokens": 2835427858.0, "step": 3718 }, { "epoch": 5.073741371768747, "grad_norm": 0.19613813216364076, "learning_rate": 7.615639735368545e-06, "loss": 0.1963, "num_tokens": 2836266860.0, "step": 3719 }, { "epoch": 5.0751069527274275, "grad_norm": 0.20738454564924447, "learning_rate": 7.611034222550575e-06, "loss": 0.2089, "num_tokens": 2837056039.0, "step": 3720 }, { "epoch": 5.0764725336861085, "grad_norm": 0.2247428984936338, "learning_rate": 7.606429743559305e-06, "loss": 0.194, "num_tokens": 2837827937.0, "step": 3721 }, { "epoch": 5.077838114644789, "grad_norm": 0.20454990700879552, "learning_rate": 7.601826299799353e-06, "loss": 0.2045, "num_tokens": 2838594254.0, "step": 3722 }, { "epoch": 5.07920369560347, "grad_norm": 0.21193935293288915, "learning_rate": 7.597223892675037e-06, "loss": 0.2036, "num_tokens": 2839353558.0, "step": 3723 }, { "epoch": 5.08056927656215, "grad_norm": 0.22863906505878895, "learning_rate": 7.592622523590349e-06, "loss": 0.1957, "num_tokens": 2840127778.0, "step": 3724 }, { "epoch": 5.081934857520831, "grad_norm": 0.20401087580320995, "learning_rate": 7.5880221939489695e-06, "loss": 0.2014, "num_tokens": 2840824736.0, "step": 3725 }, { "epoch": 5.083300438479511, "grad_norm": 0.2333431824315937, "learning_rate": 7.583422905154259e-06, "loss": 0.2034, "num_tokens": 2841547904.0, "step": 3726 }, { "epoch": 5.084666019438192, "grad_norm": 0.22655433158373553, "learning_rate": 7.578824658609254e-06, "loss": 0.1961, "num_tokens": 2842292439.0, "step": 3727 }, { "epoch": 5.086031600396872, "grad_norm": 0.23722138169270146, "learning_rate": 7.574227455716696e-06, "loss": 0.197, "num_tokens": 2843018473.0, "step": 3728 }, { "epoch": 5.087397181355552, "grad_norm": 0.20288394990091863, "learning_rate": 7.569631297878977e-06, "loss": 0.2009, "num_tokens": 2843820438.0, "step": 3729 }, { "epoch": 5.088762762314233, "grad_norm": 0.2105290634998853, "learning_rate": 7.565036186498196e-06, "loss": 0.2066, "num_tokens": 2844602083.0, "step": 3730 }, { "epoch": 5.090128343272913, "grad_norm": 0.20658309011248943, "learning_rate": 7.560442122976117e-06, "loss": 0.2041, "num_tokens": 2845430361.0, "step": 3731 }, { "epoch": 5.091493924231594, "grad_norm": 0.22996818891697138, "learning_rate": 7.555849108714192e-06, "loss": 0.2062, "num_tokens": 2846251497.0, "step": 3732 }, { "epoch": 5.092859505190274, "grad_norm": 0.19887106858574144, "learning_rate": 7.551257145113555e-06, "loss": 0.1968, "num_tokens": 2846985223.0, "step": 3733 }, { "epoch": 5.094225086148955, "grad_norm": 0.22911570163904468, "learning_rate": 7.546666233575004e-06, "loss": 0.2034, "num_tokens": 2847711644.0, "step": 3734 }, { "epoch": 5.095590667107635, "grad_norm": 0.19554646022596142, "learning_rate": 7.542076375499039e-06, "loss": 0.2003, "num_tokens": 2848517942.0, "step": 3735 }, { "epoch": 5.096956248066316, "grad_norm": 0.2284781528615546, "learning_rate": 7.537487572285816e-06, "loss": 0.2097, "num_tokens": 2849223412.0, "step": 3736 }, { "epoch": 5.0983218290249965, "grad_norm": 0.2427992820342156, "learning_rate": 7.532899825335191e-06, "loss": 0.201, "num_tokens": 2849990418.0, "step": 3737 }, { "epoch": 5.099687409983677, "grad_norm": 0.20976585153755753, "learning_rate": 7.528313136046679e-06, "loss": 0.2038, "num_tokens": 2850760436.0, "step": 3738 }, { "epoch": 5.101052990942358, "grad_norm": 0.21762376143196477, "learning_rate": 7.523727505819487e-06, "loss": 0.1968, "num_tokens": 2851452950.0, "step": 3739 }, { "epoch": 5.102418571901038, "grad_norm": 0.2179676664339702, "learning_rate": 7.519142936052486e-06, "loss": 0.2084, "num_tokens": 2852175881.0, "step": 3740 }, { "epoch": 5.103784152859719, "grad_norm": 0.28221146791758867, "learning_rate": 7.5145594281442326e-06, "loss": 0.1978, "num_tokens": 2852957481.0, "step": 3741 }, { "epoch": 5.105149733818399, "grad_norm": 0.22843899646330948, "learning_rate": 7.509976983492957e-06, "loss": 0.2093, "num_tokens": 2853750089.0, "step": 3742 }, { "epoch": 5.10651531477708, "grad_norm": 0.21814535463044143, "learning_rate": 7.505395603496562e-06, "loss": 0.2058, "num_tokens": 2854544525.0, "step": 3743 }, { "epoch": 5.10788089573576, "grad_norm": 0.2203516031446968, "learning_rate": 7.50081528955263e-06, "loss": 0.2008, "num_tokens": 2855364298.0, "step": 3744 }, { "epoch": 5.109246476694441, "grad_norm": 0.22528566388532117, "learning_rate": 7.496236043058414e-06, "loss": 0.2048, "num_tokens": 2856089749.0, "step": 3745 }, { "epoch": 5.110612057653121, "grad_norm": 0.21776986766123949, "learning_rate": 7.491657865410853e-06, "loss": 0.2035, "num_tokens": 2856825111.0, "step": 3746 }, { "epoch": 5.111977638611801, "grad_norm": 0.22441463456951855, "learning_rate": 7.4870807580065354e-06, "loss": 0.2027, "num_tokens": 2857597980.0, "step": 3747 }, { "epoch": 5.113343219570482, "grad_norm": 0.2152520420262227, "learning_rate": 7.4825047222417524e-06, "loss": 0.2146, "num_tokens": 2858340334.0, "step": 3748 }, { "epoch": 5.114708800529162, "grad_norm": 0.2387304125560509, "learning_rate": 7.477929759512448e-06, "loss": 0.2114, "num_tokens": 2859133115.0, "step": 3749 }, { "epoch": 5.116074381487843, "grad_norm": 0.21248524117583617, "learning_rate": 7.473355871214248e-06, "loss": 0.2125, "num_tokens": 2859931061.0, "step": 3750 }, { "epoch": 5.117439962446523, "grad_norm": 0.22661845305761805, "learning_rate": 7.4687830587424436e-06, "loss": 0.1952, "num_tokens": 2860671638.0, "step": 3751 }, { "epoch": 5.118805543405204, "grad_norm": 0.21056618631875612, "learning_rate": 7.4642113234920035e-06, "loss": 0.2055, "num_tokens": 2861466618.0, "step": 3752 }, { "epoch": 5.1201711243638846, "grad_norm": 0.20036601439947804, "learning_rate": 7.459640666857568e-06, "loss": 0.2048, "num_tokens": 2862204795.0, "step": 3753 }, { "epoch": 5.1215367053225656, "grad_norm": 0.21565698999249916, "learning_rate": 7.455071090233441e-06, "loss": 0.1992, "num_tokens": 2862941357.0, "step": 3754 }, { "epoch": 5.122902286281246, "grad_norm": 0.20057741944365076, "learning_rate": 7.450502595013611e-06, "loss": 0.2057, "num_tokens": 2863732177.0, "step": 3755 }, { "epoch": 5.124267867239926, "grad_norm": 0.23608369377011024, "learning_rate": 7.445935182591724e-06, "loss": 0.2003, "num_tokens": 2864558257.0, "step": 3756 }, { "epoch": 5.125633448198607, "grad_norm": 0.2151746674087631, "learning_rate": 7.441368854361095e-06, "loss": 0.2129, "num_tokens": 2865332519.0, "step": 3757 }, { "epoch": 5.126999029157287, "grad_norm": 0.21374828683100594, "learning_rate": 7.436803611714722e-06, "loss": 0.2021, "num_tokens": 2866081744.0, "step": 3758 }, { "epoch": 5.128364610115968, "grad_norm": 0.21892295521800512, "learning_rate": 7.432239456045254e-06, "loss": 0.2, "num_tokens": 2866771195.0, "step": 3759 }, { "epoch": 5.129730191074648, "grad_norm": 0.23259196818141742, "learning_rate": 7.427676388745027e-06, "loss": 0.2107, "num_tokens": 2867479050.0, "step": 3760 }, { "epoch": 5.131095772033329, "grad_norm": 0.2200191554884494, "learning_rate": 7.423114411206023e-06, "loss": 0.2076, "num_tokens": 2868244794.0, "step": 3761 }, { "epoch": 5.132461352992009, "grad_norm": 0.2088709206351495, "learning_rate": 7.418553524819916e-06, "loss": 0.1979, "num_tokens": 2869069644.0, "step": 3762 }, { "epoch": 5.13382693395069, "grad_norm": 0.2253836309377385, "learning_rate": 7.4139937309780245e-06, "loss": 0.2016, "num_tokens": 2869873024.0, "step": 3763 }, { "epoch": 5.13519251490937, "grad_norm": 0.206326144224142, "learning_rate": 7.40943503107135e-06, "loss": 0.209, "num_tokens": 2870678881.0, "step": 3764 }, { "epoch": 5.13655809586805, "grad_norm": 0.22355355451112124, "learning_rate": 7.4048774264905575e-06, "loss": 0.1993, "num_tokens": 2871524178.0, "step": 3765 }, { "epoch": 5.137923676826731, "grad_norm": 0.1955354341137779, "learning_rate": 7.4003209186259675e-06, "loss": 0.1998, "num_tokens": 2872279319.0, "step": 3766 }, { "epoch": 5.1392892577854115, "grad_norm": 0.20926443675354034, "learning_rate": 7.395765508867576e-06, "loss": 0.2058, "num_tokens": 2873073161.0, "step": 3767 }, { "epoch": 5.1406548387440925, "grad_norm": 0.21712716331852347, "learning_rate": 7.391211198605041e-06, "loss": 0.1911, "num_tokens": 2873833436.0, "step": 3768 }, { "epoch": 5.142020419702773, "grad_norm": 0.23070246301849928, "learning_rate": 7.38665798922769e-06, "loss": 0.2116, "num_tokens": 2874557440.0, "step": 3769 }, { "epoch": 5.143386000661454, "grad_norm": 0.2181409977924548, "learning_rate": 7.382105882124502e-06, "loss": 0.2122, "num_tokens": 2875323341.0, "step": 3770 }, { "epoch": 5.144751581620134, "grad_norm": 0.22062018394722555, "learning_rate": 7.377554878684132e-06, "loss": 0.197, "num_tokens": 2876016624.0, "step": 3771 }, { "epoch": 5.146117162578815, "grad_norm": 0.21785043859602848, "learning_rate": 7.373004980294898e-06, "loss": 0.206, "num_tokens": 2876723689.0, "step": 3772 }, { "epoch": 5.147482743537495, "grad_norm": 0.22602512627801633, "learning_rate": 7.368456188344766e-06, "loss": 0.1919, "num_tokens": 2877452052.0, "step": 3773 }, { "epoch": 5.148848324496175, "grad_norm": 0.22340938741779331, "learning_rate": 7.3639085042213884e-06, "loss": 0.2106, "num_tokens": 2878273742.0, "step": 3774 }, { "epoch": 5.150213905454856, "grad_norm": 0.196962586869315, "learning_rate": 7.359361929312054e-06, "loss": 0.2097, "num_tokens": 2879075204.0, "step": 3775 }, { "epoch": 5.151579486413536, "grad_norm": 0.22802121366441894, "learning_rate": 7.354816465003738e-06, "loss": 0.1978, "num_tokens": 2879853777.0, "step": 3776 }, { "epoch": 5.152945067372217, "grad_norm": 0.2193585328732095, "learning_rate": 7.350272112683057e-06, "loss": 0.2003, "num_tokens": 2880626968.0, "step": 3777 }, { "epoch": 5.154310648330897, "grad_norm": 0.22210305684423354, "learning_rate": 7.345728873736298e-06, "loss": 0.2046, "num_tokens": 2881373668.0, "step": 3778 }, { "epoch": 5.155676229289578, "grad_norm": 0.22008201039739372, "learning_rate": 7.341186749549407e-06, "loss": 0.2097, "num_tokens": 2882119236.0, "step": 3779 }, { "epoch": 5.157041810248258, "grad_norm": 0.21106241298947118, "learning_rate": 7.3366457415079885e-06, "loss": 0.1989, "num_tokens": 2882914376.0, "step": 3780 }, { "epoch": 5.158407391206939, "grad_norm": 0.2171274902175172, "learning_rate": 7.332105850997309e-06, "loss": 0.2078, "num_tokens": 2883704652.0, "step": 3781 }, { "epoch": 5.159772972165619, "grad_norm": 0.20068729354325243, "learning_rate": 7.327567079402287e-06, "loss": 0.202, "num_tokens": 2884510638.0, "step": 3782 }, { "epoch": 5.1611385531242995, "grad_norm": 0.20897623155718253, "learning_rate": 7.323029428107515e-06, "loss": 0.1935, "num_tokens": 2885284587.0, "step": 3783 }, { "epoch": 5.1625041340829805, "grad_norm": 0.20352511808835652, "learning_rate": 7.318492898497225e-06, "loss": 0.1994, "num_tokens": 2886097012.0, "step": 3784 }, { "epoch": 5.163869715041661, "grad_norm": 0.2240623004510108, "learning_rate": 7.313957491955322e-06, "loss": 0.2091, "num_tokens": 2886829757.0, "step": 3785 }, { "epoch": 5.165235296000342, "grad_norm": 0.22570518571551393, "learning_rate": 7.3094232098653565e-06, "loss": 0.2094, "num_tokens": 2887610830.0, "step": 3786 }, { "epoch": 5.166600876959022, "grad_norm": 0.21497929877471925, "learning_rate": 7.304890053610547e-06, "loss": 0.1992, "num_tokens": 2888281944.0, "step": 3787 }, { "epoch": 5.167966457917703, "grad_norm": 0.21377304334486458, "learning_rate": 7.30035802457376e-06, "loss": 0.2132, "num_tokens": 2889179974.0, "step": 3788 }, { "epoch": 5.169332038876383, "grad_norm": 0.22426007325273972, "learning_rate": 7.29582712413752e-06, "loss": 0.2013, "num_tokens": 2889957121.0, "step": 3789 }, { "epoch": 5.170697619835064, "grad_norm": 0.2241249978544526, "learning_rate": 7.291297353684013e-06, "loss": 0.1962, "num_tokens": 2890692868.0, "step": 3790 }, { "epoch": 5.172063200793744, "grad_norm": 0.2187048842876811, "learning_rate": 7.2867687145950695e-06, "loss": 0.2134, "num_tokens": 2891503383.0, "step": 3791 }, { "epoch": 5.173428781752424, "grad_norm": 0.22639862141911835, "learning_rate": 7.282241208252185e-06, "loss": 0.2, "num_tokens": 2892299751.0, "step": 3792 }, { "epoch": 5.174794362711105, "grad_norm": 0.22848740792986263, "learning_rate": 7.277714836036507e-06, "loss": 0.2114, "num_tokens": 2893042581.0, "step": 3793 }, { "epoch": 5.176159943669785, "grad_norm": 0.22077816366746142, "learning_rate": 7.273189599328836e-06, "loss": 0.2006, "num_tokens": 2893673349.0, "step": 3794 }, { "epoch": 5.177525524628466, "grad_norm": 0.2556511709358179, "learning_rate": 7.268665499509624e-06, "loss": 0.2054, "num_tokens": 2894460176.0, "step": 3795 }, { "epoch": 5.178891105587146, "grad_norm": 0.22054666116471505, "learning_rate": 7.2641425379589735e-06, "loss": 0.208, "num_tokens": 2895186534.0, "step": 3796 }, { "epoch": 5.180256686545827, "grad_norm": 0.22161928409977202, "learning_rate": 7.259620716056652e-06, "loss": 0.2097, "num_tokens": 2895975883.0, "step": 3797 }, { "epoch": 5.181622267504507, "grad_norm": 0.21765944734530993, "learning_rate": 7.255100035182068e-06, "loss": 0.1996, "num_tokens": 2896692568.0, "step": 3798 }, { "epoch": 5.182987848463188, "grad_norm": 0.2107155905680877, "learning_rate": 7.250580496714281e-06, "loss": 0.2026, "num_tokens": 2897458690.0, "step": 3799 }, { "epoch": 5.1843534294218685, "grad_norm": 0.22710858803140827, "learning_rate": 7.246062102032014e-06, "loss": 0.2094, "num_tokens": 2898234091.0, "step": 3800 }, { "epoch": 5.185719010380549, "grad_norm": 0.20287614195019252, "learning_rate": 7.241544852513634e-06, "loss": 0.1985, "num_tokens": 2899010777.0, "step": 3801 }, { "epoch": 5.18708459133923, "grad_norm": 0.21186611234250663, "learning_rate": 7.237028749537145e-06, "loss": 0.1963, "num_tokens": 2899760328.0, "step": 3802 }, { "epoch": 5.18845017229791, "grad_norm": 0.22557872055123027, "learning_rate": 7.2325137944802334e-06, "loss": 0.1951, "num_tokens": 2900535013.0, "step": 3803 }, { "epoch": 5.189815753256591, "grad_norm": 0.20604432958115748, "learning_rate": 7.227999988720207e-06, "loss": 0.1976, "num_tokens": 2901259567.0, "step": 3804 }, { "epoch": 5.191181334215271, "grad_norm": 0.21384251940926238, "learning_rate": 7.223487333634025e-06, "loss": 0.1973, "num_tokens": 2902039980.0, "step": 3805 }, { "epoch": 5.192546915173952, "grad_norm": 0.2768980835830079, "learning_rate": 7.218975830598318e-06, "loss": 0.2043, "num_tokens": 2902888144.0, "step": 3806 }, { "epoch": 5.193912496132632, "grad_norm": 0.19696957616131214, "learning_rate": 7.214465480989339e-06, "loss": 0.1971, "num_tokens": 2903800533.0, "step": 3807 }, { "epoch": 5.195278077091313, "grad_norm": 0.2058465700323115, "learning_rate": 7.209956286183006e-06, "loss": 0.2165, "num_tokens": 2904650321.0, "step": 3808 }, { "epoch": 5.196643658049993, "grad_norm": 0.2229418133284332, "learning_rate": 7.205448247554876e-06, "loss": 0.1992, "num_tokens": 2905378165.0, "step": 3809 }, { "epoch": 5.198009239008673, "grad_norm": 0.20828585528976826, "learning_rate": 7.200941366480159e-06, "loss": 0.2018, "num_tokens": 2906165429.0, "step": 3810 }, { "epoch": 5.199374819967354, "grad_norm": 0.21540492539970196, "learning_rate": 7.196435644333708e-06, "loss": 0.1966, "num_tokens": 2906881168.0, "step": 3811 }, { "epoch": 5.200740400926034, "grad_norm": 0.21940943135553798, "learning_rate": 7.19193108249002e-06, "loss": 0.2118, "num_tokens": 2907700720.0, "step": 3812 }, { "epoch": 5.202105981884715, "grad_norm": 0.20710881949041968, "learning_rate": 7.187427682323252e-06, "loss": 0.2004, "num_tokens": 2908502238.0, "step": 3813 }, { "epoch": 5.203471562843395, "grad_norm": 0.20536153745260324, "learning_rate": 7.18292544520718e-06, "loss": 0.2014, "num_tokens": 2909205202.0, "step": 3814 }, { "epoch": 5.204837143802076, "grad_norm": 0.22094981709510114, "learning_rate": 7.17842437251526e-06, "loss": 0.2054, "num_tokens": 2909930358.0, "step": 3815 }, { "epoch": 5.2062027247607565, "grad_norm": 0.2261930284649632, "learning_rate": 7.173924465620561e-06, "loss": 0.2059, "num_tokens": 2910651026.0, "step": 3816 }, { "epoch": 5.2075683057194375, "grad_norm": 0.20799316690038727, "learning_rate": 7.169425725895816e-06, "loss": 0.2042, "num_tokens": 2911457416.0, "step": 3817 }, { "epoch": 5.208933886678118, "grad_norm": 0.23312664792829438, "learning_rate": 7.164928154713395e-06, "loss": 0.2065, "num_tokens": 2912175709.0, "step": 3818 }, { "epoch": 5.210299467636798, "grad_norm": 0.2191041810039405, "learning_rate": 7.160431753445309e-06, "loss": 0.217, "num_tokens": 2912991295.0, "step": 3819 }, { "epoch": 5.211665048595479, "grad_norm": 0.2257661907210269, "learning_rate": 7.155936523463216e-06, "loss": 0.1978, "num_tokens": 2913724781.0, "step": 3820 }, { "epoch": 5.213030629554159, "grad_norm": 0.2782668934721485, "learning_rate": 7.151442466138418e-06, "loss": 0.2017, "num_tokens": 2914462084.0, "step": 3821 }, { "epoch": 5.21439621051284, "grad_norm": 0.22721174553469378, "learning_rate": 7.1469495828418624e-06, "loss": 0.1937, "num_tokens": 2915148584.0, "step": 3822 }, { "epoch": 5.21576179147152, "grad_norm": 0.22763006882644274, "learning_rate": 7.14245787494412e-06, "loss": 0.2166, "num_tokens": 2915892592.0, "step": 3823 }, { "epoch": 5.217127372430201, "grad_norm": 0.2290755944527407, "learning_rate": 7.137967343815431e-06, "loss": 0.1972, "num_tokens": 2916648004.0, "step": 3824 }, { "epoch": 5.218492953388881, "grad_norm": 0.23173571289003456, "learning_rate": 7.13347799082565e-06, "loss": 0.1975, "num_tokens": 2917369755.0, "step": 3825 }, { "epoch": 5.219858534347562, "grad_norm": 0.2582734958833463, "learning_rate": 7.12898981734429e-06, "loss": 0.204, "num_tokens": 2918139154.0, "step": 3826 }, { "epoch": 5.221224115306242, "grad_norm": 0.202642484460165, "learning_rate": 7.1245028247405e-06, "loss": 0.2081, "num_tokens": 2918883890.0, "step": 3827 }, { "epoch": 5.222589696264922, "grad_norm": 0.2280625850878707, "learning_rate": 7.120017014383063e-06, "loss": 0.2012, "num_tokens": 2919586829.0, "step": 3828 }, { "epoch": 5.223955277223603, "grad_norm": 0.2378131251491234, "learning_rate": 7.115532387640407e-06, "loss": 0.2181, "num_tokens": 2920327593.0, "step": 3829 }, { "epoch": 5.225320858182283, "grad_norm": 0.2349497431338002, "learning_rate": 7.111048945880597e-06, "loss": 0.2127, "num_tokens": 2921118664.0, "step": 3830 }, { "epoch": 5.226686439140964, "grad_norm": 0.2068678242736546, "learning_rate": 7.106566690471339e-06, "loss": 0.2006, "num_tokens": 2921833209.0, "step": 3831 }, { "epoch": 5.2280520200996445, "grad_norm": 0.25917666778670595, "learning_rate": 7.102085622779968e-06, "loss": 0.2019, "num_tokens": 2922624987.0, "step": 3832 }, { "epoch": 5.2294176010583255, "grad_norm": 0.23533089684929226, "learning_rate": 7.097605744173477e-06, "loss": 0.2053, "num_tokens": 2923419893.0, "step": 3833 }, { "epoch": 5.230783182017006, "grad_norm": 0.2187524200542421, "learning_rate": 7.093127056018472e-06, "loss": 0.1993, "num_tokens": 2924142121.0, "step": 3834 }, { "epoch": 5.232148762975687, "grad_norm": 0.20805288633951538, "learning_rate": 7.0886495596812085e-06, "loss": 0.2094, "num_tokens": 2924989374.0, "step": 3835 }, { "epoch": 5.233514343934367, "grad_norm": 0.2075164908327263, "learning_rate": 7.084173256527582e-06, "loss": 0.2039, "num_tokens": 2925790629.0, "step": 3836 }, { "epoch": 5.234879924893047, "grad_norm": 0.21627954290217682, "learning_rate": 7.079698147923111e-06, "loss": 0.2142, "num_tokens": 2926646421.0, "step": 3837 }, { "epoch": 5.236245505851728, "grad_norm": 0.3231215570745274, "learning_rate": 7.075224235232962e-06, "loss": 0.2122, "num_tokens": 2927405311.0, "step": 3838 }, { "epoch": 5.237611086810408, "grad_norm": 0.22151428392172745, "learning_rate": 7.070751519821932e-06, "loss": 0.2049, "num_tokens": 2928146186.0, "step": 3839 }, { "epoch": 5.238976667769089, "grad_norm": 0.22128398675141975, "learning_rate": 7.066280003054456e-06, "loss": 0.2094, "num_tokens": 2928999750.0, "step": 3840 }, { "epoch": 5.240342248727769, "grad_norm": 0.20945109053100924, "learning_rate": 7.061809686294585e-06, "loss": 0.215, "num_tokens": 2929786118.0, "step": 3841 }, { "epoch": 5.24170782968645, "grad_norm": 0.21076573069011556, "learning_rate": 7.0573405709060396e-06, "loss": 0.1947, "num_tokens": 2930540256.0, "step": 3842 }, { "epoch": 5.24307341064513, "grad_norm": 0.21235474573283647, "learning_rate": 7.052872658252141e-06, "loss": 0.2023, "num_tokens": 2931309753.0, "step": 3843 }, { "epoch": 5.244438991603811, "grad_norm": 0.20724810815082806, "learning_rate": 7.0484059496958575e-06, "loss": 0.2062, "num_tokens": 2932054402.0, "step": 3844 }, { "epoch": 5.245804572562491, "grad_norm": 0.23829628577306675, "learning_rate": 7.043940446599795e-06, "loss": 0.209, "num_tokens": 2932769018.0, "step": 3845 }, { "epoch": 5.247170153521171, "grad_norm": 0.21781084827279307, "learning_rate": 7.0394761503261765e-06, "loss": 0.2017, "num_tokens": 2933567115.0, "step": 3846 }, { "epoch": 5.248535734479852, "grad_norm": 0.20943378442156013, "learning_rate": 7.0350130622368666e-06, "loss": 0.2026, "num_tokens": 2934334057.0, "step": 3847 }, { "epoch": 5.249901315438533, "grad_norm": 0.2143704922132957, "learning_rate": 7.030551183693364e-06, "loss": 0.2041, "num_tokens": 2935128766.0, "step": 3848 }, { "epoch": 5.251266896397214, "grad_norm": 0.2214658857179839, "learning_rate": 7.026090516056798e-06, "loss": 0.2032, "num_tokens": 2935873209.0, "step": 3849 }, { "epoch": 5.252632477355894, "grad_norm": 0.20422754477118435, "learning_rate": 7.021631060687915e-06, "loss": 0.1981, "num_tokens": 2936628137.0, "step": 3850 }, { "epoch": 5.253998058314575, "grad_norm": 0.21141180023045134, "learning_rate": 7.017172818947105e-06, "loss": 0.2016, "num_tokens": 2937402047.0, "step": 3851 }, { "epoch": 5.255363639273255, "grad_norm": 0.2053930901423065, "learning_rate": 7.012715792194389e-06, "loss": 0.2104, "num_tokens": 2938163175.0, "step": 3852 }, { "epoch": 5.256729220231936, "grad_norm": 0.23061720964938742, "learning_rate": 7.008259981789409e-06, "loss": 0.2025, "num_tokens": 2938919984.0, "step": 3853 }, { "epoch": 5.258094801190616, "grad_norm": 0.2130794877185142, "learning_rate": 7.0038053890914445e-06, "loss": 0.2016, "num_tokens": 2939690312.0, "step": 3854 }, { "epoch": 5.259460382149296, "grad_norm": 0.20842503646885813, "learning_rate": 6.999352015459392e-06, "loss": 0.1911, "num_tokens": 2940500635.0, "step": 3855 }, { "epoch": 5.260825963107977, "grad_norm": 0.206622847402675, "learning_rate": 6.994899862251788e-06, "loss": 0.2054, "num_tokens": 2941266229.0, "step": 3856 }, { "epoch": 5.262191544066657, "grad_norm": 0.20370694567723946, "learning_rate": 6.9904489308267874e-06, "loss": 0.2075, "num_tokens": 2942046011.0, "step": 3857 }, { "epoch": 5.263557125025338, "grad_norm": 0.21224502972670442, "learning_rate": 6.98599922254218e-06, "loss": 0.2032, "num_tokens": 2942822616.0, "step": 3858 }, { "epoch": 5.264922705984018, "grad_norm": 0.20986512126196755, "learning_rate": 6.981550738755382e-06, "loss": 0.2113, "num_tokens": 2943726274.0, "step": 3859 }, { "epoch": 5.266288286942699, "grad_norm": 0.22037342963510634, "learning_rate": 6.9771034808234266e-06, "loss": 0.2049, "num_tokens": 2944519120.0, "step": 3860 }, { "epoch": 5.267653867901379, "grad_norm": 0.2226433663988552, "learning_rate": 6.9726574501029795e-06, "loss": 0.2112, "num_tokens": 2945287946.0, "step": 3861 }, { "epoch": 5.26901944886006, "grad_norm": 0.21981291641226347, "learning_rate": 6.968212647950337e-06, "loss": 0.2031, "num_tokens": 2946050946.0, "step": 3862 }, { "epoch": 5.2703850298187405, "grad_norm": 0.22150686823442328, "learning_rate": 6.963769075721418e-06, "loss": 0.213, "num_tokens": 2946838789.0, "step": 3863 }, { "epoch": 5.271750610777421, "grad_norm": 0.2030750945912966, "learning_rate": 6.959326734771754e-06, "loss": 0.1976, "num_tokens": 2947594649.0, "step": 3864 }, { "epoch": 5.273116191736102, "grad_norm": 0.21730482994177372, "learning_rate": 6.954885626456519e-06, "loss": 0.2086, "num_tokens": 2948385309.0, "step": 3865 }, { "epoch": 5.274481772694782, "grad_norm": 0.21559735540238867, "learning_rate": 6.950445752130501e-06, "loss": 0.2037, "num_tokens": 2949147582.0, "step": 3866 }, { "epoch": 5.275847353653463, "grad_norm": 0.22463654666467894, "learning_rate": 6.9460071131481096e-06, "loss": 0.2, "num_tokens": 2949926277.0, "step": 3867 }, { "epoch": 5.277212934612143, "grad_norm": 0.2086699971850183, "learning_rate": 6.941569710863389e-06, "loss": 0.2133, "num_tokens": 2950730013.0, "step": 3868 }, { "epoch": 5.278578515570824, "grad_norm": 0.22295150774548883, "learning_rate": 6.9371335466299905e-06, "loss": 0.2082, "num_tokens": 2951485273.0, "step": 3869 }, { "epoch": 5.279944096529504, "grad_norm": 0.22122049514469388, "learning_rate": 6.9326986218012e-06, "loss": 0.2051, "num_tokens": 2952191219.0, "step": 3870 }, { "epoch": 5.281309677488185, "grad_norm": 0.2224592409572515, "learning_rate": 6.928264937729919e-06, "loss": 0.2174, "num_tokens": 2952952636.0, "step": 3871 }, { "epoch": 5.282675258446865, "grad_norm": 0.23104874395581698, "learning_rate": 6.923832495768678e-06, "loss": 0.2103, "num_tokens": 2953693609.0, "step": 3872 }, { "epoch": 5.284040839405545, "grad_norm": 0.221508196171485, "learning_rate": 6.919401297269614e-06, "loss": 0.2137, "num_tokens": 2954524429.0, "step": 3873 }, { "epoch": 5.285406420364226, "grad_norm": 0.2006615646744375, "learning_rate": 6.9149713435845e-06, "loss": 0.1958, "num_tokens": 2955351525.0, "step": 3874 }, { "epoch": 5.286772001322906, "grad_norm": 0.20664850070294355, "learning_rate": 6.910542636064725e-06, "loss": 0.2095, "num_tokens": 2956084086.0, "step": 3875 }, { "epoch": 5.288137582281587, "grad_norm": 0.22144603501606874, "learning_rate": 6.9061151760612835e-06, "loss": 0.2071, "num_tokens": 2956835961.0, "step": 3876 }, { "epoch": 5.289503163240267, "grad_norm": 0.21793817391935666, "learning_rate": 6.90168896492482e-06, "loss": 0.2019, "num_tokens": 2957563220.0, "step": 3877 }, { "epoch": 5.290868744198948, "grad_norm": 0.21957027969317294, "learning_rate": 6.897264004005566e-06, "loss": 0.2152, "num_tokens": 2958364073.0, "step": 3878 }, { "epoch": 5.2922343251576285, "grad_norm": 0.21423368784299876, "learning_rate": 6.89284029465339e-06, "loss": 0.2007, "num_tokens": 2959060095.0, "step": 3879 }, { "epoch": 5.2935999061163095, "grad_norm": 0.2164059030161996, "learning_rate": 6.8884178382177735e-06, "loss": 0.1991, "num_tokens": 2959773835.0, "step": 3880 }, { "epoch": 5.29496548707499, "grad_norm": 0.21749932340217, "learning_rate": 6.8839966360478215e-06, "loss": 0.2066, "num_tokens": 2960500556.0, "step": 3881 }, { "epoch": 5.29633106803367, "grad_norm": 0.2134249990529955, "learning_rate": 6.879576689492244e-06, "loss": 0.1984, "num_tokens": 2961237474.0, "step": 3882 }, { "epoch": 5.297696648992351, "grad_norm": 0.2119816066679417, "learning_rate": 6.8751579998993775e-06, "loss": 0.2117, "num_tokens": 2962054518.0, "step": 3883 }, { "epoch": 5.299062229951031, "grad_norm": 0.21123354634863953, "learning_rate": 6.870740568617177e-06, "loss": 0.2079, "num_tokens": 2962898403.0, "step": 3884 }, { "epoch": 5.300427810909712, "grad_norm": 0.22247465086742244, "learning_rate": 6.8663243969931984e-06, "loss": 0.2101, "num_tokens": 2963694779.0, "step": 3885 }, { "epoch": 5.301793391868392, "grad_norm": 0.20205372907774188, "learning_rate": 6.8619094863746395e-06, "loss": 0.2032, "num_tokens": 2964479884.0, "step": 3886 }, { "epoch": 5.303158972827073, "grad_norm": 0.21908163753243928, "learning_rate": 6.857495838108288e-06, "loss": 0.198, "num_tokens": 2965260789.0, "step": 3887 }, { "epoch": 5.304524553785753, "grad_norm": 0.2287376333976934, "learning_rate": 6.853083453540557e-06, "loss": 0.2089, "num_tokens": 2966052441.0, "step": 3888 }, { "epoch": 5.305890134744434, "grad_norm": 0.21353855195920252, "learning_rate": 6.848672334017484e-06, "loss": 0.2048, "num_tokens": 2966838727.0, "step": 3889 }, { "epoch": 5.307255715703114, "grad_norm": 0.21638175902077988, "learning_rate": 6.844262480884698e-06, "loss": 0.207, "num_tokens": 2967651690.0, "step": 3890 }, { "epoch": 5.308621296661794, "grad_norm": 0.20958988449824822, "learning_rate": 6.839853895487459e-06, "loss": 0.2091, "num_tokens": 2968414422.0, "step": 3891 }, { "epoch": 5.309986877620475, "grad_norm": 0.20986597733984208, "learning_rate": 6.835446579170635e-06, "loss": 0.2044, "num_tokens": 2969132221.0, "step": 3892 }, { "epoch": 5.311352458579155, "grad_norm": 0.23463757746358527, "learning_rate": 6.831040533278712e-06, "loss": 0.2006, "num_tokens": 2969875938.0, "step": 3893 }, { "epoch": 5.312718039537836, "grad_norm": 0.21107520089488901, "learning_rate": 6.826635759155775e-06, "loss": 0.2021, "num_tokens": 2970621325.0, "step": 3894 }, { "epoch": 5.3140836204965165, "grad_norm": 0.23263860847824042, "learning_rate": 6.82223225814554e-06, "loss": 0.2084, "num_tokens": 2971298137.0, "step": 3895 }, { "epoch": 5.3154492014551975, "grad_norm": 0.20749241828523024, "learning_rate": 6.817830031591316e-06, "loss": 0.2038, "num_tokens": 2972063341.0, "step": 3896 }, { "epoch": 5.316814782413878, "grad_norm": 0.229656857850978, "learning_rate": 6.813429080836037e-06, "loss": 0.2135, "num_tokens": 2972838274.0, "step": 3897 }, { "epoch": 5.318180363372559, "grad_norm": 0.2266837868976853, "learning_rate": 6.809029407222242e-06, "loss": 0.1999, "num_tokens": 2973615333.0, "step": 3898 }, { "epoch": 5.319545944331239, "grad_norm": 0.22065870763076495, "learning_rate": 6.804631012092075e-06, "loss": 0.2159, "num_tokens": 2974414900.0, "step": 3899 }, { "epoch": 5.320911525289919, "grad_norm": 0.19775343079142108, "learning_rate": 6.8002338967873026e-06, "loss": 0.211, "num_tokens": 2975235244.0, "step": 3900 }, { "epoch": 5.3222771062486, "grad_norm": 0.23786819160271633, "learning_rate": 6.79583806264929e-06, "loss": 0.1962, "num_tokens": 2975928748.0, "step": 3901 }, { "epoch": 5.32364268720728, "grad_norm": 0.21134203931635603, "learning_rate": 6.7914435110190225e-06, "loss": 0.2116, "num_tokens": 2976715939.0, "step": 3902 }, { "epoch": 5.325008268165961, "grad_norm": 0.20160089950672844, "learning_rate": 6.787050243237076e-06, "loss": 0.2047, "num_tokens": 2977560098.0, "step": 3903 }, { "epoch": 5.326373849124641, "grad_norm": 0.22908234980062678, "learning_rate": 6.782658260643658e-06, "loss": 0.2033, "num_tokens": 2978261356.0, "step": 3904 }, { "epoch": 5.327739430083322, "grad_norm": 0.23161987750464236, "learning_rate": 6.778267564578569e-06, "loss": 0.2126, "num_tokens": 2979037412.0, "step": 3905 }, { "epoch": 5.329105011042002, "grad_norm": 0.2305438749698866, "learning_rate": 6.773878156381211e-06, "loss": 0.2144, "num_tokens": 2979814603.0, "step": 3906 }, { "epoch": 5.330470592000683, "grad_norm": 0.2081562066004125, "learning_rate": 6.7694900373906155e-06, "loss": 0.2012, "num_tokens": 2980564647.0, "step": 3907 }, { "epoch": 5.331836172959363, "grad_norm": 0.22192673974876678, "learning_rate": 6.7651032089453985e-06, "loss": 0.2054, "num_tokens": 2981310300.0, "step": 3908 }, { "epoch": 5.333201753918043, "grad_norm": 0.2176542188437182, "learning_rate": 6.760717672383795e-06, "loss": 0.209, "num_tokens": 2982033185.0, "step": 3909 }, { "epoch": 5.334567334876724, "grad_norm": 0.22525557621780348, "learning_rate": 6.7563334290436426e-06, "loss": 0.2028, "num_tokens": 2982848474.0, "step": 3910 }, { "epoch": 5.3359329158354045, "grad_norm": 0.20635915928303075, "learning_rate": 6.751950480262386e-06, "loss": 0.2155, "num_tokens": 2983653628.0, "step": 3911 }, { "epoch": 5.3372984967940855, "grad_norm": 0.2154540681791433, "learning_rate": 6.747568827377069e-06, "loss": 0.2016, "num_tokens": 2984416772.0, "step": 3912 }, { "epoch": 5.338664077752766, "grad_norm": 0.22306836992650342, "learning_rate": 6.743188471724343e-06, "loss": 0.1951, "num_tokens": 2985157701.0, "step": 3913 }, { "epoch": 5.340029658711447, "grad_norm": 0.20611683063501834, "learning_rate": 6.738809414640468e-06, "loss": 0.2141, "num_tokens": 2985898060.0, "step": 3914 }, { "epoch": 5.341395239670127, "grad_norm": 0.2187750564732513, "learning_rate": 6.734431657461307e-06, "loss": 0.2027, "num_tokens": 2986605338.0, "step": 3915 }, { "epoch": 5.342760820628808, "grad_norm": 0.20961506531679483, "learning_rate": 6.730055201522321e-06, "loss": 0.1981, "num_tokens": 2987388621.0, "step": 3916 }, { "epoch": 5.344126401587488, "grad_norm": 0.20592194707910752, "learning_rate": 6.725680048158576e-06, "loss": 0.2087, "num_tokens": 2988145617.0, "step": 3917 }, { "epoch": 5.345491982546168, "grad_norm": 0.23028862312316617, "learning_rate": 6.721306198704741e-06, "loss": 0.2148, "num_tokens": 2988849071.0, "step": 3918 }, { "epoch": 5.346857563504849, "grad_norm": 0.21600451560393882, "learning_rate": 6.716933654495093e-06, "loss": 0.2118, "num_tokens": 2989698036.0, "step": 3919 }, { "epoch": 5.348223144463529, "grad_norm": 0.21066639382721805, "learning_rate": 6.712562416863506e-06, "loss": 0.2059, "num_tokens": 2990416949.0, "step": 3920 }, { "epoch": 5.34958872542221, "grad_norm": 0.22177766642982763, "learning_rate": 6.7081924871434475e-06, "loss": 0.1991, "num_tokens": 2991073570.0, "step": 3921 }, { "epoch": 5.35095430638089, "grad_norm": 0.22918639932125587, "learning_rate": 6.703823866668001e-06, "loss": 0.1996, "num_tokens": 2991738334.0, "step": 3922 }, { "epoch": 5.352319887339571, "grad_norm": 0.22809827484483122, "learning_rate": 6.699456556769839e-06, "loss": 0.2065, "num_tokens": 2992531767.0, "step": 3923 }, { "epoch": 5.353685468298251, "grad_norm": 0.22637273925794932, "learning_rate": 6.695090558781242e-06, "loss": 0.2029, "num_tokens": 2993317909.0, "step": 3924 }, { "epoch": 5.355051049256932, "grad_norm": 0.20931756894604284, "learning_rate": 6.6907258740340895e-06, "loss": 0.198, "num_tokens": 2994073804.0, "step": 3925 }, { "epoch": 5.356416630215612, "grad_norm": 0.21642887573944217, "learning_rate": 6.68636250385985e-06, "loss": 0.2052, "num_tokens": 2994838006.0, "step": 3926 }, { "epoch": 5.3577822111742925, "grad_norm": 0.20364415051136914, "learning_rate": 6.682000449589603e-06, "loss": 0.2116, "num_tokens": 2995652277.0, "step": 3927 }, { "epoch": 5.3591477921329735, "grad_norm": 0.21857818864510942, "learning_rate": 6.677639712554027e-06, "loss": 0.2099, "num_tokens": 2996398720.0, "step": 3928 }, { "epoch": 5.360513373091654, "grad_norm": 0.2314345631279765, "learning_rate": 6.673280294083382e-06, "loss": 0.2071, "num_tokens": 2997131561.0, "step": 3929 }, { "epoch": 5.361878954050335, "grad_norm": 0.20131179303858668, "learning_rate": 6.668922195507548e-06, "loss": 0.2072, "num_tokens": 2997934342.0, "step": 3930 }, { "epoch": 5.363244535009015, "grad_norm": 0.233263609568893, "learning_rate": 6.664565418155989e-06, "loss": 0.2091, "num_tokens": 2998687850.0, "step": 3931 }, { "epoch": 5.364610115967696, "grad_norm": 0.20367509243928286, "learning_rate": 6.660209963357768e-06, "loss": 0.2033, "num_tokens": 2999417879.0, "step": 3932 }, { "epoch": 5.365975696926376, "grad_norm": 0.22285521247094325, "learning_rate": 6.655855832441549e-06, "loss": 0.2075, "num_tokens": 3000213010.0, "step": 3933 }, { "epoch": 5.367341277885057, "grad_norm": 0.20587081710308405, "learning_rate": 6.651503026735588e-06, "loss": 0.2, "num_tokens": 3000940295.0, "step": 3934 }, { "epoch": 5.368706858843737, "grad_norm": 0.2268081529679454, "learning_rate": 6.647151547567734e-06, "loss": 0.2056, "num_tokens": 3001682320.0, "step": 3935 }, { "epoch": 5.370072439802417, "grad_norm": 0.21997545141581412, "learning_rate": 6.6428013962654344e-06, "loss": 0.2154, "num_tokens": 3002515148.0, "step": 3936 }, { "epoch": 5.371438020761098, "grad_norm": 0.22183626037955562, "learning_rate": 6.638452574155741e-06, "loss": 0.2116, "num_tokens": 3003256720.0, "step": 3937 }, { "epoch": 5.372803601719778, "grad_norm": 0.20952289743097108, "learning_rate": 6.6341050825652795e-06, "loss": 0.1984, "num_tokens": 3003989555.0, "step": 3938 }, { "epoch": 5.374169182678459, "grad_norm": 0.2572710191439589, "learning_rate": 6.629758922820288e-06, "loss": 0.2059, "num_tokens": 3004778789.0, "step": 3939 }, { "epoch": 5.375534763637139, "grad_norm": 0.2227413166976771, "learning_rate": 6.6254140962465895e-06, "loss": 0.2083, "num_tokens": 3005603740.0, "step": 3940 }, { "epoch": 5.37690034459582, "grad_norm": 0.2046889864532848, "learning_rate": 6.621070604169602e-06, "loss": 0.2095, "num_tokens": 3006477235.0, "step": 3941 }, { "epoch": 5.3782659255545004, "grad_norm": 0.2074449822734452, "learning_rate": 6.616728447914338e-06, "loss": 0.2151, "num_tokens": 3007257369.0, "step": 3942 }, { "epoch": 5.3796315065131814, "grad_norm": 0.2222527527049467, "learning_rate": 6.6123876288054055e-06, "loss": 0.2055, "num_tokens": 3008091255.0, "step": 3943 }, { "epoch": 5.380997087471862, "grad_norm": 0.2066662667538461, "learning_rate": 6.608048148166992e-06, "loss": 0.2095, "num_tokens": 3008835966.0, "step": 3944 }, { "epoch": 5.382362668430542, "grad_norm": 0.2094255872460499, "learning_rate": 6.60371000732289e-06, "loss": 0.2044, "num_tokens": 3009581954.0, "step": 3945 }, { "epoch": 5.383728249389223, "grad_norm": 0.24559765003364084, "learning_rate": 6.59937320759648e-06, "loss": 0.2014, "num_tokens": 3010312747.0, "step": 3946 }, { "epoch": 5.385093830347903, "grad_norm": 0.20678264104654098, "learning_rate": 6.595037750310724e-06, "loss": 0.2072, "num_tokens": 3011082766.0, "step": 3947 }, { "epoch": 5.386459411306584, "grad_norm": 0.2543945449072729, "learning_rate": 6.590703636788194e-06, "loss": 0.1934, "num_tokens": 3011822170.0, "step": 3948 }, { "epoch": 5.387824992265264, "grad_norm": 0.22275532818215518, "learning_rate": 6.586370868351031e-06, "loss": 0.2119, "num_tokens": 3012652051.0, "step": 3949 }, { "epoch": 5.389190573223945, "grad_norm": 0.19847591928556818, "learning_rate": 6.5820394463209755e-06, "loss": 0.2024, "num_tokens": 3013437448.0, "step": 3950 }, { "epoch": 5.390556154182625, "grad_norm": 0.24530013976803167, "learning_rate": 6.577709372019365e-06, "loss": 0.2008, "num_tokens": 3014133575.0, "step": 3951 }, { "epoch": 5.391921735141306, "grad_norm": 0.21422848722822987, "learning_rate": 6.573380646767109e-06, "loss": 0.2045, "num_tokens": 3014806358.0, "step": 3952 }, { "epoch": 5.393287316099986, "grad_norm": 0.22703608219277344, "learning_rate": 6.5690532718847165e-06, "loss": 0.216, "num_tokens": 3015569583.0, "step": 3953 }, { "epoch": 5.394652897058666, "grad_norm": 0.22086839885486104, "learning_rate": 6.564727248692282e-06, "loss": 0.2048, "num_tokens": 3016278910.0, "step": 3954 }, { "epoch": 5.396018478017347, "grad_norm": 0.22056733051736155, "learning_rate": 6.560402578509493e-06, "loss": 0.2113, "num_tokens": 3017074705.0, "step": 3955 }, { "epoch": 5.397384058976027, "grad_norm": 0.21840213135137446, "learning_rate": 6.556079262655607e-06, "loss": 0.2001, "num_tokens": 3017833239.0, "step": 3956 }, { "epoch": 5.398749639934708, "grad_norm": 0.21999095382393496, "learning_rate": 6.551757302449495e-06, "loss": 0.2166, "num_tokens": 3018622790.0, "step": 3957 }, { "epoch": 5.4001152208933885, "grad_norm": 0.19957739012206901, "learning_rate": 6.54743669920959e-06, "loss": 0.2024, "num_tokens": 3019347669.0, "step": 3958 }, { "epoch": 5.4014808018520695, "grad_norm": 0.22853674952185196, "learning_rate": 6.5431174542539245e-06, "loss": 0.2057, "num_tokens": 3020126173.0, "step": 3959 }, { "epoch": 5.40284638281075, "grad_norm": 0.21010204796162435, "learning_rate": 6.538799568900116e-06, "loss": 0.2098, "num_tokens": 3020908127.0, "step": 3960 }, { "epoch": 5.404211963769431, "grad_norm": 0.21473003703560234, "learning_rate": 6.534483044465356e-06, "loss": 0.2112, "num_tokens": 3021648448.0, "step": 3961 }, { "epoch": 5.405577544728111, "grad_norm": 0.23003744192902606, "learning_rate": 6.530167882266435e-06, "loss": 0.198, "num_tokens": 3022383335.0, "step": 3962 }, { "epoch": 5.406943125686791, "grad_norm": 0.21666741927684402, "learning_rate": 6.525854083619721e-06, "loss": 0.2122, "num_tokens": 3023156801.0, "step": 3963 }, { "epoch": 5.408308706645472, "grad_norm": 0.2171317227612684, "learning_rate": 6.52154164984117e-06, "loss": 0.2041, "num_tokens": 3023959678.0, "step": 3964 }, { "epoch": 5.409674287604152, "grad_norm": 0.2104430500593837, "learning_rate": 6.5172305822463115e-06, "loss": 0.2099, "num_tokens": 3024689913.0, "step": 3965 }, { "epoch": 5.411039868562833, "grad_norm": 0.2098890088110271, "learning_rate": 6.512920882150274e-06, "loss": 0.2069, "num_tokens": 3025407564.0, "step": 3966 }, { "epoch": 5.412405449521513, "grad_norm": 0.22973634723470993, "learning_rate": 6.508612550867759e-06, "loss": 0.2067, "num_tokens": 3026177276.0, "step": 3967 }, { "epoch": 5.413771030480194, "grad_norm": 0.20839545171748086, "learning_rate": 6.504305589713041e-06, "loss": 0.204, "num_tokens": 3026923881.0, "step": 3968 }, { "epoch": 5.415136611438874, "grad_norm": 0.22778132849813956, "learning_rate": 6.500000000000003e-06, "loss": 0.2087, "num_tokens": 3027687394.0, "step": 3969 }, { "epoch": 5.416502192397555, "grad_norm": 0.21037047184898067, "learning_rate": 6.495695783042082e-06, "loss": 0.2061, "num_tokens": 3028389599.0, "step": 3970 }, { "epoch": 5.417867773356235, "grad_norm": 0.2414489609470623, "learning_rate": 6.491392940152315e-06, "loss": 0.2095, "num_tokens": 3029169920.0, "step": 3971 }, { "epoch": 5.419233354314915, "grad_norm": 0.2077668625929062, "learning_rate": 6.487091472643308e-06, "loss": 0.2017, "num_tokens": 3029941531.0, "step": 3972 }, { "epoch": 5.420598935273596, "grad_norm": 0.22017754187498098, "learning_rate": 6.482791381827261e-06, "loss": 0.2062, "num_tokens": 3030651189.0, "step": 3973 }, { "epoch": 5.4219645162322765, "grad_norm": 0.2316252835736614, "learning_rate": 6.478492669015932e-06, "loss": 0.1981, "num_tokens": 3031366648.0, "step": 3974 }, { "epoch": 5.4233300971909575, "grad_norm": 0.20762883696792797, "learning_rate": 6.474195335520687e-06, "loss": 0.2027, "num_tokens": 3032151233.0, "step": 3975 }, { "epoch": 5.424695678149638, "grad_norm": 0.24004211229214478, "learning_rate": 6.4698993826524485e-06, "loss": 0.2082, "num_tokens": 3032860335.0, "step": 3976 }, { "epoch": 5.426061259108319, "grad_norm": 0.2136522610963918, "learning_rate": 6.46560481172172e-06, "loss": 0.2063, "num_tokens": 3033667505.0, "step": 3977 }, { "epoch": 5.427426840066999, "grad_norm": 0.20298596822369086, "learning_rate": 6.461311624038602e-06, "loss": 0.2105, "num_tokens": 3034450014.0, "step": 3978 }, { "epoch": 5.42879242102568, "grad_norm": 0.20205763260334447, "learning_rate": 6.457019820912751e-06, "loss": 0.2037, "num_tokens": 3035308364.0, "step": 3979 }, { "epoch": 5.43015800198436, "grad_norm": 0.2162672521254492, "learning_rate": 6.4527294036534115e-06, "loss": 0.2094, "num_tokens": 3036121851.0, "step": 3980 }, { "epoch": 5.43152358294304, "grad_norm": 0.2250839791990096, "learning_rate": 6.448440373569407e-06, "loss": 0.2052, "num_tokens": 3036833305.0, "step": 3981 }, { "epoch": 5.432889163901721, "grad_norm": 0.217943459385609, "learning_rate": 6.444152731969135e-06, "loss": 0.2062, "num_tokens": 3037622713.0, "step": 3982 }, { "epoch": 5.434254744860401, "grad_norm": 0.1951198917997279, "learning_rate": 6.439866480160566e-06, "loss": 0.206, "num_tokens": 3038534317.0, "step": 3983 }, { "epoch": 5.435620325819082, "grad_norm": 0.2097948606629585, "learning_rate": 6.43558161945125e-06, "loss": 0.2017, "num_tokens": 3039338160.0, "step": 3984 }, { "epoch": 5.436985906777762, "grad_norm": 0.21817028082996742, "learning_rate": 6.4312981511483175e-06, "loss": 0.2069, "num_tokens": 3040113293.0, "step": 3985 }, { "epoch": 5.438351487736443, "grad_norm": 0.22264698923364068, "learning_rate": 6.427016076558459e-06, "loss": 0.2148, "num_tokens": 3040894044.0, "step": 3986 }, { "epoch": 5.439717068695123, "grad_norm": 0.20188599497247336, "learning_rate": 6.422735396987963e-06, "loss": 0.2046, "num_tokens": 3041723690.0, "step": 3987 }, { "epoch": 5.441082649653804, "grad_norm": 0.19903786129190232, "learning_rate": 6.418456113742668e-06, "loss": 0.2082, "num_tokens": 3042486863.0, "step": 3988 }, { "epoch": 5.442448230612484, "grad_norm": 0.20804889391265294, "learning_rate": 6.414178228128005e-06, "loss": 0.1998, "num_tokens": 3043273006.0, "step": 3989 }, { "epoch": 5.4438138115711645, "grad_norm": 0.2160420412197853, "learning_rate": 6.409901741448972e-06, "loss": 0.2067, "num_tokens": 3043955791.0, "step": 3990 }, { "epoch": 5.4451793925298455, "grad_norm": 0.20425636187548976, "learning_rate": 6.405626655010133e-06, "loss": 0.2066, "num_tokens": 3044834069.0, "step": 3991 }, { "epoch": 5.446544973488526, "grad_norm": 0.21246159659244993, "learning_rate": 6.401352970115638e-06, "loss": 0.2104, "num_tokens": 3045602629.0, "step": 3992 }, { "epoch": 5.447910554447207, "grad_norm": 0.21886231902836617, "learning_rate": 6.397080688069201e-06, "loss": 0.2139, "num_tokens": 3046360595.0, "step": 3993 }, { "epoch": 5.449276135405887, "grad_norm": 0.20539537523016446, "learning_rate": 6.392809810174114e-06, "loss": 0.2075, "num_tokens": 3047199492.0, "step": 3994 }, { "epoch": 5.450641716364568, "grad_norm": 0.229334453274223, "learning_rate": 6.3885403377332265e-06, "loss": 0.1953, "num_tokens": 3047803460.0, "step": 3995 }, { "epoch": 5.452007297323248, "grad_norm": 0.22410905597612238, "learning_rate": 6.384272272048983e-06, "loss": 0.2075, "num_tokens": 3048637447.0, "step": 3996 }, { "epoch": 5.453372878281929, "grad_norm": 0.22274573284051796, "learning_rate": 6.380005614423377e-06, "loss": 0.2157, "num_tokens": 3049428743.0, "step": 3997 }, { "epoch": 5.454738459240609, "grad_norm": 0.21258309147166182, "learning_rate": 6.375740366157981e-06, "loss": 0.2069, "num_tokens": 3050164842.0, "step": 3998 }, { "epoch": 5.456104040199289, "grad_norm": 0.21520306475815004, "learning_rate": 6.371476528553944e-06, "loss": 0.1981, "num_tokens": 3050907511.0, "step": 3999 }, { "epoch": 5.45746962115797, "grad_norm": 0.20122939015207728, "learning_rate": 6.367214102911972e-06, "loss": 0.2069, "num_tokens": 3051682836.0, "step": 4000 }, { "epoch": 5.45883520211665, "grad_norm": 0.22592501514985766, "learning_rate": 6.3629530905323464e-06, "loss": 0.2116, "num_tokens": 3052478045.0, "step": 4001 }, { "epoch": 5.460200783075331, "grad_norm": 0.2176108478543041, "learning_rate": 6.35869349271492e-06, "loss": 0.2066, "num_tokens": 3053210446.0, "step": 4002 }, { "epoch": 5.461566364034011, "grad_norm": 0.20674467800831395, "learning_rate": 6.354435310759111e-06, "loss": 0.2073, "num_tokens": 3054006308.0, "step": 4003 }, { "epoch": 5.462931944992692, "grad_norm": 0.2345891668369277, "learning_rate": 6.350178545963905e-06, "loss": 0.2126, "num_tokens": 3054741990.0, "step": 4004 }, { "epoch": 5.464297525951372, "grad_norm": 0.23176850966060938, "learning_rate": 6.345923199627864e-06, "loss": 0.2199, "num_tokens": 3055526044.0, "step": 4005 }, { "epoch": 5.465663106910053, "grad_norm": 0.22589339573453995, "learning_rate": 6.3416692730490996e-06, "loss": 0.2077, "num_tokens": 3056252404.0, "step": 4006 }, { "epoch": 5.4670286878687335, "grad_norm": 0.23358481396508393, "learning_rate": 6.337416767525304e-06, "loss": 0.2117, "num_tokens": 3056966395.0, "step": 4007 }, { "epoch": 5.468394268827414, "grad_norm": 0.21025613402496268, "learning_rate": 6.333165684353737e-06, "loss": 0.2035, "num_tokens": 3057664301.0, "step": 4008 }, { "epoch": 5.469759849786095, "grad_norm": 0.251703696814109, "learning_rate": 6.328916024831213e-06, "loss": 0.2175, "num_tokens": 3058438685.0, "step": 4009 }, { "epoch": 5.471125430744775, "grad_norm": 0.2262444978695399, "learning_rate": 6.324667790254122e-06, "loss": 0.2051, "num_tokens": 3059214596.0, "step": 4010 }, { "epoch": 5.472491011703456, "grad_norm": 0.21644858962503064, "learning_rate": 6.320420981918418e-06, "loss": 0.2081, "num_tokens": 3060016497.0, "step": 4011 }, { "epoch": 5.473856592662136, "grad_norm": 0.22032054477746746, "learning_rate": 6.316175601119616e-06, "loss": 0.1995, "num_tokens": 3060726131.0, "step": 4012 }, { "epoch": 5.475222173620817, "grad_norm": 0.20388210939366752, "learning_rate": 6.311931649152798e-06, "loss": 0.2111, "num_tokens": 3061575665.0, "step": 4013 }, { "epoch": 5.476587754579497, "grad_norm": 0.22081055704488686, "learning_rate": 6.307689127312618e-06, "loss": 0.2073, "num_tokens": 3062279694.0, "step": 4014 }, { "epoch": 5.477953335538178, "grad_norm": 0.20898105737558534, "learning_rate": 6.3034480368932715e-06, "loss": 0.2126, "num_tokens": 3063111464.0, "step": 4015 }, { "epoch": 5.479318916496858, "grad_norm": 0.22929244385398376, "learning_rate": 6.299208379188539e-06, "loss": 0.2107, "num_tokens": 3063836991.0, "step": 4016 }, { "epoch": 5.480684497455538, "grad_norm": 0.2143771747382398, "learning_rate": 6.2949701554917595e-06, "loss": 0.2031, "num_tokens": 3064568829.0, "step": 4017 }, { "epoch": 5.482050078414219, "grad_norm": 0.20987728909390047, "learning_rate": 6.290733367095826e-06, "loss": 0.2094, "num_tokens": 3065329798.0, "step": 4018 }, { "epoch": 5.483415659372899, "grad_norm": 0.21117604972376425, "learning_rate": 6.2864980152932e-06, "loss": 0.2105, "num_tokens": 3066050636.0, "step": 4019 }, { "epoch": 5.48478124033158, "grad_norm": 0.2327515751279894, "learning_rate": 6.282264101375906e-06, "loss": 0.2071, "num_tokens": 3066815131.0, "step": 4020 }, { "epoch": 5.48614682129026, "grad_norm": 0.24443044086385796, "learning_rate": 6.278031626635527e-06, "loss": 0.2045, "num_tokens": 3067613614.0, "step": 4021 }, { "epoch": 5.487512402248941, "grad_norm": 0.22146707077219002, "learning_rate": 6.273800592363211e-06, "loss": 0.2161, "num_tokens": 3068410579.0, "step": 4022 }, { "epoch": 5.4888779832076215, "grad_norm": 0.19455245649918942, "learning_rate": 6.269570999849655e-06, "loss": 0.2127, "num_tokens": 3069256654.0, "step": 4023 }, { "epoch": 5.4902435641663025, "grad_norm": 0.21497086412925887, "learning_rate": 6.265342850385131e-06, "loss": 0.2065, "num_tokens": 3070071738.0, "step": 4024 }, { "epoch": 5.491609145124983, "grad_norm": 0.201725131659208, "learning_rate": 6.261116145259462e-06, "loss": 0.218, "num_tokens": 3070909780.0, "step": 4025 }, { "epoch": 5.492974726083663, "grad_norm": 0.219728704790792, "learning_rate": 6.256890885762037e-06, "loss": 0.2027, "num_tokens": 3071648369.0, "step": 4026 }, { "epoch": 5.494340307042344, "grad_norm": 0.2049371855384657, "learning_rate": 6.2526670731817925e-06, "loss": 0.2039, "num_tokens": 3072395992.0, "step": 4027 }, { "epoch": 5.495705888001024, "grad_norm": 0.20897281764403916, "learning_rate": 6.248444708807235e-06, "loss": 0.2042, "num_tokens": 3073163013.0, "step": 4028 }, { "epoch": 5.497071468959705, "grad_norm": 0.2085776967673941, "learning_rate": 6.244223793926429e-06, "loss": 0.2106, "num_tokens": 3073916595.0, "step": 4029 }, { "epoch": 5.498437049918385, "grad_norm": 0.22237470330227166, "learning_rate": 6.240004329826981e-06, "loss": 0.206, "num_tokens": 3074579261.0, "step": 4030 }, { "epoch": 5.499802630877066, "grad_norm": 0.22534106077116134, "learning_rate": 6.235786317796079e-06, "loss": 0.207, "num_tokens": 3075404828.0, "step": 4031 }, { "epoch": 5.501168211835746, "grad_norm": 0.20104296681205794, "learning_rate": 6.231569759120448e-06, "loss": 0.2027, "num_tokens": 3076147419.0, "step": 4032 }, { "epoch": 5.502533792794427, "grad_norm": 0.225964785451297, "learning_rate": 6.2273546550863815e-06, "loss": 0.2086, "num_tokens": 3076899160.0, "step": 4033 }, { "epoch": 5.503899373753107, "grad_norm": 0.21971665098145374, "learning_rate": 6.2231410069797245e-06, "loss": 0.2029, "num_tokens": 3077621583.0, "step": 4034 }, { "epoch": 5.505264954711787, "grad_norm": 0.2122077600393208, "learning_rate": 6.218928816085881e-06, "loss": 0.1985, "num_tokens": 3078382603.0, "step": 4035 }, { "epoch": 5.506630535670468, "grad_norm": 0.2183234093717378, "learning_rate": 6.214718083689797e-06, "loss": 0.2071, "num_tokens": 3079120246.0, "step": 4036 }, { "epoch": 5.5079961166291485, "grad_norm": 0.25820487067874803, "learning_rate": 6.210508811076e-06, "loss": 0.2082, "num_tokens": 3079897172.0, "step": 4037 }, { "epoch": 5.5093616975878295, "grad_norm": 0.21311041492054736, "learning_rate": 6.2063009995285495e-06, "loss": 0.2045, "num_tokens": 3080690619.0, "step": 4038 }, { "epoch": 5.51072727854651, "grad_norm": 0.2084524522622275, "learning_rate": 6.2020946503310604e-06, "loss": 0.2093, "num_tokens": 3081505700.0, "step": 4039 }, { "epoch": 5.512092859505191, "grad_norm": 0.21341027101383142, "learning_rate": 6.197889764766722e-06, "loss": 0.2102, "num_tokens": 3082290722.0, "step": 4040 }, { "epoch": 5.513458440463871, "grad_norm": 0.20357063469703732, "learning_rate": 6.193686344118249e-06, "loss": 0.2128, "num_tokens": 3083052892.0, "step": 4041 }, { "epoch": 5.514824021422552, "grad_norm": 0.21215134787095122, "learning_rate": 6.189484389667929e-06, "loss": 0.2017, "num_tokens": 3083776991.0, "step": 4042 }, { "epoch": 5.516189602381232, "grad_norm": 0.2175171423517633, "learning_rate": 6.185283902697596e-06, "loss": 0.1974, "num_tokens": 3084468521.0, "step": 4043 }, { "epoch": 5.517555183339912, "grad_norm": 0.21498635040138891, "learning_rate": 6.181084884488642e-06, "loss": 0.2012, "num_tokens": 3085226689.0, "step": 4044 }, { "epoch": 5.518920764298593, "grad_norm": 0.2046053770235797, "learning_rate": 6.1768873363219935e-06, "loss": 0.2085, "num_tokens": 3086000490.0, "step": 4045 }, { "epoch": 5.520286345257273, "grad_norm": 0.20763218667975852, "learning_rate": 6.172691259478151e-06, "loss": 0.2063, "num_tokens": 3086797562.0, "step": 4046 }, { "epoch": 5.521651926215954, "grad_norm": 0.2204174048373065, "learning_rate": 6.168496655237153e-06, "loss": 0.2066, "num_tokens": 3087569172.0, "step": 4047 }, { "epoch": 5.523017507174634, "grad_norm": 0.22262146601577268, "learning_rate": 6.164303524878585e-06, "loss": 0.2049, "num_tokens": 3088338864.0, "step": 4048 }, { "epoch": 5.524383088133315, "grad_norm": 0.20773856372480864, "learning_rate": 6.160111869681602e-06, "loss": 0.1982, "num_tokens": 3089019602.0, "step": 4049 }, { "epoch": 5.525748669091995, "grad_norm": 0.21064764970965, "learning_rate": 6.155921690924887e-06, "loss": 0.2048, "num_tokens": 3089852351.0, "step": 4050 }, { "epoch": 5.527114250050676, "grad_norm": 0.21593256174257552, "learning_rate": 6.151732989886685e-06, "loss": 0.2072, "num_tokens": 3090563410.0, "step": 4051 }, { "epoch": 5.528479831009356, "grad_norm": 0.21493090892184094, "learning_rate": 6.1475457678447855e-06, "loss": 0.2143, "num_tokens": 3091345855.0, "step": 4052 }, { "epoch": 5.5298454119680365, "grad_norm": 0.22296841197131737, "learning_rate": 6.1433600260765345e-06, "loss": 0.2115, "num_tokens": 3092141335.0, "step": 4053 }, { "epoch": 5.5312109929267175, "grad_norm": 0.2190920650727816, "learning_rate": 6.139175765858814e-06, "loss": 0.2115, "num_tokens": 3092951165.0, "step": 4054 }, { "epoch": 5.532576573885398, "grad_norm": 0.21489894620353736, "learning_rate": 6.134992988468063e-06, "loss": 0.2175, "num_tokens": 3093795756.0, "step": 4055 }, { "epoch": 5.533942154844079, "grad_norm": 0.22743818771160382, "learning_rate": 6.130811695180268e-06, "loss": 0.2075, "num_tokens": 3094598915.0, "step": 4056 }, { "epoch": 5.535307735802759, "grad_norm": 0.2188088889598014, "learning_rate": 6.126631887270951e-06, "loss": 0.2038, "num_tokens": 3095311056.0, "step": 4057 }, { "epoch": 5.53667331676144, "grad_norm": 0.20591357683576897, "learning_rate": 6.122453566015205e-06, "loss": 0.2088, "num_tokens": 3096137685.0, "step": 4058 }, { "epoch": 5.53803889772012, "grad_norm": 0.20199409820903227, "learning_rate": 6.1182767326876444e-06, "loss": 0.2073, "num_tokens": 3096925322.0, "step": 4059 }, { "epoch": 5.539404478678801, "grad_norm": 0.2270911282985042, "learning_rate": 6.1141013885624434e-06, "loss": 0.211, "num_tokens": 3097668110.0, "step": 4060 }, { "epoch": 5.540770059637481, "grad_norm": 0.21858224468166615, "learning_rate": 6.10992753491332e-06, "loss": 0.2154, "num_tokens": 3098403403.0, "step": 4061 }, { "epoch": 5.542135640596161, "grad_norm": 0.2123622401770957, "learning_rate": 6.105755173013533e-06, "loss": 0.2004, "num_tokens": 3099170392.0, "step": 4062 }, { "epoch": 5.543501221554842, "grad_norm": 0.21016835185898516, "learning_rate": 6.101584304135891e-06, "loss": 0.2101, "num_tokens": 3099942311.0, "step": 4063 }, { "epoch": 5.544866802513522, "grad_norm": 0.20522968483676618, "learning_rate": 6.0974149295527454e-06, "loss": 0.2014, "num_tokens": 3100812410.0, "step": 4064 }, { "epoch": 5.546232383472203, "grad_norm": 0.1999648359479289, "learning_rate": 6.093247050535995e-06, "loss": 0.2116, "num_tokens": 3101666417.0, "step": 4065 }, { "epoch": 5.547597964430883, "grad_norm": 0.2040894145205277, "learning_rate": 6.089080668357069e-06, "loss": 0.2128, "num_tokens": 3102413714.0, "step": 4066 }, { "epoch": 5.548963545389564, "grad_norm": 0.22416274919145654, "learning_rate": 6.084915784286964e-06, "loss": 0.2013, "num_tokens": 3103176374.0, "step": 4067 }, { "epoch": 5.550329126348244, "grad_norm": 0.1951795178098006, "learning_rate": 6.080752399596197e-06, "loss": 0.1915, "num_tokens": 3103846975.0, "step": 4068 }, { "epoch": 5.551694707306925, "grad_norm": 0.23006215214645082, "learning_rate": 6.076590515554836e-06, "loss": 0.2142, "num_tokens": 3104597477.0, "step": 4069 }, { "epoch": 5.5530602882656055, "grad_norm": 0.22122603193311416, "learning_rate": 6.072430133432499e-06, "loss": 0.2113, "num_tokens": 3105344862.0, "step": 4070 }, { "epoch": 5.554425869224286, "grad_norm": 0.2196496386678765, "learning_rate": 6.068271254498329e-06, "loss": 0.2005, "num_tokens": 3106093499.0, "step": 4071 }, { "epoch": 5.555791450182967, "grad_norm": 0.21178106363685606, "learning_rate": 6.0641138800210255e-06, "loss": 0.2173, "num_tokens": 3106863822.0, "step": 4072 }, { "epoch": 5.557157031141647, "grad_norm": 0.2057309049225531, "learning_rate": 6.059958011268822e-06, "loss": 0.2068, "num_tokens": 3107622345.0, "step": 4073 }, { "epoch": 5.558522612100328, "grad_norm": 0.21712802466768952, "learning_rate": 6.055803649509498e-06, "loss": 0.2063, "num_tokens": 3108354943.0, "step": 4074 }, { "epoch": 5.559888193059008, "grad_norm": 0.20870504984157687, "learning_rate": 6.0516507960103565e-06, "loss": 0.2106, "num_tokens": 3109129219.0, "step": 4075 }, { "epoch": 5.561253774017689, "grad_norm": 0.317943053362746, "learning_rate": 6.04749945203827e-06, "loss": 0.207, "num_tokens": 3109889531.0, "step": 4076 }, { "epoch": 5.562619354976369, "grad_norm": 0.22457200584632273, "learning_rate": 6.0433496188596245e-06, "loss": 0.2098, "num_tokens": 3110625203.0, "step": 4077 }, { "epoch": 5.56398493593505, "grad_norm": 0.20949840851837406, "learning_rate": 6.039201297740355e-06, "loss": 0.21, "num_tokens": 3111468690.0, "step": 4078 }, { "epoch": 5.56535051689373, "grad_norm": 0.2134721578662111, "learning_rate": 6.035054489945939e-06, "loss": 0.203, "num_tokens": 3112182456.0, "step": 4079 }, { "epoch": 5.56671609785241, "grad_norm": 0.2143516903383674, "learning_rate": 6.030909196741382e-06, "loss": 0.2097, "num_tokens": 3112976043.0, "step": 4080 }, { "epoch": 5.568081678811091, "grad_norm": 0.2128802746430646, "learning_rate": 6.026765419391234e-06, "loss": 0.2033, "num_tokens": 3113768274.0, "step": 4081 }, { "epoch": 5.569447259769771, "grad_norm": 0.2079684664105549, "learning_rate": 6.022623159159584e-06, "loss": 0.2006, "num_tokens": 3114557570.0, "step": 4082 }, { "epoch": 5.570812840728452, "grad_norm": 0.18879824208793158, "learning_rate": 6.018482417310062e-06, "loss": 0.2037, "num_tokens": 3115328265.0, "step": 4083 }, { "epoch": 5.572178421687132, "grad_norm": 0.22009167294925178, "learning_rate": 6.014343195105818e-06, "loss": 0.2072, "num_tokens": 3116078488.0, "step": 4084 }, { "epoch": 5.573544002645813, "grad_norm": 0.20024304589917835, "learning_rate": 6.010205493809555e-06, "loss": 0.2077, "num_tokens": 3116872104.0, "step": 4085 }, { "epoch": 5.5749095836044935, "grad_norm": 0.22655369007522966, "learning_rate": 6.006069314683507e-06, "loss": 0.2132, "num_tokens": 3117603523.0, "step": 4086 }, { "epoch": 5.5762751645631745, "grad_norm": 0.21564135808790766, "learning_rate": 6.001934658989442e-06, "loss": 0.2124, "num_tokens": 3118463469.0, "step": 4087 }, { "epoch": 5.577640745521855, "grad_norm": 0.20648270669668944, "learning_rate": 5.997801527988669e-06, "loss": 0.1977, "num_tokens": 3119223708.0, "step": 4088 }, { "epoch": 5.579006326480535, "grad_norm": 0.2106464330369209, "learning_rate": 5.993669922942018e-06, "loss": 0.2009, "num_tokens": 3119923912.0, "step": 4089 }, { "epoch": 5.580371907439216, "grad_norm": 0.21273269270820844, "learning_rate": 5.989539845109868e-06, "loss": 0.1992, "num_tokens": 3120739563.0, "step": 4090 }, { "epoch": 5.581737488397896, "grad_norm": 0.21097021160117405, "learning_rate": 5.985411295752128e-06, "loss": 0.2031, "num_tokens": 3121473005.0, "step": 4091 }, { "epoch": 5.583103069356577, "grad_norm": 0.23030349271592465, "learning_rate": 5.981284276128238e-06, "loss": 0.2032, "num_tokens": 3122266349.0, "step": 4092 }, { "epoch": 5.584468650315257, "grad_norm": 0.2021921358725317, "learning_rate": 5.977158787497175e-06, "loss": 0.198, "num_tokens": 3123071981.0, "step": 4093 }, { "epoch": 5.585834231273938, "grad_norm": 0.20716562478186848, "learning_rate": 5.973034831117444e-06, "loss": 0.2029, "num_tokens": 3123918070.0, "step": 4094 }, { "epoch": 5.587199812232618, "grad_norm": 0.21887392955435184, "learning_rate": 5.968912408247085e-06, "loss": 0.2085, "num_tokens": 3124719503.0, "step": 4095 }, { "epoch": 5.588565393191299, "grad_norm": 0.21923978832350263, "learning_rate": 5.9647915201436735e-06, "loss": 0.2121, "num_tokens": 3125485855.0, "step": 4096 }, { "epoch": 5.589930974149979, "grad_norm": 0.19992748321655135, "learning_rate": 5.960672168064316e-06, "loss": 0.2002, "num_tokens": 3126223479.0, "step": 4097 }, { "epoch": 5.591296555108659, "grad_norm": 0.20946537043658145, "learning_rate": 5.956554353265642e-06, "loss": 0.2081, "num_tokens": 3126977866.0, "step": 4098 }, { "epoch": 5.59266213606734, "grad_norm": 0.21736264417558654, "learning_rate": 5.952438077003822e-06, "loss": 0.2135, "num_tokens": 3127721005.0, "step": 4099 }, { "epoch": 5.59402771702602, "grad_norm": 0.22115569313970443, "learning_rate": 5.948323340534554e-06, "loss": 0.2088, "num_tokens": 3128438761.0, "step": 4100 }, { "epoch": 5.595393297984701, "grad_norm": 0.22903359722893876, "learning_rate": 5.944210145113063e-06, "loss": 0.2013, "num_tokens": 3129258469.0, "step": 4101 }, { "epoch": 5.5967588789433815, "grad_norm": 0.20861779509467682, "learning_rate": 5.940098491994113e-06, "loss": 0.2072, "num_tokens": 3130016861.0, "step": 4102 }, { "epoch": 5.5981244599020625, "grad_norm": 0.23941779732263477, "learning_rate": 5.935988382431984e-06, "loss": 0.2091, "num_tokens": 3130761304.0, "step": 4103 }, { "epoch": 5.599490040860743, "grad_norm": 0.19040778178950618, "learning_rate": 5.931879817680497e-06, "loss": 0.2172, "num_tokens": 3131668800.0, "step": 4104 }, { "epoch": 5.600855621819424, "grad_norm": 0.2114857516777645, "learning_rate": 5.9277727989929945e-06, "loss": 0.1997, "num_tokens": 3132386950.0, "step": 4105 }, { "epoch": 5.602221202778104, "grad_norm": 0.2058526650825957, "learning_rate": 5.923667327622355e-06, "loss": 0.1993, "num_tokens": 3133173587.0, "step": 4106 }, { "epoch": 5.603586783736784, "grad_norm": 0.21651482145756312, "learning_rate": 5.919563404820976e-06, "loss": 0.2094, "num_tokens": 3133967098.0, "step": 4107 }, { "epoch": 5.604952364695465, "grad_norm": 0.270240867258495, "learning_rate": 5.915461031840784e-06, "loss": 0.2092, "num_tokens": 3134726030.0, "step": 4108 }, { "epoch": 5.606317945654145, "grad_norm": 0.21973160496800873, "learning_rate": 5.911360209933244e-06, "loss": 0.1999, "num_tokens": 3135477071.0, "step": 4109 }, { "epoch": 5.607683526612826, "grad_norm": 0.2326094519295077, "learning_rate": 5.907260940349324e-06, "loss": 0.2113, "num_tokens": 3136310599.0, "step": 4110 }, { "epoch": 5.609049107571506, "grad_norm": 0.20494262349505174, "learning_rate": 5.903163224339553e-06, "loss": 0.2042, "num_tokens": 3137059520.0, "step": 4111 }, { "epoch": 5.610414688530187, "grad_norm": 0.21660359980031887, "learning_rate": 5.899067063153953e-06, "loss": 0.208, "num_tokens": 3137818275.0, "step": 4112 }, { "epoch": 5.611780269488867, "grad_norm": 0.20562294403696807, "learning_rate": 5.894972458042088e-06, "loss": 0.2075, "num_tokens": 3138555024.0, "step": 4113 }, { "epoch": 5.613145850447548, "grad_norm": 0.19581591305992788, "learning_rate": 5.890879410253049e-06, "loss": 0.2094, "num_tokens": 3139425746.0, "step": 4114 }, { "epoch": 5.614511431406228, "grad_norm": 0.2110429235340085, "learning_rate": 5.8867879210354465e-06, "loss": 0.2067, "num_tokens": 3140206696.0, "step": 4115 }, { "epoch": 5.615877012364908, "grad_norm": 0.2192327126848145, "learning_rate": 5.88269799163741e-06, "loss": 0.2059, "num_tokens": 3140937706.0, "step": 4116 }, { "epoch": 5.617242593323589, "grad_norm": 0.21976734388039076, "learning_rate": 5.878609623306606e-06, "loss": 0.2001, "num_tokens": 3141740337.0, "step": 4117 }, { "epoch": 5.6186081742822696, "grad_norm": 0.20077167780228503, "learning_rate": 5.87452281729022e-06, "loss": 0.2055, "num_tokens": 3142455034.0, "step": 4118 }, { "epoch": 5.6199737552409506, "grad_norm": 0.2253784301744551, "learning_rate": 5.870437574834953e-06, "loss": 0.2114, "num_tokens": 3143258328.0, "step": 4119 }, { "epoch": 5.621339336199631, "grad_norm": 0.2226388452353918, "learning_rate": 5.866353897187043e-06, "loss": 0.2101, "num_tokens": 3143988079.0, "step": 4120 }, { "epoch": 5.622704917158312, "grad_norm": 0.22258174195057312, "learning_rate": 5.862271785592238e-06, "loss": 0.2112, "num_tokens": 3144745912.0, "step": 4121 }, { "epoch": 5.624070498116992, "grad_norm": 0.22498194746596176, "learning_rate": 5.858191241295814e-06, "loss": 0.2099, "num_tokens": 3145570708.0, "step": 4122 }, { "epoch": 5.625436079075673, "grad_norm": 0.1908574736654714, "learning_rate": 5.854112265542575e-06, "loss": 0.21, "num_tokens": 3146417194.0, "step": 4123 }, { "epoch": 5.626801660034353, "grad_norm": 0.20034749822455747, "learning_rate": 5.850034859576827e-06, "loss": 0.202, "num_tokens": 3147240342.0, "step": 4124 }, { "epoch": 5.628167240993033, "grad_norm": 0.21920728416392513, "learning_rate": 5.84595902464242e-06, "loss": 0.2155, "num_tokens": 3148007324.0, "step": 4125 }, { "epoch": 5.629532821951714, "grad_norm": 0.21035478817512437, "learning_rate": 5.841884761982712e-06, "loss": 0.2152, "num_tokens": 3148783698.0, "step": 4126 }, { "epoch": 5.630898402910394, "grad_norm": 0.21925088626475825, "learning_rate": 5.837812072840589e-06, "loss": 0.2115, "num_tokens": 3149564510.0, "step": 4127 }, { "epoch": 5.632263983869075, "grad_norm": 0.23236973787996654, "learning_rate": 5.833740958458438e-06, "loss": 0.2161, "num_tokens": 3150325159.0, "step": 4128 }, { "epoch": 5.633629564827755, "grad_norm": 0.21280160402838733, "learning_rate": 5.829671420078197e-06, "loss": 0.2118, "num_tokens": 3151096884.0, "step": 4129 }, { "epoch": 5.634995145786436, "grad_norm": 0.22114138329853644, "learning_rate": 5.8256034589412935e-06, "loss": 0.2025, "num_tokens": 3151809893.0, "step": 4130 }, { "epoch": 5.636360726745116, "grad_norm": 0.22964948640905392, "learning_rate": 5.821537076288691e-06, "loss": 0.1935, "num_tokens": 3152617124.0, "step": 4131 }, { "epoch": 5.637726307703797, "grad_norm": 0.20902028762433972, "learning_rate": 5.817472273360869e-06, "loss": 0.2088, "num_tokens": 3153397838.0, "step": 4132 }, { "epoch": 5.6390918886624775, "grad_norm": 0.1970712489301563, "learning_rate": 5.813409051397818e-06, "loss": 0.2047, "num_tokens": 3154158712.0, "step": 4133 }, { "epoch": 5.640457469621158, "grad_norm": 0.22073528222486, "learning_rate": 5.809347411639052e-06, "loss": 0.2129, "num_tokens": 3154899646.0, "step": 4134 }, { "epoch": 5.641823050579839, "grad_norm": 0.22631562477108055, "learning_rate": 5.805287355323606e-06, "loss": 0.2036, "num_tokens": 3155668206.0, "step": 4135 }, { "epoch": 5.643188631538519, "grad_norm": 0.22287095708652782, "learning_rate": 5.801228883690025e-06, "loss": 0.2036, "num_tokens": 3156406095.0, "step": 4136 }, { "epoch": 5.6445542124972, "grad_norm": 0.21522923227665347, "learning_rate": 5.797171997976364e-06, "loss": 0.21, "num_tokens": 3157169685.0, "step": 4137 }, { "epoch": 5.64591979345588, "grad_norm": 0.21668991876207291, "learning_rate": 5.793116699420218e-06, "loss": 0.2061, "num_tokens": 3157876834.0, "step": 4138 }, { "epoch": 5.647285374414561, "grad_norm": 0.22472686343258116, "learning_rate": 5.789062989258677e-06, "loss": 0.2107, "num_tokens": 3158662827.0, "step": 4139 }, { "epoch": 5.648650955373241, "grad_norm": 0.20152314247855102, "learning_rate": 5.785010868728345e-06, "loss": 0.2068, "num_tokens": 3159472617.0, "step": 4140 }, { "epoch": 5.650016536331922, "grad_norm": 0.21363527709941427, "learning_rate": 5.780960339065361e-06, "loss": 0.2087, "num_tokens": 3160301663.0, "step": 4141 }, { "epoch": 5.651382117290602, "grad_norm": 0.1980537890775585, "learning_rate": 5.7769114015053575e-06, "loss": 0.2113, "num_tokens": 3161069948.0, "step": 4142 }, { "epoch": 5.652747698249282, "grad_norm": 0.2290609837328324, "learning_rate": 5.772864057283492e-06, "loss": 0.2113, "num_tokens": 3161789416.0, "step": 4143 }, { "epoch": 5.654113279207963, "grad_norm": 0.21234301297175123, "learning_rate": 5.768818307634436e-06, "loss": 0.2035, "num_tokens": 3162567169.0, "step": 4144 }, { "epoch": 5.655478860166643, "grad_norm": 0.22367918593385025, "learning_rate": 5.764774153792374e-06, "loss": 0.2074, "num_tokens": 3163366386.0, "step": 4145 }, { "epoch": 5.656844441125324, "grad_norm": 0.22538655218840165, "learning_rate": 5.760731596990992e-06, "loss": 0.2126, "num_tokens": 3164158679.0, "step": 4146 }, { "epoch": 5.658210022084004, "grad_norm": 0.21872261798512013, "learning_rate": 5.756690638463512e-06, "loss": 0.2052, "num_tokens": 3164915297.0, "step": 4147 }, { "epoch": 5.659575603042685, "grad_norm": 0.2201534585509166, "learning_rate": 5.752651279442647e-06, "loss": 0.2059, "num_tokens": 3165729861.0, "step": 4148 }, { "epoch": 5.6609411840013655, "grad_norm": 0.21724150892351443, "learning_rate": 5.7486135211606344e-06, "loss": 0.204, "num_tokens": 3166450051.0, "step": 4149 }, { "epoch": 5.6623067649600465, "grad_norm": 0.22277488222355468, "learning_rate": 5.744577364849222e-06, "loss": 0.2087, "num_tokens": 3167189343.0, "step": 4150 }, { "epoch": 5.663672345918727, "grad_norm": 0.20412197006633614, "learning_rate": 5.740542811739658e-06, "loss": 0.2009, "num_tokens": 3168016654.0, "step": 4151 }, { "epoch": 5.665037926877407, "grad_norm": 0.2114148128753542, "learning_rate": 5.736509863062713e-06, "loss": 0.1996, "num_tokens": 3168769333.0, "step": 4152 }, { "epoch": 5.666403507836088, "grad_norm": 0.20578731270148976, "learning_rate": 5.7324785200486686e-06, "loss": 0.2078, "num_tokens": 3169567851.0, "step": 4153 }, { "epoch": 5.667769088794768, "grad_norm": 0.2242712683466558, "learning_rate": 5.7284487839273136e-06, "loss": 0.2085, "num_tokens": 3170285210.0, "step": 4154 }, { "epoch": 5.669134669753449, "grad_norm": 0.21246609675287478, "learning_rate": 5.7244206559279405e-06, "loss": 0.2094, "num_tokens": 3171021253.0, "step": 4155 }, { "epoch": 5.670500250712129, "grad_norm": 0.2222846227687881, "learning_rate": 5.72039413727936e-06, "loss": 0.2124, "num_tokens": 3171781660.0, "step": 4156 }, { "epoch": 5.67186583167081, "grad_norm": 0.22235257434960415, "learning_rate": 5.716369229209885e-06, "loss": 0.2081, "num_tokens": 3172620979.0, "step": 4157 }, { "epoch": 5.67323141262949, "grad_norm": 0.2166059863737499, "learning_rate": 5.712345932947345e-06, "loss": 0.2033, "num_tokens": 3173336689.0, "step": 4158 }, { "epoch": 5.674596993588171, "grad_norm": 0.21595549040077594, "learning_rate": 5.708324249719077e-06, "loss": 0.2076, "num_tokens": 3174085565.0, "step": 4159 }, { "epoch": 5.675962574546851, "grad_norm": 0.22280020610484227, "learning_rate": 5.704304180751912e-06, "loss": 0.2108, "num_tokens": 3174782401.0, "step": 4160 }, { "epoch": 5.677328155505531, "grad_norm": 0.22113022810331445, "learning_rate": 5.700285727272206e-06, "loss": 0.2091, "num_tokens": 3175571919.0, "step": 4161 }, { "epoch": 5.678693736464212, "grad_norm": 0.20066665226316344, "learning_rate": 5.696268890505814e-06, "loss": 0.2046, "num_tokens": 3176314937.0, "step": 4162 }, { "epoch": 5.680059317422892, "grad_norm": 0.2120071095809977, "learning_rate": 5.692253671678097e-06, "loss": 0.2118, "num_tokens": 3177094054.0, "step": 4163 }, { "epoch": 5.681424898381573, "grad_norm": 0.26009342116537454, "learning_rate": 5.688240072013927e-06, "loss": 0.2076, "num_tokens": 3177757331.0, "step": 4164 }, { "epoch": 5.6827904793402535, "grad_norm": 0.21127825977018833, "learning_rate": 5.6842280927376755e-06, "loss": 0.2009, "num_tokens": 3178489502.0, "step": 4165 }, { "epoch": 5.6841560602989345, "grad_norm": 0.2251269368365144, "learning_rate": 5.680217735073228e-06, "loss": 0.2045, "num_tokens": 3179243334.0, "step": 4166 }, { "epoch": 5.685521641257615, "grad_norm": 0.20957623872930387, "learning_rate": 5.6762090002439705e-06, "loss": 0.2051, "num_tokens": 3180018285.0, "step": 4167 }, { "epoch": 5.686887222216296, "grad_norm": 0.21696187391291002, "learning_rate": 5.672201889472795e-06, "loss": 0.2124, "num_tokens": 3180757264.0, "step": 4168 }, { "epoch": 5.688252803174976, "grad_norm": 0.21203275741145236, "learning_rate": 5.668196403982092e-06, "loss": 0.2115, "num_tokens": 3181566912.0, "step": 4169 }, { "epoch": 5.689618384133656, "grad_norm": 0.2089208089227541, "learning_rate": 5.664192544993766e-06, "loss": 0.2023, "num_tokens": 3182335422.0, "step": 4170 }, { "epoch": 5.690983965092337, "grad_norm": 0.22108277116048275, "learning_rate": 5.660190313729224e-06, "loss": 0.2016, "num_tokens": 3183039877.0, "step": 4171 }, { "epoch": 5.692349546051017, "grad_norm": 0.21849179643887834, "learning_rate": 5.6561897114093666e-06, "loss": 0.2073, "num_tokens": 3183803075.0, "step": 4172 }, { "epoch": 5.693715127009698, "grad_norm": 0.21827303922312224, "learning_rate": 5.652190739254608e-06, "loss": 0.2099, "num_tokens": 3184577701.0, "step": 4173 }, { "epoch": 5.695080707968378, "grad_norm": 0.21364991220070445, "learning_rate": 5.648193398484859e-06, "loss": 0.2025, "num_tokens": 3185303235.0, "step": 4174 }, { "epoch": 5.696446288927059, "grad_norm": 0.2169093851388293, "learning_rate": 5.64419769031954e-06, "loss": 0.2054, "num_tokens": 3186082597.0, "step": 4175 }, { "epoch": 5.697811869885739, "grad_norm": 0.21867667813899136, "learning_rate": 5.640203615977562e-06, "loss": 0.2086, "num_tokens": 3186984541.0, "step": 4176 }, { "epoch": 5.69917745084442, "grad_norm": 0.22637487156415106, "learning_rate": 5.636211176677353e-06, "loss": 0.2099, "num_tokens": 3187802906.0, "step": 4177 }, { "epoch": 5.7005430318031, "grad_norm": 0.20940862193057339, "learning_rate": 5.632220373636825e-06, "loss": 0.2119, "num_tokens": 3188534065.0, "step": 4178 }, { "epoch": 5.70190861276178, "grad_norm": 0.21991412790333378, "learning_rate": 5.628231208073402e-06, "loss": 0.2082, "num_tokens": 3189333439.0, "step": 4179 }, { "epoch": 5.703274193720461, "grad_norm": 0.2198151247205235, "learning_rate": 5.624243681204007e-06, "loss": 0.2095, "num_tokens": 3190031977.0, "step": 4180 }, { "epoch": 5.7046397746791415, "grad_norm": 0.209179104590826, "learning_rate": 5.620257794245056e-06, "loss": 0.2002, "num_tokens": 3190777536.0, "step": 4181 }, { "epoch": 5.7060053556378225, "grad_norm": 0.22203526909844284, "learning_rate": 5.616273548412483e-06, "loss": 0.2083, "num_tokens": 3191564273.0, "step": 4182 }, { "epoch": 5.707370936596503, "grad_norm": 0.22328209761728995, "learning_rate": 5.612290944921697e-06, "loss": 0.2105, "num_tokens": 3192287475.0, "step": 4183 }, { "epoch": 5.708736517555184, "grad_norm": 0.2318235439122243, "learning_rate": 5.60830998498762e-06, "loss": 0.2041, "num_tokens": 3193030960.0, "step": 4184 }, { "epoch": 5.710102098513864, "grad_norm": 0.20793962082929957, "learning_rate": 5.604330669824674e-06, "loss": 0.2007, "num_tokens": 3193740604.0, "step": 4185 }, { "epoch": 5.711467679472545, "grad_norm": 0.2251826900040625, "learning_rate": 5.600353000646777e-06, "loss": 0.2091, "num_tokens": 3194527109.0, "step": 4186 }, { "epoch": 5.712833260431225, "grad_norm": 0.3221795357174082, "learning_rate": 5.596376978667337e-06, "loss": 0.2012, "num_tokens": 3195227991.0, "step": 4187 }, { "epoch": 5.714198841389905, "grad_norm": 0.23184050821634009, "learning_rate": 5.59240260509927e-06, "loss": 0.2041, "num_tokens": 3195999454.0, "step": 4188 }, { "epoch": 5.715564422348586, "grad_norm": 0.2220343822679678, "learning_rate": 5.588429881154988e-06, "loss": 0.2051, "num_tokens": 3196727210.0, "step": 4189 }, { "epoch": 5.716930003307266, "grad_norm": 0.2229491003925695, "learning_rate": 5.584458808046389e-06, "loss": 0.2136, "num_tokens": 3197448777.0, "step": 4190 }, { "epoch": 5.718295584265947, "grad_norm": 0.22221143464155388, "learning_rate": 5.580489386984886e-06, "loss": 0.2108, "num_tokens": 3198213864.0, "step": 4191 }, { "epoch": 5.719661165224627, "grad_norm": 0.22029028439239534, "learning_rate": 5.576521619181369e-06, "loss": 0.2176, "num_tokens": 3198997536.0, "step": 4192 }, { "epoch": 5.721026746183308, "grad_norm": 0.2111173960816002, "learning_rate": 5.5725555058462355e-06, "loss": 0.2034, "num_tokens": 3199745049.0, "step": 4193 }, { "epoch": 5.722392327141988, "grad_norm": 0.2118995712731698, "learning_rate": 5.5685910481893755e-06, "loss": 0.2098, "num_tokens": 3200486957.0, "step": 4194 }, { "epoch": 5.723757908100669, "grad_norm": 0.21479371609848036, "learning_rate": 5.564628247420172e-06, "loss": 0.1996, "num_tokens": 3201174487.0, "step": 4195 }, { "epoch": 5.725123489059349, "grad_norm": 0.22105228961256235, "learning_rate": 5.560667104747502e-06, "loss": 0.2046, "num_tokens": 3201903970.0, "step": 4196 }, { "epoch": 5.7264890700180295, "grad_norm": 0.20705616933556797, "learning_rate": 5.556707621379742e-06, "loss": 0.2083, "num_tokens": 3202700110.0, "step": 4197 }, { "epoch": 5.7278546509767105, "grad_norm": 0.221517188532402, "learning_rate": 5.552749798524761e-06, "loss": 0.2071, "num_tokens": 3203532829.0, "step": 4198 }, { "epoch": 5.729220231935391, "grad_norm": 0.2057744763005639, "learning_rate": 5.548793637389911e-06, "loss": 0.2098, "num_tokens": 3204296785.0, "step": 4199 }, { "epoch": 5.730585812894072, "grad_norm": 0.22731729165470155, "learning_rate": 5.544839139182054e-06, "loss": 0.2076, "num_tokens": 3205011171.0, "step": 4200 }, { "epoch": 5.731951393852752, "grad_norm": 0.22728791776817003, "learning_rate": 5.540886305107535e-06, "loss": 0.2005, "num_tokens": 3205748357.0, "step": 4201 }, { "epoch": 5.733316974811433, "grad_norm": 0.21835099834136748, "learning_rate": 5.536935136372184e-06, "loss": 0.2045, "num_tokens": 3206451879.0, "step": 4202 }, { "epoch": 5.734682555770113, "grad_norm": 0.21171533568998757, "learning_rate": 5.5329856341813425e-06, "loss": 0.1946, "num_tokens": 3207249454.0, "step": 4203 }, { "epoch": 5.736048136728794, "grad_norm": 0.20700682995359845, "learning_rate": 5.529037799739825e-06, "loss": 0.203, "num_tokens": 3208014832.0, "step": 4204 }, { "epoch": 5.737413717687474, "grad_norm": 0.21724911177983613, "learning_rate": 5.525091634251946e-06, "loss": 0.2174, "num_tokens": 3208780386.0, "step": 4205 }, { "epoch": 5.738779298646154, "grad_norm": 0.23799172623053194, "learning_rate": 5.521147138921514e-06, "loss": 0.2127, "num_tokens": 3209521371.0, "step": 4206 }, { "epoch": 5.740144879604835, "grad_norm": 0.21193376313856313, "learning_rate": 5.517204314951824e-06, "loss": 0.2092, "num_tokens": 3210274446.0, "step": 4207 }, { "epoch": 5.741510460563515, "grad_norm": 0.20563248358199748, "learning_rate": 5.51326316354565e-06, "loss": 0.2003, "num_tokens": 3211058488.0, "step": 4208 }, { "epoch": 5.742876041522196, "grad_norm": 0.21617990396044937, "learning_rate": 5.509323685905281e-06, "loss": 0.2061, "num_tokens": 3211836131.0, "step": 4209 }, { "epoch": 5.744241622480876, "grad_norm": 0.2086093815405659, "learning_rate": 5.505385883232476e-06, "loss": 0.2043, "num_tokens": 3212598231.0, "step": 4210 }, { "epoch": 5.745607203439557, "grad_norm": 0.23242601937821505, "learning_rate": 5.501449756728479e-06, "loss": 0.2087, "num_tokens": 3213297670.0, "step": 4211 }, { "epoch": 5.746972784398237, "grad_norm": 0.2095644538763236, "learning_rate": 5.497515307594045e-06, "loss": 0.2067, "num_tokens": 3214129169.0, "step": 4212 }, { "epoch": 5.7483383653569184, "grad_norm": 0.2085374176410146, "learning_rate": 5.493582537029396e-06, "loss": 0.218, "num_tokens": 3214912290.0, "step": 4213 }, { "epoch": 5.749703946315599, "grad_norm": 0.22304254700386042, "learning_rate": 5.4896514462342525e-06, "loss": 0.209, "num_tokens": 3215690671.0, "step": 4214 }, { "epoch": 5.751069527274279, "grad_norm": 0.21195146778446572, "learning_rate": 5.485722036407819e-06, "loss": 0.2114, "num_tokens": 3216452994.0, "step": 4215 }, { "epoch": 5.75243510823296, "grad_norm": 0.216456612300976, "learning_rate": 5.481794308748792e-06, "loss": 0.2115, "num_tokens": 3217209036.0, "step": 4216 }, { "epoch": 5.75380068919164, "grad_norm": 0.22097617692338534, "learning_rate": 5.477868264455346e-06, "loss": 0.2056, "num_tokens": 3217964047.0, "step": 4217 }, { "epoch": 5.755166270150321, "grad_norm": 0.20455618307805526, "learning_rate": 5.473943904725149e-06, "loss": 0.2098, "num_tokens": 3218753988.0, "step": 4218 }, { "epoch": 5.756531851109001, "grad_norm": 0.20380263317187766, "learning_rate": 5.470021230755358e-06, "loss": 0.2179, "num_tokens": 3219525105.0, "step": 4219 }, { "epoch": 5.757897432067682, "grad_norm": 0.23241617806064577, "learning_rate": 5.466100243742598e-06, "loss": 0.2086, "num_tokens": 3220272817.0, "step": 4220 }, { "epoch": 5.759263013026362, "grad_norm": 0.19719624748270617, "learning_rate": 5.4621809448830084e-06, "loss": 0.2131, "num_tokens": 3221050873.0, "step": 4221 }, { "epoch": 5.760628593985043, "grad_norm": 0.21722937780254506, "learning_rate": 5.458263335372186e-06, "loss": 0.2085, "num_tokens": 3221844959.0, "step": 4222 }, { "epoch": 5.761994174943723, "grad_norm": 0.21838877853012525, "learning_rate": 5.454347416405229e-06, "loss": 0.2023, "num_tokens": 3222581258.0, "step": 4223 }, { "epoch": 5.763359755902403, "grad_norm": 0.21131473470635287, "learning_rate": 5.450433189176712e-06, "loss": 0.2058, "num_tokens": 3223325447.0, "step": 4224 }, { "epoch": 5.764725336861084, "grad_norm": 0.21723025573530222, "learning_rate": 5.446520654880702e-06, "loss": 0.2102, "num_tokens": 3224034726.0, "step": 4225 }, { "epoch": 5.766090917819764, "grad_norm": 0.21843710634020033, "learning_rate": 5.442609814710736e-06, "loss": 0.2076, "num_tokens": 3224788015.0, "step": 4226 }, { "epoch": 5.767456498778445, "grad_norm": 0.2047665164258604, "learning_rate": 5.438700669859844e-06, "loss": 0.2065, "num_tokens": 3225622323.0, "step": 4227 }, { "epoch": 5.7688220797371255, "grad_norm": 0.20442896012783388, "learning_rate": 5.434793221520542e-06, "loss": 0.2063, "num_tokens": 3226453591.0, "step": 4228 }, { "epoch": 5.7701876606958065, "grad_norm": 0.21252747574411618, "learning_rate": 5.4308874708848145e-06, "loss": 0.2081, "num_tokens": 3227226684.0, "step": 4229 }, { "epoch": 5.771553241654487, "grad_norm": 0.23583610102433405, "learning_rate": 5.426983419144148e-06, "loss": 0.2077, "num_tokens": 3228114046.0, "step": 4230 }, { "epoch": 5.772918822613168, "grad_norm": 0.19979481668525254, "learning_rate": 5.42308106748949e-06, "loss": 0.194, "num_tokens": 3228849217.0, "step": 4231 }, { "epoch": 5.774284403571848, "grad_norm": 0.21076584079899705, "learning_rate": 5.419180417111282e-06, "loss": 0.2, "num_tokens": 3229527686.0, "step": 4232 }, { "epoch": 5.775649984530528, "grad_norm": 0.21924347734212937, "learning_rate": 5.415281469199447e-06, "loss": 0.203, "num_tokens": 3230322181.0, "step": 4233 }, { "epoch": 5.777015565489209, "grad_norm": 0.2135158821265052, "learning_rate": 5.41138422494338e-06, "loss": 0.208, "num_tokens": 3231087725.0, "step": 4234 }, { "epoch": 5.778381146447889, "grad_norm": 0.226938044962691, "learning_rate": 5.407488685531963e-06, "loss": 0.2071, "num_tokens": 3231798444.0, "step": 4235 }, { "epoch": 5.77974672740657, "grad_norm": 0.19839179528987155, "learning_rate": 5.4035948521535575e-06, "loss": 0.2202, "num_tokens": 3232640034.0, "step": 4236 }, { "epoch": 5.78111230836525, "grad_norm": 0.21473698981998282, "learning_rate": 5.399702725996001e-06, "loss": 0.2105, "num_tokens": 3233467000.0, "step": 4237 }, { "epoch": 5.782477889323931, "grad_norm": 0.22757290814232478, "learning_rate": 5.395812308246617e-06, "loss": 0.2148, "num_tokens": 3234204552.0, "step": 4238 }, { "epoch": 5.783843470282611, "grad_norm": 0.2119407282937037, "learning_rate": 5.391923600092204e-06, "loss": 0.2001, "num_tokens": 3234914773.0, "step": 4239 }, { "epoch": 5.785209051241292, "grad_norm": 0.21023216573991196, "learning_rate": 5.388036602719031e-06, "loss": 0.2018, "num_tokens": 3235690965.0, "step": 4240 }, { "epoch": 5.786574632199972, "grad_norm": 0.21780525051768712, "learning_rate": 5.384151317312858e-06, "loss": 0.2072, "num_tokens": 3236457810.0, "step": 4241 }, { "epoch": 5.787940213158652, "grad_norm": 0.20861535845922327, "learning_rate": 5.380267745058918e-06, "loss": 0.2034, "num_tokens": 3237110740.0, "step": 4242 }, { "epoch": 5.789305794117333, "grad_norm": 0.21666499170770342, "learning_rate": 5.376385887141917e-06, "loss": 0.2117, "num_tokens": 3237857545.0, "step": 4243 }, { "epoch": 5.7906713750760135, "grad_norm": 0.23865192407785457, "learning_rate": 5.372505744746044e-06, "loss": 0.1983, "num_tokens": 3238588908.0, "step": 4244 }, { "epoch": 5.7920369560346945, "grad_norm": 0.20190108714219912, "learning_rate": 5.368627319054962e-06, "loss": 0.2055, "num_tokens": 3239333447.0, "step": 4245 }, { "epoch": 5.793402536993375, "grad_norm": 0.20648191953908449, "learning_rate": 5.36475061125181e-06, "loss": 0.1995, "num_tokens": 3240089534.0, "step": 4246 }, { "epoch": 5.794768117952056, "grad_norm": 0.21183723035387533, "learning_rate": 5.360875622519206e-06, "loss": 0.2098, "num_tokens": 3240867072.0, "step": 4247 }, { "epoch": 5.796133698910736, "grad_norm": 0.21704285914067947, "learning_rate": 5.357002354039243e-06, "loss": 0.204, "num_tokens": 3241549888.0, "step": 4248 }, { "epoch": 5.797499279869417, "grad_norm": 0.21899926988477267, "learning_rate": 5.3531308069934805e-06, "loss": 0.2085, "num_tokens": 3242338907.0, "step": 4249 }, { "epoch": 5.798864860828097, "grad_norm": 0.21654837110254418, "learning_rate": 5.349260982562963e-06, "loss": 0.1983, "num_tokens": 3243071888.0, "step": 4250 }, { "epoch": 5.800230441786777, "grad_norm": 0.22214505598588327, "learning_rate": 5.3453928819282105e-06, "loss": 0.1998, "num_tokens": 3243797760.0, "step": 4251 }, { "epoch": 5.801596022745458, "grad_norm": 0.21917820400216237, "learning_rate": 5.341526506269208e-06, "loss": 0.1978, "num_tokens": 3244515569.0, "step": 4252 }, { "epoch": 5.802961603704138, "grad_norm": 0.24734326250469496, "learning_rate": 5.337661856765419e-06, "loss": 0.2022, "num_tokens": 3245213256.0, "step": 4253 }, { "epoch": 5.804327184662819, "grad_norm": 0.22286648804488418, "learning_rate": 5.333798934595784e-06, "loss": 0.2154, "num_tokens": 3245930182.0, "step": 4254 }, { "epoch": 5.805692765621499, "grad_norm": 0.20967386384571665, "learning_rate": 5.329937740938711e-06, "loss": 0.2053, "num_tokens": 3246765786.0, "step": 4255 }, { "epoch": 5.80705834658018, "grad_norm": 0.2096688573858637, "learning_rate": 5.3260782769720865e-06, "loss": 0.2014, "num_tokens": 3247518844.0, "step": 4256 }, { "epoch": 5.80842392753886, "grad_norm": 0.22356079913806276, "learning_rate": 5.32222054387326e-06, "loss": 0.2116, "num_tokens": 3248212248.0, "step": 4257 }, { "epoch": 5.809789508497541, "grad_norm": 0.23357629409070074, "learning_rate": 5.318364542819062e-06, "loss": 0.2052, "num_tokens": 3248924615.0, "step": 4258 }, { "epoch": 5.811155089456221, "grad_norm": 0.22396863417081314, "learning_rate": 5.314510274985792e-06, "loss": 0.2037, "num_tokens": 3249707347.0, "step": 4259 }, { "epoch": 5.8125206704149015, "grad_norm": 0.2163136701415485, "learning_rate": 5.310657741549221e-06, "loss": 0.2041, "num_tokens": 3250419921.0, "step": 4260 }, { "epoch": 5.8138862513735825, "grad_norm": 0.22716725346773228, "learning_rate": 5.306806943684586e-06, "loss": 0.2089, "num_tokens": 3251170917.0, "step": 4261 }, { "epoch": 5.815251832332263, "grad_norm": 0.2141548579426943, "learning_rate": 5.302957882566602e-06, "loss": 0.2122, "num_tokens": 3252012057.0, "step": 4262 }, { "epoch": 5.816617413290944, "grad_norm": 0.2418110747203828, "learning_rate": 5.299110559369449e-06, "loss": 0.2138, "num_tokens": 3252781977.0, "step": 4263 }, { "epoch": 5.817982994249624, "grad_norm": 0.19964885661196688, "learning_rate": 5.29526497526678e-06, "loss": 0.2191, "num_tokens": 3253555334.0, "step": 4264 }, { "epoch": 5.819348575208305, "grad_norm": 0.21499519344453938, "learning_rate": 5.291421131431721e-06, "loss": 0.205, "num_tokens": 3254276471.0, "step": 4265 }, { "epoch": 5.820714156166985, "grad_norm": 0.2190296438373196, "learning_rate": 5.287579029036854e-06, "loss": 0.2084, "num_tokens": 3255028291.0, "step": 4266 }, { "epoch": 5.822079737125666, "grad_norm": 0.20955966638409798, "learning_rate": 5.283738669254239e-06, "loss": 0.2083, "num_tokens": 3255776714.0, "step": 4267 }, { "epoch": 5.823445318084346, "grad_norm": 0.20802750826421257, "learning_rate": 5.279900053255407e-06, "loss": 0.2144, "num_tokens": 3256567695.0, "step": 4268 }, { "epoch": 5.824810899043026, "grad_norm": 0.22619374530979092, "learning_rate": 5.276063182211355e-06, "loss": 0.2097, "num_tokens": 3257350178.0, "step": 4269 }, { "epoch": 5.826176480001707, "grad_norm": 0.2133765418911571, "learning_rate": 5.272228057292537e-06, "loss": 0.211, "num_tokens": 3258108844.0, "step": 4270 }, { "epoch": 5.827542060960387, "grad_norm": 0.21388621500752694, "learning_rate": 5.268394679668898e-06, "loss": 0.2034, "num_tokens": 3258862161.0, "step": 4271 }, { "epoch": 5.828907641919068, "grad_norm": 0.21503194365858638, "learning_rate": 5.264563050509826e-06, "loss": 0.2132, "num_tokens": 3259649546.0, "step": 4272 }, { "epoch": 5.830273222877748, "grad_norm": 0.22044295599093336, "learning_rate": 5.26073317098418e-06, "loss": 0.1994, "num_tokens": 3260378620.0, "step": 4273 }, { "epoch": 5.831638803836429, "grad_norm": 0.21109563817559718, "learning_rate": 5.256905042260303e-06, "loss": 0.2082, "num_tokens": 3261203666.0, "step": 4274 }, { "epoch": 5.833004384795109, "grad_norm": 0.21574876191322387, "learning_rate": 5.2530786655059825e-06, "loss": 0.211, "num_tokens": 3261949136.0, "step": 4275 }, { "epoch": 5.83436996575379, "grad_norm": 0.21317963019101357, "learning_rate": 5.249254041888485e-06, "loss": 0.2049, "num_tokens": 3262700366.0, "step": 4276 }, { "epoch": 5.8357355467124705, "grad_norm": 0.22040899052544635, "learning_rate": 5.245431172574533e-06, "loss": 0.2006, "num_tokens": 3263438965.0, "step": 4277 }, { "epoch": 5.837101127671151, "grad_norm": 0.22883335640467845, "learning_rate": 5.241610058730327e-06, "loss": 0.2064, "num_tokens": 3264120668.0, "step": 4278 }, { "epoch": 5.838466708629832, "grad_norm": 0.2136898359493668, "learning_rate": 5.237790701521512e-06, "loss": 0.209, "num_tokens": 3264886474.0, "step": 4279 }, { "epoch": 5.839832289588512, "grad_norm": 0.21613962742259807, "learning_rate": 5.233973102113215e-06, "loss": 0.2126, "num_tokens": 3265675330.0, "step": 4280 }, { "epoch": 5.841197870547193, "grad_norm": 0.20326860189631368, "learning_rate": 5.230157261670023e-06, "loss": 0.2025, "num_tokens": 3266519191.0, "step": 4281 }, { "epoch": 5.842563451505873, "grad_norm": 0.2126056708207295, "learning_rate": 5.226343181355972e-06, "loss": 0.2143, "num_tokens": 3267309371.0, "step": 4282 }, { "epoch": 5.843929032464554, "grad_norm": 0.21967408916261885, "learning_rate": 5.222530862334587e-06, "loss": 0.2097, "num_tokens": 3268077241.0, "step": 4283 }, { "epoch": 5.845294613423234, "grad_norm": 0.21069315680177833, "learning_rate": 5.2187203057688314e-06, "loss": 0.1934, "num_tokens": 3268749832.0, "step": 4284 }, { "epoch": 5.846660194381915, "grad_norm": 0.2211461826018855, "learning_rate": 5.214911512821146e-06, "loss": 0.2049, "num_tokens": 3269447963.0, "step": 4285 }, { "epoch": 5.848025775340595, "grad_norm": 0.21130044451007018, "learning_rate": 5.211104484653427e-06, "loss": 0.2144, "num_tokens": 3270187708.0, "step": 4286 }, { "epoch": 5.849391356299275, "grad_norm": 0.2341511752425816, "learning_rate": 5.207299222427037e-06, "loss": 0.2031, "num_tokens": 3270874520.0, "step": 4287 }, { "epoch": 5.850756937257956, "grad_norm": 0.208967743256596, "learning_rate": 5.203495727302789e-06, "loss": 0.1945, "num_tokens": 3271553203.0, "step": 4288 }, { "epoch": 5.852122518216636, "grad_norm": 0.21664586199560606, "learning_rate": 5.199694000440971e-06, "loss": 0.2037, "num_tokens": 3272304379.0, "step": 4289 }, { "epoch": 5.853488099175317, "grad_norm": 0.21460257494661653, "learning_rate": 5.1958940430013275e-06, "loss": 0.2011, "num_tokens": 3273074280.0, "step": 4290 }, { "epoch": 5.854853680133997, "grad_norm": 0.22888846940778182, "learning_rate": 5.192095856143049e-06, "loss": 0.2009, "num_tokens": 3273777879.0, "step": 4291 }, { "epoch": 5.856219261092678, "grad_norm": 0.20150240069647052, "learning_rate": 5.188299441024812e-06, "loss": 0.2116, "num_tokens": 3274547114.0, "step": 4292 }, { "epoch": 5.8575848420513585, "grad_norm": 0.23367448929096357, "learning_rate": 5.18450479880473e-06, "loss": 0.2078, "num_tokens": 3275295259.0, "step": 4293 }, { "epoch": 5.8589504230100395, "grad_norm": 0.20692345283129254, "learning_rate": 5.180711930640387e-06, "loss": 0.2008, "num_tokens": 3276069554.0, "step": 4294 }, { "epoch": 5.86031600396872, "grad_norm": 0.21162434163948513, "learning_rate": 5.176920837688824e-06, "loss": 0.2033, "num_tokens": 3276838246.0, "step": 4295 }, { "epoch": 5.8616815849274, "grad_norm": 0.21938280387218573, "learning_rate": 5.173131521106534e-06, "loss": 0.2073, "num_tokens": 3277561695.0, "step": 4296 }, { "epoch": 5.863047165886081, "grad_norm": 0.20825052217912057, "learning_rate": 5.169343982049476e-06, "loss": 0.2181, "num_tokens": 3278396040.0, "step": 4297 }, { "epoch": 5.864412746844761, "grad_norm": 0.22920460866176232, "learning_rate": 5.165558221673066e-06, "loss": 0.2194, "num_tokens": 3279124842.0, "step": 4298 }, { "epoch": 5.865778327803442, "grad_norm": 0.21908621964082123, "learning_rate": 5.161774241132178e-06, "loss": 0.2046, "num_tokens": 3279822553.0, "step": 4299 }, { "epoch": 5.867143908762122, "grad_norm": 0.23915925181085307, "learning_rate": 5.1579920415811305e-06, "loss": 0.2064, "num_tokens": 3280533997.0, "step": 4300 }, { "epoch": 5.868509489720803, "grad_norm": 0.2218596843839502, "learning_rate": 5.154211624173721e-06, "loss": 0.2059, "num_tokens": 3281247310.0, "step": 4301 }, { "epoch": 5.869875070679483, "grad_norm": 0.20714650474792562, "learning_rate": 5.150432990063181e-06, "loss": 0.2005, "num_tokens": 3282052033.0, "step": 4302 }, { "epoch": 5.871240651638164, "grad_norm": 0.21281327476629777, "learning_rate": 5.146656140402214e-06, "loss": 0.2063, "num_tokens": 3282753732.0, "step": 4303 }, { "epoch": 5.872606232596844, "grad_norm": 0.2192777345980949, "learning_rate": 5.142881076342976e-06, "loss": 0.1986, "num_tokens": 3283501176.0, "step": 4304 }, { "epoch": 5.873971813555524, "grad_norm": 0.20388940086389573, "learning_rate": 5.139107799037067e-06, "loss": 0.2068, "num_tokens": 3284238988.0, "step": 4305 }, { "epoch": 5.875337394514205, "grad_norm": 0.20915427658768287, "learning_rate": 5.135336309635554e-06, "loss": 0.2017, "num_tokens": 3285067399.0, "step": 4306 }, { "epoch": 5.8767029754728854, "grad_norm": 0.19964095568486448, "learning_rate": 5.131566609288956e-06, "loss": 0.202, "num_tokens": 3285873888.0, "step": 4307 }, { "epoch": 5.8780685564315664, "grad_norm": 0.20402011796763142, "learning_rate": 5.12779869914725e-06, "loss": 0.205, "num_tokens": 3286664345.0, "step": 4308 }, { "epoch": 5.879434137390247, "grad_norm": 0.20699272489509823, "learning_rate": 5.12403258035985e-06, "loss": 0.208, "num_tokens": 3287406288.0, "step": 4309 }, { "epoch": 5.880799718348928, "grad_norm": 0.23332732758633432, "learning_rate": 5.120268254075651e-06, "loss": 0.2104, "num_tokens": 3288196384.0, "step": 4310 }, { "epoch": 5.882165299307608, "grad_norm": 0.211704477149572, "learning_rate": 5.116505721442973e-06, "loss": 0.2059, "num_tokens": 3288946042.0, "step": 4311 }, { "epoch": 5.883530880266289, "grad_norm": 0.20688819191061014, "learning_rate": 5.1127449836096075e-06, "loss": 0.2097, "num_tokens": 3289677088.0, "step": 4312 }, { "epoch": 5.884896461224969, "grad_norm": 0.21389439942296823, "learning_rate": 5.1089860417227954e-06, "loss": 0.1984, "num_tokens": 3290449035.0, "step": 4313 }, { "epoch": 5.886262042183649, "grad_norm": 0.2067227950024797, "learning_rate": 5.105228896929222e-06, "loss": 0.2113, "num_tokens": 3291197208.0, "step": 4314 }, { "epoch": 5.88762762314233, "grad_norm": 0.2082058742033818, "learning_rate": 5.101473550375029e-06, "loss": 0.2051, "num_tokens": 3291904843.0, "step": 4315 }, { "epoch": 5.88899320410101, "grad_norm": 0.21896873578723888, "learning_rate": 5.097720003205813e-06, "loss": 0.211, "num_tokens": 3292651610.0, "step": 4316 }, { "epoch": 5.890358785059691, "grad_norm": 0.2095118476267361, "learning_rate": 5.093968256566623e-06, "loss": 0.2041, "num_tokens": 3293390492.0, "step": 4317 }, { "epoch": 5.891724366018371, "grad_norm": 0.21219252260559157, "learning_rate": 5.090218311601944e-06, "loss": 0.2063, "num_tokens": 3294147522.0, "step": 4318 }, { "epoch": 5.893089946977052, "grad_norm": 0.2165786605823634, "learning_rate": 5.086470169455728e-06, "loss": 0.2005, "num_tokens": 3294816452.0, "step": 4319 }, { "epoch": 5.894455527935732, "grad_norm": 0.21132510784164318, "learning_rate": 5.08272383127137e-06, "loss": 0.2132, "num_tokens": 3295650715.0, "step": 4320 }, { "epoch": 5.895821108894413, "grad_norm": 0.23472334919192886, "learning_rate": 5.078979298191717e-06, "loss": 0.212, "num_tokens": 3296426697.0, "step": 4321 }, { "epoch": 5.897186689853093, "grad_norm": 0.21325005362807478, "learning_rate": 5.075236571359063e-06, "loss": 0.2083, "num_tokens": 3297169758.0, "step": 4322 }, { "epoch": 5.8985522708117735, "grad_norm": 0.21817819755737483, "learning_rate": 5.0714956519151506e-06, "loss": 0.204, "num_tokens": 3297918546.0, "step": 4323 }, { "epoch": 5.8999178517704545, "grad_norm": 0.20837556125867127, "learning_rate": 5.067756541001173e-06, "loss": 0.2012, "num_tokens": 3298741246.0, "step": 4324 }, { "epoch": 5.901283432729135, "grad_norm": 0.21742721344862487, "learning_rate": 5.06401923975777e-06, "loss": 0.1952, "num_tokens": 3299477437.0, "step": 4325 }, { "epoch": 5.902649013687816, "grad_norm": 0.21705678009930437, "learning_rate": 5.060283749325033e-06, "loss": 0.2103, "num_tokens": 3300209536.0, "step": 4326 }, { "epoch": 5.904014594646496, "grad_norm": 0.2167365092664306, "learning_rate": 5.0565500708424974e-06, "loss": 0.1974, "num_tokens": 3300988187.0, "step": 4327 }, { "epoch": 5.905380175605177, "grad_norm": 0.2145120820499253, "learning_rate": 5.052818205449146e-06, "loss": 0.2066, "num_tokens": 3301695135.0, "step": 4328 }, { "epoch": 5.906745756563857, "grad_norm": 0.21966608179895106, "learning_rate": 5.049088154283408e-06, "loss": 0.2043, "num_tokens": 3302452563.0, "step": 4329 }, { "epoch": 5.908111337522538, "grad_norm": 0.23560542723043307, "learning_rate": 5.045359918483162e-06, "loss": 0.213, "num_tokens": 3303217423.0, "step": 4330 }, { "epoch": 5.909476918481218, "grad_norm": 0.21777119466828512, "learning_rate": 5.041633499185734e-06, "loss": 0.2109, "num_tokens": 3303971620.0, "step": 4331 }, { "epoch": 5.910842499439898, "grad_norm": 0.21447875595395602, "learning_rate": 5.037908897527888e-06, "loss": 0.2046, "num_tokens": 3304761851.0, "step": 4332 }, { "epoch": 5.912208080398579, "grad_norm": 0.216226126931504, "learning_rate": 5.034186114645842e-06, "loss": 0.2074, "num_tokens": 3305456057.0, "step": 4333 }, { "epoch": 5.913573661357259, "grad_norm": 0.2119073996197028, "learning_rate": 5.030465151675257e-06, "loss": 0.2034, "num_tokens": 3306251305.0, "step": 4334 }, { "epoch": 5.91493924231594, "grad_norm": 0.2050809888935085, "learning_rate": 5.0267460097512285e-06, "loss": 0.204, "num_tokens": 3306986725.0, "step": 4335 }, { "epoch": 5.91630482327462, "grad_norm": 0.21379655028086933, "learning_rate": 5.02302869000832e-06, "loss": 0.2055, "num_tokens": 3307713896.0, "step": 4336 }, { "epoch": 5.917670404233301, "grad_norm": 0.2736571320246307, "learning_rate": 5.019313193580515e-06, "loss": 0.2102, "num_tokens": 3308501007.0, "step": 4337 }, { "epoch": 5.919035985191981, "grad_norm": 0.2068032644804702, "learning_rate": 5.015599521601253e-06, "loss": 0.2091, "num_tokens": 3309187467.0, "step": 4338 }, { "epoch": 5.920401566150662, "grad_norm": 0.2454308551382579, "learning_rate": 5.0118876752034155e-06, "loss": 0.2183, "num_tokens": 3309956356.0, "step": 4339 }, { "epoch": 5.9217671471093425, "grad_norm": 0.21683351929514097, "learning_rate": 5.008177655519327e-06, "loss": 0.209, "num_tokens": 3310763853.0, "step": 4340 }, { "epoch": 5.923132728068023, "grad_norm": 0.208237596357415, "learning_rate": 5.004469463680748e-06, "loss": 0.2068, "num_tokens": 3311519765.0, "step": 4341 }, { "epoch": 5.924498309026704, "grad_norm": 0.21057392268394162, "learning_rate": 5.000763100818893e-06, "loss": 0.2071, "num_tokens": 3312340164.0, "step": 4342 }, { "epoch": 5.925863889985384, "grad_norm": 0.22311089282393046, "learning_rate": 4.997058568064413e-06, "loss": 0.2102, "num_tokens": 3313110884.0, "step": 4343 }, { "epoch": 5.927229470944065, "grad_norm": 0.22005315544875517, "learning_rate": 4.993355866547393e-06, "loss": 0.2114, "num_tokens": 3313869665.0, "step": 4344 }, { "epoch": 5.928595051902745, "grad_norm": 0.21405059745728336, "learning_rate": 4.989654997397379e-06, "loss": 0.2065, "num_tokens": 3314639717.0, "step": 4345 }, { "epoch": 5.929960632861426, "grad_norm": 0.2096237706265529, "learning_rate": 4.985955961743336e-06, "loss": 0.198, "num_tokens": 3315383057.0, "step": 4346 }, { "epoch": 5.931326213820106, "grad_norm": 0.2343633381826909, "learning_rate": 4.982258760713683e-06, "loss": 0.2107, "num_tokens": 3316161996.0, "step": 4347 }, { "epoch": 5.932691794778787, "grad_norm": 0.22298258886413252, "learning_rate": 4.978563395436276e-06, "loss": 0.2082, "num_tokens": 3316908831.0, "step": 4348 }, { "epoch": 5.934057375737467, "grad_norm": 0.21266652114805817, "learning_rate": 4.9748698670384134e-06, "loss": 0.2045, "num_tokens": 3317664998.0, "step": 4349 }, { "epoch": 5.935422956696147, "grad_norm": 0.21011183094230917, "learning_rate": 4.971178176646825e-06, "loss": 0.2077, "num_tokens": 3318394301.0, "step": 4350 }, { "epoch": 5.936788537654828, "grad_norm": 0.20515052217632412, "learning_rate": 4.96748832538769e-06, "loss": 0.2069, "num_tokens": 3319170978.0, "step": 4351 }, { "epoch": 5.938154118613508, "grad_norm": 0.19689597779735668, "learning_rate": 4.963800314386623e-06, "loss": 0.2095, "num_tokens": 3319939756.0, "step": 4352 }, { "epoch": 5.939519699572189, "grad_norm": 0.20421433926975466, "learning_rate": 4.960114144768669e-06, "loss": 0.2104, "num_tokens": 3320724501.0, "step": 4353 }, { "epoch": 5.940885280530869, "grad_norm": 0.2158002980247675, "learning_rate": 4.95642981765833e-06, "loss": 0.2042, "num_tokens": 3321457597.0, "step": 4354 }, { "epoch": 5.94225086148955, "grad_norm": 0.22515287782829732, "learning_rate": 4.952747334179524e-06, "loss": 0.2094, "num_tokens": 3322235945.0, "step": 4355 }, { "epoch": 5.9436164424482305, "grad_norm": 0.20152186948059192, "learning_rate": 4.949066695455625e-06, "loss": 0.2068, "num_tokens": 3323024156.0, "step": 4356 }, { "epoch": 5.9449820234069115, "grad_norm": 0.2265016756462833, "learning_rate": 4.945387902609432e-06, "loss": 0.2177, "num_tokens": 3323830175.0, "step": 4357 }, { "epoch": 5.946347604365592, "grad_norm": 0.24756695049688368, "learning_rate": 4.941710956763192e-06, "loss": 0.2102, "num_tokens": 3324575269.0, "step": 4358 }, { "epoch": 5.947713185324272, "grad_norm": 0.2172319835909987, "learning_rate": 4.938035859038571e-06, "loss": 0.2198, "num_tokens": 3325306604.0, "step": 4359 }, { "epoch": 5.949078766282953, "grad_norm": 0.21984033945111656, "learning_rate": 4.93436261055669e-06, "loss": 0.2069, "num_tokens": 3326101615.0, "step": 4360 }, { "epoch": 5.950444347241633, "grad_norm": 0.21582582364257336, "learning_rate": 4.930691212438098e-06, "loss": 0.2057, "num_tokens": 3326854259.0, "step": 4361 }, { "epoch": 5.951809928200314, "grad_norm": 0.20869154878723692, "learning_rate": 4.927021665802772e-06, "loss": 0.21, "num_tokens": 3327667602.0, "step": 4362 }, { "epoch": 5.953175509158994, "grad_norm": 0.20894481123627578, "learning_rate": 4.923353971770144e-06, "loss": 0.203, "num_tokens": 3328415257.0, "step": 4363 }, { "epoch": 5.954541090117675, "grad_norm": 0.20850455373242263, "learning_rate": 4.919688131459059e-06, "loss": 0.199, "num_tokens": 3329117520.0, "step": 4364 }, { "epoch": 5.955906671076355, "grad_norm": 0.2136825546116263, "learning_rate": 4.916024145987808e-06, "loss": 0.2051, "num_tokens": 3329912439.0, "step": 4365 }, { "epoch": 5.957272252035036, "grad_norm": 0.21587236345558525, "learning_rate": 4.91236201647412e-06, "loss": 0.2104, "num_tokens": 3330658866.0, "step": 4366 }, { "epoch": 5.958637832993716, "grad_norm": 0.21723331080978672, "learning_rate": 4.908701744035144e-06, "loss": 0.2074, "num_tokens": 3331435127.0, "step": 4367 }, { "epoch": 5.960003413952396, "grad_norm": 0.21404275802035458, "learning_rate": 4.905043329787474e-06, "loss": 0.2071, "num_tokens": 3332223902.0, "step": 4368 }, { "epoch": 5.961368994911077, "grad_norm": 0.21991077929034253, "learning_rate": 4.901386774847134e-06, "loss": 0.2058, "num_tokens": 3332997906.0, "step": 4369 }, { "epoch": 5.962734575869757, "grad_norm": 0.20658458169205982, "learning_rate": 4.897732080329583e-06, "loss": 0.2113, "num_tokens": 3333831393.0, "step": 4370 }, { "epoch": 5.964100156828438, "grad_norm": 0.20871064801211173, "learning_rate": 4.894079247349703e-06, "loss": 0.2087, "num_tokens": 3334587339.0, "step": 4371 }, { "epoch": 5.9654657377871185, "grad_norm": 0.2033008717439575, "learning_rate": 4.8904282770218244e-06, "loss": 0.2044, "num_tokens": 3335336624.0, "step": 4372 }, { "epoch": 5.9668313187457995, "grad_norm": 0.21107413142514553, "learning_rate": 4.886779170459695e-06, "loss": 0.2071, "num_tokens": 3336144643.0, "step": 4373 }, { "epoch": 5.96819689970448, "grad_norm": 0.20511771734750694, "learning_rate": 4.883131928776495e-06, "loss": 0.2113, "num_tokens": 3336921502.0, "step": 4374 }, { "epoch": 5.969562480663161, "grad_norm": 0.21942976988756668, "learning_rate": 4.879486553084849e-06, "loss": 0.2053, "num_tokens": 3337672822.0, "step": 4375 }, { "epoch": 5.970928061621841, "grad_norm": 0.20087426256510466, "learning_rate": 4.875843044496797e-06, "loss": 0.2152, "num_tokens": 3338517051.0, "step": 4376 }, { "epoch": 5.972293642580521, "grad_norm": 0.21281511914740617, "learning_rate": 4.872201404123818e-06, "loss": 0.1988, "num_tokens": 3339200990.0, "step": 4377 }, { "epoch": 5.973659223539202, "grad_norm": 0.22724318734770663, "learning_rate": 4.868561633076817e-06, "loss": 0.1984, "num_tokens": 3339932316.0, "step": 4378 }, { "epoch": 5.975024804497882, "grad_norm": 0.21459370475345238, "learning_rate": 4.864923732466135e-06, "loss": 0.2129, "num_tokens": 3340700745.0, "step": 4379 }, { "epoch": 5.976390385456563, "grad_norm": 0.21449347135227145, "learning_rate": 4.861287703401528e-06, "loss": 0.2116, "num_tokens": 3341448333.0, "step": 4380 }, { "epoch": 5.977755966415243, "grad_norm": 0.2179243628190327, "learning_rate": 4.857653546992205e-06, "loss": 0.2016, "num_tokens": 3342188770.0, "step": 4381 }, { "epoch": 5.979121547373924, "grad_norm": 0.22408681844375217, "learning_rate": 4.854021264346781e-06, "loss": 0.1956, "num_tokens": 3342827064.0, "step": 4382 }, { "epoch": 5.980487128332604, "grad_norm": 0.22089757909248758, "learning_rate": 4.850390856573304e-06, "loss": 0.2034, "num_tokens": 3343515813.0, "step": 4383 }, { "epoch": 5.981852709291285, "grad_norm": 0.21937237180422786, "learning_rate": 4.846762324779266e-06, "loss": 0.2074, "num_tokens": 3344257150.0, "step": 4384 }, { "epoch": 5.983218290249965, "grad_norm": 0.20936902348140668, "learning_rate": 4.843135670071566e-06, "loss": 0.2016, "num_tokens": 3345005544.0, "step": 4385 }, { "epoch": 5.984583871208645, "grad_norm": 0.21476437537676563, "learning_rate": 4.839510893556542e-06, "loss": 0.2042, "num_tokens": 3345769869.0, "step": 4386 }, { "epoch": 5.985949452167326, "grad_norm": 0.19352730341941765, "learning_rate": 4.835887996339955e-06, "loss": 0.2128, "num_tokens": 3346596643.0, "step": 4387 }, { "epoch": 5.9873150331260065, "grad_norm": 0.2097472418758118, "learning_rate": 4.832266979526999e-06, "loss": 0.2087, "num_tokens": 3347315899.0, "step": 4388 }, { "epoch": 5.9886806140846875, "grad_norm": 0.23191262269922386, "learning_rate": 4.828647844222284e-06, "loss": 0.2054, "num_tokens": 3348024161.0, "step": 4389 }, { "epoch": 5.990046195043368, "grad_norm": 0.22236814387083373, "learning_rate": 4.825030591529851e-06, "loss": 0.2075, "num_tokens": 3348700695.0, "step": 4390 }, { "epoch": 5.991411776002049, "grad_norm": 0.20109500113738168, "learning_rate": 4.821415222553171e-06, "loss": 0.2086, "num_tokens": 3349506009.0, "step": 4391 }, { "epoch": 5.992777356960729, "grad_norm": 0.2586930525477737, "learning_rate": 4.817801738395134e-06, "loss": 0.2037, "num_tokens": 3350256018.0, "step": 4392 }, { "epoch": 5.99414293791941, "grad_norm": 0.216341773149177, "learning_rate": 4.814190140158062e-06, "loss": 0.2052, "num_tokens": 3351011012.0, "step": 4393 }, { "epoch": 5.99550851887809, "grad_norm": 0.2138425056568521, "learning_rate": 4.8105804289436895e-06, "loss": 0.2052, "num_tokens": 3351795937.0, "step": 4394 }, { "epoch": 5.99687409983677, "grad_norm": 0.2155563522910789, "learning_rate": 4.806972605853189e-06, "loss": 0.2111, "num_tokens": 3352557398.0, "step": 4395 }, { "epoch": 5.998239680795451, "grad_norm": 0.20535220067156243, "learning_rate": 4.803366671987148e-06, "loss": 0.2037, "num_tokens": 3353309510.0, "step": 4396 }, { "epoch": 5.999605261754131, "grad_norm": 0.20591093891082932, "learning_rate": 4.799762628445585e-06, "loss": 0.2179, "num_tokens": 3354066255.0, "step": 4397 }, { "epoch": 6.0, "grad_norm": 0.20591093891082932, "learning_rate": 4.7961604763279315e-06, "loss": 0.1946, "num_tokens": 3354252816.0, "step": 4398 }, { "epoch": 6.00136558095868, "grad_norm": 0.4585177996520023, "learning_rate": 4.792560216733051e-06, "loss": 0.181, "num_tokens": 3355009547.0, "step": 4399 }, { "epoch": 6.002731161917361, "grad_norm": 0.3912397225765619, "learning_rate": 4.7889618507592275e-06, "loss": 0.1798, "num_tokens": 3355696719.0, "step": 4400 }, { "epoch": 6.004096742876041, "grad_norm": 0.33710216367674195, "learning_rate": 4.785365379504166e-06, "loss": 0.1741, "num_tokens": 3356437241.0, "step": 4401 }, { "epoch": 6.005462323834722, "grad_norm": 0.2911734032369926, "learning_rate": 4.781770804064998e-06, "loss": 0.1819, "num_tokens": 3357218728.0, "step": 4402 }, { "epoch": 6.006827904793402, "grad_norm": 0.23838591071348517, "learning_rate": 4.7781781255382645e-06, "loss": 0.1764, "num_tokens": 3357980336.0, "step": 4403 }, { "epoch": 6.008193485752083, "grad_norm": 0.2621093596072259, "learning_rate": 4.774587345019942e-06, "loss": 0.1829, "num_tokens": 3358761707.0, "step": 4404 }, { "epoch": 6.0095590667107635, "grad_norm": 0.3444977322073399, "learning_rate": 4.770998463605423e-06, "loss": 0.1804, "num_tokens": 3359497582.0, "step": 4405 }, { "epoch": 6.0109246476694445, "grad_norm": 0.28407986906967087, "learning_rate": 4.767411482389515e-06, "loss": 0.1761, "num_tokens": 3360266768.0, "step": 4406 }, { "epoch": 6.012290228628125, "grad_norm": 0.2965888928676144, "learning_rate": 4.763826402466453e-06, "loss": 0.1831, "num_tokens": 3361038867.0, "step": 4407 }, { "epoch": 6.013655809586805, "grad_norm": 0.24350841758275243, "learning_rate": 4.760243224929891e-06, "loss": 0.1767, "num_tokens": 3361786004.0, "step": 4408 }, { "epoch": 6.015021390545486, "grad_norm": 0.3118467831195779, "learning_rate": 4.7566619508729e-06, "loss": 0.1764, "num_tokens": 3362559674.0, "step": 4409 }, { "epoch": 6.016386971504166, "grad_norm": 0.2546083036589664, "learning_rate": 4.75308258138797e-06, "loss": 0.1682, "num_tokens": 3363236421.0, "step": 4410 }, { "epoch": 6.017752552462847, "grad_norm": 0.25020424151247606, "learning_rate": 4.749505117567019e-06, "loss": 0.1801, "num_tokens": 3363974049.0, "step": 4411 }, { "epoch": 6.019118133421527, "grad_norm": 0.238830001859413, "learning_rate": 4.745929560501366e-06, "loss": 0.1729, "num_tokens": 3364789946.0, "step": 4412 }, { "epoch": 6.020483714380208, "grad_norm": 0.2264753968447385, "learning_rate": 4.742355911281763e-06, "loss": 0.1694, "num_tokens": 3365568023.0, "step": 4413 }, { "epoch": 6.021849295338888, "grad_norm": 0.21654944571911136, "learning_rate": 4.73878417099838e-06, "loss": 0.1702, "num_tokens": 3366260484.0, "step": 4414 }, { "epoch": 6.023214876297569, "grad_norm": 0.2584160311866936, "learning_rate": 4.7352143407407895e-06, "loss": 0.1751, "num_tokens": 3366963404.0, "step": 4415 }, { "epoch": 6.024580457256249, "grad_norm": 0.25247587751989753, "learning_rate": 4.7316464215980056e-06, "loss": 0.1764, "num_tokens": 3367782536.0, "step": 4416 }, { "epoch": 6.025946038214929, "grad_norm": 0.23335092427991314, "learning_rate": 4.728080414658436e-06, "loss": 0.1746, "num_tokens": 3368492002.0, "step": 4417 }, { "epoch": 6.02731161917361, "grad_norm": 0.227242114088925, "learning_rate": 4.724516321009917e-06, "loss": 0.1799, "num_tokens": 3369247279.0, "step": 4418 }, { "epoch": 6.02867720013229, "grad_norm": 0.22150753316181543, "learning_rate": 4.720954141739702e-06, "loss": 0.171, "num_tokens": 3370025726.0, "step": 4419 }, { "epoch": 6.030042781090971, "grad_norm": 0.23001812186809834, "learning_rate": 4.717393877934458e-06, "loss": 0.1742, "num_tokens": 3370697764.0, "step": 4420 }, { "epoch": 6.0314083620496515, "grad_norm": 0.24462877876179195, "learning_rate": 4.713835530680263e-06, "loss": 0.1789, "num_tokens": 3371422974.0, "step": 4421 }, { "epoch": 6.0327739430083325, "grad_norm": 0.22176696762083187, "learning_rate": 4.710279101062616e-06, "loss": 0.1695, "num_tokens": 3372206398.0, "step": 4422 }, { "epoch": 6.034139523967013, "grad_norm": 0.21552229185027025, "learning_rate": 4.706724590166434e-06, "loss": 0.1669, "num_tokens": 3372940430.0, "step": 4423 }, { "epoch": 6.035505104925694, "grad_norm": 0.2415655040465163, "learning_rate": 4.7031719990760355e-06, "loss": 0.1785, "num_tokens": 3373715318.0, "step": 4424 }, { "epoch": 6.036870685884374, "grad_norm": 0.2081411910962891, "learning_rate": 4.699621328875173e-06, "loss": 0.1797, "num_tokens": 3374438548.0, "step": 4425 }, { "epoch": 6.038236266843054, "grad_norm": 0.23066394910645086, "learning_rate": 4.696072580646994e-06, "loss": 0.1771, "num_tokens": 3375183388.0, "step": 4426 }, { "epoch": 6.039601847801735, "grad_norm": 0.2259500594816629, "learning_rate": 4.692525755474071e-06, "loss": 0.1742, "num_tokens": 3375949950.0, "step": 4427 }, { "epoch": 6.040967428760415, "grad_norm": 0.23536400447232905, "learning_rate": 4.688980854438388e-06, "loss": 0.1826, "num_tokens": 3376741255.0, "step": 4428 }, { "epoch": 6.042333009719096, "grad_norm": 0.21588219657404012, "learning_rate": 4.685437878621339e-06, "loss": 0.1758, "num_tokens": 3377621258.0, "step": 4429 }, { "epoch": 6.043698590677776, "grad_norm": 0.2148373107385538, "learning_rate": 4.681896829103732e-06, "loss": 0.1759, "num_tokens": 3378354161.0, "step": 4430 }, { "epoch": 6.045064171636457, "grad_norm": 0.23782115443865062, "learning_rate": 4.6783577069657875e-06, "loss": 0.1876, "num_tokens": 3379080139.0, "step": 4431 }, { "epoch": 6.046429752595137, "grad_norm": 0.22351969689844262, "learning_rate": 4.674820513287142e-06, "loss": 0.1661, "num_tokens": 3379819878.0, "step": 4432 }, { "epoch": 6.047795333553818, "grad_norm": 0.2233047141075785, "learning_rate": 4.671285249146834e-06, "loss": 0.1734, "num_tokens": 3380548057.0, "step": 4433 }, { "epoch": 6.049160914512498, "grad_norm": 0.2189354258042658, "learning_rate": 4.667751915623328e-06, "loss": 0.1827, "num_tokens": 3381369615.0, "step": 4434 }, { "epoch": 6.050526495471178, "grad_norm": 0.21842866551626267, "learning_rate": 4.664220513794483e-06, "loss": 0.1698, "num_tokens": 3382148636.0, "step": 4435 }, { "epoch": 6.051892076429859, "grad_norm": 0.21784368416807026, "learning_rate": 4.6606910447375804e-06, "loss": 0.1843, "num_tokens": 3382901160.0, "step": 4436 }, { "epoch": 6.0532576573885395, "grad_norm": 0.22789367116511622, "learning_rate": 4.657163509529311e-06, "loss": 0.181, "num_tokens": 3383738696.0, "step": 4437 }, { "epoch": 6.0546232383472205, "grad_norm": 0.23006143939050336, "learning_rate": 4.653637909245767e-06, "loss": 0.1716, "num_tokens": 3384400405.0, "step": 4438 }, { "epoch": 6.055988819305901, "grad_norm": 0.2214385613151796, "learning_rate": 4.650114244962458e-06, "loss": 0.1797, "num_tokens": 3385235183.0, "step": 4439 }, { "epoch": 6.057354400264582, "grad_norm": 0.22803755873452153, "learning_rate": 4.6465925177543036e-06, "loss": 0.1793, "num_tokens": 3386014740.0, "step": 4440 }, { "epoch": 6.058719981223262, "grad_norm": 0.21688395070305036, "learning_rate": 4.643072728695632e-06, "loss": 0.1818, "num_tokens": 3386848644.0, "step": 4441 }, { "epoch": 6.060085562181943, "grad_norm": 0.2162227270852833, "learning_rate": 4.639554878860171e-06, "loss": 0.1711, "num_tokens": 3387609591.0, "step": 4442 }, { "epoch": 6.061451143140623, "grad_norm": 0.21940339360493047, "learning_rate": 4.636038969321073e-06, "loss": 0.1773, "num_tokens": 3388412567.0, "step": 4443 }, { "epoch": 6.062816724099303, "grad_norm": 0.2341942673425418, "learning_rate": 4.632525001150887e-06, "loss": 0.1782, "num_tokens": 3389198038.0, "step": 4444 }, { "epoch": 6.064182305057984, "grad_norm": 0.2174948647541384, "learning_rate": 4.629012975421566e-06, "loss": 0.1773, "num_tokens": 3390069029.0, "step": 4445 }, { "epoch": 6.065547886016664, "grad_norm": 0.21484919381704048, "learning_rate": 4.625502893204487e-06, "loss": 0.177, "num_tokens": 3390821019.0, "step": 4446 }, { "epoch": 6.066913466975345, "grad_norm": 0.21567029198169105, "learning_rate": 4.621994755570416e-06, "loss": 0.176, "num_tokens": 3391566032.0, "step": 4447 }, { "epoch": 6.068279047934025, "grad_norm": 0.22083606918292492, "learning_rate": 4.618488563589538e-06, "loss": 0.1732, "num_tokens": 3392316762.0, "step": 4448 }, { "epoch": 6.069644628892706, "grad_norm": 0.2138134991843932, "learning_rate": 4.614984318331439e-06, "loss": 0.1749, "num_tokens": 3393033351.0, "step": 4449 }, { "epoch": 6.071010209851386, "grad_norm": 0.21376723582755067, "learning_rate": 4.611482020865116e-06, "loss": 0.1765, "num_tokens": 3393730096.0, "step": 4450 }, { "epoch": 6.072375790810067, "grad_norm": 0.2513444585246254, "learning_rate": 4.6079816722589636e-06, "loss": 0.1796, "num_tokens": 3394541780.0, "step": 4451 }, { "epoch": 6.073741371768747, "grad_norm": 0.2184302765928734, "learning_rate": 4.604483273580786e-06, "loss": 0.1795, "num_tokens": 3395359254.0, "step": 4452 }, { "epoch": 6.0751069527274275, "grad_norm": 0.22094451280438968, "learning_rate": 4.6009868258977995e-06, "loss": 0.1737, "num_tokens": 3396095784.0, "step": 4453 }, { "epoch": 6.0764725336861085, "grad_norm": 0.21994515782183258, "learning_rate": 4.597492330276608e-06, "loss": 0.1734, "num_tokens": 3396822887.0, "step": 4454 }, { "epoch": 6.077838114644789, "grad_norm": 0.22331371034204653, "learning_rate": 4.593999787783245e-06, "loss": 0.183, "num_tokens": 3397611851.0, "step": 4455 }, { "epoch": 6.07920369560347, "grad_norm": 0.2293000144112066, "learning_rate": 4.5905091994831225e-06, "loss": 0.1751, "num_tokens": 3398318025.0, "step": 4456 }, { "epoch": 6.08056927656215, "grad_norm": 0.22725068438461382, "learning_rate": 4.587020566441071e-06, "loss": 0.1646, "num_tokens": 3399049300.0, "step": 4457 }, { "epoch": 6.081934857520831, "grad_norm": 0.21259847673923732, "learning_rate": 4.583533889721323e-06, "loss": 0.1827, "num_tokens": 3399921574.0, "step": 4458 }, { "epoch": 6.083300438479511, "grad_norm": 0.24750316055113192, "learning_rate": 4.580049170387515e-06, "loss": 0.1786, "num_tokens": 3400719251.0, "step": 4459 }, { "epoch": 6.084666019438192, "grad_norm": 0.23655916061139723, "learning_rate": 4.576566409502678e-06, "loss": 0.1812, "num_tokens": 3401467639.0, "step": 4460 }, { "epoch": 6.086031600396872, "grad_norm": 0.2294654619347723, "learning_rate": 4.573085608129253e-06, "loss": 0.189, "num_tokens": 3402161919.0, "step": 4461 }, { "epoch": 6.087397181355552, "grad_norm": 0.22522034877832006, "learning_rate": 4.5696067673290875e-06, "loss": 0.1779, "num_tokens": 3402916033.0, "step": 4462 }, { "epoch": 6.088762762314233, "grad_norm": 0.2108770097286622, "learning_rate": 4.566129888163413e-06, "loss": 0.1785, "num_tokens": 3403637405.0, "step": 4463 }, { "epoch": 6.090128343272913, "grad_norm": 0.218034538404724, "learning_rate": 4.562654971692889e-06, "loss": 0.1736, "num_tokens": 3404425002.0, "step": 4464 }, { "epoch": 6.091493924231594, "grad_norm": 0.23074185975280587, "learning_rate": 4.559182018977552e-06, "loss": 0.1734, "num_tokens": 3405215493.0, "step": 4465 }, { "epoch": 6.092859505190274, "grad_norm": 0.2071715513395369, "learning_rate": 4.555711031076854e-06, "loss": 0.175, "num_tokens": 3406008591.0, "step": 4466 }, { "epoch": 6.094225086148955, "grad_norm": 0.22648826715597087, "learning_rate": 4.552242009049644e-06, "loss": 0.1788, "num_tokens": 3406765138.0, "step": 4467 }, { "epoch": 6.095590667107635, "grad_norm": 0.24025039898958242, "learning_rate": 4.548774953954166e-06, "loss": 0.1707, "num_tokens": 3407468920.0, "step": 4468 }, { "epoch": 6.096956248066316, "grad_norm": 0.23154871631644736, "learning_rate": 4.545309866848071e-06, "loss": 0.1791, "num_tokens": 3408226598.0, "step": 4469 }, { "epoch": 6.0983218290249965, "grad_norm": 0.2216963549401061, "learning_rate": 4.5418467487884064e-06, "loss": 0.1817, "num_tokens": 3408954614.0, "step": 4470 }, { "epoch": 6.099687409983677, "grad_norm": 0.2994055485967713, "learning_rate": 4.538385600831622e-06, "loss": 0.1782, "num_tokens": 3409643700.0, "step": 4471 }, { "epoch": 6.101052990942358, "grad_norm": 0.22041232213414622, "learning_rate": 4.5349264240335574e-06, "loss": 0.1802, "num_tokens": 3410447795.0, "step": 4472 }, { "epoch": 6.102418571901038, "grad_norm": 0.23199155100203914, "learning_rate": 4.531469219449468e-06, "loss": 0.1784, "num_tokens": 3411245692.0, "step": 4473 }, { "epoch": 6.103784152859719, "grad_norm": 0.21577669277288322, "learning_rate": 4.528013988133987e-06, "loss": 0.1784, "num_tokens": 3411963004.0, "step": 4474 }, { "epoch": 6.105149733818399, "grad_norm": 0.2417349130187108, "learning_rate": 4.524560731141162e-06, "loss": 0.1854, "num_tokens": 3412725041.0, "step": 4475 }, { "epoch": 6.10651531477708, "grad_norm": 0.21618688313939852, "learning_rate": 4.5211094495244325e-06, "loss": 0.1817, "num_tokens": 3413477648.0, "step": 4476 }, { "epoch": 6.10788089573576, "grad_norm": 0.23405788037925568, "learning_rate": 4.5176601443366314e-06, "loss": 0.1785, "num_tokens": 3414244062.0, "step": 4477 }, { "epoch": 6.109246476694441, "grad_norm": 0.2197580712372545, "learning_rate": 4.514212816629993e-06, "loss": 0.1754, "num_tokens": 3415066114.0, "step": 4478 }, { "epoch": 6.110612057653121, "grad_norm": 0.22440709069970965, "learning_rate": 4.51076746745615e-06, "loss": 0.1784, "num_tokens": 3415798601.0, "step": 4479 }, { "epoch": 6.111977638611801, "grad_norm": 0.22378451203640654, "learning_rate": 4.507324097866126e-06, "loss": 0.1818, "num_tokens": 3416591434.0, "step": 4480 }, { "epoch": 6.113343219570482, "grad_norm": 0.2421120759788064, "learning_rate": 4.503882708910347e-06, "loss": 0.1798, "num_tokens": 3417386431.0, "step": 4481 }, { "epoch": 6.114708800529162, "grad_norm": 0.21826931331134983, "learning_rate": 4.500443301638633e-06, "loss": 0.1749, "num_tokens": 3418202590.0, "step": 4482 }, { "epoch": 6.116074381487843, "grad_norm": 0.23908864840311186, "learning_rate": 4.4970058771001925e-06, "loss": 0.1852, "num_tokens": 3419044902.0, "step": 4483 }, { "epoch": 6.117439962446523, "grad_norm": 0.2208685361586619, "learning_rate": 4.493570436343638e-06, "loss": 0.1712, "num_tokens": 3419861317.0, "step": 4484 }, { "epoch": 6.118805543405204, "grad_norm": 0.21650802735400249, "learning_rate": 4.490136980416978e-06, "loss": 0.1826, "num_tokens": 3420613672.0, "step": 4485 }, { "epoch": 6.1201711243638846, "grad_norm": 0.20719033601158465, "learning_rate": 4.486705510367605e-06, "loss": 0.1778, "num_tokens": 3421490447.0, "step": 4486 }, { "epoch": 6.1215367053225656, "grad_norm": 0.21906419131915372, "learning_rate": 4.483276027242313e-06, "loss": 0.1833, "num_tokens": 3422250044.0, "step": 4487 }, { "epoch": 6.122902286281246, "grad_norm": 0.21810450746205956, "learning_rate": 4.479848532087288e-06, "loss": 0.1796, "num_tokens": 3423065073.0, "step": 4488 }, { "epoch": 6.124267867239926, "grad_norm": 0.20301923761216395, "learning_rate": 4.4764230259481145e-06, "loss": 0.1796, "num_tokens": 3423817357.0, "step": 4489 }, { "epoch": 6.125633448198607, "grad_norm": 0.22063538435052682, "learning_rate": 4.472999509869766e-06, "loss": 0.1911, "num_tokens": 3424653070.0, "step": 4490 }, { "epoch": 6.126999029157287, "grad_norm": 0.22120455236533326, "learning_rate": 4.469577984896602e-06, "loss": 0.1809, "num_tokens": 3425514557.0, "step": 4491 }, { "epoch": 6.128364610115968, "grad_norm": 0.22083477452312278, "learning_rate": 4.466158452072388e-06, "loss": 0.1753, "num_tokens": 3426267036.0, "step": 4492 }, { "epoch": 6.129730191074648, "grad_norm": 0.22188928878077294, "learning_rate": 4.462740912440274e-06, "loss": 0.1804, "num_tokens": 3427004673.0, "step": 4493 }, { "epoch": 6.131095772033329, "grad_norm": 0.23114820763976177, "learning_rate": 4.459325367042804e-06, "loss": 0.1694, "num_tokens": 3427737863.0, "step": 4494 }, { "epoch": 6.132461352992009, "grad_norm": 0.2233106292754563, "learning_rate": 4.45591181692191e-06, "loss": 0.1752, "num_tokens": 3428510172.0, "step": 4495 }, { "epoch": 6.13382693395069, "grad_norm": 0.33553311068362973, "learning_rate": 4.452500263118922e-06, "loss": 0.178, "num_tokens": 3429166656.0, "step": 4496 }, { "epoch": 6.13519251490937, "grad_norm": 0.22051775567612125, "learning_rate": 4.449090706674554e-06, "loss": 0.1801, "num_tokens": 3429905954.0, "step": 4497 }, { "epoch": 6.13655809586805, "grad_norm": 0.23927461724348775, "learning_rate": 4.445683148628915e-06, "loss": 0.1763, "num_tokens": 3430563869.0, "step": 4498 }, { "epoch": 6.137923676826731, "grad_norm": 0.23098489194021554, "learning_rate": 4.442277590021509e-06, "loss": 0.1721, "num_tokens": 3431279791.0, "step": 4499 }, { "epoch": 6.1392892577854115, "grad_norm": 0.21249157348244865, "learning_rate": 4.4388740318912145e-06, "loss": 0.1797, "num_tokens": 3432049749.0, "step": 4500 }, { "epoch": 6.1406548387440925, "grad_norm": 0.22697925713857062, "learning_rate": 4.435472475276316e-06, "loss": 0.1698, "num_tokens": 3432829417.0, "step": 4501 }, { "epoch": 6.142020419702773, "grad_norm": 0.22325960952336804, "learning_rate": 4.432072921214479e-06, "loss": 0.1753, "num_tokens": 3433598933.0, "step": 4502 }, { "epoch": 6.143386000661454, "grad_norm": 0.21779056871334146, "learning_rate": 4.428675370742763e-06, "loss": 0.1727, "num_tokens": 3434403845.0, "step": 4503 }, { "epoch": 6.144751581620134, "grad_norm": 0.21539000044813883, "learning_rate": 4.425279824897608e-06, "loss": 0.1781, "num_tokens": 3435176577.0, "step": 4504 }, { "epoch": 6.146117162578815, "grad_norm": 0.219663716664207, "learning_rate": 4.421886284714856e-06, "loss": 0.1813, "num_tokens": 3436058064.0, "step": 4505 }, { "epoch": 6.147482743537495, "grad_norm": 0.19782339122765638, "learning_rate": 4.418494751229725e-06, "loss": 0.183, "num_tokens": 3436878576.0, "step": 4506 }, { "epoch": 6.148848324496175, "grad_norm": 0.21203259430202637, "learning_rate": 4.415105225476818e-06, "loss": 0.1809, "num_tokens": 3437673093.0, "step": 4507 }, { "epoch": 6.150213905454856, "grad_norm": 0.2336275625240025, "learning_rate": 4.4117177084901465e-06, "loss": 0.1792, "num_tokens": 3438375178.0, "step": 4508 }, { "epoch": 6.151579486413536, "grad_norm": 0.2316479732396051, "learning_rate": 4.408332201303084e-06, "loss": 0.1721, "num_tokens": 3439112672.0, "step": 4509 }, { "epoch": 6.152945067372217, "grad_norm": 0.22163746027198764, "learning_rate": 4.4049487049484055e-06, "loss": 0.1797, "num_tokens": 3439899433.0, "step": 4510 }, { "epoch": 6.154310648330897, "grad_norm": 0.22028382182738873, "learning_rate": 4.401567220458271e-06, "loss": 0.1707, "num_tokens": 3440609305.0, "step": 4511 }, { "epoch": 6.155676229289578, "grad_norm": 0.23262887979016025, "learning_rate": 4.398187748864226e-06, "loss": 0.1805, "num_tokens": 3441238978.0, "step": 4512 }, { "epoch": 6.157041810248258, "grad_norm": 0.24789304673878493, "learning_rate": 4.394810291197193e-06, "loss": 0.1777, "num_tokens": 3442056775.0, "step": 4513 }, { "epoch": 6.158407391206939, "grad_norm": 0.20969774356356577, "learning_rate": 4.391434848487498e-06, "loss": 0.1717, "num_tokens": 3442815152.0, "step": 4514 }, { "epoch": 6.159772972165619, "grad_norm": 0.23276591209687553, "learning_rate": 4.3880614217648395e-06, "loss": 0.1838, "num_tokens": 3443518406.0, "step": 4515 }, { "epoch": 6.1611385531242995, "grad_norm": 0.2160518447031787, "learning_rate": 4.3846900120582965e-06, "loss": 0.1847, "num_tokens": 3444280412.0, "step": 4516 }, { "epoch": 6.1625041340829805, "grad_norm": 0.21765141216990866, "learning_rate": 4.3813206203963535e-06, "loss": 0.1742, "num_tokens": 3444984664.0, "step": 4517 }, { "epoch": 6.163869715041661, "grad_norm": 0.23683736636523095, "learning_rate": 4.377953247806853e-06, "loss": 0.1729, "num_tokens": 3445790250.0, "step": 4518 }, { "epoch": 6.165235296000342, "grad_norm": 0.22052113787648814, "learning_rate": 4.374587895317042e-06, "loss": 0.1774, "num_tokens": 3446562538.0, "step": 4519 }, { "epoch": 6.166600876959022, "grad_norm": 0.2272677270213607, "learning_rate": 4.37122456395354e-06, "loss": 0.1829, "num_tokens": 3447364960.0, "step": 4520 }, { "epoch": 6.167966457917703, "grad_norm": 0.21986032808084735, "learning_rate": 4.367863254742361e-06, "loss": 0.175, "num_tokens": 3448198825.0, "step": 4521 }, { "epoch": 6.169332038876383, "grad_norm": 0.23100630994472057, "learning_rate": 4.364503968708885e-06, "loss": 0.1789, "num_tokens": 3448973515.0, "step": 4522 }, { "epoch": 6.170697619835064, "grad_norm": 0.2324724777517825, "learning_rate": 4.36114670687789e-06, "loss": 0.1782, "num_tokens": 3449732948.0, "step": 4523 }, { "epoch": 6.172063200793744, "grad_norm": 0.22402977192388637, "learning_rate": 4.357791470273534e-06, "loss": 0.1719, "num_tokens": 3450423957.0, "step": 4524 }, { "epoch": 6.173428781752424, "grad_norm": 0.23447390052313588, "learning_rate": 4.354438259919343e-06, "loss": 0.1887, "num_tokens": 3451285379.0, "step": 4525 }, { "epoch": 6.174794362711105, "grad_norm": 0.21287784464370527, "learning_rate": 4.351087076838251e-06, "loss": 0.176, "num_tokens": 3452094627.0, "step": 4526 }, { "epoch": 6.176159943669785, "grad_norm": 0.22071043461752263, "learning_rate": 4.347737922052549e-06, "loss": 0.1736, "num_tokens": 3452861074.0, "step": 4527 }, { "epoch": 6.177525524628466, "grad_norm": 0.2277140706596248, "learning_rate": 4.344390796583922e-06, "loss": 0.1721, "num_tokens": 3453596726.0, "step": 4528 }, { "epoch": 6.178891105587146, "grad_norm": 0.20687993440965713, "learning_rate": 4.3410457014534355e-06, "loss": 0.1769, "num_tokens": 3454321168.0, "step": 4529 }, { "epoch": 6.180256686545827, "grad_norm": 0.22660369626935795, "learning_rate": 4.337702637681529e-06, "loss": 0.1679, "num_tokens": 3455015952.0, "step": 4530 }, { "epoch": 6.181622267504507, "grad_norm": 0.24304606269690304, "learning_rate": 4.334361606288027e-06, "loss": 0.1883, "num_tokens": 3455784264.0, "step": 4531 }, { "epoch": 6.182987848463188, "grad_norm": 0.23653850649487354, "learning_rate": 4.3310226082921344e-06, "loss": 0.1856, "num_tokens": 3456527875.0, "step": 4532 }, { "epoch": 6.1843534294218685, "grad_norm": 0.25063315056713936, "learning_rate": 4.3276856447124395e-06, "loss": 0.1776, "num_tokens": 3457290527.0, "step": 4533 }, { "epoch": 6.185719010380549, "grad_norm": 0.23021076779045613, "learning_rate": 4.3243507165668954e-06, "loss": 0.1849, "num_tokens": 3458066186.0, "step": 4534 }, { "epoch": 6.18708459133923, "grad_norm": 0.20518928027347919, "learning_rate": 4.321017824872854e-06, "loss": 0.1765, "num_tokens": 3458870754.0, "step": 4535 }, { "epoch": 6.18845017229791, "grad_norm": 0.22452478666694203, "learning_rate": 4.31768697064703e-06, "loss": 0.1798, "num_tokens": 3459651791.0, "step": 4536 }, { "epoch": 6.189815753256591, "grad_norm": 0.22135071290567507, "learning_rate": 4.314358154905525e-06, "loss": 0.175, "num_tokens": 3460355593.0, "step": 4537 }, { "epoch": 6.191181334215271, "grad_norm": 0.2302598794395475, "learning_rate": 4.311031378663819e-06, "loss": 0.1789, "num_tokens": 3461035221.0, "step": 4538 }, { "epoch": 6.192546915173952, "grad_norm": 0.2451951568308318, "learning_rate": 4.307706642936761e-06, "loss": 0.1858, "num_tokens": 3461808868.0, "step": 4539 }, { "epoch": 6.193912496132632, "grad_norm": 0.21250871549966083, "learning_rate": 4.3043839487385895e-06, "loss": 0.1738, "num_tokens": 3462628996.0, "step": 4540 }, { "epoch": 6.195278077091313, "grad_norm": 0.22757375689874892, "learning_rate": 4.301063297082912e-06, "loss": 0.1808, "num_tokens": 3463377448.0, "step": 4541 }, { "epoch": 6.196643658049993, "grad_norm": 0.22411977585476253, "learning_rate": 4.297744688982719e-06, "loss": 0.1847, "num_tokens": 3464185628.0, "step": 4542 }, { "epoch": 6.198009239008673, "grad_norm": 0.21718502314699714, "learning_rate": 4.294428125450365e-06, "loss": 0.1754, "num_tokens": 3464979233.0, "step": 4543 }, { "epoch": 6.199374819967354, "grad_norm": 0.24288196154103228, "learning_rate": 4.2911136074976024e-06, "loss": 0.18, "num_tokens": 3465772271.0, "step": 4544 }, { "epoch": 6.200740400926034, "grad_norm": 0.21077125200331845, "learning_rate": 4.2878011361355386e-06, "loss": 0.1811, "num_tokens": 3466479562.0, "step": 4545 }, { "epoch": 6.202105981884715, "grad_norm": 0.22794401023079633, "learning_rate": 4.2844907123746685e-06, "loss": 0.1779, "num_tokens": 3467190071.0, "step": 4546 }, { "epoch": 6.203471562843395, "grad_norm": 0.22703259780312576, "learning_rate": 4.28118233722486e-06, "loss": 0.1783, "num_tokens": 3468019980.0, "step": 4547 }, { "epoch": 6.204837143802076, "grad_norm": 0.22351404386950546, "learning_rate": 4.27787601169535e-06, "loss": 0.1804, "num_tokens": 3468716464.0, "step": 4548 }, { "epoch": 6.2062027247607565, "grad_norm": 0.22731524453914415, "learning_rate": 4.274571736794758e-06, "loss": 0.1716, "num_tokens": 3469433911.0, "step": 4549 }, { "epoch": 6.2075683057194375, "grad_norm": 0.2198255676591793, "learning_rate": 4.271269513531075e-06, "loss": 0.1812, "num_tokens": 3470219400.0, "step": 4550 }, { "epoch": 6.208933886678118, "grad_norm": 0.22030132556363524, "learning_rate": 4.267969342911672e-06, "loss": 0.1796, "num_tokens": 3470966642.0, "step": 4551 }, { "epoch": 6.210299467636798, "grad_norm": 0.23123225135684844, "learning_rate": 4.2646712259432755e-06, "loss": 0.1825, "num_tokens": 3471709148.0, "step": 4552 }, { "epoch": 6.211665048595479, "grad_norm": 0.220776375978549, "learning_rate": 4.261375163632012e-06, "loss": 0.1839, "num_tokens": 3472527777.0, "step": 4553 }, { "epoch": 6.213030629554159, "grad_norm": 0.22186720333694754, "learning_rate": 4.2580811569833584e-06, "loss": 0.1816, "num_tokens": 3473302167.0, "step": 4554 }, { "epoch": 6.21439621051284, "grad_norm": 0.22048593978822026, "learning_rate": 4.254789207002177e-06, "loss": 0.1843, "num_tokens": 3474108664.0, "step": 4555 }, { "epoch": 6.21576179147152, "grad_norm": 0.21574615102864872, "learning_rate": 4.2514993146927e-06, "loss": 0.1758, "num_tokens": 3474804538.0, "step": 4556 }, { "epoch": 6.217127372430201, "grad_norm": 0.22117376048687712, "learning_rate": 4.248211481058526e-06, "loss": 0.1784, "num_tokens": 3475548271.0, "step": 4557 }, { "epoch": 6.218492953388881, "grad_norm": 0.23166457922103698, "learning_rate": 4.244925707102635e-06, "loss": 0.1777, "num_tokens": 3476291397.0, "step": 4558 }, { "epoch": 6.219858534347562, "grad_norm": 0.21762628034392542, "learning_rate": 4.241641993827373e-06, "loss": 0.1825, "num_tokens": 3477124355.0, "step": 4559 }, { "epoch": 6.221224115306242, "grad_norm": 0.213444512991053, "learning_rate": 4.238360342234461e-06, "loss": 0.1799, "num_tokens": 3477906054.0, "step": 4560 }, { "epoch": 6.222589696264922, "grad_norm": 0.22204746703167785, "learning_rate": 4.235080753324985e-06, "loss": 0.1777, "num_tokens": 3478660140.0, "step": 4561 }, { "epoch": 6.223955277223603, "grad_norm": 0.2744635911610287, "learning_rate": 4.231803228099408e-06, "loss": 0.1725, "num_tokens": 3479478041.0, "step": 4562 }, { "epoch": 6.225320858182283, "grad_norm": 0.2229204134558285, "learning_rate": 4.22852776755756e-06, "loss": 0.1792, "num_tokens": 3480180854.0, "step": 4563 }, { "epoch": 6.226686439140964, "grad_norm": 0.2214870107436558, "learning_rate": 4.225254372698641e-06, "loss": 0.1762, "num_tokens": 3480959968.0, "step": 4564 }, { "epoch": 6.2280520200996445, "grad_norm": 0.22565969202025818, "learning_rate": 4.221983044521229e-06, "loss": 0.1755, "num_tokens": 3481780576.0, "step": 4565 }, { "epoch": 6.2294176010583255, "grad_norm": 0.21302069759579312, "learning_rate": 4.218713784023255e-06, "loss": 0.1763, "num_tokens": 3482538169.0, "step": 4566 }, { "epoch": 6.230783182017006, "grad_norm": 0.21108712225024537, "learning_rate": 4.215446592202033e-06, "loss": 0.1797, "num_tokens": 3483309776.0, "step": 4567 }, { "epoch": 6.232148762975687, "grad_norm": 0.21318830661838534, "learning_rate": 4.2121814700542414e-06, "loss": 0.1731, "num_tokens": 3484077354.0, "step": 4568 }, { "epoch": 6.233514343934367, "grad_norm": 0.21814291554272444, "learning_rate": 4.208918418575928e-06, "loss": 0.1736, "num_tokens": 3484863806.0, "step": 4569 }, { "epoch": 6.234879924893047, "grad_norm": 0.2142705634924007, "learning_rate": 4.205657438762511e-06, "loss": 0.181, "num_tokens": 3485718376.0, "step": 4570 }, { "epoch": 6.236245505851728, "grad_norm": 0.21529194721794206, "learning_rate": 4.20239853160877e-06, "loss": 0.1695, "num_tokens": 3486430360.0, "step": 4571 }, { "epoch": 6.237611086810408, "grad_norm": 0.22063129748436203, "learning_rate": 4.199141698108856e-06, "loss": 0.1827, "num_tokens": 3487188606.0, "step": 4572 }, { "epoch": 6.238976667769089, "grad_norm": 0.2307116979269138, "learning_rate": 4.195886939256291e-06, "loss": 0.1801, "num_tokens": 3487953543.0, "step": 4573 }, { "epoch": 6.240342248727769, "grad_norm": 0.22287422631113657, "learning_rate": 4.192634256043963e-06, "loss": 0.1792, "num_tokens": 3488739397.0, "step": 4574 }, { "epoch": 6.24170782968645, "grad_norm": 0.20953073984959866, "learning_rate": 4.1893836494641185e-06, "loss": 0.1857, "num_tokens": 3489591658.0, "step": 4575 }, { "epoch": 6.24307341064513, "grad_norm": 0.2155306505891302, "learning_rate": 4.186135120508379e-06, "loss": 0.1759, "num_tokens": 3490270154.0, "step": 4576 }, { "epoch": 6.244438991603811, "grad_norm": 0.22675852117196066, "learning_rate": 4.182888670167733e-06, "loss": 0.1786, "num_tokens": 3490938599.0, "step": 4577 }, { "epoch": 6.245804572562491, "grad_norm": 0.22114424595309687, "learning_rate": 4.179644299432526e-06, "loss": 0.1825, "num_tokens": 3491691608.0, "step": 4578 }, { "epoch": 6.247170153521171, "grad_norm": 0.22130197307457095, "learning_rate": 4.176402009292483e-06, "loss": 0.1842, "num_tokens": 3492428160.0, "step": 4579 }, { "epoch": 6.248535734479852, "grad_norm": 0.24113597507233553, "learning_rate": 4.17316180073668e-06, "loss": 0.1871, "num_tokens": 3493268337.0, "step": 4580 }, { "epoch": 6.249901315438533, "grad_norm": 0.2342872229189589, "learning_rate": 4.169923674753566e-06, "loss": 0.1847, "num_tokens": 3494004643.0, "step": 4581 }, { "epoch": 6.251266896397214, "grad_norm": 0.20985481049045268, "learning_rate": 4.166687632330954e-06, "loss": 0.1793, "num_tokens": 3494777242.0, "step": 4582 }, { "epoch": 6.252632477355894, "grad_norm": 0.2182906627022529, "learning_rate": 4.163453674456022e-06, "loss": 0.1852, "num_tokens": 3495611764.0, "step": 4583 }, { "epoch": 6.253998058314575, "grad_norm": 0.21525479474901657, "learning_rate": 4.160221802115305e-06, "loss": 0.185, "num_tokens": 3496354529.0, "step": 4584 }, { "epoch": 6.255363639273255, "grad_norm": 0.2360289916713184, "learning_rate": 4.156992016294712e-06, "loss": 0.182, "num_tokens": 3497144767.0, "step": 4585 }, { "epoch": 6.256729220231936, "grad_norm": 0.22642382925621618, "learning_rate": 4.153764317979511e-06, "loss": 0.175, "num_tokens": 3497844724.0, "step": 4586 }, { "epoch": 6.258094801190616, "grad_norm": 0.22209425149287665, "learning_rate": 4.150538708154326e-06, "loss": 0.182, "num_tokens": 3498572429.0, "step": 4587 }, { "epoch": 6.259460382149296, "grad_norm": 0.2323186731317651, "learning_rate": 4.147315187803161e-06, "loss": 0.172, "num_tokens": 3499266427.0, "step": 4588 }, { "epoch": 6.260825963107977, "grad_norm": 0.23086929032987255, "learning_rate": 4.144093757909365e-06, "loss": 0.184, "num_tokens": 3500080062.0, "step": 4589 }, { "epoch": 6.262191544066657, "grad_norm": 0.2210544528967968, "learning_rate": 4.1408744194556615e-06, "loss": 0.1802, "num_tokens": 3500895054.0, "step": 4590 }, { "epoch": 6.263557125025338, "grad_norm": 0.21735116796503404, "learning_rate": 4.137657173424128e-06, "loss": 0.1823, "num_tokens": 3501620088.0, "step": 4591 }, { "epoch": 6.264922705984018, "grad_norm": 0.23619968376155812, "learning_rate": 4.1344420207962086e-06, "loss": 0.1752, "num_tokens": 3502307483.0, "step": 4592 }, { "epoch": 6.266288286942699, "grad_norm": 0.2321990526738248, "learning_rate": 4.131228962552707e-06, "loss": 0.1655, "num_tokens": 3502992365.0, "step": 4593 }, { "epoch": 6.267653867901379, "grad_norm": 0.23434853542778, "learning_rate": 4.128017999673786e-06, "loss": 0.1748, "num_tokens": 3503751298.0, "step": 4594 }, { "epoch": 6.26901944886006, "grad_norm": 0.22439757606691704, "learning_rate": 4.124809133138977e-06, "loss": 0.1773, "num_tokens": 3504505387.0, "step": 4595 }, { "epoch": 6.2703850298187405, "grad_norm": 0.22132451061902028, "learning_rate": 4.121602363927155e-06, "loss": 0.1771, "num_tokens": 3505223012.0, "step": 4596 }, { "epoch": 6.271750610777421, "grad_norm": 0.22446146734086175, "learning_rate": 4.118397693016582e-06, "loss": 0.1772, "num_tokens": 3505873858.0, "step": 4597 }, { "epoch": 6.273116191736102, "grad_norm": 0.23785912841374188, "learning_rate": 4.115195121384851e-06, "loss": 0.1721, "num_tokens": 3506574211.0, "step": 4598 }, { "epoch": 6.274481772694782, "grad_norm": 0.22584758060774557, "learning_rate": 4.111994650008933e-06, "loss": 0.1833, "num_tokens": 3507348540.0, "step": 4599 }, { "epoch": 6.275847353653463, "grad_norm": 0.2069471263560601, "learning_rate": 4.108796279865156e-06, "loss": 0.1792, "num_tokens": 3508064816.0, "step": 4600 }, { "epoch": 6.277212934612143, "grad_norm": 0.22849604017584552, "learning_rate": 4.1056000119292e-06, "loss": 0.1813, "num_tokens": 3508834231.0, "step": 4601 }, { "epoch": 6.278578515570824, "grad_norm": 0.220626860639284, "learning_rate": 4.102405847176108e-06, "loss": 0.1739, "num_tokens": 3509620070.0, "step": 4602 }, { "epoch": 6.279944096529504, "grad_norm": 0.20824367311636907, "learning_rate": 4.099213786580282e-06, "loss": 0.1773, "num_tokens": 3510319187.0, "step": 4603 }, { "epoch": 6.281309677488185, "grad_norm": 0.22869293644876434, "learning_rate": 4.096023831115485e-06, "loss": 0.1859, "num_tokens": 3511110675.0, "step": 4604 }, { "epoch": 6.282675258446865, "grad_norm": 0.22852341386589692, "learning_rate": 4.0928359817548275e-06, "loss": 0.1803, "num_tokens": 3511831234.0, "step": 4605 }, { "epoch": 6.284040839405545, "grad_norm": 0.24648044263110894, "learning_rate": 4.089650239470792e-06, "loss": 0.1825, "num_tokens": 3512589321.0, "step": 4606 }, { "epoch": 6.285406420364226, "grad_norm": 0.2156025648603416, "learning_rate": 4.0864666052352054e-06, "loss": 0.1804, "num_tokens": 3513447670.0, "step": 4607 }, { "epoch": 6.286772001322906, "grad_norm": 0.21862622097717746, "learning_rate": 4.083285080019257e-06, "loss": 0.1845, "num_tokens": 3514202403.0, "step": 4608 }, { "epoch": 6.288137582281587, "grad_norm": 0.2304846549022789, "learning_rate": 4.080105664793496e-06, "loss": 0.1859, "num_tokens": 3515010614.0, "step": 4609 }, { "epoch": 6.289503163240267, "grad_norm": 0.21189551335791587, "learning_rate": 4.076928360527819e-06, "loss": 0.1765, "num_tokens": 3515788396.0, "step": 4610 }, { "epoch": 6.290868744198948, "grad_norm": 0.2268812022758147, "learning_rate": 4.0737531681914864e-06, "loss": 0.1785, "num_tokens": 3516520038.0, "step": 4611 }, { "epoch": 6.2922343251576285, "grad_norm": 0.2246026052922428, "learning_rate": 4.070580088753111e-06, "loss": 0.1786, "num_tokens": 3517320143.0, "step": 4612 }, { "epoch": 6.2935999061163095, "grad_norm": 0.2171828381558734, "learning_rate": 4.067409123180666e-06, "loss": 0.1764, "num_tokens": 3518097785.0, "step": 4613 }, { "epoch": 6.29496548707499, "grad_norm": 0.2294986502104526, "learning_rate": 4.064240272441467e-06, "loss": 0.1757, "num_tokens": 3518852718.0, "step": 4614 }, { "epoch": 6.29633106803367, "grad_norm": 0.21110089024395373, "learning_rate": 4.061073537502205e-06, "loss": 0.1798, "num_tokens": 3519572072.0, "step": 4615 }, { "epoch": 6.297696648992351, "grad_norm": 0.2397500487031368, "learning_rate": 4.057908919328907e-06, "loss": 0.1914, "num_tokens": 3520359111.0, "step": 4616 }, { "epoch": 6.299062229951031, "grad_norm": 0.21829353717483338, "learning_rate": 4.054746418886956e-06, "loss": 0.1856, "num_tokens": 3521137754.0, "step": 4617 }, { "epoch": 6.300427810909712, "grad_norm": 0.2321374240712012, "learning_rate": 4.051586037141103e-06, "loss": 0.185, "num_tokens": 3521896847.0, "step": 4618 }, { "epoch": 6.301793391868392, "grad_norm": 0.222466741972722, "learning_rate": 4.048427775055439e-06, "loss": 0.1788, "num_tokens": 3522641389.0, "step": 4619 }, { "epoch": 6.303158972827073, "grad_norm": 0.2299971645339046, "learning_rate": 4.045271633593414e-06, "loss": 0.1825, "num_tokens": 3523373801.0, "step": 4620 }, { "epoch": 6.304524553785753, "grad_norm": 0.2236939817377869, "learning_rate": 4.04211761371783e-06, "loss": 0.1754, "num_tokens": 3524114552.0, "step": 4621 }, { "epoch": 6.305890134744434, "grad_norm": 0.2191320558252902, "learning_rate": 4.0389657163908425e-06, "loss": 0.1841, "num_tokens": 3525025106.0, "step": 4622 }, { "epoch": 6.307255715703114, "grad_norm": 0.24162996500865816, "learning_rate": 4.035815942573957e-06, "loss": 0.174, "num_tokens": 3525793061.0, "step": 4623 }, { "epoch": 6.308621296661794, "grad_norm": 0.2002310090519619, "learning_rate": 4.0326682932280336e-06, "loss": 0.1817, "num_tokens": 3526553122.0, "step": 4624 }, { "epoch": 6.309986877620475, "grad_norm": 0.22024577044935845, "learning_rate": 4.029522769313285e-06, "loss": 0.1801, "num_tokens": 3527355848.0, "step": 4625 }, { "epoch": 6.311352458579155, "grad_norm": 0.22675363436552176, "learning_rate": 4.026379371789274e-06, "loss": 0.188, "num_tokens": 3528120215.0, "step": 4626 }, { "epoch": 6.312718039537836, "grad_norm": 0.22968293993848335, "learning_rate": 4.023238101614917e-06, "loss": 0.1784, "num_tokens": 3528894356.0, "step": 4627 }, { "epoch": 6.3140836204965165, "grad_norm": 0.22299539668455945, "learning_rate": 4.0200989597484756e-06, "loss": 0.1824, "num_tokens": 3529765730.0, "step": 4628 }, { "epoch": 6.3154492014551975, "grad_norm": 0.2191179847060652, "learning_rate": 4.016961947147566e-06, "loss": 0.1824, "num_tokens": 3530548872.0, "step": 4629 }, { "epoch": 6.316814782413878, "grad_norm": 0.23206025959905505, "learning_rate": 4.013827064769158e-06, "loss": 0.1809, "num_tokens": 3531326054.0, "step": 4630 }, { "epoch": 6.318180363372559, "grad_norm": 0.20914965197334484, "learning_rate": 4.010694313569568e-06, "loss": 0.1796, "num_tokens": 3532057861.0, "step": 4631 }, { "epoch": 6.319545944331239, "grad_norm": 0.221436283712645, "learning_rate": 4.007563694504462e-06, "loss": 0.1858, "num_tokens": 3532875188.0, "step": 4632 }, { "epoch": 6.320911525289919, "grad_norm": 0.22038072546242202, "learning_rate": 4.004435208528854e-06, "loss": 0.1885, "num_tokens": 3533637412.0, "step": 4633 }, { "epoch": 6.3222771062486, "grad_norm": 0.22486531339561402, "learning_rate": 4.0013088565971126e-06, "loss": 0.1759, "num_tokens": 3534342808.0, "step": 4634 }, { "epoch": 6.32364268720728, "grad_norm": 0.23928908114153905, "learning_rate": 3.998184639662951e-06, "loss": 0.1743, "num_tokens": 3535110054.0, "step": 4635 }, { "epoch": 6.325008268165961, "grad_norm": 0.21123091861163099, "learning_rate": 3.9950625586794365e-06, "loss": 0.1785, "num_tokens": 3535836612.0, "step": 4636 }, { "epoch": 6.326373849124641, "grad_norm": 0.22745381323725103, "learning_rate": 3.991942614598974e-06, "loss": 0.1754, "num_tokens": 3536573923.0, "step": 4637 }, { "epoch": 6.327739430083322, "grad_norm": 0.2247523369746374, "learning_rate": 3.988824808373326e-06, "loss": 0.1839, "num_tokens": 3537321773.0, "step": 4638 }, { "epoch": 6.329105011042002, "grad_norm": 0.24512911610804214, "learning_rate": 3.9857091409536044e-06, "loss": 0.1713, "num_tokens": 3538040214.0, "step": 4639 }, { "epoch": 6.330470592000683, "grad_norm": 0.22850361471914524, "learning_rate": 3.982595613290258e-06, "loss": 0.1778, "num_tokens": 3538740712.0, "step": 4640 }, { "epoch": 6.331836172959363, "grad_norm": 0.2181433212971442, "learning_rate": 3.979484226333092e-06, "loss": 0.1838, "num_tokens": 3539472508.0, "step": 4641 }, { "epoch": 6.333201753918043, "grad_norm": 0.23936787051984446, "learning_rate": 3.976374981031256e-06, "loss": 0.1799, "num_tokens": 3540207921.0, "step": 4642 }, { "epoch": 6.334567334876724, "grad_norm": 0.22896106121475626, "learning_rate": 3.973267878333246e-06, "loss": 0.1914, "num_tokens": 3541059715.0, "step": 4643 }, { "epoch": 6.3359329158354045, "grad_norm": 0.21436586729513096, "learning_rate": 3.970162919186904e-06, "loss": 0.1779, "num_tokens": 3541872348.0, "step": 4644 }, { "epoch": 6.3372984967940855, "grad_norm": 0.2365491781992192, "learning_rate": 3.967060104539422e-06, "loss": 0.1814, "num_tokens": 3542563785.0, "step": 4645 }, { "epoch": 6.338664077752766, "grad_norm": 0.22180108253027245, "learning_rate": 3.963959435337328e-06, "loss": 0.1796, "num_tokens": 3543395323.0, "step": 4646 }, { "epoch": 6.340029658711447, "grad_norm": 0.2086760766404233, "learning_rate": 3.9608609125265065e-06, "loss": 0.1771, "num_tokens": 3544132690.0, "step": 4647 }, { "epoch": 6.341395239670127, "grad_norm": 0.2305601756663565, "learning_rate": 3.957764537052182e-06, "loss": 0.1886, "num_tokens": 3544918936.0, "step": 4648 }, { "epoch": 6.342760820628808, "grad_norm": 0.2379074699616871, "learning_rate": 3.954670309858924e-06, "loss": 0.1778, "num_tokens": 3545656346.0, "step": 4649 }, { "epoch": 6.344126401587488, "grad_norm": 0.22680761925206733, "learning_rate": 3.951578231890645e-06, "loss": 0.1838, "num_tokens": 3546321398.0, "step": 4650 }, { "epoch": 6.345491982546168, "grad_norm": 0.23507353210246723, "learning_rate": 3.948488304090607e-06, "loss": 0.1886, "num_tokens": 3547078530.0, "step": 4651 }, { "epoch": 6.346857563504849, "grad_norm": 0.21148751720868564, "learning_rate": 3.945400527401413e-06, "loss": 0.1831, "num_tokens": 3547874258.0, "step": 4652 }, { "epoch": 6.348223144463529, "grad_norm": 0.20918958158014256, "learning_rate": 3.942314902765008e-06, "loss": 0.1715, "num_tokens": 3548627865.0, "step": 4653 }, { "epoch": 6.34958872542221, "grad_norm": 0.23489495661402937, "learning_rate": 3.939231431122687e-06, "loss": 0.1872, "num_tokens": 3549423721.0, "step": 4654 }, { "epoch": 6.35095430638089, "grad_norm": 0.23431304774191444, "learning_rate": 3.9361501134150795e-06, "loss": 0.1827, "num_tokens": 3550157737.0, "step": 4655 }, { "epoch": 6.352319887339571, "grad_norm": 0.2222385081460156, "learning_rate": 3.933070950582162e-06, "loss": 0.1768, "num_tokens": 3550943083.0, "step": 4656 }, { "epoch": 6.353685468298251, "grad_norm": 0.21862820456026383, "learning_rate": 3.929993943563256e-06, "loss": 0.1784, "num_tokens": 3551699719.0, "step": 4657 }, { "epoch": 6.355051049256932, "grad_norm": 0.21998676786638408, "learning_rate": 3.926919093297017e-06, "loss": 0.1751, "num_tokens": 3552438492.0, "step": 4658 }, { "epoch": 6.356416630215612, "grad_norm": 0.2369966964247266, "learning_rate": 3.9238464007214605e-06, "loss": 0.1753, "num_tokens": 3553167155.0, "step": 4659 }, { "epoch": 6.3577822111742925, "grad_norm": 0.21604083630285426, "learning_rate": 3.9207758667739214e-06, "loss": 0.1821, "num_tokens": 3553891467.0, "step": 4660 }, { "epoch": 6.3591477921329735, "grad_norm": 0.2357963959873284, "learning_rate": 3.917707492391091e-06, "loss": 0.1807, "num_tokens": 3554615623.0, "step": 4661 }, { "epoch": 6.360513373091654, "grad_norm": 0.22546090470128136, "learning_rate": 3.914641278508997e-06, "loss": 0.1847, "num_tokens": 3555347041.0, "step": 4662 }, { "epoch": 6.361878954050335, "grad_norm": 0.22650929042883072, "learning_rate": 3.9115772260630075e-06, "loss": 0.1754, "num_tokens": 3556119211.0, "step": 4663 }, { "epoch": 6.363244535009015, "grad_norm": 0.222244021830333, "learning_rate": 3.908515335987833e-06, "loss": 0.1834, "num_tokens": 3556841686.0, "step": 4664 }, { "epoch": 6.364610115967696, "grad_norm": 0.22426117139032098, "learning_rate": 3.905455609217522e-06, "loss": 0.1897, "num_tokens": 3557625885.0, "step": 4665 }, { "epoch": 6.365975696926376, "grad_norm": 0.23175864754082223, "learning_rate": 3.902398046685469e-06, "loss": 0.1856, "num_tokens": 3558487340.0, "step": 4666 }, { "epoch": 6.367341277885057, "grad_norm": 0.21772293639261567, "learning_rate": 3.899342649324396e-06, "loss": 0.174, "num_tokens": 3559233752.0, "step": 4667 }, { "epoch": 6.368706858843737, "grad_norm": 0.21983357625405994, "learning_rate": 3.8962894180663835e-06, "loss": 0.1717, "num_tokens": 3560019530.0, "step": 4668 }, { "epoch": 6.370072439802417, "grad_norm": 0.2167223008211721, "learning_rate": 3.893238353842831e-06, "loss": 0.183, "num_tokens": 3560818457.0, "step": 4669 }, { "epoch": 6.371438020761098, "grad_norm": 0.22894878226125323, "learning_rate": 3.890189457584488e-06, "loss": 0.1737, "num_tokens": 3561567263.0, "step": 4670 }, { "epoch": 6.372803601719778, "grad_norm": 0.24209522913506676, "learning_rate": 3.887142730221445e-06, "loss": 0.1857, "num_tokens": 3562364537.0, "step": 4671 }, { "epoch": 6.374169182678459, "grad_norm": 0.21949835848089644, "learning_rate": 3.8840981726831206e-06, "loss": 0.1885, "num_tokens": 3563196530.0, "step": 4672 }, { "epoch": 6.375534763637139, "grad_norm": 0.2267978540315638, "learning_rate": 3.881055785898281e-06, "loss": 0.178, "num_tokens": 3563980698.0, "step": 4673 }, { "epoch": 6.37690034459582, "grad_norm": 0.2169501316218299, "learning_rate": 3.8780155707950255e-06, "loss": 0.1803, "num_tokens": 3564723579.0, "step": 4674 }, { "epoch": 6.3782659255545004, "grad_norm": 0.23829579362694697, "learning_rate": 3.874977528300794e-06, "loss": 0.1784, "num_tokens": 3565479726.0, "step": 4675 }, { "epoch": 6.3796315065131814, "grad_norm": 0.2041272829998039, "learning_rate": 3.871941659342355e-06, "loss": 0.1877, "num_tokens": 3566287084.0, "step": 4676 }, { "epoch": 6.380997087471862, "grad_norm": 0.22647911802883477, "learning_rate": 3.868907964845833e-06, "loss": 0.1847, "num_tokens": 3567103567.0, "step": 4677 }, { "epoch": 6.382362668430542, "grad_norm": 0.21582789442232744, "learning_rate": 3.865876445736667e-06, "loss": 0.1833, "num_tokens": 3567937581.0, "step": 4678 }, { "epoch": 6.383728249389223, "grad_norm": 0.2376580691182321, "learning_rate": 3.862847102939642e-06, "loss": 0.1795, "num_tokens": 3568629369.0, "step": 4679 }, { "epoch": 6.385093830347903, "grad_norm": 0.22200162539019003, "learning_rate": 3.859819937378885e-06, "loss": 0.1748, "num_tokens": 3569371182.0, "step": 4680 }, { "epoch": 6.386459411306584, "grad_norm": 0.22285516925533702, "learning_rate": 3.856794949977849e-06, "loss": 0.1809, "num_tokens": 3570143252.0, "step": 4681 }, { "epoch": 6.387824992265264, "grad_norm": 0.2151166707277959, "learning_rate": 3.853772141659328e-06, "loss": 0.1807, "num_tokens": 3570932834.0, "step": 4682 }, { "epoch": 6.389190573223945, "grad_norm": 0.2205795948056368, "learning_rate": 3.850751513345449e-06, "loss": 0.1719, "num_tokens": 3571671921.0, "step": 4683 }, { "epoch": 6.390556154182625, "grad_norm": 0.224445272868465, "learning_rate": 3.847733065957679e-06, "loss": 0.1748, "num_tokens": 3572378021.0, "step": 4684 }, { "epoch": 6.391921735141306, "grad_norm": 0.22407646267725653, "learning_rate": 3.844716800416806e-06, "loss": 0.1837, "num_tokens": 3573148087.0, "step": 4685 }, { "epoch": 6.393287316099986, "grad_norm": 0.23330017100166128, "learning_rate": 3.841702717642975e-06, "loss": 0.176, "num_tokens": 3573886052.0, "step": 4686 }, { "epoch": 6.394652897058666, "grad_norm": 0.23130023290693846, "learning_rate": 3.838690818555642e-06, "loss": 0.1839, "num_tokens": 3574623596.0, "step": 4687 }, { "epoch": 6.396018478017347, "grad_norm": 0.23736709335480422, "learning_rate": 3.8356811040736085e-06, "loss": 0.1862, "num_tokens": 3575381442.0, "step": 4688 }, { "epoch": 6.397384058976027, "grad_norm": 0.22543047740927208, "learning_rate": 3.8326735751150145e-06, "loss": 0.1791, "num_tokens": 3576216422.0, "step": 4689 }, { "epoch": 6.398749639934708, "grad_norm": 0.22514718013219293, "learning_rate": 3.829668232597319e-06, "loss": 0.1775, "num_tokens": 3576924796.0, "step": 4690 }, { "epoch": 6.4001152208933885, "grad_norm": 0.2160098441528086, "learning_rate": 3.826665077437325e-06, "loss": 0.1809, "num_tokens": 3577681690.0, "step": 4691 }, { "epoch": 6.4014808018520695, "grad_norm": 0.23029601763712723, "learning_rate": 3.823664110551166e-06, "loss": 0.1868, "num_tokens": 3578391121.0, "step": 4692 }, { "epoch": 6.40284638281075, "grad_norm": 0.2386341321048807, "learning_rate": 3.820665332854308e-06, "loss": 0.1746, "num_tokens": 3579136786.0, "step": 4693 }, { "epoch": 6.404211963769431, "grad_norm": 0.23239388035017836, "learning_rate": 3.817668745261544e-06, "loss": 0.1826, "num_tokens": 3579886292.0, "step": 4694 }, { "epoch": 6.405577544728111, "grad_norm": 0.2238964726566911, "learning_rate": 3.814674348687005e-06, "loss": 0.184, "num_tokens": 3580696816.0, "step": 4695 }, { "epoch": 6.406943125686791, "grad_norm": 0.23050576416839857, "learning_rate": 3.811682144044155e-06, "loss": 0.1779, "num_tokens": 3581384894.0, "step": 4696 }, { "epoch": 6.408308706645472, "grad_norm": 0.23314450347895815, "learning_rate": 3.808692132245779e-06, "loss": 0.1836, "num_tokens": 3582141568.0, "step": 4697 }, { "epoch": 6.409674287604152, "grad_norm": 0.22965775427780133, "learning_rate": 3.8057043142040083e-06, "loss": 0.1863, "num_tokens": 3582908066.0, "step": 4698 }, { "epoch": 6.411039868562833, "grad_norm": 0.2200206833706499, "learning_rate": 3.8027186908302916e-06, "loss": 0.1758, "num_tokens": 3583619650.0, "step": 4699 }, { "epoch": 6.412405449521513, "grad_norm": 0.2157339799837985, "learning_rate": 3.7997352630354135e-06, "loss": 0.1845, "num_tokens": 3584395909.0, "step": 4700 }, { "epoch": 6.413771030480194, "grad_norm": 0.2291752961617363, "learning_rate": 3.7967540317294927e-06, "loss": 0.1838, "num_tokens": 3585119871.0, "step": 4701 }, { "epoch": 6.415136611438874, "grad_norm": 0.22632143635683327, "learning_rate": 3.7937749978219686e-06, "loss": 0.18, "num_tokens": 3585897030.0, "step": 4702 }, { "epoch": 6.416502192397555, "grad_norm": 0.21145996806642645, "learning_rate": 3.790798162221616e-06, "loss": 0.1792, "num_tokens": 3586670896.0, "step": 4703 }, { "epoch": 6.417867773356235, "grad_norm": 0.21515669171450472, "learning_rate": 3.787823525836542e-06, "loss": 0.18, "num_tokens": 3587366213.0, "step": 4704 }, { "epoch": 6.419233354314915, "grad_norm": 0.23557383906712354, "learning_rate": 3.7848510895741785e-06, "loss": 0.1812, "num_tokens": 3588061765.0, "step": 4705 }, { "epoch": 6.420598935273596, "grad_norm": 0.21863454394723125, "learning_rate": 3.7818808543412822e-06, "loss": 0.1843, "num_tokens": 3588956382.0, "step": 4706 }, { "epoch": 6.4219645162322765, "grad_norm": 0.20835101755498803, "learning_rate": 3.778912821043952e-06, "loss": 0.1773, "num_tokens": 3589758235.0, "step": 4707 }, { "epoch": 6.4233300971909575, "grad_norm": 0.2057063836827315, "learning_rate": 3.775946990587598e-06, "loss": 0.1793, "num_tokens": 3590468603.0, "step": 4708 }, { "epoch": 6.424695678149638, "grad_norm": 0.24437615874414237, "learning_rate": 3.772983363876971e-06, "loss": 0.181, "num_tokens": 3591286349.0, "step": 4709 }, { "epoch": 6.426061259108319, "grad_norm": 0.2046744450519949, "learning_rate": 3.7700219418161465e-06, "loss": 0.1797, "num_tokens": 3592059649.0, "step": 4710 }, { "epoch": 6.427426840066999, "grad_norm": 0.22770072203107586, "learning_rate": 3.7670627253085213e-06, "loss": 0.1826, "num_tokens": 3592828068.0, "step": 4711 }, { "epoch": 6.42879242102568, "grad_norm": 0.23003097529615302, "learning_rate": 3.7641057152568257e-06, "loss": 0.1821, "num_tokens": 3593538463.0, "step": 4712 }, { "epoch": 6.43015800198436, "grad_norm": 0.22621569277207304, "learning_rate": 3.7611509125631176e-06, "loss": 0.1778, "num_tokens": 3594261662.0, "step": 4713 }, { "epoch": 6.43152358294304, "grad_norm": 0.27065122093530547, "learning_rate": 3.7581983181287773e-06, "loss": 0.1803, "num_tokens": 3594982771.0, "step": 4714 }, { "epoch": 6.432889163901721, "grad_norm": 0.22887813412352165, "learning_rate": 3.755247932854515e-06, "loss": 0.1826, "num_tokens": 3595740416.0, "step": 4715 }, { "epoch": 6.434254744860401, "grad_norm": 0.22332439976619603, "learning_rate": 3.752299757640367e-06, "loss": 0.1758, "num_tokens": 3596513326.0, "step": 4716 }, { "epoch": 6.435620325819082, "grad_norm": 0.22625461250911724, "learning_rate": 3.7493537933856893e-06, "loss": 0.1748, "num_tokens": 3597320666.0, "step": 4717 }, { "epoch": 6.436985906777762, "grad_norm": 0.22496017436071017, "learning_rate": 3.7464100409891717e-06, "loss": 0.1845, "num_tokens": 3598039515.0, "step": 4718 }, { "epoch": 6.438351487736443, "grad_norm": 0.23505265823779417, "learning_rate": 3.7434685013488253e-06, "loss": 0.1686, "num_tokens": 3598730197.0, "step": 4719 }, { "epoch": 6.439717068695123, "grad_norm": 0.21157946999343086, "learning_rate": 3.7405291753619834e-06, "loss": 0.1803, "num_tokens": 3599487307.0, "step": 4720 }, { "epoch": 6.441082649653804, "grad_norm": 0.22476592023972347, "learning_rate": 3.7375920639253095e-06, "loss": 0.1804, "num_tokens": 3600270378.0, "step": 4721 }, { "epoch": 6.442448230612484, "grad_norm": 0.21813437565139515, "learning_rate": 3.7346571679347887e-06, "loss": 0.1768, "num_tokens": 3601058466.0, "step": 4722 }, { "epoch": 6.4438138115711645, "grad_norm": 0.21879363477045952, "learning_rate": 3.7317244882857297e-06, "loss": 0.1783, "num_tokens": 3601839270.0, "step": 4723 }, { "epoch": 6.4451793925298455, "grad_norm": 0.22349629387091144, "learning_rate": 3.7287940258727684e-06, "loss": 0.1757, "num_tokens": 3602527181.0, "step": 4724 }, { "epoch": 6.446544973488526, "grad_norm": 0.23320098874774634, "learning_rate": 3.725865781589862e-06, "loss": 0.1874, "num_tokens": 3603318558.0, "step": 4725 }, { "epoch": 6.447910554447207, "grad_norm": 0.22515763662257524, "learning_rate": 3.722939756330287e-06, "loss": 0.1805, "num_tokens": 3604040605.0, "step": 4726 }, { "epoch": 6.449276135405887, "grad_norm": 0.23292153569376944, "learning_rate": 3.7200159509866507e-06, "loss": 0.1877, "num_tokens": 3604834282.0, "step": 4727 }, { "epoch": 6.450641716364568, "grad_norm": 0.22260454453137682, "learning_rate": 3.717094366450881e-06, "loss": 0.178, "num_tokens": 3605558682.0, "step": 4728 }, { "epoch": 6.452007297323248, "grad_norm": 0.2282820266555694, "learning_rate": 3.714175003614221e-06, "loss": 0.181, "num_tokens": 3606373347.0, "step": 4729 }, { "epoch": 6.453372878281929, "grad_norm": 0.229244625192717, "learning_rate": 3.7112578633672456e-06, "loss": 0.1867, "num_tokens": 3607172486.0, "step": 4730 }, { "epoch": 6.454738459240609, "grad_norm": 0.22302732212756482, "learning_rate": 3.708342946599847e-06, "loss": 0.1804, "num_tokens": 3607940230.0, "step": 4731 }, { "epoch": 6.456104040199289, "grad_norm": 0.2144303700158582, "learning_rate": 3.7054302542012423e-06, "loss": 0.1787, "num_tokens": 3608714880.0, "step": 4732 }, { "epoch": 6.45746962115797, "grad_norm": 0.2234023605258637, "learning_rate": 3.702519787059966e-06, "loss": 0.1865, "num_tokens": 3609574849.0, "step": 4733 }, { "epoch": 6.45883520211665, "grad_norm": 0.2063112142664421, "learning_rate": 3.699611546063875e-06, "loss": 0.1835, "num_tokens": 3610230051.0, "step": 4734 }, { "epoch": 6.460200783075331, "grad_norm": 0.2399048575424592, "learning_rate": 3.6967055321001475e-06, "loss": 0.1857, "num_tokens": 3610990679.0, "step": 4735 }, { "epoch": 6.461566364034011, "grad_norm": 0.2318328853161293, "learning_rate": 3.6938017460552844e-06, "loss": 0.1878, "num_tokens": 3611700606.0, "step": 4736 }, { "epoch": 6.462931944992692, "grad_norm": 0.2217707669562879, "learning_rate": 3.6909001888151052e-06, "loss": 0.1779, "num_tokens": 3612447433.0, "step": 4737 }, { "epoch": 6.464297525951372, "grad_norm": 0.2252814826519996, "learning_rate": 3.6880008612647466e-06, "loss": 0.1752, "num_tokens": 3613188646.0, "step": 4738 }, { "epoch": 6.465663106910053, "grad_norm": 0.2175655692944809, "learning_rate": 3.685103764288669e-06, "loss": 0.1766, "num_tokens": 3613943320.0, "step": 4739 }, { "epoch": 6.4670286878687335, "grad_norm": 0.22168190810086927, "learning_rate": 3.682208898770654e-06, "loss": 0.1793, "num_tokens": 3614720354.0, "step": 4740 }, { "epoch": 6.468394268827414, "grad_norm": 0.2141865334257175, "learning_rate": 3.6793162655937926e-06, "loss": 0.18, "num_tokens": 3615558618.0, "step": 4741 }, { "epoch": 6.469759849786095, "grad_norm": 0.21007726052422543, "learning_rate": 3.676425865640512e-06, "loss": 0.1714, "num_tokens": 3616309017.0, "step": 4742 }, { "epoch": 6.471125430744775, "grad_norm": 0.21408981407228747, "learning_rate": 3.6735376997925405e-06, "loss": 0.1792, "num_tokens": 3617083777.0, "step": 4743 }, { "epoch": 6.472491011703456, "grad_norm": 0.21169944794336004, "learning_rate": 3.670651768930934e-06, "loss": 0.1829, "num_tokens": 3617860559.0, "step": 4744 }, { "epoch": 6.473856592662136, "grad_norm": 0.2313803249364492, "learning_rate": 3.6677680739360654e-06, "loss": 0.1768, "num_tokens": 3618579668.0, "step": 4745 }, { "epoch": 6.475222173620817, "grad_norm": 0.2193264049504872, "learning_rate": 3.664886615687629e-06, "loss": 0.1768, "num_tokens": 3619314303.0, "step": 4746 }, { "epoch": 6.476587754579497, "grad_norm": 0.22900706638391982, "learning_rate": 3.662007395064624e-06, "loss": 0.1747, "num_tokens": 3620108500.0, "step": 4747 }, { "epoch": 6.477953335538178, "grad_norm": 0.2179348536458725, "learning_rate": 3.6591304129453862e-06, "loss": 0.1801, "num_tokens": 3620834478.0, "step": 4748 }, { "epoch": 6.479318916496858, "grad_norm": 0.2140647025247064, "learning_rate": 3.656255670207554e-06, "loss": 0.1827, "num_tokens": 3621583313.0, "step": 4749 }, { "epoch": 6.480684497455538, "grad_norm": 0.22938863277652466, "learning_rate": 3.653383167728081e-06, "loss": 0.1742, "num_tokens": 3622376276.0, "step": 4750 }, { "epoch": 6.482050078414219, "grad_norm": 0.21493211754686253, "learning_rate": 3.6505129063832532e-06, "loss": 0.179, "num_tokens": 3623135906.0, "step": 4751 }, { "epoch": 6.483415659372899, "grad_norm": 0.2120222351936783, "learning_rate": 3.647644887048656e-06, "loss": 0.1822, "num_tokens": 3623972103.0, "step": 4752 }, { "epoch": 6.48478124033158, "grad_norm": 0.2234264337450395, "learning_rate": 3.6447791105992005e-06, "loss": 0.1844, "num_tokens": 3624740520.0, "step": 4753 }, { "epoch": 6.48614682129026, "grad_norm": 0.2099620188238082, "learning_rate": 3.6419155779091098e-06, "loss": 0.1772, "num_tokens": 3625508282.0, "step": 4754 }, { "epoch": 6.487512402248941, "grad_norm": 0.21825852464548642, "learning_rate": 3.639054289851929e-06, "loss": 0.1785, "num_tokens": 3626263570.0, "step": 4755 }, { "epoch": 6.4888779832076215, "grad_norm": 0.22483304412252836, "learning_rate": 3.636195247300505e-06, "loss": 0.1871, "num_tokens": 3627041910.0, "step": 4756 }, { "epoch": 6.4902435641663025, "grad_norm": 0.2279487423006822, "learning_rate": 3.6333384511270124e-06, "loss": 0.1722, "num_tokens": 3627798121.0, "step": 4757 }, { "epoch": 6.491609145124983, "grad_norm": 0.2156216861389156, "learning_rate": 3.630483902202938e-06, "loss": 0.182, "num_tokens": 3628617831.0, "step": 4758 }, { "epoch": 6.492974726083663, "grad_norm": 0.22699299600406742, "learning_rate": 3.627631601399073e-06, "loss": 0.1753, "num_tokens": 3629336062.0, "step": 4759 }, { "epoch": 6.494340307042344, "grad_norm": 0.22423566268388165, "learning_rate": 3.624781549585542e-06, "loss": 0.1851, "num_tokens": 3630136894.0, "step": 4760 }, { "epoch": 6.495705888001024, "grad_norm": 0.23548155388779976, "learning_rate": 3.6219337476317635e-06, "loss": 0.1717, "num_tokens": 3630831243.0, "step": 4761 }, { "epoch": 6.497071468959705, "grad_norm": 0.2306729069560012, "learning_rate": 3.619088196406483e-06, "loss": 0.1701, "num_tokens": 3631583309.0, "step": 4762 }, { "epoch": 6.498437049918385, "grad_norm": 0.22086649154944943, "learning_rate": 3.616244896777752e-06, "loss": 0.186, "num_tokens": 3632375695.0, "step": 4763 }, { "epoch": 6.499802630877066, "grad_norm": 0.22228069400480052, "learning_rate": 3.613403849612942e-06, "loss": 0.1814, "num_tokens": 3633109951.0, "step": 4764 }, { "epoch": 6.501168211835746, "grad_norm": 0.22585595825914953, "learning_rate": 3.6105650557787297e-06, "loss": 0.1774, "num_tokens": 3633861680.0, "step": 4765 }, { "epoch": 6.502533792794427, "grad_norm": 0.23165844650568887, "learning_rate": 3.6077285161411096e-06, "loss": 0.1871, "num_tokens": 3634606790.0, "step": 4766 }, { "epoch": 6.503899373753107, "grad_norm": 0.21983394716272808, "learning_rate": 3.60489423156539e-06, "loss": 0.1805, "num_tokens": 3635356600.0, "step": 4767 }, { "epoch": 6.505264954711787, "grad_norm": 0.22320113939688455, "learning_rate": 3.60206220291618e-06, "loss": 0.177, "num_tokens": 3636035498.0, "step": 4768 }, { "epoch": 6.506630535670468, "grad_norm": 0.22327898809139804, "learning_rate": 3.599232431057419e-06, "loss": 0.1837, "num_tokens": 3636799787.0, "step": 4769 }, { "epoch": 6.5079961166291485, "grad_norm": 0.2282535912065186, "learning_rate": 3.5964049168523418e-06, "loss": 0.1822, "num_tokens": 3637565458.0, "step": 4770 }, { "epoch": 6.5093616975878295, "grad_norm": 0.21515331770538096, "learning_rate": 3.593579661163502e-06, "loss": 0.1884, "num_tokens": 3638317227.0, "step": 4771 }, { "epoch": 6.51072727854651, "grad_norm": 0.23533333065453246, "learning_rate": 3.5907566648527647e-06, "loss": 0.1819, "num_tokens": 3639077023.0, "step": 4772 }, { "epoch": 6.512092859505191, "grad_norm": 0.22806343141940338, "learning_rate": 3.5879359287812997e-06, "loss": 0.1782, "num_tokens": 3639882942.0, "step": 4773 }, { "epoch": 6.513458440463871, "grad_norm": 0.20767436332857178, "learning_rate": 3.5851174538095923e-06, "loss": 0.1829, "num_tokens": 3640609825.0, "step": 4774 }, { "epoch": 6.514824021422552, "grad_norm": 0.22926492261394626, "learning_rate": 3.5823012407974407e-06, "loss": 0.1807, "num_tokens": 3641366935.0, "step": 4775 }, { "epoch": 6.516189602381232, "grad_norm": 0.2258133987110664, "learning_rate": 3.579487290603949e-06, "loss": 0.175, "num_tokens": 3642090363.0, "step": 4776 }, { "epoch": 6.517555183339912, "grad_norm": 0.2265875394434879, "learning_rate": 3.576675604087524e-06, "loss": 0.1808, "num_tokens": 3642852870.0, "step": 4777 }, { "epoch": 6.518920764298593, "grad_norm": 0.22829008343580473, "learning_rate": 3.5738661821059004e-06, "loss": 0.1849, "num_tokens": 3643649150.0, "step": 4778 }, { "epoch": 6.520286345257273, "grad_norm": 0.2210068233585175, "learning_rate": 3.5710590255161047e-06, "loss": 0.1829, "num_tokens": 3644368437.0, "step": 4779 }, { "epoch": 6.521651926215954, "grad_norm": 0.22723059250228148, "learning_rate": 3.5682541351744794e-06, "loss": 0.1844, "num_tokens": 3645072397.0, "step": 4780 }, { "epoch": 6.523017507174634, "grad_norm": 0.22255589593864541, "learning_rate": 3.5654515119366795e-06, "loss": 0.1805, "num_tokens": 3645817812.0, "step": 4781 }, { "epoch": 6.524383088133315, "grad_norm": 0.21064919171390636, "learning_rate": 3.5626511566576568e-06, "loss": 0.1917, "num_tokens": 3646607288.0, "step": 4782 }, { "epoch": 6.525748669091995, "grad_norm": 0.23858110980550917, "learning_rate": 3.559853070191682e-06, "loss": 0.1843, "num_tokens": 3647410401.0, "step": 4783 }, { "epoch": 6.527114250050676, "grad_norm": 0.23223087139199772, "learning_rate": 3.557057253392331e-06, "loss": 0.1862, "num_tokens": 3648146124.0, "step": 4784 }, { "epoch": 6.528479831009356, "grad_norm": 0.22471123202484494, "learning_rate": 3.554263707112487e-06, "loss": 0.1871, "num_tokens": 3648964007.0, "step": 4785 }, { "epoch": 6.5298454119680365, "grad_norm": 0.23695961626303033, "learning_rate": 3.551472432204335e-06, "loss": 0.1782, "num_tokens": 3649729878.0, "step": 4786 }, { "epoch": 6.5312109929267175, "grad_norm": 0.22391162910446552, "learning_rate": 3.5486834295193785e-06, "loss": 0.1773, "num_tokens": 3650463165.0, "step": 4787 }, { "epoch": 6.532576573885398, "grad_norm": 0.2232085517123663, "learning_rate": 3.545896699908417e-06, "loss": 0.1795, "num_tokens": 3651201748.0, "step": 4788 }, { "epoch": 6.533942154844079, "grad_norm": 0.23691144618455381, "learning_rate": 3.543112244221564e-06, "loss": 0.1888, "num_tokens": 3652092189.0, "step": 4789 }, { "epoch": 6.535307735802759, "grad_norm": 0.22490600982905473, "learning_rate": 3.540330063308235e-06, "loss": 0.1831, "num_tokens": 3652873036.0, "step": 4790 }, { "epoch": 6.53667331676144, "grad_norm": 0.22435757219598473, "learning_rate": 3.5375501580171516e-06, "loss": 0.1839, "num_tokens": 3653637831.0, "step": 4791 }, { "epoch": 6.53803889772012, "grad_norm": 0.2155973791504615, "learning_rate": 3.534772529196344e-06, "loss": 0.1833, "num_tokens": 3654425575.0, "step": 4792 }, { "epoch": 6.539404478678801, "grad_norm": 0.23530158774510448, "learning_rate": 3.5319971776931466e-06, "loss": 0.18, "num_tokens": 3655244904.0, "step": 4793 }, { "epoch": 6.540770059637481, "grad_norm": 0.20648889007628898, "learning_rate": 3.5292241043542e-06, "loss": 0.1796, "num_tokens": 3656016792.0, "step": 4794 }, { "epoch": 6.542135640596161, "grad_norm": 0.22104453701631827, "learning_rate": 3.526453310025445e-06, "loss": 0.186, "num_tokens": 3656772482.0, "step": 4795 }, { "epoch": 6.543501221554842, "grad_norm": 0.22926923527423118, "learning_rate": 3.5236847955521336e-06, "loss": 0.1871, "num_tokens": 3657523431.0, "step": 4796 }, { "epoch": 6.544866802513522, "grad_norm": 0.22803608821047552, "learning_rate": 3.52091856177882e-06, "loss": 0.1835, "num_tokens": 3658304464.0, "step": 4797 }, { "epoch": 6.546232383472203, "grad_norm": 0.2887884097685323, "learning_rate": 3.5181546095493615e-06, "loss": 0.187, "num_tokens": 3659140542.0, "step": 4798 }, { "epoch": 6.547597964430883, "grad_norm": 0.21443720294582588, "learning_rate": 3.515392939706922e-06, "loss": 0.1885, "num_tokens": 3659934477.0, "step": 4799 }, { "epoch": 6.548963545389564, "grad_norm": 0.21146978928843968, "learning_rate": 3.5126335530939636e-06, "loss": 0.1837, "num_tokens": 3660760644.0, "step": 4800 }, { "epoch": 6.550329126348244, "grad_norm": 0.21727373971762395, "learning_rate": 3.5098764505522582e-06, "loss": 0.1808, "num_tokens": 3661503341.0, "step": 4801 }, { "epoch": 6.551694707306925, "grad_norm": 0.2356568973728017, "learning_rate": 3.5071216329228784e-06, "loss": 0.1774, "num_tokens": 3662262489.0, "step": 4802 }, { "epoch": 6.5530602882656055, "grad_norm": 0.22515861563110076, "learning_rate": 3.5043691010461996e-06, "loss": 0.18, "num_tokens": 3662991156.0, "step": 4803 }, { "epoch": 6.554425869224286, "grad_norm": 0.20769287043248816, "learning_rate": 3.5016188557619014e-06, "loss": 0.1807, "num_tokens": 3663740009.0, "step": 4804 }, { "epoch": 6.555791450182967, "grad_norm": 0.23436581245797553, "learning_rate": 3.4988708979089607e-06, "loss": 0.183, "num_tokens": 3664588188.0, "step": 4805 }, { "epoch": 6.557157031141647, "grad_norm": 0.21522648493837862, "learning_rate": 3.496125228325663e-06, "loss": 0.1808, "num_tokens": 3665370956.0, "step": 4806 }, { "epoch": 6.558522612100328, "grad_norm": 0.23170779984980064, "learning_rate": 3.493381847849593e-06, "loss": 0.1793, "num_tokens": 3666065625.0, "step": 4807 }, { "epoch": 6.559888193059008, "grad_norm": 0.23184877641013862, "learning_rate": 3.4906407573176395e-06, "loss": 0.1855, "num_tokens": 3666795284.0, "step": 4808 }, { "epoch": 6.561253774017689, "grad_norm": 0.21595031908544562, "learning_rate": 3.487901957565986e-06, "loss": 0.1774, "num_tokens": 3667517081.0, "step": 4809 }, { "epoch": 6.562619354976369, "grad_norm": 0.22509699032226332, "learning_rate": 3.485165449430124e-06, "loss": 0.1861, "num_tokens": 3668306469.0, "step": 4810 }, { "epoch": 6.56398493593505, "grad_norm": 0.21343485412770635, "learning_rate": 3.482431233744846e-06, "loss": 0.182, "num_tokens": 3669123553.0, "step": 4811 }, { "epoch": 6.56535051689373, "grad_norm": 0.21767751706702304, "learning_rate": 3.4796993113442355e-06, "loss": 0.1829, "num_tokens": 3669894804.0, "step": 4812 }, { "epoch": 6.56671609785241, "grad_norm": 0.21331809850934497, "learning_rate": 3.476969683061694e-06, "loss": 0.1819, "num_tokens": 3670699111.0, "step": 4813 }, { "epoch": 6.568081678811091, "grad_norm": 0.21121591440758444, "learning_rate": 3.474242349729904e-06, "loss": 0.1787, "num_tokens": 3671498463.0, "step": 4814 }, { "epoch": 6.569447259769771, "grad_norm": 0.2279823790914049, "learning_rate": 3.47151731218086e-06, "loss": 0.1898, "num_tokens": 3672329311.0, "step": 4815 }, { "epoch": 6.570812840728452, "grad_norm": 0.2061356450958764, "learning_rate": 3.4687945712458543e-06, "loss": 0.1899, "num_tokens": 3673106078.0, "step": 4816 }, { "epoch": 6.572178421687132, "grad_norm": 0.22411591340123, "learning_rate": 3.4660741277554776e-06, "loss": 0.1931, "num_tokens": 3673862511.0, "step": 4817 }, { "epoch": 6.573544002645813, "grad_norm": 0.22078619941856764, "learning_rate": 3.4633559825396167e-06, "loss": 0.1795, "num_tokens": 3674651619.0, "step": 4818 }, { "epoch": 6.5749095836044935, "grad_norm": 0.2848154979272138, "learning_rate": 3.46064013642746e-06, "loss": 0.1741, "num_tokens": 3675421560.0, "step": 4819 }, { "epoch": 6.5762751645631745, "grad_norm": 0.21447635245095573, "learning_rate": 3.457926590247499e-06, "loss": 0.1816, "num_tokens": 3676168927.0, "step": 4820 }, { "epoch": 6.577640745521855, "grad_norm": 0.23951090040235246, "learning_rate": 3.455215344827511e-06, "loss": 0.1756, "num_tokens": 3676945516.0, "step": 4821 }, { "epoch": 6.579006326480535, "grad_norm": 0.2069201234309381, "learning_rate": 3.452506400994588e-06, "loss": 0.1739, "num_tokens": 3677747801.0, "step": 4822 }, { "epoch": 6.580371907439216, "grad_norm": 0.21764044633346558, "learning_rate": 3.449799759575107e-06, "loss": 0.181, "num_tokens": 3678562736.0, "step": 4823 }, { "epoch": 6.581737488397896, "grad_norm": 0.21737917694850611, "learning_rate": 3.4470954213947472e-06, "loss": 0.1895, "num_tokens": 3679339737.0, "step": 4824 }, { "epoch": 6.583103069356577, "grad_norm": 0.22831936347462248, "learning_rate": 3.444393387278486e-06, "loss": 0.1856, "num_tokens": 3680147407.0, "step": 4825 }, { "epoch": 6.584468650315257, "grad_norm": 0.20694477243610473, "learning_rate": 3.441693658050599e-06, "loss": 0.1803, "num_tokens": 3680920441.0, "step": 4826 }, { "epoch": 6.585834231273938, "grad_norm": 0.2223391131817523, "learning_rate": 3.438996234534654e-06, "loss": 0.171, "num_tokens": 3681649657.0, "step": 4827 }, { "epoch": 6.587199812232618, "grad_norm": 0.21882424363083458, "learning_rate": 3.4363011175535167e-06, "loss": 0.1857, "num_tokens": 3682381101.0, "step": 4828 }, { "epoch": 6.588565393191299, "grad_norm": 0.2470869020974388, "learning_rate": 3.433608307929354e-06, "loss": 0.1854, "num_tokens": 3683183073.0, "step": 4829 }, { "epoch": 6.589930974149979, "grad_norm": 0.21654690296937112, "learning_rate": 3.430917806483621e-06, "loss": 0.1775, "num_tokens": 3683941267.0, "step": 4830 }, { "epoch": 6.591296555108659, "grad_norm": 0.21443759710205532, "learning_rate": 3.4282296140370807e-06, "loss": 0.1806, "num_tokens": 3684741206.0, "step": 4831 }, { "epoch": 6.59266213606734, "grad_norm": 0.21034103038296753, "learning_rate": 3.425543731409777e-06, "loss": 0.1758, "num_tokens": 3685479790.0, "step": 4832 }, { "epoch": 6.59402771702602, "grad_norm": 0.22911965372523307, "learning_rate": 3.4228601594210587e-06, "loss": 0.1832, "num_tokens": 3686224594.0, "step": 4833 }, { "epoch": 6.595393297984701, "grad_norm": 0.22110409450488314, "learning_rate": 3.420178898889571e-06, "loss": 0.1751, "num_tokens": 3686971812.0, "step": 4834 }, { "epoch": 6.5967588789433815, "grad_norm": 0.2477610221994168, "learning_rate": 3.4174999506332434e-06, "loss": 0.1851, "num_tokens": 3687677318.0, "step": 4835 }, { "epoch": 6.5981244599020625, "grad_norm": 0.21513196922143643, "learning_rate": 3.4148233154693116e-06, "loss": 0.1805, "num_tokens": 3688487786.0, "step": 4836 }, { "epoch": 6.599490040860743, "grad_norm": 0.21222539739223864, "learning_rate": 3.4121489942143006e-06, "loss": 0.1871, "num_tokens": 3689217720.0, "step": 4837 }, { "epoch": 6.600855621819424, "grad_norm": 0.23404660271559805, "learning_rate": 3.4094769876840305e-06, "loss": 0.1809, "num_tokens": 3689963805.0, "step": 4838 }, { "epoch": 6.602221202778104, "grad_norm": 0.22709276811637877, "learning_rate": 3.4068072966936106e-06, "loss": 0.1776, "num_tokens": 3690700761.0, "step": 4839 }, { "epoch": 6.603586783736784, "grad_norm": 0.22998078455947865, "learning_rate": 3.4041399220574554e-06, "loss": 0.1791, "num_tokens": 3691429771.0, "step": 4840 }, { "epoch": 6.604952364695465, "grad_norm": 0.22975225226587087, "learning_rate": 3.401474864589258e-06, "loss": 0.1828, "num_tokens": 3692150449.0, "step": 4841 }, { "epoch": 6.606317945654145, "grad_norm": 0.23009457839580627, "learning_rate": 3.398812125102017e-06, "loss": 0.1876, "num_tokens": 3692970592.0, "step": 4842 }, { "epoch": 6.607683526612826, "grad_norm": 0.2250966665782744, "learning_rate": 3.3961517044080182e-06, "loss": 0.1761, "num_tokens": 3693687446.0, "step": 4843 }, { "epoch": 6.609049107571506, "grad_norm": 0.22360865049426387, "learning_rate": 3.393493603318838e-06, "loss": 0.1757, "num_tokens": 3694429415.0, "step": 4844 }, { "epoch": 6.610414688530187, "grad_norm": 0.24429494406894797, "learning_rate": 3.3908378226453497e-06, "loss": 0.1803, "num_tokens": 3695137197.0, "step": 4845 }, { "epoch": 6.611780269488867, "grad_norm": 0.22566137214324625, "learning_rate": 3.3881843631977163e-06, "loss": 0.1725, "num_tokens": 3695880173.0, "step": 4846 }, { "epoch": 6.613145850447548, "grad_norm": 0.22954624379362149, "learning_rate": 3.3855332257853966e-06, "loss": 0.1813, "num_tokens": 3696658450.0, "step": 4847 }, { "epoch": 6.614511431406228, "grad_norm": 0.2278589311559936, "learning_rate": 3.382884411217131e-06, "loss": 0.1708, "num_tokens": 3697385255.0, "step": 4848 }, { "epoch": 6.615877012364908, "grad_norm": 0.21851124685806794, "learning_rate": 3.3802379203009662e-06, "loss": 0.1785, "num_tokens": 3698167435.0, "step": 4849 }, { "epoch": 6.617242593323589, "grad_norm": 0.2094659226249039, "learning_rate": 3.377593753844228e-06, "loss": 0.1804, "num_tokens": 3698967758.0, "step": 4850 }, { "epoch": 6.6186081742822696, "grad_norm": 0.23889354929098042, "learning_rate": 3.3749519126535337e-06, "loss": 0.1885, "num_tokens": 3699754695.0, "step": 4851 }, { "epoch": 6.6199737552409506, "grad_norm": 0.21781996486086538, "learning_rate": 3.3723123975348016e-06, "loss": 0.1876, "num_tokens": 3700560364.0, "step": 4852 }, { "epoch": 6.621339336199631, "grad_norm": 0.21839122768274152, "learning_rate": 3.3696752092932283e-06, "loss": 0.1892, "num_tokens": 3701296715.0, "step": 4853 }, { "epoch": 6.622704917158312, "grad_norm": 0.22697885916400812, "learning_rate": 3.367040348733307e-06, "loss": 0.1932, "num_tokens": 3702122819.0, "step": 4854 }, { "epoch": 6.624070498116992, "grad_norm": 0.22600928434059214, "learning_rate": 3.36440781665882e-06, "loss": 0.1741, "num_tokens": 3702869826.0, "step": 4855 }, { "epoch": 6.625436079075673, "grad_norm": 0.21862268586887315, "learning_rate": 3.361777613872841e-06, "loss": 0.1824, "num_tokens": 3703667911.0, "step": 4856 }, { "epoch": 6.626801660034353, "grad_norm": 0.21888720102719694, "learning_rate": 3.359149741177724e-06, "loss": 0.1715, "num_tokens": 3704374127.0, "step": 4857 }, { "epoch": 6.628167240993033, "grad_norm": 0.2405388945107942, "learning_rate": 3.356524199375129e-06, "loss": 0.1819, "num_tokens": 3705185823.0, "step": 4858 }, { "epoch": 6.629532821951714, "grad_norm": 0.2148834742173097, "learning_rate": 3.3539009892659873e-06, "loss": 0.187, "num_tokens": 3705958395.0, "step": 4859 }, { "epoch": 6.630898402910394, "grad_norm": 0.2299156825844622, "learning_rate": 3.351280111650529e-06, "loss": 0.183, "num_tokens": 3706688539.0, "step": 4860 }, { "epoch": 6.632263983869075, "grad_norm": 0.22129554969191337, "learning_rate": 3.348661567328273e-06, "loss": 0.1811, "num_tokens": 3707515907.0, "step": 4861 }, { "epoch": 6.633629564827755, "grad_norm": 0.2201366827231735, "learning_rate": 3.34604535709802e-06, "loss": 0.1783, "num_tokens": 3708351392.0, "step": 4862 }, { "epoch": 6.634995145786436, "grad_norm": 0.2104719653928465, "learning_rate": 3.343431481757864e-06, "loss": 0.1751, "num_tokens": 3709123282.0, "step": 4863 }, { "epoch": 6.636360726745116, "grad_norm": 0.22288152649292414, "learning_rate": 3.3408199421051845e-06, "loss": 0.1773, "num_tokens": 3709912489.0, "step": 4864 }, { "epoch": 6.637726307703797, "grad_norm": 0.21881221239134527, "learning_rate": 3.3382107389366527e-06, "loss": 0.1789, "num_tokens": 3710640352.0, "step": 4865 }, { "epoch": 6.6390918886624775, "grad_norm": 0.22941990949603128, "learning_rate": 3.3356038730482176e-06, "loss": 0.1776, "num_tokens": 3711468154.0, "step": 4866 }, { "epoch": 6.640457469621158, "grad_norm": 0.2093290750897343, "learning_rate": 3.332999345235124e-06, "loss": 0.1944, "num_tokens": 3712280015.0, "step": 4867 }, { "epoch": 6.641823050579839, "grad_norm": 0.24270798115826317, "learning_rate": 3.3303971562919013e-06, "loss": 0.1802, "num_tokens": 3713061381.0, "step": 4868 }, { "epoch": 6.643188631538519, "grad_norm": 0.2080679269332129, "learning_rate": 3.327797307012363e-06, "loss": 0.1861, "num_tokens": 3713804399.0, "step": 4869 }, { "epoch": 6.6445542124972, "grad_norm": 0.2251924331140791, "learning_rate": 3.3251997981896145e-06, "loss": 0.1918, "num_tokens": 3714572796.0, "step": 4870 }, { "epoch": 6.64591979345588, "grad_norm": 0.23024541709220717, "learning_rate": 3.3226046306160376e-06, "loss": 0.1821, "num_tokens": 3715305150.0, "step": 4871 }, { "epoch": 6.647285374414561, "grad_norm": 0.22236457696877102, "learning_rate": 3.3200118050833086e-06, "loss": 0.1839, "num_tokens": 3716122203.0, "step": 4872 }, { "epoch": 6.648650955373241, "grad_norm": 0.28999662633463075, "learning_rate": 3.3174213223823878e-06, "loss": 0.1727, "num_tokens": 3716873064.0, "step": 4873 }, { "epoch": 6.650016536331922, "grad_norm": 0.2219927818922372, "learning_rate": 3.3148331833035154e-06, "loss": 0.1705, "num_tokens": 3717562519.0, "step": 4874 }, { "epoch": 6.651382117290602, "grad_norm": 0.21511912392471874, "learning_rate": 3.3122473886362215e-06, "loss": 0.1776, "num_tokens": 3718371766.0, "step": 4875 }, { "epoch": 6.652747698249282, "grad_norm": 0.21992165096206584, "learning_rate": 3.309663939169323e-06, "loss": 0.1743, "num_tokens": 3719163033.0, "step": 4876 }, { "epoch": 6.654113279207963, "grad_norm": 0.22184704531161742, "learning_rate": 3.3070828356909156e-06, "loss": 0.1834, "num_tokens": 3719898484.0, "step": 4877 }, { "epoch": 6.655478860166643, "grad_norm": 0.22663813455177678, "learning_rate": 3.304504078988382e-06, "loss": 0.1843, "num_tokens": 3720597694.0, "step": 4878 }, { "epoch": 6.656844441125324, "grad_norm": 0.23465924342988512, "learning_rate": 3.3019276698483937e-06, "loss": 0.1748, "num_tokens": 3721256361.0, "step": 4879 }, { "epoch": 6.658210022084004, "grad_norm": 0.23632405772929593, "learning_rate": 3.2993536090568955e-06, "loss": 0.1803, "num_tokens": 3722039498.0, "step": 4880 }, { "epoch": 6.659575603042685, "grad_norm": 0.21761004777620957, "learning_rate": 3.2967818973991243e-06, "loss": 0.1786, "num_tokens": 3722817309.0, "step": 4881 }, { "epoch": 6.6609411840013655, "grad_norm": 0.2164026366514478, "learning_rate": 3.294212535659601e-06, "loss": 0.1755, "num_tokens": 3723557858.0, "step": 4882 }, { "epoch": 6.6623067649600465, "grad_norm": 0.22203646231572757, "learning_rate": 3.291645524622122e-06, "loss": 0.1814, "num_tokens": 3724285837.0, "step": 4883 }, { "epoch": 6.663672345918727, "grad_norm": 0.21987425810173392, "learning_rate": 3.289080865069772e-06, "loss": 0.1844, "num_tokens": 3725080782.0, "step": 4884 }, { "epoch": 6.665037926877407, "grad_norm": 0.21433224589759894, "learning_rate": 3.286518557784919e-06, "loss": 0.1767, "num_tokens": 3725826676.0, "step": 4885 }, { "epoch": 6.666403507836088, "grad_norm": 0.23151045301715995, "learning_rate": 3.283958603549211e-06, "loss": 0.1842, "num_tokens": 3726582591.0, "step": 4886 }, { "epoch": 6.667769088794768, "grad_norm": 0.21355575721727604, "learning_rate": 3.28140100314358e-06, "loss": 0.175, "num_tokens": 3727341210.0, "step": 4887 }, { "epoch": 6.669134669753449, "grad_norm": 0.457746435218773, "learning_rate": 3.278845757348241e-06, "loss": 0.1779, "num_tokens": 3728081889.0, "step": 4888 }, { "epoch": 6.670500250712129, "grad_norm": 0.21743916021630652, "learning_rate": 3.2762928669426846e-06, "loss": 0.1736, "num_tokens": 3728818306.0, "step": 4889 }, { "epoch": 6.67186583167081, "grad_norm": 0.21666552159982022, "learning_rate": 3.2737423327056893e-06, "loss": 0.1848, "num_tokens": 3729539453.0, "step": 4890 }, { "epoch": 6.67323141262949, "grad_norm": 0.2343700675532772, "learning_rate": 3.271194155415315e-06, "loss": 0.181, "num_tokens": 3730260660.0, "step": 4891 }, { "epoch": 6.674596993588171, "grad_norm": 0.22901107366790074, "learning_rate": 3.268648335848893e-06, "loss": 0.1837, "num_tokens": 3731053538.0, "step": 4892 }, { "epoch": 6.675962574546851, "grad_norm": 0.21706263409297086, "learning_rate": 3.2661048747830515e-06, "loss": 0.1734, "num_tokens": 3731823775.0, "step": 4893 }, { "epoch": 6.677328155505531, "grad_norm": 0.2169084414396973, "learning_rate": 3.2635637729936843e-06, "loss": 0.1731, "num_tokens": 3732471064.0, "step": 4894 }, { "epoch": 6.678693736464212, "grad_norm": 0.23074015924237617, "learning_rate": 3.2610250312559734e-06, "loss": 0.1798, "num_tokens": 3733266382.0, "step": 4895 }, { "epoch": 6.680059317422892, "grad_norm": 0.2122072872242158, "learning_rate": 3.258488650344379e-06, "loss": 0.1795, "num_tokens": 3734064279.0, "step": 4896 }, { "epoch": 6.681424898381573, "grad_norm": 0.22214411198649975, "learning_rate": 3.2559546310326435e-06, "loss": 0.1821, "num_tokens": 3734864126.0, "step": 4897 }, { "epoch": 6.6827904793402535, "grad_norm": 0.2348672180969931, "learning_rate": 3.2534229740937807e-06, "loss": 0.1828, "num_tokens": 3735651652.0, "step": 4898 }, { "epoch": 6.6841560602989345, "grad_norm": 0.20007910206312118, "learning_rate": 3.2508936803000936e-06, "loss": 0.18, "num_tokens": 3736421919.0, "step": 4899 }, { "epoch": 6.685521641257615, "grad_norm": 0.22028860878491907, "learning_rate": 3.248366750423162e-06, "loss": 0.1845, "num_tokens": 3737214326.0, "step": 4900 }, { "epoch": 6.686887222216296, "grad_norm": 0.24917825939736216, "learning_rate": 3.2458421852338344e-06, "loss": 0.1843, "num_tokens": 3738018102.0, "step": 4901 }, { "epoch": 6.688252803174976, "grad_norm": 0.22776364947627575, "learning_rate": 3.2433199855022574e-06, "loss": 0.1775, "num_tokens": 3738812017.0, "step": 4902 }, { "epoch": 6.689618384133656, "grad_norm": 0.22544626188054653, "learning_rate": 3.240800151997837e-06, "loss": 0.1748, "num_tokens": 3739540111.0, "step": 4903 }, { "epoch": 6.690983965092337, "grad_norm": 0.2232790580724294, "learning_rate": 3.238282685489268e-06, "loss": 0.1791, "num_tokens": 3740274556.0, "step": 4904 }, { "epoch": 6.692349546051017, "grad_norm": 0.2190671293041399, "learning_rate": 3.2357675867445233e-06, "loss": 0.1784, "num_tokens": 3741075465.0, "step": 4905 }, { "epoch": 6.693715127009698, "grad_norm": 0.22229548335275695, "learning_rate": 3.2332548565308438e-06, "loss": 0.1852, "num_tokens": 3741915610.0, "step": 4906 }, { "epoch": 6.695080707968378, "grad_norm": 0.227960190661624, "learning_rate": 3.230744495614759e-06, "loss": 0.1836, "num_tokens": 3742706368.0, "step": 4907 }, { "epoch": 6.696446288927059, "grad_norm": 0.22855204808196178, "learning_rate": 3.228236504762071e-06, "loss": 0.1846, "num_tokens": 3743389555.0, "step": 4908 }, { "epoch": 6.697811869885739, "grad_norm": 0.24568375496079206, "learning_rate": 3.225730884737862e-06, "loss": 0.1811, "num_tokens": 3744121560.0, "step": 4909 }, { "epoch": 6.69917745084442, "grad_norm": 0.22649848434791564, "learning_rate": 3.2232276363064796e-06, "loss": 0.176, "num_tokens": 3744903609.0, "step": 4910 }, { "epoch": 6.7005430318031, "grad_norm": 0.20954705797349746, "learning_rate": 3.2207267602315665e-06, "loss": 0.1836, "num_tokens": 3745673216.0, "step": 4911 }, { "epoch": 6.70190861276178, "grad_norm": 0.2267430939490457, "learning_rate": 3.2182282572760263e-06, "loss": 0.1794, "num_tokens": 3746439228.0, "step": 4912 }, { "epoch": 6.703274193720461, "grad_norm": 0.22025364390416519, "learning_rate": 3.215732128202042e-06, "loss": 0.1786, "num_tokens": 3747191897.0, "step": 4913 }, { "epoch": 6.7046397746791415, "grad_norm": 0.21664441611669805, "learning_rate": 3.2132383737710814e-06, "loss": 0.1799, "num_tokens": 3747957549.0, "step": 4914 }, { "epoch": 6.7060053556378225, "grad_norm": 0.23346116295938132, "learning_rate": 3.2107469947438745e-06, "loss": 0.1748, "num_tokens": 3748693883.0, "step": 4915 }, { "epoch": 6.707370936596503, "grad_norm": 0.2244498489269335, "learning_rate": 3.2082579918804357e-06, "loss": 0.1825, "num_tokens": 3749499329.0, "step": 4916 }, { "epoch": 6.708736517555184, "grad_norm": 0.21934399220757458, "learning_rate": 3.205771365940052e-06, "loss": 0.1863, "num_tokens": 3750307157.0, "step": 4917 }, { "epoch": 6.710102098513864, "grad_norm": 0.21174279938375234, "learning_rate": 3.203287117681288e-06, "loss": 0.1834, "num_tokens": 3751108463.0, "step": 4918 }, { "epoch": 6.711467679472545, "grad_norm": 0.21530145398478084, "learning_rate": 3.200805247861972e-06, "loss": 0.1764, "num_tokens": 3751852071.0, "step": 4919 }, { "epoch": 6.712833260431225, "grad_norm": 0.21039483344873874, "learning_rate": 3.1983257572392255e-06, "loss": 0.1788, "num_tokens": 3752661377.0, "step": 4920 }, { "epoch": 6.714198841389905, "grad_norm": 0.21971352421790485, "learning_rate": 3.195848646569428e-06, "loss": 0.18, "num_tokens": 3753459166.0, "step": 4921 }, { "epoch": 6.715564422348586, "grad_norm": 0.21281636608764773, "learning_rate": 3.1933739166082366e-06, "loss": 0.1851, "num_tokens": 3754313391.0, "step": 4922 }, { "epoch": 6.716930003307266, "grad_norm": 0.23176230163458247, "learning_rate": 3.19090156811059e-06, "loss": 0.1837, "num_tokens": 3755051643.0, "step": 4923 }, { "epoch": 6.718295584265947, "grad_norm": 0.23133142719492866, "learning_rate": 3.1884316018306895e-06, "loss": 0.1812, "num_tokens": 3755840840.0, "step": 4924 }, { "epoch": 6.719661165224627, "grad_norm": 0.2113225979647157, "learning_rate": 3.1859640185220177e-06, "loss": 0.1767, "num_tokens": 3756641170.0, "step": 4925 }, { "epoch": 6.721026746183308, "grad_norm": 0.20982620758816892, "learning_rate": 3.183498818937326e-06, "loss": 0.1782, "num_tokens": 3757405074.0, "step": 4926 }, { "epoch": 6.722392327141988, "grad_norm": 0.23403761838057588, "learning_rate": 3.181036003828643e-06, "loss": 0.1795, "num_tokens": 3758130099.0, "step": 4927 }, { "epoch": 6.723757908100669, "grad_norm": 0.21563686063944335, "learning_rate": 3.1785755739472625e-06, "loss": 0.1801, "num_tokens": 3758891801.0, "step": 4928 }, { "epoch": 6.725123489059349, "grad_norm": 0.2213222465597084, "learning_rate": 3.176117530043758e-06, "loss": 0.1831, "num_tokens": 3759670298.0, "step": 4929 }, { "epoch": 6.7264890700180295, "grad_norm": 0.21874447220926785, "learning_rate": 3.173661872867973e-06, "loss": 0.1773, "num_tokens": 3760409195.0, "step": 4930 }, { "epoch": 6.7278546509767105, "grad_norm": 0.21817156089574227, "learning_rate": 3.1712086031690163e-06, "loss": 0.1873, "num_tokens": 3761175529.0, "step": 4931 }, { "epoch": 6.729220231935391, "grad_norm": 0.22791579036172707, "learning_rate": 3.1687577216952835e-06, "loss": 0.1838, "num_tokens": 3761901582.0, "step": 4932 }, { "epoch": 6.730585812894072, "grad_norm": 0.22281414660799048, "learning_rate": 3.166309229194426e-06, "loss": 0.1819, "num_tokens": 3762676076.0, "step": 4933 }, { "epoch": 6.731951393852752, "grad_norm": 0.21982239791278763, "learning_rate": 3.1638631264133734e-06, "loss": 0.1787, "num_tokens": 3763498810.0, "step": 4934 }, { "epoch": 6.733316974811433, "grad_norm": 0.21683202795304135, "learning_rate": 3.161419414098327e-06, "loss": 0.1785, "num_tokens": 3764265053.0, "step": 4935 }, { "epoch": 6.734682555770113, "grad_norm": 0.21939550899078603, "learning_rate": 3.1589780929947583e-06, "loss": 0.1889, "num_tokens": 3765071239.0, "step": 4936 }, { "epoch": 6.736048136728794, "grad_norm": 0.22203717973198125, "learning_rate": 3.156539163847407e-06, "loss": 0.19, "num_tokens": 3765904500.0, "step": 4937 }, { "epoch": 6.737413717687474, "grad_norm": 0.2187174355768729, "learning_rate": 3.1541026274002844e-06, "loss": 0.1808, "num_tokens": 3766668435.0, "step": 4938 }, { "epoch": 6.738779298646154, "grad_norm": 0.22443028277913843, "learning_rate": 3.151668484396677e-06, "loss": 0.1692, "num_tokens": 3767442022.0, "step": 4939 }, { "epoch": 6.740144879604835, "grad_norm": 0.2199391990095458, "learning_rate": 3.1492367355791264e-06, "loss": 0.1834, "num_tokens": 3768215505.0, "step": 4940 }, { "epoch": 6.741510460563515, "grad_norm": 0.2237636313421162, "learning_rate": 3.146807381689465e-06, "loss": 0.1846, "num_tokens": 3768892235.0, "step": 4941 }, { "epoch": 6.742876041522196, "grad_norm": 0.22178800174679641, "learning_rate": 3.1443804234687774e-06, "loss": 0.1809, "num_tokens": 3769776220.0, "step": 4942 }, { "epoch": 6.744241622480876, "grad_norm": 0.1989079486803811, "learning_rate": 3.1419558616574242e-06, "loss": 0.1788, "num_tokens": 3770488861.0, "step": 4943 }, { "epoch": 6.745607203439557, "grad_norm": 0.24462345780391534, "learning_rate": 3.139533696995036e-06, "loss": 0.1712, "num_tokens": 3771198472.0, "step": 4944 }, { "epoch": 6.746972784398237, "grad_norm": 0.22162654198835027, "learning_rate": 3.1371139302205067e-06, "loss": 0.179, "num_tokens": 3771961797.0, "step": 4945 }, { "epoch": 6.7483383653569184, "grad_norm": 0.23222107348032206, "learning_rate": 3.134696562072006e-06, "loss": 0.1849, "num_tokens": 3772739107.0, "step": 4946 }, { "epoch": 6.749703946315599, "grad_norm": 0.21718864463616844, "learning_rate": 3.1322815932869664e-06, "loss": 0.18, "num_tokens": 3773502271.0, "step": 4947 }, { "epoch": 6.751069527274279, "grad_norm": 0.2243174822803025, "learning_rate": 3.1298690246020902e-06, "loss": 0.1836, "num_tokens": 3774242645.0, "step": 4948 }, { "epoch": 6.75243510823296, "grad_norm": 0.23053248855352537, "learning_rate": 3.1274588567533494e-06, "loss": 0.1915, "num_tokens": 3775086458.0, "step": 4949 }, { "epoch": 6.75380068919164, "grad_norm": 0.2169633570069198, "learning_rate": 3.125051090475981e-06, "loss": 0.1775, "num_tokens": 3775843821.0, "step": 4950 }, { "epoch": 6.755166270150321, "grad_norm": 0.21776206216142294, "learning_rate": 3.1226457265044886e-06, "loss": 0.1803, "num_tokens": 3776655378.0, "step": 4951 }, { "epoch": 6.756531851109001, "grad_norm": 0.21075031440922903, "learning_rate": 3.120242765572646e-06, "loss": 0.1787, "num_tokens": 3777461879.0, "step": 4952 }, { "epoch": 6.757897432067682, "grad_norm": 0.2155785538483473, "learning_rate": 3.1178422084134946e-06, "loss": 0.1831, "num_tokens": 3778223092.0, "step": 4953 }, { "epoch": 6.759263013026362, "grad_norm": 0.22179712040466867, "learning_rate": 3.1154440557593363e-06, "loss": 0.1861, "num_tokens": 3778985790.0, "step": 4954 }, { "epoch": 6.760628593985043, "grad_norm": 0.21236701666463942, "learning_rate": 3.1130483083417443e-06, "loss": 0.176, "num_tokens": 3779709374.0, "step": 4955 }, { "epoch": 6.761994174943723, "grad_norm": 0.2200353388768226, "learning_rate": 3.1106549668915594e-06, "loss": 0.1851, "num_tokens": 3780483118.0, "step": 4956 }, { "epoch": 6.763359755902403, "grad_norm": 0.21974595108029168, "learning_rate": 3.108264032138886e-06, "loss": 0.1767, "num_tokens": 3781283858.0, "step": 4957 }, { "epoch": 6.764725336861084, "grad_norm": 0.2270829474645771, "learning_rate": 3.1058755048130935e-06, "loss": 0.183, "num_tokens": 3782077737.0, "step": 4958 }, { "epoch": 6.766090917819764, "grad_norm": 0.2162055716243948, "learning_rate": 3.1034893856428213e-06, "loss": 0.1737, "num_tokens": 3782772764.0, "step": 4959 }, { "epoch": 6.767456498778445, "grad_norm": 0.22114579383999353, "learning_rate": 3.101105675355967e-06, "loss": 0.1775, "num_tokens": 3783554966.0, "step": 4960 }, { "epoch": 6.7688220797371255, "grad_norm": 0.20968240633467986, "learning_rate": 3.0987243746796997e-06, "loss": 0.1826, "num_tokens": 3784310842.0, "step": 4961 }, { "epoch": 6.7701876606958065, "grad_norm": 0.22926899084843713, "learning_rate": 3.096345484340451e-06, "loss": 0.1781, "num_tokens": 3785048854.0, "step": 4962 }, { "epoch": 6.771553241654487, "grad_norm": 0.2286487878994961, "learning_rate": 3.0939690050639155e-06, "loss": 0.1828, "num_tokens": 3785824720.0, "step": 4963 }, { "epoch": 6.772918822613168, "grad_norm": 0.21039169942129382, "learning_rate": 3.0915949375750565e-06, "loss": 0.1829, "num_tokens": 3786591500.0, "step": 4964 }, { "epoch": 6.774284403571848, "grad_norm": 0.22042908699422537, "learning_rate": 3.089223282598097e-06, "loss": 0.1889, "num_tokens": 3787397898.0, "step": 4965 }, { "epoch": 6.775649984530528, "grad_norm": 0.2226793496481801, "learning_rate": 3.086854040856528e-06, "loss": 0.1794, "num_tokens": 3788116540.0, "step": 4966 }, { "epoch": 6.777015565489209, "grad_norm": 0.2361497052677204, "learning_rate": 3.0844872130731023e-06, "loss": 0.1782, "num_tokens": 3788856192.0, "step": 4967 }, { "epoch": 6.778381146447889, "grad_norm": 0.21476224475294475, "learning_rate": 3.082122799969836e-06, "loss": 0.1833, "num_tokens": 3789642109.0, "step": 4968 }, { "epoch": 6.77974672740657, "grad_norm": 0.2174007059624105, "learning_rate": 3.0797608022680085e-06, "loss": 0.1843, "num_tokens": 3790393148.0, "step": 4969 }, { "epoch": 6.78111230836525, "grad_norm": 0.22851487059765943, "learning_rate": 3.077401220688165e-06, "loss": 0.1775, "num_tokens": 3791179744.0, "step": 4970 }, { "epoch": 6.782477889323931, "grad_norm": 0.21604775845608645, "learning_rate": 3.0750440559501105e-06, "loss": 0.1849, "num_tokens": 3791989393.0, "step": 4971 }, { "epoch": 6.783843470282611, "grad_norm": 0.24046680700351344, "learning_rate": 3.0726893087729138e-06, "loss": 0.1681, "num_tokens": 3792755321.0, "step": 4972 }, { "epoch": 6.785209051241292, "grad_norm": 0.21178439571384558, "learning_rate": 3.070336979874905e-06, "loss": 0.1858, "num_tokens": 3793559913.0, "step": 4973 }, { "epoch": 6.786574632199972, "grad_norm": 0.2235378305521276, "learning_rate": 3.06798706997368e-06, "loss": 0.1771, "num_tokens": 3794307912.0, "step": 4974 }, { "epoch": 6.787940213158652, "grad_norm": 0.2297757994312317, "learning_rate": 3.0656395797860928e-06, "loss": 0.1726, "num_tokens": 3795024514.0, "step": 4975 }, { "epoch": 6.789305794117333, "grad_norm": 0.22563998949438702, "learning_rate": 3.063294510028265e-06, "loss": 0.1763, "num_tokens": 3795813635.0, "step": 4976 }, { "epoch": 6.7906713750760135, "grad_norm": 0.22144764880471945, "learning_rate": 3.0609518614155694e-06, "loss": 0.1801, "num_tokens": 3796530108.0, "step": 4977 }, { "epoch": 6.7920369560346945, "grad_norm": 0.22321842473062675, "learning_rate": 3.0586116346626503e-06, "loss": 0.1692, "num_tokens": 3797198179.0, "step": 4978 }, { "epoch": 6.793402536993375, "grad_norm": 0.23164729921087102, "learning_rate": 3.0562738304834106e-06, "loss": 0.1742, "num_tokens": 3797902352.0, "step": 4979 }, { "epoch": 6.794768117952056, "grad_norm": 0.224752654644207, "learning_rate": 3.0539384495910123e-06, "loss": 0.1862, "num_tokens": 3798656909.0, "step": 4980 }, { "epoch": 6.796133698910736, "grad_norm": 0.22511060133799465, "learning_rate": 3.0516054926978744e-06, "loss": 0.1785, "num_tokens": 3799396961.0, "step": 4981 }, { "epoch": 6.797499279869417, "grad_norm": 0.22783253984652851, "learning_rate": 3.049274960515689e-06, "loss": 0.188, "num_tokens": 3800264390.0, "step": 4982 }, { "epoch": 6.798864860828097, "grad_norm": 0.21786044036412722, "learning_rate": 3.046946853755397e-06, "loss": 0.1777, "num_tokens": 3801063181.0, "step": 4983 }, { "epoch": 6.800230441786777, "grad_norm": 0.2199127163170723, "learning_rate": 3.0446211731271977e-06, "loss": 0.1746, "num_tokens": 3801811720.0, "step": 4984 }, { "epoch": 6.801596022745458, "grad_norm": 0.2140209848323263, "learning_rate": 3.0422979193405643e-06, "loss": 0.1877, "num_tokens": 3802586615.0, "step": 4985 }, { "epoch": 6.802961603704138, "grad_norm": 0.23015295801529426, "learning_rate": 3.039977093104216e-06, "loss": 0.1811, "num_tokens": 3803334416.0, "step": 4986 }, { "epoch": 6.804327184662819, "grad_norm": 0.2140254974280941, "learning_rate": 3.0376586951261367e-06, "loss": 0.1893, "num_tokens": 3804120562.0, "step": 4987 }, { "epoch": 6.805692765621499, "grad_norm": 0.23344861733081992, "learning_rate": 3.0353427261135703e-06, "loss": 0.1838, "num_tokens": 3804907449.0, "step": 4988 }, { "epoch": 6.80705834658018, "grad_norm": 0.22991190181386892, "learning_rate": 3.03302918677302e-06, "loss": 0.176, "num_tokens": 3805605039.0, "step": 4989 }, { "epoch": 6.80842392753886, "grad_norm": 0.22399608139878693, "learning_rate": 3.030718077810242e-06, "loss": 0.1851, "num_tokens": 3806454566.0, "step": 4990 }, { "epoch": 6.809789508497541, "grad_norm": 0.22262154806888382, "learning_rate": 3.028409399930258e-06, "loss": 0.1792, "num_tokens": 3807283743.0, "step": 4991 }, { "epoch": 6.811155089456221, "grad_norm": 0.2129750839184565, "learning_rate": 3.026103153837348e-06, "loss": 0.1888, "num_tokens": 3808003476.0, "step": 4992 }, { "epoch": 6.8125206704149015, "grad_norm": 0.23108406978154383, "learning_rate": 3.023799340235043e-06, "loss": 0.1759, "num_tokens": 3808761311.0, "step": 4993 }, { "epoch": 6.8138862513735825, "grad_norm": 0.2209551317069388, "learning_rate": 3.021497959826142e-06, "loss": 0.1857, "num_tokens": 3809601840.0, "step": 4994 }, { "epoch": 6.815251832332263, "grad_norm": 0.211713712406043, "learning_rate": 3.019199013312693e-06, "loss": 0.1712, "num_tokens": 3810296487.0, "step": 4995 }, { "epoch": 6.816617413290944, "grad_norm": 0.23456398117019026, "learning_rate": 3.016902501396005e-06, "loss": 0.1819, "num_tokens": 3811048864.0, "step": 4996 }, { "epoch": 6.817982994249624, "grad_norm": 0.23285948591489183, "learning_rate": 3.014608424776646e-06, "loss": 0.1746, "num_tokens": 3811750342.0, "step": 4997 }, { "epoch": 6.819348575208305, "grad_norm": 0.22861259603637232, "learning_rate": 3.0123167841544398e-06, "loss": 0.1842, "num_tokens": 3812495918.0, "step": 4998 }, { "epoch": 6.820714156166985, "grad_norm": 0.23569376396036495, "learning_rate": 3.0100275802284643e-06, "loss": 0.1833, "num_tokens": 3813294193.0, "step": 4999 }, { "epoch": 6.822079737125666, "grad_norm": 0.21652172589337032, "learning_rate": 3.0077408136970574e-06, "loss": 0.1826, "num_tokens": 3814097527.0, "step": 5000 }, { "epoch": 6.823445318084346, "grad_norm": 0.21978716988253072, "learning_rate": 3.0054564852578155e-06, "loss": 0.1823, "num_tokens": 3814895511.0, "step": 5001 }, { "epoch": 6.824810899043026, "grad_norm": 0.23187877112705985, "learning_rate": 3.0031745956075807e-06, "loss": 0.1886, "num_tokens": 3815668973.0, "step": 5002 }, { "epoch": 6.826176480001707, "grad_norm": 0.2637105121727647, "learning_rate": 3.000895145442469e-06, "loss": 0.1873, "num_tokens": 3816435258.0, "step": 5003 }, { "epoch": 6.827542060960387, "grad_norm": 0.247809750415901, "learning_rate": 2.998618135457833e-06, "loss": 0.1856, "num_tokens": 3817205954.0, "step": 5004 }, { "epoch": 6.828907641919068, "grad_norm": 0.21028389035975867, "learning_rate": 2.9963435663482956e-06, "loss": 0.1836, "num_tokens": 3817991659.0, "step": 5005 }, { "epoch": 6.830273222877748, "grad_norm": 0.2206729542065838, "learning_rate": 2.994071438807727e-06, "loss": 0.1846, "num_tokens": 3818833465.0, "step": 5006 }, { "epoch": 6.831638803836429, "grad_norm": 0.2217622087322051, "learning_rate": 2.9918017535292548e-06, "loss": 0.1863, "num_tokens": 3819547114.0, "step": 5007 }, { "epoch": 6.833004384795109, "grad_norm": 0.22381997029858292, "learning_rate": 2.9895345112052613e-06, "loss": 0.171, "num_tokens": 3820328263.0, "step": 5008 }, { "epoch": 6.83436996575379, "grad_norm": 0.21878891162465394, "learning_rate": 2.9872697125273847e-06, "loss": 0.1775, "num_tokens": 3821154686.0, "step": 5009 }, { "epoch": 6.8357355467124705, "grad_norm": 0.2337429310284699, "learning_rate": 2.9850073581865184e-06, "loss": 0.1882, "num_tokens": 3821893728.0, "step": 5010 }, { "epoch": 6.837101127671151, "grad_norm": 0.2240471414109968, "learning_rate": 2.9827474488728044e-06, "loss": 0.1801, "num_tokens": 3822652488.0, "step": 5011 }, { "epoch": 6.838466708629832, "grad_norm": 0.21453685021457702, "learning_rate": 2.980489985275649e-06, "loss": 0.1819, "num_tokens": 3823479741.0, "step": 5012 }, { "epoch": 6.839832289588512, "grad_norm": 0.22016860134144678, "learning_rate": 2.9782349680837024e-06, "loss": 0.1765, "num_tokens": 3824186577.0, "step": 5013 }, { "epoch": 6.841197870547193, "grad_norm": 0.2322667347510334, "learning_rate": 2.975982397984874e-06, "loss": 0.1801, "num_tokens": 3824975533.0, "step": 5014 }, { "epoch": 6.842563451505873, "grad_norm": 0.2089813505495261, "learning_rate": 2.973732275666327e-06, "loss": 0.1892, "num_tokens": 3825818876.0, "step": 5015 }, { "epoch": 6.843929032464554, "grad_norm": 0.2248210381635068, "learning_rate": 2.971484601814473e-06, "loss": 0.1808, "num_tokens": 3826577474.0, "step": 5016 }, { "epoch": 6.845294613423234, "grad_norm": 0.21379923023655595, "learning_rate": 2.969239377114982e-06, "loss": 0.1838, "num_tokens": 3827338494.0, "step": 5017 }, { "epoch": 6.846660194381915, "grad_norm": 0.22644137501887937, "learning_rate": 2.966996602252773e-06, "loss": 0.1779, "num_tokens": 3828093501.0, "step": 5018 }, { "epoch": 6.848025775340595, "grad_norm": 0.22247120247893012, "learning_rate": 2.9647562779120233e-06, "loss": 0.1688, "num_tokens": 3828802521.0, "step": 5019 }, { "epoch": 6.849391356299275, "grad_norm": 0.22164292679484485, "learning_rate": 2.9625184047761522e-06, "loss": 0.1852, "num_tokens": 3829577322.0, "step": 5020 }, { "epoch": 6.850756937257956, "grad_norm": 0.22003024353340714, "learning_rate": 2.9602829835278464e-06, "loss": 0.1844, "num_tokens": 3830378979.0, "step": 5021 }, { "epoch": 6.852122518216636, "grad_norm": 0.210584562209536, "learning_rate": 2.9580500148490297e-06, "loss": 0.1814, "num_tokens": 3831153186.0, "step": 5022 }, { "epoch": 6.853488099175317, "grad_norm": 0.22598251301136388, "learning_rate": 2.955819499420887e-06, "loss": 0.1823, "num_tokens": 3831932008.0, "step": 5023 }, { "epoch": 6.854853680133997, "grad_norm": 0.2210176135938933, "learning_rate": 2.9535914379238518e-06, "loss": 0.1831, "num_tokens": 3832698183.0, "step": 5024 }, { "epoch": 6.856219261092678, "grad_norm": 0.23763352093764192, "learning_rate": 2.9513658310376075e-06, "loss": 0.1733, "num_tokens": 3833439499.0, "step": 5025 }, { "epoch": 6.8575848420513585, "grad_norm": 0.3322242820485917, "learning_rate": 2.9491426794410918e-06, "loss": 0.1811, "num_tokens": 3834150219.0, "step": 5026 }, { "epoch": 6.8589504230100395, "grad_norm": 0.23215579782512066, "learning_rate": 2.94692198381249e-06, "loss": 0.1743, "num_tokens": 3834860406.0, "step": 5027 }, { "epoch": 6.86031600396872, "grad_norm": 0.2334142971782333, "learning_rate": 2.944703744829245e-06, "loss": 0.1845, "num_tokens": 3835644412.0, "step": 5028 }, { "epoch": 6.8616815849274, "grad_norm": 0.2205079991863466, "learning_rate": 2.9424879631680397e-06, "loss": 0.1817, "num_tokens": 3836365438.0, "step": 5029 }, { "epoch": 6.863047165886081, "grad_norm": 0.22528064964760064, "learning_rate": 2.9402746395048164e-06, "loss": 0.1817, "num_tokens": 3837117155.0, "step": 5030 }, { "epoch": 6.864412746844761, "grad_norm": 0.22848848707834538, "learning_rate": 2.9380637745147622e-06, "loss": 0.1737, "num_tokens": 3837823810.0, "step": 5031 }, { "epoch": 6.865778327803442, "grad_norm": 0.24648195688209285, "learning_rate": 2.935855368872319e-06, "loss": 0.185, "num_tokens": 3838534500.0, "step": 5032 }, { "epoch": 6.867143908762122, "grad_norm": 0.21608876711878142, "learning_rate": 2.9336494232511757e-06, "loss": 0.1819, "num_tokens": 3839369779.0, "step": 5033 }, { "epoch": 6.868509489720803, "grad_norm": 0.2238790039316482, "learning_rate": 2.931445938324268e-06, "loss": 0.1888, "num_tokens": 3840143420.0, "step": 5034 }, { "epoch": 6.869875070679483, "grad_norm": 0.2223878390643004, "learning_rate": 2.9292449147637848e-06, "loss": 0.1812, "num_tokens": 3840932082.0, "step": 5035 }, { "epoch": 6.871240651638164, "grad_norm": 0.21252344215450844, "learning_rate": 2.9270463532411634e-06, "loss": 0.1841, "num_tokens": 3841694212.0, "step": 5036 }, { "epoch": 6.872606232596844, "grad_norm": 0.2207917860331854, "learning_rate": 2.9248502544270896e-06, "loss": 0.1844, "num_tokens": 3842486151.0, "step": 5037 }, { "epoch": 6.873971813555524, "grad_norm": 0.21987312271577825, "learning_rate": 2.9226566189914995e-06, "loss": 0.1851, "num_tokens": 3843261714.0, "step": 5038 }, { "epoch": 6.875337394514205, "grad_norm": 0.24163495787795777, "learning_rate": 2.9204654476035744e-06, "loss": 0.1831, "num_tokens": 3844006127.0, "step": 5039 }, { "epoch": 6.8767029754728854, "grad_norm": 0.21402363101809663, "learning_rate": 2.918276740931746e-06, "loss": 0.1882, "num_tokens": 3844796036.0, "step": 5040 }, { "epoch": 6.8780685564315664, "grad_norm": 0.22799698163600932, "learning_rate": 2.9160904996436932e-06, "loss": 0.187, "num_tokens": 3845533214.0, "step": 5041 }, { "epoch": 6.879434137390247, "grad_norm": 0.20904555530852242, "learning_rate": 2.9139067244063467e-06, "loss": 0.181, "num_tokens": 3846285358.0, "step": 5042 }, { "epoch": 6.880799718348928, "grad_norm": 0.22308078533520775, "learning_rate": 2.9117254158858787e-06, "loss": 0.1769, "num_tokens": 3847041612.0, "step": 5043 }, { "epoch": 6.882165299307608, "grad_norm": 0.23307612801622327, "learning_rate": 2.9095465747477133e-06, "loss": 0.1799, "num_tokens": 3847812082.0, "step": 5044 }, { "epoch": 6.883530880266289, "grad_norm": 0.22332650811332444, "learning_rate": 2.907370201656521e-06, "loss": 0.1801, "num_tokens": 3848620062.0, "step": 5045 }, { "epoch": 6.884896461224969, "grad_norm": 0.21949665970037205, "learning_rate": 2.905196297276215e-06, "loss": 0.183, "num_tokens": 3849348078.0, "step": 5046 }, { "epoch": 6.886262042183649, "grad_norm": 0.235619881015995, "learning_rate": 2.9030248622699657e-06, "loss": 0.1781, "num_tokens": 3850105462.0, "step": 5047 }, { "epoch": 6.88762762314233, "grad_norm": 0.23421292425732562, "learning_rate": 2.9008558973001788e-06, "loss": 0.1881, "num_tokens": 3850848302.0, "step": 5048 }, { "epoch": 6.88899320410101, "grad_norm": 0.23556033867283066, "learning_rate": 2.8986894030285146e-06, "loss": 0.1715, "num_tokens": 3851549222.0, "step": 5049 }, { "epoch": 6.890358785059691, "grad_norm": 0.23169821967740284, "learning_rate": 2.8965253801158744e-06, "loss": 0.1837, "num_tokens": 3852279486.0, "step": 5050 }, { "epoch": 6.891724366018371, "grad_norm": 0.22379965198154578, "learning_rate": 2.8943638292224115e-06, "loss": 0.1786, "num_tokens": 3853023235.0, "step": 5051 }, { "epoch": 6.893089946977052, "grad_norm": 0.22883185278047585, "learning_rate": 2.8922047510075174e-06, "loss": 0.1822, "num_tokens": 3853804095.0, "step": 5052 }, { "epoch": 6.894455527935732, "grad_norm": 0.233874088290576, "learning_rate": 2.8900481461298347e-06, "loss": 0.1773, "num_tokens": 3854634477.0, "step": 5053 }, { "epoch": 6.895821108894413, "grad_norm": 0.2065778106233143, "learning_rate": 2.8878940152472514e-06, "loss": 0.1855, "num_tokens": 3855461523.0, "step": 5054 }, { "epoch": 6.897186689853093, "grad_norm": 0.21396776329575898, "learning_rate": 2.8857423590168964e-06, "loss": 0.1777, "num_tokens": 3856284734.0, "step": 5055 }, { "epoch": 6.8985522708117735, "grad_norm": 0.21409820278479227, "learning_rate": 2.8835931780951525e-06, "loss": 0.1731, "num_tokens": 3857004904.0, "step": 5056 }, { "epoch": 6.8999178517704545, "grad_norm": 0.2197813123863557, "learning_rate": 2.8814464731376356e-06, "loss": 0.1802, "num_tokens": 3857767665.0, "step": 5057 }, { "epoch": 6.901283432729135, "grad_norm": 0.23309901866232938, "learning_rate": 2.879302244799216e-06, "loss": 0.18, "num_tokens": 3858507107.0, "step": 5058 }, { "epoch": 6.902649013687816, "grad_norm": 0.22967108188677, "learning_rate": 2.877160493734003e-06, "loss": 0.1807, "num_tokens": 3859253769.0, "step": 5059 }, { "epoch": 6.904014594646496, "grad_norm": 0.2221628989719488, "learning_rate": 2.8750212205953565e-06, "loss": 0.1875, "num_tokens": 3860080498.0, "step": 5060 }, { "epoch": 6.905380175605177, "grad_norm": 0.2187223330935441, "learning_rate": 2.872884426035869e-06, "loss": 0.1718, "num_tokens": 3860793528.0, "step": 5061 }, { "epoch": 6.906745756563857, "grad_norm": 0.23117481758273262, "learning_rate": 2.8707501107073887e-06, "loss": 0.1873, "num_tokens": 3861572081.0, "step": 5062 }, { "epoch": 6.908111337522538, "grad_norm": 0.21605820723372357, "learning_rate": 2.8686182752610025e-06, "loss": 0.1789, "num_tokens": 3862373481.0, "step": 5063 }, { "epoch": 6.909476918481218, "grad_norm": 0.2257453827444246, "learning_rate": 2.8664889203470388e-06, "loss": 0.1806, "num_tokens": 3863095340.0, "step": 5064 }, { "epoch": 6.910842499439898, "grad_norm": 0.21660299515465173, "learning_rate": 2.864362046615075e-06, "loss": 0.1753, "num_tokens": 3863842990.0, "step": 5065 }, { "epoch": 6.912208080398579, "grad_norm": 0.2219463476254167, "learning_rate": 2.8622376547139254e-06, "loss": 0.1773, "num_tokens": 3864608076.0, "step": 5066 }, { "epoch": 6.913573661357259, "grad_norm": 0.21831453690105718, "learning_rate": 2.86011574529165e-06, "loss": 0.1793, "num_tokens": 3865371467.0, "step": 5067 }, { "epoch": 6.91493924231594, "grad_norm": 0.2327004508800191, "learning_rate": 2.857996318995553e-06, "loss": 0.1806, "num_tokens": 3866195731.0, "step": 5068 }, { "epoch": 6.91630482327462, "grad_norm": 0.21600267394288353, "learning_rate": 2.855879376472179e-06, "loss": 0.1827, "num_tokens": 3866932539.0, "step": 5069 }, { "epoch": 6.917670404233301, "grad_norm": 0.23135868543785096, "learning_rate": 2.8537649183673155e-06, "loss": 0.1812, "num_tokens": 3867667610.0, "step": 5070 }, { "epoch": 6.919035985191981, "grad_norm": 0.23750019712323014, "learning_rate": 2.8516529453259913e-06, "loss": 0.179, "num_tokens": 3868387990.0, "step": 5071 }, { "epoch": 6.920401566150662, "grad_norm": 0.2171056960038839, "learning_rate": 2.8495434579924808e-06, "loss": 0.1818, "num_tokens": 3869145399.0, "step": 5072 }, { "epoch": 6.9217671471093425, "grad_norm": 0.23651155901790955, "learning_rate": 2.8474364570102907e-06, "loss": 0.1837, "num_tokens": 3870018970.0, "step": 5073 }, { "epoch": 6.923132728068023, "grad_norm": 0.21003115865836064, "learning_rate": 2.845331943022184e-06, "loss": 0.1843, "num_tokens": 3870775909.0, "step": 5074 }, { "epoch": 6.924498309026704, "grad_norm": 0.22441250525690054, "learning_rate": 2.843229916670151e-06, "loss": 0.1833, "num_tokens": 3871577356.0, "step": 5075 }, { "epoch": 6.925863889985384, "grad_norm": 0.22033218520218506, "learning_rate": 2.8411303785954304e-06, "loss": 0.1828, "num_tokens": 3872360216.0, "step": 5076 }, { "epoch": 6.927229470944065, "grad_norm": 0.23618449284724385, "learning_rate": 2.839033329438503e-06, "loss": 0.1689, "num_tokens": 3873143448.0, "step": 5077 }, { "epoch": 6.928595051902745, "grad_norm": 0.20315508477265815, "learning_rate": 2.8369387698390838e-06, "loss": 0.1832, "num_tokens": 3873878392.0, "step": 5078 }, { "epoch": 6.929960632861426, "grad_norm": 0.23369438880919305, "learning_rate": 2.834846700436133e-06, "loss": 0.178, "num_tokens": 3874635322.0, "step": 5079 }, { "epoch": 6.931326213820106, "grad_norm": 0.21570767393180865, "learning_rate": 2.8327571218678517e-06, "loss": 0.1832, "num_tokens": 3875413840.0, "step": 5080 }, { "epoch": 6.932691794778787, "grad_norm": 0.22902416787061503, "learning_rate": 2.8306700347716803e-06, "loss": 0.1881, "num_tokens": 3876167627.0, "step": 5081 }, { "epoch": 6.934057375737467, "grad_norm": 0.23051647524816557, "learning_rate": 2.828585439784295e-06, "loss": 0.1802, "num_tokens": 3876937321.0, "step": 5082 }, { "epoch": 6.935422956696147, "grad_norm": 0.21701848500426432, "learning_rate": 2.8265033375416213e-06, "loss": 0.1879, "num_tokens": 3877754464.0, "step": 5083 }, { "epoch": 6.936788537654828, "grad_norm": 0.21241519731207809, "learning_rate": 2.8244237286788157e-06, "loss": 0.1736, "num_tokens": 3878573801.0, "step": 5084 }, { "epoch": 6.938154118613508, "grad_norm": 0.21308531017887677, "learning_rate": 2.8223466138302723e-06, "loss": 0.1809, "num_tokens": 3879322391.0, "step": 5085 }, { "epoch": 6.939519699572189, "grad_norm": 0.22084227910686982, "learning_rate": 2.8202719936296367e-06, "loss": 0.1789, "num_tokens": 3880078095.0, "step": 5086 }, { "epoch": 6.940885280530869, "grad_norm": 0.23288934559585775, "learning_rate": 2.81819986870978e-06, "loss": 0.1836, "num_tokens": 3880792429.0, "step": 5087 }, { "epoch": 6.94225086148955, "grad_norm": 0.22887212006856153, "learning_rate": 2.816130239702821e-06, "loss": 0.178, "num_tokens": 3881590359.0, "step": 5088 }, { "epoch": 6.9436164424482305, "grad_norm": 0.2058289350309943, "learning_rate": 2.814063107240111e-06, "loss": 0.1796, "num_tokens": 3882303463.0, "step": 5089 }, { "epoch": 6.9449820234069115, "grad_norm": 0.23772961699492431, "learning_rate": 2.811998471952247e-06, "loss": 0.1825, "num_tokens": 3883126732.0, "step": 5090 }, { "epoch": 6.946347604365592, "grad_norm": 0.2197494738323331, "learning_rate": 2.809936334469053e-06, "loss": 0.1753, "num_tokens": 3883865117.0, "step": 5091 }, { "epoch": 6.947713185324272, "grad_norm": 0.2228480783327344, "learning_rate": 2.8078766954196046e-06, "loss": 0.1848, "num_tokens": 3884616440.0, "step": 5092 }, { "epoch": 6.949078766282953, "grad_norm": 0.22908140027406343, "learning_rate": 2.805819555432206e-06, "loss": 0.1832, "num_tokens": 3885409433.0, "step": 5093 }, { "epoch": 6.950444347241633, "grad_norm": 0.21813101056012943, "learning_rate": 2.8037649151343988e-06, "loss": 0.1879, "num_tokens": 3886130430.0, "step": 5094 }, { "epoch": 6.951809928200314, "grad_norm": 0.22186585251689542, "learning_rate": 2.8017127751529704e-06, "loss": 0.1758, "num_tokens": 3886959051.0, "step": 5095 }, { "epoch": 6.953175509158994, "grad_norm": 0.21222491317254105, "learning_rate": 2.799663136113935e-06, "loss": 0.1813, "num_tokens": 3887738326.0, "step": 5096 }, { "epoch": 6.954541090117675, "grad_norm": 0.2183823079326851, "learning_rate": 2.7976159986425494e-06, "loss": 0.1887, "num_tokens": 3888520295.0, "step": 5097 }, { "epoch": 6.955906671076355, "grad_norm": 0.22187448787630398, "learning_rate": 2.7955713633633085e-06, "loss": 0.1835, "num_tokens": 3889234019.0, "step": 5098 }, { "epoch": 6.957272252035036, "grad_norm": 0.2313389338024401, "learning_rate": 2.7935292308999424e-06, "loss": 0.1879, "num_tokens": 3890025719.0, "step": 5099 }, { "epoch": 6.958637832993716, "grad_norm": 0.24158249824822886, "learning_rate": 2.7914896018754146e-06, "loss": 0.1874, "num_tokens": 3890729564.0, "step": 5100 }, { "epoch": 6.960003413952396, "grad_norm": 0.2190861235122049, "learning_rate": 2.789452476911928e-06, "loss": 0.1802, "num_tokens": 3891490208.0, "step": 5101 }, { "epoch": 6.961368994911077, "grad_norm": 0.22345579427645199, "learning_rate": 2.787417856630923e-06, "loss": 0.1842, "num_tokens": 3892264007.0, "step": 5102 }, { "epoch": 6.962734575869757, "grad_norm": 0.2856814162618208, "learning_rate": 2.785385741653073e-06, "loss": 0.1843, "num_tokens": 3893051470.0, "step": 5103 }, { "epoch": 6.964100156828438, "grad_norm": 0.2273026566033108, "learning_rate": 2.7833561325982894e-06, "loss": 0.1835, "num_tokens": 3893775920.0, "step": 5104 }, { "epoch": 6.9654657377871185, "grad_norm": 0.22650029831917734, "learning_rate": 2.7813290300857164e-06, "loss": 0.1859, "num_tokens": 3894556649.0, "step": 5105 }, { "epoch": 6.9668313187457995, "grad_norm": 0.2378196381467257, "learning_rate": 2.779304434733736e-06, "loss": 0.1736, "num_tokens": 3895252351.0, "step": 5106 }, { "epoch": 6.96819689970448, "grad_norm": 0.21797302296221077, "learning_rate": 2.777282347159964e-06, "loss": 0.1817, "num_tokens": 3895975972.0, "step": 5107 }, { "epoch": 6.969562480663161, "grad_norm": 0.23187303730194023, "learning_rate": 2.7752627679812543e-06, "loss": 0.1842, "num_tokens": 3896684622.0, "step": 5108 }, { "epoch": 6.970928061621841, "grad_norm": 0.23632318406478975, "learning_rate": 2.773245697813689e-06, "loss": 0.1739, "num_tokens": 3897492108.0, "step": 5109 }, { "epoch": 6.972293642580521, "grad_norm": 0.20450017326389747, "learning_rate": 2.771231137272591e-06, "loss": 0.1798, "num_tokens": 3898236031.0, "step": 5110 }, { "epoch": 6.973659223539202, "grad_norm": 0.2201234673607879, "learning_rate": 2.769219086972516e-06, "loss": 0.1851, "num_tokens": 3898994040.0, "step": 5111 }, { "epoch": 6.975024804497882, "grad_norm": 0.21738290873865976, "learning_rate": 2.767209547527252e-06, "loss": 0.1782, "num_tokens": 3899722854.0, "step": 5112 }, { "epoch": 6.976390385456563, "grad_norm": 0.21824840069850218, "learning_rate": 2.765202519549825e-06, "loss": 0.1759, "num_tokens": 3900373100.0, "step": 5113 }, { "epoch": 6.977755966415243, "grad_norm": 0.23240688053025696, "learning_rate": 2.763198003652489e-06, "loss": 0.1807, "num_tokens": 3901170777.0, "step": 5114 }, { "epoch": 6.979121547373924, "grad_norm": 0.22020834455255098, "learning_rate": 2.7611960004467364e-06, "loss": 0.1853, "num_tokens": 3901928346.0, "step": 5115 }, { "epoch": 6.980487128332604, "grad_norm": 0.23243546115561042, "learning_rate": 2.7591965105432933e-06, "loss": 0.1801, "num_tokens": 3902697700.0, "step": 5116 }, { "epoch": 6.981852709291285, "grad_norm": 0.2215790161730926, "learning_rate": 2.7571995345521145e-06, "loss": 0.1711, "num_tokens": 3903458551.0, "step": 5117 }, { "epoch": 6.983218290249965, "grad_norm": 0.22535483227470726, "learning_rate": 2.755205073082392e-06, "loss": 0.181, "num_tokens": 3904206584.0, "step": 5118 }, { "epoch": 6.984583871208645, "grad_norm": 0.21830997021070098, "learning_rate": 2.7532131267425503e-06, "loss": 0.1838, "num_tokens": 3904903156.0, "step": 5119 }, { "epoch": 6.985949452167326, "grad_norm": 0.23426569233936553, "learning_rate": 2.7512236961402466e-06, "loss": 0.1814, "num_tokens": 3905701816.0, "step": 5120 }, { "epoch": 6.9873150331260065, "grad_norm": 0.2343640586194323, "learning_rate": 2.749236781882367e-06, "loss": 0.1824, "num_tokens": 3906416443.0, "step": 5121 }, { "epoch": 6.9886806140846875, "grad_norm": 0.2271115252775791, "learning_rate": 2.747252384575038e-06, "loss": 0.1759, "num_tokens": 3907144273.0, "step": 5122 }, { "epoch": 6.990046195043368, "grad_norm": 0.21297233081570185, "learning_rate": 2.745270504823608e-06, "loss": 0.18, "num_tokens": 3907852900.0, "step": 5123 }, { "epoch": 6.991411776002049, "grad_norm": 0.23681075559167175, "learning_rate": 2.7432911432326666e-06, "loss": 0.1802, "num_tokens": 3908541402.0, "step": 5124 }, { "epoch": 6.992777356960729, "grad_norm": 0.22863374880342954, "learning_rate": 2.74131430040603e-06, "loss": 0.1866, "num_tokens": 3909305930.0, "step": 5125 }, { "epoch": 6.99414293791941, "grad_norm": 0.2233878564490833, "learning_rate": 2.7393399769467443e-06, "loss": 0.1864, "num_tokens": 3910040889.0, "step": 5126 }, { "epoch": 6.99550851887809, "grad_norm": 0.232492852899945, "learning_rate": 2.7373681734570966e-06, "loss": 0.1818, "num_tokens": 3910802752.0, "step": 5127 }, { "epoch": 6.99687409983677, "grad_norm": 0.21165456543594816, "learning_rate": 2.735398890538592e-06, "loss": 0.1887, "num_tokens": 3911606255.0, "step": 5128 }, { "epoch": 6.998239680795451, "grad_norm": 0.22581012921291183, "learning_rate": 2.733432128791978e-06, "loss": 0.1802, "num_tokens": 3912293608.0, "step": 5129 }, { "epoch": 6.999605261754131, "grad_norm": 0.222777718904541, "learning_rate": 2.7314678888172263e-06, "loss": 0.1822, "num_tokens": 3913065563.0, "step": 5130 }, { "epoch": 7.0, "grad_norm": 0.42565635941891367, "learning_rate": 2.7295061712135433e-06, "loss": 0.1696, "num_tokens": 3913294952.0, "step": 5131 }, { "epoch": 7.00136558095868, "grad_norm": 0.33466972415482615, "learning_rate": 2.7275469765793615e-06, "loss": 0.1647, "num_tokens": 3914074205.0, "step": 5132 }, { "epoch": 7.002731161917361, "grad_norm": 0.3300775348088642, "learning_rate": 2.7255903055123475e-06, "loss": 0.1666, "num_tokens": 3914793807.0, "step": 5133 }, { "epoch": 7.004096742876041, "grad_norm": 0.28756238696483577, "learning_rate": 2.7236361586093997e-06, "loss": 0.1684, "num_tokens": 3915502865.0, "step": 5134 }, { "epoch": 7.005462323834722, "grad_norm": 0.24159740173362276, "learning_rate": 2.7216845364666367e-06, "loss": 0.1674, "num_tokens": 3916305587.0, "step": 5135 }, { "epoch": 7.006827904793402, "grad_norm": 0.23114485815741323, "learning_rate": 2.719735439679421e-06, "loss": 0.1631, "num_tokens": 3917091738.0, "step": 5136 }, { "epoch": 7.008193485752083, "grad_norm": 0.2517179578706104, "learning_rate": 2.717788868842334e-06, "loss": 0.1588, "num_tokens": 3917824523.0, "step": 5137 }, { "epoch": 7.0095590667107635, "grad_norm": 0.3038822668661947, "learning_rate": 2.715844824549191e-06, "loss": 0.1593, "num_tokens": 3918580998.0, "step": 5138 }, { "epoch": 7.0109246476694445, "grad_norm": 0.2744013130254852, "learning_rate": 2.7139033073930375e-06, "loss": 0.1597, "num_tokens": 3919376389.0, "step": 5139 }, { "epoch": 7.012290228628125, "grad_norm": 0.2665617631309145, "learning_rate": 2.7119643179661432e-06, "loss": 0.1647, "num_tokens": 3920154551.0, "step": 5140 }, { "epoch": 7.013655809586805, "grad_norm": 0.2681011073450579, "learning_rate": 2.71002785686001e-06, "loss": 0.1672, "num_tokens": 3920890306.0, "step": 5141 }, { "epoch": 7.015021390545486, "grad_norm": 0.25752076328335544, "learning_rate": 2.70809392466537e-06, "loss": 0.1636, "num_tokens": 3921627925.0, "step": 5142 }, { "epoch": 7.016386971504166, "grad_norm": 0.26056749835607534, "learning_rate": 2.7061625219721845e-06, "loss": 0.1639, "num_tokens": 3922396555.0, "step": 5143 }, { "epoch": 7.017752552462847, "grad_norm": 0.2609612112444019, "learning_rate": 2.704233649369634e-06, "loss": 0.1621, "num_tokens": 3923089937.0, "step": 5144 }, { "epoch": 7.019118133421527, "grad_norm": 0.2408503095942782, "learning_rate": 2.7023073074461414e-06, "loss": 0.1592, "num_tokens": 3923873116.0, "step": 5145 }, { "epoch": 7.020483714380208, "grad_norm": 0.2302042792915584, "learning_rate": 2.7003834967893444e-06, "loss": 0.1543, "num_tokens": 3924601066.0, "step": 5146 }, { "epoch": 7.021849295338888, "grad_norm": 0.23243747902932005, "learning_rate": 2.6984622179861163e-06, "loss": 0.1543, "num_tokens": 3925279444.0, "step": 5147 }, { "epoch": 7.023214876297569, "grad_norm": 0.237987625031753, "learning_rate": 2.6965434716225597e-06, "loss": 0.1521, "num_tokens": 3925967991.0, "step": 5148 }, { "epoch": 7.024580457256249, "grad_norm": 0.24567662864722933, "learning_rate": 2.6946272582839953e-06, "loss": 0.1754, "num_tokens": 3926752285.0, "step": 5149 }, { "epoch": 7.025946038214929, "grad_norm": 0.251826536142183, "learning_rate": 2.6927135785549798e-06, "loss": 0.1582, "num_tokens": 3927524338.0, "step": 5150 }, { "epoch": 7.02731161917361, "grad_norm": 0.23429222144023443, "learning_rate": 2.690802433019293e-06, "loss": 0.1699, "num_tokens": 3928286419.0, "step": 5151 }, { "epoch": 7.02867720013229, "grad_norm": 0.23202443241134343, "learning_rate": 2.6888938222599447e-06, "loss": 0.1603, "num_tokens": 3929035407.0, "step": 5152 }, { "epoch": 7.030042781090971, "grad_norm": 0.23188349587641513, "learning_rate": 2.6869877468591647e-06, "loss": 0.1536, "num_tokens": 3929756090.0, "step": 5153 }, { "epoch": 7.0314083620496515, "grad_norm": 0.22652472954190672, "learning_rate": 2.6850842073984196e-06, "loss": 0.1637, "num_tokens": 3930505896.0, "step": 5154 }, { "epoch": 7.0327739430083325, "grad_norm": 0.2253335940555467, "learning_rate": 2.683183204458395e-06, "loss": 0.1633, "num_tokens": 3931276559.0, "step": 5155 }, { "epoch": 7.034139523967013, "grad_norm": 0.229278951988845, "learning_rate": 2.681284738619001e-06, "loss": 0.1541, "num_tokens": 3931979868.0, "step": 5156 }, { "epoch": 7.035505104925694, "grad_norm": 0.22181494317720954, "learning_rate": 2.6793888104593836e-06, "loss": 0.1668, "num_tokens": 3932781351.0, "step": 5157 }, { "epoch": 7.036870685884374, "grad_norm": 0.22778508975804843, "learning_rate": 2.677495420557904e-06, "loss": 0.1577, "num_tokens": 3933566298.0, "step": 5158 }, { "epoch": 7.038236266843054, "grad_norm": 0.22143918963203504, "learning_rate": 2.6756045694921535e-06, "loss": 0.1603, "num_tokens": 3934355893.0, "step": 5159 }, { "epoch": 7.039601847801735, "grad_norm": 0.23947558008996012, "learning_rate": 2.673716257838951e-06, "loss": 0.1621, "num_tokens": 3935154702.0, "step": 5160 }, { "epoch": 7.040967428760415, "grad_norm": 0.2194913163946518, "learning_rate": 2.6718304861743383e-06, "loss": 0.1658, "num_tokens": 3935899687.0, "step": 5161 }, { "epoch": 7.042333009719096, "grad_norm": 0.21597011108233383, "learning_rate": 2.6699472550735805e-06, "loss": 0.1585, "num_tokens": 3936679875.0, "step": 5162 }, { "epoch": 7.043698590677776, "grad_norm": 0.2206469828122371, "learning_rate": 2.6680665651111715e-06, "loss": 0.155, "num_tokens": 3937506676.0, "step": 5163 }, { "epoch": 7.045064171636457, "grad_norm": 0.21895978992943885, "learning_rate": 2.6661884168608293e-06, "loss": 0.1684, "num_tokens": 3938308883.0, "step": 5164 }, { "epoch": 7.046429752595137, "grad_norm": 0.2248559524545943, "learning_rate": 2.6643128108954914e-06, "loss": 0.1607, "num_tokens": 3939097617.0, "step": 5165 }, { "epoch": 7.047795333553818, "grad_norm": 0.2109010106552991, "learning_rate": 2.662439747787331e-06, "loss": 0.1621, "num_tokens": 3939971176.0, "step": 5166 }, { "epoch": 7.049160914512498, "grad_norm": 0.23948638223562596, "learning_rate": 2.660569228107731e-06, "loss": 0.1604, "num_tokens": 3940689815.0, "step": 5167 }, { "epoch": 7.050526495471178, "grad_norm": 0.2245749301533297, "learning_rate": 2.658701252427311e-06, "loss": 0.1556, "num_tokens": 3941426599.0, "step": 5168 }, { "epoch": 7.051892076429859, "grad_norm": 0.218855980720438, "learning_rate": 2.6568358213159085e-06, "loss": 0.1634, "num_tokens": 3942240856.0, "step": 5169 }, { "epoch": 7.0532576573885395, "grad_norm": 0.22027254735141802, "learning_rate": 2.6549729353425856e-06, "loss": 0.1593, "num_tokens": 3942975016.0, "step": 5170 }, { "epoch": 7.0546232383472205, "grad_norm": 0.23659433847062597, "learning_rate": 2.6531125950756258e-06, "loss": 0.1575, "num_tokens": 3943661683.0, "step": 5171 }, { "epoch": 7.055988819305901, "grad_norm": 0.202722343273409, "learning_rate": 2.6512548010825406e-06, "loss": 0.1527, "num_tokens": 3944516478.0, "step": 5172 }, { "epoch": 7.057354400264582, "grad_norm": 0.2110697820299748, "learning_rate": 2.649399553930064e-06, "loss": 0.1622, "num_tokens": 3945337931.0, "step": 5173 }, { "epoch": 7.058719981223262, "grad_norm": 0.2267294770874837, "learning_rate": 2.647546854184147e-06, "loss": 0.1596, "num_tokens": 3946072265.0, "step": 5174 }, { "epoch": 7.060085562181943, "grad_norm": 0.21976384560743312, "learning_rate": 2.6456967024099737e-06, "loss": 0.1549, "num_tokens": 3946816635.0, "step": 5175 }, { "epoch": 7.061451143140623, "grad_norm": 0.22581418527163802, "learning_rate": 2.643849099171941e-06, "loss": 0.1638, "num_tokens": 3947525486.0, "step": 5176 }, { "epoch": 7.062816724099303, "grad_norm": 0.2239818094998691, "learning_rate": 2.642004045033674e-06, "loss": 0.1546, "num_tokens": 3948230953.0, "step": 5177 }, { "epoch": 7.064182305057984, "grad_norm": 0.2263616912241683, "learning_rate": 2.6401615405580193e-06, "loss": 0.1713, "num_tokens": 3949094737.0, "step": 5178 }, { "epoch": 7.065547886016664, "grad_norm": 0.2261815712619795, "learning_rate": 2.6383215863070443e-06, "loss": 0.159, "num_tokens": 3949846816.0, "step": 5179 }, { "epoch": 7.066913466975345, "grad_norm": 0.21566833862480214, "learning_rate": 2.636484182842039e-06, "loss": 0.1587, "num_tokens": 3950649749.0, "step": 5180 }, { "epoch": 7.068279047934025, "grad_norm": 0.21997235710927188, "learning_rate": 2.634649330723517e-06, "loss": 0.1652, "num_tokens": 3951497077.0, "step": 5181 }, { "epoch": 7.069644628892706, "grad_norm": 0.22798623738741908, "learning_rate": 2.6328170305112134e-06, "loss": 0.1679, "num_tokens": 3952241781.0, "step": 5182 }, { "epoch": 7.071010209851386, "grad_norm": 0.2349440985953012, "learning_rate": 2.630987282764078e-06, "loss": 0.1511, "num_tokens": 3952888798.0, "step": 5183 }, { "epoch": 7.072375790810067, "grad_norm": 0.21297618351054204, "learning_rate": 2.6291600880402957e-06, "loss": 0.1658, "num_tokens": 3953662463.0, "step": 5184 }, { "epoch": 7.073741371768747, "grad_norm": 0.24043425374124985, "learning_rate": 2.6273354468972576e-06, "loss": 0.1589, "num_tokens": 3954346863.0, "step": 5185 }, { "epoch": 7.0751069527274275, "grad_norm": 0.2205573084623806, "learning_rate": 2.6255133598915864e-06, "loss": 0.1633, "num_tokens": 3955074556.0, "step": 5186 }, { "epoch": 7.0764725336861085, "grad_norm": 0.23463607360695565, "learning_rate": 2.6236938275791212e-06, "loss": 0.1606, "num_tokens": 3955813277.0, "step": 5187 }, { "epoch": 7.077838114644789, "grad_norm": 0.2164812569168557, "learning_rate": 2.621876850514922e-06, "loss": 0.1619, "num_tokens": 3956612099.0, "step": 5188 }, { "epoch": 7.07920369560347, "grad_norm": 0.2304694743916733, "learning_rate": 2.6200624292532693e-06, "loss": 0.1573, "num_tokens": 3957356102.0, "step": 5189 }, { "epoch": 7.08056927656215, "grad_norm": 0.210083015543732, "learning_rate": 2.6182505643476653e-06, "loss": 0.1547, "num_tokens": 3958132376.0, "step": 5190 }, { "epoch": 7.081934857520831, "grad_norm": 0.21265406866850053, "learning_rate": 2.616441256350831e-06, "loss": 0.1635, "num_tokens": 3959046671.0, "step": 5191 }, { "epoch": 7.083300438479511, "grad_norm": 0.22639313136582226, "learning_rate": 2.6146345058147094e-06, "loss": 0.1613, "num_tokens": 3959795343.0, "step": 5192 }, { "epoch": 7.084666019438192, "grad_norm": 0.2173381626551724, "learning_rate": 2.612830313290462e-06, "loss": 0.1649, "num_tokens": 3960626269.0, "step": 5193 }, { "epoch": 7.086031600396872, "grad_norm": 0.2251812967458691, "learning_rate": 2.6110286793284663e-06, "loss": 0.1586, "num_tokens": 3961374886.0, "step": 5194 }, { "epoch": 7.087397181355552, "grad_norm": 0.22030722938437478, "learning_rate": 2.609229604478326e-06, "loss": 0.1576, "num_tokens": 3962085468.0, "step": 5195 }, { "epoch": 7.088762762314233, "grad_norm": 0.22405748329246722, "learning_rate": 2.6074330892888606e-06, "loss": 0.1726, "num_tokens": 3962855172.0, "step": 5196 }, { "epoch": 7.090128343272913, "grad_norm": 0.22958829850118415, "learning_rate": 2.6056391343081078e-06, "loss": 0.1623, "num_tokens": 3963592741.0, "step": 5197 }, { "epoch": 7.091493924231594, "grad_norm": 0.23287554962957313, "learning_rate": 2.6038477400833255e-06, "loss": 0.1611, "num_tokens": 3964312617.0, "step": 5198 }, { "epoch": 7.092859505190274, "grad_norm": 0.2194342354513781, "learning_rate": 2.6020589071609923e-06, "loss": 0.1655, "num_tokens": 3965119639.0, "step": 5199 }, { "epoch": 7.094225086148955, "grad_norm": 0.22238544543804212, "learning_rate": 2.600272636086802e-06, "loss": 0.1583, "num_tokens": 3965902789.0, "step": 5200 }, { "epoch": 7.095590667107635, "grad_norm": 0.2318905145919968, "learning_rate": 2.5984889274056707e-06, "loss": 0.1627, "num_tokens": 3966652780.0, "step": 5201 }, { "epoch": 7.096956248066316, "grad_norm": 0.22516352471965273, "learning_rate": 2.5967077816617274e-06, "loss": 0.1623, "num_tokens": 3967399168.0, "step": 5202 }, { "epoch": 7.0983218290249965, "grad_norm": 0.23554199592559583, "learning_rate": 2.594929199398324e-06, "loss": 0.1635, "num_tokens": 3968127319.0, "step": 5203 }, { "epoch": 7.099687409983677, "grad_norm": 0.23809458286736648, "learning_rate": 2.59315318115803e-06, "loss": 0.1645, "num_tokens": 3968875999.0, "step": 5204 }, { "epoch": 7.101052990942358, "grad_norm": 0.22225232994996177, "learning_rate": 2.591379727482632e-06, "loss": 0.1621, "num_tokens": 3969653185.0, "step": 5205 }, { "epoch": 7.102418571901038, "grad_norm": 0.23196173815933868, "learning_rate": 2.5896088389131314e-06, "loss": 0.1502, "num_tokens": 3970346946.0, "step": 5206 }, { "epoch": 7.103784152859719, "grad_norm": 0.22221085435665333, "learning_rate": 2.58784051598975e-06, "loss": 0.1577, "num_tokens": 3971090741.0, "step": 5207 }, { "epoch": 7.105149733818399, "grad_norm": 0.2248256876182048, "learning_rate": 2.5860747592519293e-06, "loss": 0.1595, "num_tokens": 3971791525.0, "step": 5208 }, { "epoch": 7.10651531477708, "grad_norm": 0.22753378080095796, "learning_rate": 2.5843115692383224e-06, "loss": 0.1654, "num_tokens": 3972587498.0, "step": 5209 }, { "epoch": 7.10788089573576, "grad_norm": 0.2278142852027865, "learning_rate": 2.582550946486805e-06, "loss": 0.1681, "num_tokens": 3973395546.0, "step": 5210 }, { "epoch": 7.109246476694441, "grad_norm": 0.2250530064224165, "learning_rate": 2.580792891534463e-06, "loss": 0.1587, "num_tokens": 3974158046.0, "step": 5211 }, { "epoch": 7.110612057653121, "grad_norm": 0.21968387253246166, "learning_rate": 2.579037404917606e-06, "loss": 0.1622, "num_tokens": 3974918711.0, "step": 5212 }, { "epoch": 7.111977638611801, "grad_norm": 0.23101531042835302, "learning_rate": 2.5772844871717546e-06, "loss": 0.1582, "num_tokens": 3975619096.0, "step": 5213 }, { "epoch": 7.113343219570482, "grad_norm": 0.20624916335555435, "learning_rate": 2.5755341388316514e-06, "loss": 0.1586, "num_tokens": 3976411812.0, "step": 5214 }, { "epoch": 7.114708800529162, "grad_norm": 0.23062449719568262, "learning_rate": 2.573786360431247e-06, "loss": 0.1614, "num_tokens": 3977190015.0, "step": 5215 }, { "epoch": 7.116074381487843, "grad_norm": 0.21406332803300715, "learning_rate": 2.572041152503718e-06, "loss": 0.159, "num_tokens": 3978011404.0, "step": 5216 }, { "epoch": 7.117439962446523, "grad_norm": 0.24138511775439844, "learning_rate": 2.5702985155814504e-06, "loss": 0.1655, "num_tokens": 3978749431.0, "step": 5217 }, { "epoch": 7.118805543405204, "grad_norm": 0.24262292457555437, "learning_rate": 2.5685584501960438e-06, "loss": 0.1588, "num_tokens": 3979440228.0, "step": 5218 }, { "epoch": 7.1201711243638846, "grad_norm": 0.22766804576432892, "learning_rate": 2.5668209568783214e-06, "loss": 0.1585, "num_tokens": 3980106663.0, "step": 5219 }, { "epoch": 7.1215367053225656, "grad_norm": 0.22805898595558508, "learning_rate": 2.5650860361583147e-06, "loss": 0.1581, "num_tokens": 3980890384.0, "step": 5220 }, { "epoch": 7.122902286281246, "grad_norm": 0.238310283347779, "learning_rate": 2.563353688565272e-06, "loss": 0.1628, "num_tokens": 3981587170.0, "step": 5221 }, { "epoch": 7.124267867239926, "grad_norm": 0.23953254611320593, "learning_rate": 2.5616239146276593e-06, "loss": 0.1686, "num_tokens": 3982355916.0, "step": 5222 }, { "epoch": 7.125633448198607, "grad_norm": 0.2332702781336164, "learning_rate": 2.5598967148731567e-06, "loss": 0.1583, "num_tokens": 3983142109.0, "step": 5223 }, { "epoch": 7.126999029157287, "grad_norm": 0.2104882634467482, "learning_rate": 2.558172089828654e-06, "loss": 0.1614, "num_tokens": 3983957122.0, "step": 5224 }, { "epoch": 7.128364610115968, "grad_norm": 0.22478609093948457, "learning_rate": 2.5564500400202645e-06, "loss": 0.1536, "num_tokens": 3984734339.0, "step": 5225 }, { "epoch": 7.129730191074648, "grad_norm": 0.22089115860234507, "learning_rate": 2.5547305659733083e-06, "loss": 0.1565, "num_tokens": 3985481119.0, "step": 5226 }, { "epoch": 7.131095772033329, "grad_norm": 0.2533581682058892, "learning_rate": 2.553013668212322e-06, "loss": 0.166, "num_tokens": 3986131475.0, "step": 5227 }, { "epoch": 7.132461352992009, "grad_norm": 0.22128082885473202, "learning_rate": 2.5512993472610593e-06, "loss": 0.159, "num_tokens": 3986911871.0, "step": 5228 }, { "epoch": 7.13382693395069, "grad_norm": 0.2294642599903656, "learning_rate": 2.5495876036424823e-06, "loss": 0.1583, "num_tokens": 3987664281.0, "step": 5229 }, { "epoch": 7.13519251490937, "grad_norm": 0.2323530715266377, "learning_rate": 2.5478784378787715e-06, "loss": 0.1535, "num_tokens": 3988414397.0, "step": 5230 }, { "epoch": 7.13655809586805, "grad_norm": 0.21794813650051617, "learning_rate": 2.5461718504913173e-06, "loss": 0.1617, "num_tokens": 3989206827.0, "step": 5231 }, { "epoch": 7.137923676826731, "grad_norm": 0.22479258563496338, "learning_rate": 2.5444678420007305e-06, "loss": 0.161, "num_tokens": 3989997720.0, "step": 5232 }, { "epoch": 7.1392892577854115, "grad_norm": 0.23352209764530527, "learning_rate": 2.5427664129268253e-06, "loss": 0.1525, "num_tokens": 3990729886.0, "step": 5233 }, { "epoch": 7.1406548387440925, "grad_norm": 0.21981530163019, "learning_rate": 2.5410675637886362e-06, "loss": 0.1623, "num_tokens": 3991510829.0, "step": 5234 }, { "epoch": 7.142020419702773, "grad_norm": 0.22936975813264698, "learning_rate": 2.539371295104409e-06, "loss": 0.1533, "num_tokens": 3992281046.0, "step": 5235 }, { "epoch": 7.143386000661454, "grad_norm": 0.2681033702598507, "learning_rate": 2.537677607391598e-06, "loss": 0.1624, "num_tokens": 3993014490.0, "step": 5236 }, { "epoch": 7.144751581620134, "grad_norm": 0.22656889956042275, "learning_rate": 2.5359865011668803e-06, "loss": 0.1667, "num_tokens": 3993826218.0, "step": 5237 }, { "epoch": 7.146117162578815, "grad_norm": 0.2641051711380781, "learning_rate": 2.5342979769461353e-06, "loss": 0.1611, "num_tokens": 3994579875.0, "step": 5238 }, { "epoch": 7.147482743537495, "grad_norm": 0.2537911145543262, "learning_rate": 2.5326120352444573e-06, "loss": 0.1594, "num_tokens": 3995277028.0, "step": 5239 }, { "epoch": 7.148848324496175, "grad_norm": 0.2279765042022696, "learning_rate": 2.5309286765761586e-06, "loss": 0.164, "num_tokens": 3996081399.0, "step": 5240 }, { "epoch": 7.150213905454856, "grad_norm": 0.23600250734682454, "learning_rate": 2.529247901454755e-06, "loss": 0.1563, "num_tokens": 3996773342.0, "step": 5241 }, { "epoch": 7.151579486413536, "grad_norm": 0.22915115782623457, "learning_rate": 2.527569710392979e-06, "loss": 0.1598, "num_tokens": 3997472013.0, "step": 5242 }, { "epoch": 7.152945067372217, "grad_norm": 0.22307889358969749, "learning_rate": 2.525894103902775e-06, "loss": 0.1598, "num_tokens": 3998229257.0, "step": 5243 }, { "epoch": 7.154310648330897, "grad_norm": 0.22316350163590268, "learning_rate": 2.524221082495298e-06, "loss": 0.1625, "num_tokens": 3999018092.0, "step": 5244 }, { "epoch": 7.155676229289578, "grad_norm": 0.22618392031497345, "learning_rate": 2.5225506466809125e-06, "loss": 0.1629, "num_tokens": 3999806287.0, "step": 5245 }, { "epoch": 7.157041810248258, "grad_norm": 0.216465123784099, "learning_rate": 2.5208827969691994e-06, "loss": 0.1634, "num_tokens": 4000646441.0, "step": 5246 }, { "epoch": 7.158407391206939, "grad_norm": 0.23222283325415982, "learning_rate": 2.519217533868944e-06, "loss": 0.1648, "num_tokens": 4001419788.0, "step": 5247 }, { "epoch": 7.159772972165619, "grad_norm": 0.2260348493081412, "learning_rate": 2.5175548578881482e-06, "loss": 0.165, "num_tokens": 4002187722.0, "step": 5248 }, { "epoch": 7.1611385531242995, "grad_norm": 0.22481753552392533, "learning_rate": 2.5158947695340237e-06, "loss": 0.1576, "num_tokens": 4002967758.0, "step": 5249 }, { "epoch": 7.1625041340829805, "grad_norm": 0.24320126832934552, "learning_rate": 2.5142372693129876e-06, "loss": 0.1681, "num_tokens": 4003709428.0, "step": 5250 }, { "epoch": 7.163869715041661, "grad_norm": 0.2273117610380449, "learning_rate": 2.5125823577306736e-06, "loss": 0.1621, "num_tokens": 4004456712.0, "step": 5251 }, { "epoch": 7.165235296000342, "grad_norm": 0.2192133133790806, "learning_rate": 2.5109300352919237e-06, "loss": 0.158, "num_tokens": 4005232657.0, "step": 5252 }, { "epoch": 7.166600876959022, "grad_norm": 0.21636681745064829, "learning_rate": 2.5092803025007906e-06, "loss": 0.1585, "num_tokens": 4005948120.0, "step": 5253 }, { "epoch": 7.167966457917703, "grad_norm": 0.21403086624264714, "learning_rate": 2.507633159860533e-06, "loss": 0.1589, "num_tokens": 4006749220.0, "step": 5254 }, { "epoch": 7.169332038876383, "grad_norm": 0.22371140608785042, "learning_rate": 2.5059886078736278e-06, "loss": 0.163, "num_tokens": 4007549096.0, "step": 5255 }, { "epoch": 7.170697619835064, "grad_norm": 0.21789759591846056, "learning_rate": 2.504346647041752e-06, "loss": 0.1612, "num_tokens": 4008362989.0, "step": 5256 }, { "epoch": 7.172063200793744, "grad_norm": 0.2317596232219424, "learning_rate": 2.502707277865799e-06, "loss": 0.168, "num_tokens": 4009074208.0, "step": 5257 }, { "epoch": 7.173428781752424, "grad_norm": 0.2238834990276597, "learning_rate": 2.5010705008458715e-06, "loss": 0.165, "num_tokens": 4009863877.0, "step": 5258 }, { "epoch": 7.174794362711105, "grad_norm": 0.21399006926804387, "learning_rate": 2.499436316481275e-06, "loss": 0.1617, "num_tokens": 4010680283.0, "step": 5259 }, { "epoch": 7.176159943669785, "grad_norm": 0.2324779109832412, "learning_rate": 2.497804725270529e-06, "loss": 0.1669, "num_tokens": 4011489335.0, "step": 5260 }, { "epoch": 7.177525524628466, "grad_norm": 0.2184619909264437, "learning_rate": 2.4961757277113633e-06, "loss": 0.1694, "num_tokens": 4012313455.0, "step": 5261 }, { "epoch": 7.178891105587146, "grad_norm": 0.22027062118564952, "learning_rate": 2.494549324300715e-06, "loss": 0.1599, "num_tokens": 4013077802.0, "step": 5262 }, { "epoch": 7.180256686545827, "grad_norm": 0.22864368584077188, "learning_rate": 2.4929255155347258e-06, "loss": 0.1653, "num_tokens": 4013828045.0, "step": 5263 }, { "epoch": 7.181622267504507, "grad_norm": 0.3161269824476037, "learning_rate": 2.4913043019087546e-06, "loss": 0.1527, "num_tokens": 4014546381.0, "step": 5264 }, { "epoch": 7.182987848463188, "grad_norm": 0.23327569593137984, "learning_rate": 2.48968568391736e-06, "loss": 0.1625, "num_tokens": 4015265409.0, "step": 5265 }, { "epoch": 7.1843534294218685, "grad_norm": 0.2292400815405088, "learning_rate": 2.488069662054311e-06, "loss": 0.1571, "num_tokens": 4016022785.0, "step": 5266 }, { "epoch": 7.185719010380549, "grad_norm": 0.2265083987802442, "learning_rate": 2.48645623681259e-06, "loss": 0.1688, "num_tokens": 4016812196.0, "step": 5267 }, { "epoch": 7.18708459133923, "grad_norm": 0.24716669188490173, "learning_rate": 2.4848454086843796e-06, "loss": 0.1671, "num_tokens": 4017542592.0, "step": 5268 }, { "epoch": 7.18845017229791, "grad_norm": 0.24597092514254212, "learning_rate": 2.483237178161075e-06, "loss": 0.1634, "num_tokens": 4018279113.0, "step": 5269 }, { "epoch": 7.189815753256591, "grad_norm": 0.22723541125809757, "learning_rate": 2.4816315457332773e-06, "loss": 0.1605, "num_tokens": 4019025885.0, "step": 5270 }, { "epoch": 7.191181334215271, "grad_norm": 0.22242185057524128, "learning_rate": 2.4800285118907976e-06, "loss": 0.1588, "num_tokens": 4019803771.0, "step": 5271 }, { "epoch": 7.192546915173952, "grad_norm": 0.24690624153151042, "learning_rate": 2.4784280771226486e-06, "loss": 0.1656, "num_tokens": 4020500025.0, "step": 5272 }, { "epoch": 7.193912496132632, "grad_norm": 0.22719994551371867, "learning_rate": 2.476830241917056e-06, "loss": 0.1608, "num_tokens": 4021267421.0, "step": 5273 }, { "epoch": 7.195278077091313, "grad_norm": 0.2371264535056204, "learning_rate": 2.4752350067614485e-06, "loss": 0.1674, "num_tokens": 4022055079.0, "step": 5274 }, { "epoch": 7.196643658049993, "grad_norm": 0.22752387890536868, "learning_rate": 2.4736423721424653e-06, "loss": 0.1638, "num_tokens": 4022786569.0, "step": 5275 }, { "epoch": 7.198009239008673, "grad_norm": 0.23308728200666176, "learning_rate": 2.47205233854595e-06, "loss": 0.1721, "num_tokens": 4023563415.0, "step": 5276 }, { "epoch": 7.199374819967354, "grad_norm": 0.23296033400524366, "learning_rate": 2.4704649064569504e-06, "loss": 0.1703, "num_tokens": 4024330662.0, "step": 5277 }, { "epoch": 7.200740400926034, "grad_norm": 0.2368478284650606, "learning_rate": 2.468880076359726e-06, "loss": 0.1585, "num_tokens": 4025054989.0, "step": 5278 }, { "epoch": 7.202105981884715, "grad_norm": 0.2276077872446591, "learning_rate": 2.467297848737739e-06, "loss": 0.1614, "num_tokens": 4025859833.0, "step": 5279 }, { "epoch": 7.203471562843395, "grad_norm": 0.22553703367971492, "learning_rate": 2.4657182240736576e-06, "loss": 0.1559, "num_tokens": 4026618808.0, "step": 5280 }, { "epoch": 7.204837143802076, "grad_norm": 0.36551394694679606, "learning_rate": 2.4641412028493593e-06, "loss": 0.1591, "num_tokens": 4027365038.0, "step": 5281 }, { "epoch": 7.2062027247607565, "grad_norm": 0.22689274444197852, "learning_rate": 2.4625667855459225e-06, "loss": 0.1639, "num_tokens": 4028098243.0, "step": 5282 }, { "epoch": 7.2075683057194375, "grad_norm": 0.22890450017408542, "learning_rate": 2.4609949726436347e-06, "loss": 0.1663, "num_tokens": 4028896408.0, "step": 5283 }, { "epoch": 7.208933886678118, "grad_norm": 0.2215799617041883, "learning_rate": 2.4594257646219873e-06, "loss": 0.1637, "num_tokens": 4029683429.0, "step": 5284 }, { "epoch": 7.210299467636798, "grad_norm": 0.2390365603063388, "learning_rate": 2.457859161959679e-06, "loss": 0.1658, "num_tokens": 4030399212.0, "step": 5285 }, { "epoch": 7.211665048595479, "grad_norm": 0.22437021126708473, "learning_rate": 2.456295165134612e-06, "loss": 0.1634, "num_tokens": 4031200427.0, "step": 5286 }, { "epoch": 7.213030629554159, "grad_norm": 0.2209462206004101, "learning_rate": 2.454733774623893e-06, "loss": 0.1622, "num_tokens": 4031969134.0, "step": 5287 }, { "epoch": 7.21439621051284, "grad_norm": 0.22814355205257997, "learning_rate": 2.4531749909038364e-06, "loss": 0.1537, "num_tokens": 4032688357.0, "step": 5288 }, { "epoch": 7.21576179147152, "grad_norm": 0.2631458618420531, "learning_rate": 2.451618814449956e-06, "loss": 0.1593, "num_tokens": 4033427241.0, "step": 5289 }, { "epoch": 7.217127372430201, "grad_norm": 0.2615774986671202, "learning_rate": 2.450065245736979e-06, "loss": 0.1632, "num_tokens": 4034162179.0, "step": 5290 }, { "epoch": 7.218492953388881, "grad_norm": 0.23708831829956115, "learning_rate": 2.4485142852388277e-06, "loss": 0.1618, "num_tokens": 4034930423.0, "step": 5291 }, { "epoch": 7.219858534347562, "grad_norm": 0.22705822431658143, "learning_rate": 2.446965933428635e-06, "loss": 0.1637, "num_tokens": 4035742098.0, "step": 5292 }, { "epoch": 7.221224115306242, "grad_norm": 0.22801416477062525, "learning_rate": 2.4454201907787364e-06, "loss": 0.163, "num_tokens": 4036466032.0, "step": 5293 }, { "epoch": 7.222589696264922, "grad_norm": 0.2242775449492381, "learning_rate": 2.4438770577606704e-06, "loss": 0.1674, "num_tokens": 4037337559.0, "step": 5294 }, { "epoch": 7.223955277223603, "grad_norm": 0.22717084360874654, "learning_rate": 2.4423365348451793e-06, "loss": 0.1573, "num_tokens": 4038085019.0, "step": 5295 }, { "epoch": 7.225320858182283, "grad_norm": 0.23149831208451657, "learning_rate": 2.4407986225022108e-06, "loss": 0.1652, "num_tokens": 4038864870.0, "step": 5296 }, { "epoch": 7.226686439140964, "grad_norm": 0.22447481271388975, "learning_rate": 2.4392633212009144e-06, "loss": 0.1586, "num_tokens": 4039598186.0, "step": 5297 }, { "epoch": 7.2280520200996445, "grad_norm": 0.21824743188015736, "learning_rate": 2.437730631409644e-06, "loss": 0.1594, "num_tokens": 4040391786.0, "step": 5298 }, { "epoch": 7.2294176010583255, "grad_norm": 0.23087681602589674, "learning_rate": 2.4362005535959604e-06, "loss": 0.1608, "num_tokens": 4041200871.0, "step": 5299 }, { "epoch": 7.230783182017006, "grad_norm": 0.2223645680214769, "learning_rate": 2.4346730882266185e-06, "loss": 0.1655, "num_tokens": 4041987374.0, "step": 5300 }, { "epoch": 7.232148762975687, "grad_norm": 0.22493951499596498, "learning_rate": 2.433148235767586e-06, "loss": 0.161, "num_tokens": 4042730587.0, "step": 5301 }, { "epoch": 7.233514343934367, "grad_norm": 0.2216519798201471, "learning_rate": 2.4316259966840265e-06, "loss": 0.1614, "num_tokens": 4043475368.0, "step": 5302 }, { "epoch": 7.234879924893047, "grad_norm": 0.2211717583966399, "learning_rate": 2.4301063714403137e-06, "loss": 0.1632, "num_tokens": 4044282821.0, "step": 5303 }, { "epoch": 7.236245505851728, "grad_norm": 0.22472182602398846, "learning_rate": 2.428589360500013e-06, "loss": 0.1688, "num_tokens": 4045066716.0, "step": 5304 }, { "epoch": 7.237611086810408, "grad_norm": 0.22729689636464273, "learning_rate": 2.4270749643259047e-06, "loss": 0.1542, "num_tokens": 4045781706.0, "step": 5305 }, { "epoch": 7.238976667769089, "grad_norm": 0.2241855072098184, "learning_rate": 2.425563183379962e-06, "loss": 0.1595, "num_tokens": 4046590802.0, "step": 5306 }, { "epoch": 7.240342248727769, "grad_norm": 0.2211750585622038, "learning_rate": 2.4240540181233636e-06, "loss": 0.1602, "num_tokens": 4047347391.0, "step": 5307 }, { "epoch": 7.24170782968645, "grad_norm": 0.22621664787651147, "learning_rate": 2.422547469016494e-06, "loss": 0.163, "num_tokens": 4048077602.0, "step": 5308 }, { "epoch": 7.24307341064513, "grad_norm": 0.23131585233650626, "learning_rate": 2.4210435365189315e-06, "loss": 0.16, "num_tokens": 4048817737.0, "step": 5309 }, { "epoch": 7.244438991603811, "grad_norm": 0.2310503677601857, "learning_rate": 2.419542221089464e-06, "loss": 0.1565, "num_tokens": 4049550182.0, "step": 5310 }, { "epoch": 7.245804572562491, "grad_norm": 0.21888698829919545, "learning_rate": 2.418043523186078e-06, "loss": 0.1575, "num_tokens": 4050365159.0, "step": 5311 }, { "epoch": 7.247170153521171, "grad_norm": 0.24971539304209103, "learning_rate": 2.416547443265959e-06, "loss": 0.1693, "num_tokens": 4051172523.0, "step": 5312 }, { "epoch": 7.248535734479852, "grad_norm": 0.2330722141250014, "learning_rate": 2.4150539817854966e-06, "loss": 0.1571, "num_tokens": 4051956919.0, "step": 5313 }, { "epoch": 7.249901315438533, "grad_norm": 0.21873435772319708, "learning_rate": 2.4135631392002825e-06, "loss": 0.1626, "num_tokens": 4052825881.0, "step": 5314 }, { "epoch": 7.251266896397214, "grad_norm": 0.22880063151490565, "learning_rate": 2.412074915965108e-06, "loss": 0.155, "num_tokens": 4053491715.0, "step": 5315 }, { "epoch": 7.252632477355894, "grad_norm": 0.2267466958426787, "learning_rate": 2.410589312533964e-06, "loss": 0.1697, "num_tokens": 4054309973.0, "step": 5316 }, { "epoch": 7.253998058314575, "grad_norm": 0.23871625729860377, "learning_rate": 2.4091063293600465e-06, "loss": 0.1574, "num_tokens": 4055068646.0, "step": 5317 }, { "epoch": 7.255363639273255, "grad_norm": 0.22096356224605743, "learning_rate": 2.4076259668957475e-06, "loss": 0.161, "num_tokens": 4055864372.0, "step": 5318 }, { "epoch": 7.256729220231936, "grad_norm": 0.22585800747202195, "learning_rate": 2.4061482255926622e-06, "loss": 0.1689, "num_tokens": 4056684677.0, "step": 5319 }, { "epoch": 7.258094801190616, "grad_norm": 0.23372806976057542, "learning_rate": 2.4046731059015855e-06, "loss": 0.1613, "num_tokens": 4057429568.0, "step": 5320 }, { "epoch": 7.259460382149296, "grad_norm": 0.217833926122154, "learning_rate": 2.403200608272511e-06, "loss": 0.1642, "num_tokens": 4058243318.0, "step": 5321 }, { "epoch": 7.260825963107977, "grad_norm": 0.22676100644288907, "learning_rate": 2.4017307331546354e-06, "loss": 0.1534, "num_tokens": 4058996157.0, "step": 5322 }, { "epoch": 7.262191544066657, "grad_norm": 0.229467206158808, "learning_rate": 2.4002634809963536e-06, "loss": 0.1605, "num_tokens": 4059741185.0, "step": 5323 }, { "epoch": 7.263557125025338, "grad_norm": 0.2289795731046091, "learning_rate": 2.3987988522452625e-06, "loss": 0.1619, "num_tokens": 4060489501.0, "step": 5324 }, { "epoch": 7.264922705984018, "grad_norm": 0.23930215142569702, "learning_rate": 2.397336847348153e-06, "loss": 0.1559, "num_tokens": 4061238913.0, "step": 5325 }, { "epoch": 7.266288286942699, "grad_norm": 0.22681187569688466, "learning_rate": 2.395877466751023e-06, "loss": 0.1608, "num_tokens": 4062009300.0, "step": 5326 }, { "epoch": 7.267653867901379, "grad_norm": 0.22637379630506999, "learning_rate": 2.3944207108990656e-06, "loss": 0.1663, "num_tokens": 4062835571.0, "step": 5327 }, { "epoch": 7.26901944886006, "grad_norm": 0.2237783607689323, "learning_rate": 2.3929665802366713e-06, "loss": 0.1638, "num_tokens": 4063601960.0, "step": 5328 }, { "epoch": 7.2703850298187405, "grad_norm": 0.23168507594702362, "learning_rate": 2.3915150752074363e-06, "loss": 0.1589, "num_tokens": 4064380083.0, "step": 5329 }, { "epoch": 7.271750610777421, "grad_norm": 0.2282930200963839, "learning_rate": 2.3900661962541493e-06, "loss": 0.1654, "num_tokens": 4065134852.0, "step": 5330 }, { "epoch": 7.273116191736102, "grad_norm": 0.22996725362412523, "learning_rate": 2.3886199438188017e-06, "loss": 0.1645, "num_tokens": 4065922443.0, "step": 5331 }, { "epoch": 7.274481772694782, "grad_norm": 0.2344034293655992, "learning_rate": 2.387176318342581e-06, "loss": 0.1519, "num_tokens": 4066625748.0, "step": 5332 }, { "epoch": 7.275847353653463, "grad_norm": 0.22724219842764998, "learning_rate": 2.3857353202658782e-06, "loss": 0.1595, "num_tokens": 4067402938.0, "step": 5333 }, { "epoch": 7.277212934612143, "grad_norm": 0.22901877165278292, "learning_rate": 2.3842969500282747e-06, "loss": 0.1617, "num_tokens": 4068197958.0, "step": 5334 }, { "epoch": 7.278578515570824, "grad_norm": 0.2360259926993058, "learning_rate": 2.3828612080685576e-06, "loss": 0.1667, "num_tokens": 4068973786.0, "step": 5335 }, { "epoch": 7.279944096529504, "grad_norm": 0.215950405257391, "learning_rate": 2.3814280948247105e-06, "loss": 0.1644, "num_tokens": 4069788410.0, "step": 5336 }, { "epoch": 7.281309677488185, "grad_norm": 0.23291084622261846, "learning_rate": 2.379997610733912e-06, "loss": 0.1734, "num_tokens": 4070518045.0, "step": 5337 }, { "epoch": 7.282675258446865, "grad_norm": 0.22664838769234846, "learning_rate": 2.378569756232543e-06, "loss": 0.1642, "num_tokens": 4071321170.0, "step": 5338 }, { "epoch": 7.284040839405545, "grad_norm": 0.2265070920137822, "learning_rate": 2.3771445317561784e-06, "loss": 0.1617, "num_tokens": 4072146421.0, "step": 5339 }, { "epoch": 7.285406420364226, "grad_norm": 0.23071296582891995, "learning_rate": 2.3757219377395926e-06, "loss": 0.1677, "num_tokens": 4072943824.0, "step": 5340 }, { "epoch": 7.286772001322906, "grad_norm": 0.22637830577661366, "learning_rate": 2.374301974616758e-06, "loss": 0.1675, "num_tokens": 4073785930.0, "step": 5341 }, { "epoch": 7.288137582281587, "grad_norm": 0.2353274327510552, "learning_rate": 2.372884642820845e-06, "loss": 0.161, "num_tokens": 4074511130.0, "step": 5342 }, { "epoch": 7.289503163240267, "grad_norm": 0.21453497905303526, "learning_rate": 2.371469942784218e-06, "loss": 0.1626, "num_tokens": 4075253713.0, "step": 5343 }, { "epoch": 7.290868744198948, "grad_norm": 0.2166187023405437, "learning_rate": 2.3700578749384408e-06, "loss": 0.1588, "num_tokens": 4076045187.0, "step": 5344 }, { "epoch": 7.2922343251576285, "grad_norm": 0.23532356898432044, "learning_rate": 2.3686484397142755e-06, "loss": 0.162, "num_tokens": 4076796876.0, "step": 5345 }, { "epoch": 7.2935999061163095, "grad_norm": 0.2443584448068937, "learning_rate": 2.3672416375416794e-06, "loss": 0.1671, "num_tokens": 4077549042.0, "step": 5346 }, { "epoch": 7.29496548707499, "grad_norm": 0.2287894595409, "learning_rate": 2.3658374688498077e-06, "loss": 0.1672, "num_tokens": 4078340311.0, "step": 5347 }, { "epoch": 7.29633106803367, "grad_norm": 0.23357606167993133, "learning_rate": 2.3644359340670095e-06, "loss": 0.1667, "num_tokens": 4079079403.0, "step": 5348 }, { "epoch": 7.297696648992351, "grad_norm": 0.2212286738350255, "learning_rate": 2.363037033620832e-06, "loss": 0.1689, "num_tokens": 4079852914.0, "step": 5349 }, { "epoch": 7.299062229951031, "grad_norm": 0.23843611969667422, "learning_rate": 2.3616407679380235e-06, "loss": 0.1586, "num_tokens": 4080616859.0, "step": 5350 }, { "epoch": 7.300427810909712, "grad_norm": 0.2337826034863261, "learning_rate": 2.3602471374445183e-06, "loss": 0.1558, "num_tokens": 4081351601.0, "step": 5351 }, { "epoch": 7.301793391868392, "grad_norm": 0.22928015222849152, "learning_rate": 2.3588561425654562e-06, "loss": 0.1627, "num_tokens": 4082144677.0, "step": 5352 }, { "epoch": 7.303158972827073, "grad_norm": 0.23484094153981833, "learning_rate": 2.3574677837251695e-06, "loss": 0.164, "num_tokens": 4082902894.0, "step": 5353 }, { "epoch": 7.304524553785753, "grad_norm": 0.2307795103464297, "learning_rate": 2.356082061347186e-06, "loss": 0.1624, "num_tokens": 4083646842.0, "step": 5354 }, { "epoch": 7.305890134744434, "grad_norm": 0.2227056997139569, "learning_rate": 2.3546989758542275e-06, "loss": 0.1647, "num_tokens": 4084426267.0, "step": 5355 }, { "epoch": 7.307255715703114, "grad_norm": 0.2069790503367002, "learning_rate": 2.3533185276682183e-06, "loss": 0.1618, "num_tokens": 4085264453.0, "step": 5356 }, { "epoch": 7.308621296661794, "grad_norm": 0.2341309399571618, "learning_rate": 2.351940717210268e-06, "loss": 0.1622, "num_tokens": 4086013757.0, "step": 5357 }, { "epoch": 7.309986877620475, "grad_norm": 0.2287620006214358, "learning_rate": 2.3505655449006903e-06, "loss": 0.1602, "num_tokens": 4086749464.0, "step": 5358 }, { "epoch": 7.311352458579155, "grad_norm": 0.21458961168624907, "learning_rate": 2.34919301115899e-06, "loss": 0.1639, "num_tokens": 4087581399.0, "step": 5359 }, { "epoch": 7.312718039537836, "grad_norm": 0.2238655309822278, "learning_rate": 2.3478231164038668e-06, "loss": 0.1681, "num_tokens": 4088355418.0, "step": 5360 }, { "epoch": 7.3140836204965165, "grad_norm": 0.23226191927930767, "learning_rate": 2.3464558610532163e-06, "loss": 0.1547, "num_tokens": 4089107344.0, "step": 5361 }, { "epoch": 7.3154492014551975, "grad_norm": 0.2256402917669762, "learning_rate": 2.3450912455241297e-06, "loss": 0.1636, "num_tokens": 4089891100.0, "step": 5362 }, { "epoch": 7.316814782413878, "grad_norm": 0.23318063664241018, "learning_rate": 2.3437292702328923e-06, "loss": 0.1552, "num_tokens": 4090679412.0, "step": 5363 }, { "epoch": 7.318180363372559, "grad_norm": 0.23139424280890059, "learning_rate": 2.3423699355949827e-06, "loss": 0.169, "num_tokens": 4091462342.0, "step": 5364 }, { "epoch": 7.319545944331239, "grad_norm": 0.2302311581162912, "learning_rate": 2.3410132420250773e-06, "loss": 0.1583, "num_tokens": 4092183924.0, "step": 5365 }, { "epoch": 7.320911525289919, "grad_norm": 0.22475481159438757, "learning_rate": 2.3396591899370415e-06, "loss": 0.1564, "num_tokens": 4092989254.0, "step": 5366 }, { "epoch": 7.3222771062486, "grad_norm": 0.23079883087516664, "learning_rate": 2.3383077797439394e-06, "loss": 0.1604, "num_tokens": 4093707672.0, "step": 5367 }, { "epoch": 7.32364268720728, "grad_norm": 0.21906490225736197, "learning_rate": 2.3369590118580294e-06, "loss": 0.165, "num_tokens": 4094524170.0, "step": 5368 }, { "epoch": 7.325008268165961, "grad_norm": 0.23469548578925717, "learning_rate": 2.335612886690757e-06, "loss": 0.1655, "num_tokens": 4095282366.0, "step": 5369 }, { "epoch": 7.326373849124641, "grad_norm": 0.21933136535897177, "learning_rate": 2.3342694046527734e-06, "loss": 0.1696, "num_tokens": 4096062714.0, "step": 5370 }, { "epoch": 7.327739430083322, "grad_norm": 0.2572986314417623, "learning_rate": 2.332928566153912e-06, "loss": 0.1644, "num_tokens": 4096850799.0, "step": 5371 }, { "epoch": 7.329105011042002, "grad_norm": 0.2323338807356399, "learning_rate": 2.3315903716032066e-06, "loss": 0.1643, "num_tokens": 4097628559.0, "step": 5372 }, { "epoch": 7.330470592000683, "grad_norm": 0.22421806750740247, "learning_rate": 2.3302548214088826e-06, "loss": 0.1591, "num_tokens": 4098377219.0, "step": 5373 }, { "epoch": 7.331836172959363, "grad_norm": 0.22688265931582377, "learning_rate": 2.3289219159783568e-06, "loss": 0.1619, "num_tokens": 4099111959.0, "step": 5374 }, { "epoch": 7.333201753918043, "grad_norm": 0.23351109651096505, "learning_rate": 2.3275916557182427e-06, "loss": 0.1577, "num_tokens": 4099844682.0, "step": 5375 }, { "epoch": 7.334567334876724, "grad_norm": 0.23526227399341407, "learning_rate": 2.3262640410343433e-06, "loss": 0.1646, "num_tokens": 4100552257.0, "step": 5376 }, { "epoch": 7.3359329158354045, "grad_norm": 0.2252775853811229, "learning_rate": 2.3249390723316585e-06, "loss": 0.1669, "num_tokens": 4101346822.0, "step": 5377 }, { "epoch": 7.3372984967940855, "grad_norm": 0.2368518468347841, "learning_rate": 2.3236167500143763e-06, "loss": 0.1693, "num_tokens": 4102137010.0, "step": 5378 }, { "epoch": 7.338664077752766, "grad_norm": 0.22933367550616685, "learning_rate": 2.3222970744858835e-06, "loss": 0.1638, "num_tokens": 4102899674.0, "step": 5379 }, { "epoch": 7.340029658711447, "grad_norm": 0.22509394643907243, "learning_rate": 2.3209800461487527e-06, "loss": 0.1597, "num_tokens": 4103651815.0, "step": 5380 }, { "epoch": 7.341395239670127, "grad_norm": 0.21611243261021196, "learning_rate": 2.3196656654047533e-06, "loss": 0.1608, "num_tokens": 4104452462.0, "step": 5381 }, { "epoch": 7.342760820628808, "grad_norm": 0.22231426537152488, "learning_rate": 2.3183539326548474e-06, "loss": 0.1631, "num_tokens": 4105217468.0, "step": 5382 }, { "epoch": 7.344126401587488, "grad_norm": 0.22383810394621334, "learning_rate": 2.3170448482991852e-06, "loss": 0.1594, "num_tokens": 4106020919.0, "step": 5383 }, { "epoch": 7.345491982546168, "grad_norm": 0.24300616086360613, "learning_rate": 2.3157384127371123e-06, "loss": 0.1599, "num_tokens": 4106748395.0, "step": 5384 }, { "epoch": 7.346857563504849, "grad_norm": 0.22448727016194572, "learning_rate": 2.314434626367167e-06, "loss": 0.1649, "num_tokens": 4107578130.0, "step": 5385 }, { "epoch": 7.348223144463529, "grad_norm": 0.23279767918257874, "learning_rate": 2.3131334895870787e-06, "loss": 0.1637, "num_tokens": 4108323312.0, "step": 5386 }, { "epoch": 7.34958872542221, "grad_norm": 0.2228066973587561, "learning_rate": 2.311835002793764e-06, "loss": 0.1585, "num_tokens": 4109011541.0, "step": 5387 }, { "epoch": 7.35095430638089, "grad_norm": 0.22503864226069176, "learning_rate": 2.3105391663833397e-06, "loss": 0.1675, "num_tokens": 4109776069.0, "step": 5388 }, { "epoch": 7.352319887339571, "grad_norm": 0.2179322752831177, "learning_rate": 2.309245980751107e-06, "loss": 0.1614, "num_tokens": 4110515018.0, "step": 5389 }, { "epoch": 7.353685468298251, "grad_norm": 0.22407336722399782, "learning_rate": 2.307955446291559e-06, "loss": 0.1613, "num_tokens": 4111244827.0, "step": 5390 }, { "epoch": 7.355051049256932, "grad_norm": 0.2120790968746116, "learning_rate": 2.3066675633983863e-06, "loss": 0.1573, "num_tokens": 4112066462.0, "step": 5391 }, { "epoch": 7.356416630215612, "grad_norm": 0.21994429932186366, "learning_rate": 2.305382332464462e-06, "loss": 0.1561, "num_tokens": 4112725106.0, "step": 5392 }, { "epoch": 7.3577822111742925, "grad_norm": 0.22286752150438327, "learning_rate": 2.304099753881857e-06, "loss": 0.1624, "num_tokens": 4113472777.0, "step": 5393 }, { "epoch": 7.3591477921329735, "grad_norm": 0.2337219169651089, "learning_rate": 2.30281982804183e-06, "loss": 0.165, "num_tokens": 4114192929.0, "step": 5394 }, { "epoch": 7.360513373091654, "grad_norm": 0.22033486221257223, "learning_rate": 2.301542555334831e-06, "loss": 0.1657, "num_tokens": 4115004909.0, "step": 5395 }, { "epoch": 7.361878954050335, "grad_norm": 0.22885481365673466, "learning_rate": 2.3002679361504974e-06, "loss": 0.1566, "num_tokens": 4115809696.0, "step": 5396 }, { "epoch": 7.363244535009015, "grad_norm": 0.2410229724947732, "learning_rate": 2.298995970877666e-06, "loss": 0.1656, "num_tokens": 4116574713.0, "step": 5397 }, { "epoch": 7.364610115967696, "grad_norm": 0.25609195964022596, "learning_rate": 2.2977266599043553e-06, "loss": 0.1751, "num_tokens": 4117285435.0, "step": 5398 }, { "epoch": 7.365975696926376, "grad_norm": 0.20931166650395885, "learning_rate": 2.296460003617776e-06, "loss": 0.1573, "num_tokens": 4118112486.0, "step": 5399 }, { "epoch": 7.367341277885057, "grad_norm": 0.23441735239791875, "learning_rate": 2.295196002404332e-06, "loss": 0.1705, "num_tokens": 4118988403.0, "step": 5400 }, { "epoch": 7.368706858843737, "grad_norm": 0.21892879444077784, "learning_rate": 2.2939346566496142e-06, "loss": 0.1657, "num_tokens": 4119800262.0, "step": 5401 }, { "epoch": 7.370072439802417, "grad_norm": 0.22531128149611235, "learning_rate": 2.2926759667384056e-06, "loss": 0.1673, "num_tokens": 4120554871.0, "step": 5402 }, { "epoch": 7.371438020761098, "grad_norm": 0.23767898264999146, "learning_rate": 2.2914199330546764e-06, "loss": 0.1656, "num_tokens": 4121337220.0, "step": 5403 }, { "epoch": 7.372803601719778, "grad_norm": 0.2633161649993797, "learning_rate": 2.2901665559815914e-06, "loss": 0.1614, "num_tokens": 4122021848.0, "step": 5404 }, { "epoch": 7.374169182678459, "grad_norm": 0.22081467390563517, "learning_rate": 2.2889158359014978e-06, "loss": 0.1603, "num_tokens": 4122749179.0, "step": 5405 }, { "epoch": 7.375534763637139, "grad_norm": 0.2518378470835453, "learning_rate": 2.2876677731959374e-06, "loss": 0.1575, "num_tokens": 4123515932.0, "step": 5406 }, { "epoch": 7.37690034459582, "grad_norm": 0.22287478526550192, "learning_rate": 2.286422368245641e-06, "loss": 0.1574, "num_tokens": 4124270905.0, "step": 5407 }, { "epoch": 7.3782659255545004, "grad_norm": 0.2361535247881203, "learning_rate": 2.285179621430525e-06, "loss": 0.1658, "num_tokens": 4124975602.0, "step": 5408 }, { "epoch": 7.3796315065131814, "grad_norm": 0.22577757546734017, "learning_rate": 2.2839395331297028e-06, "loss": 0.164, "num_tokens": 4125744891.0, "step": 5409 }, { "epoch": 7.380997087471862, "grad_norm": 0.23375317952488478, "learning_rate": 2.282702103721467e-06, "loss": 0.1658, "num_tokens": 4126558346.0, "step": 5410 }, { "epoch": 7.382362668430542, "grad_norm": 0.21472939844992567, "learning_rate": 2.2814673335833055e-06, "loss": 0.1607, "num_tokens": 4127364544.0, "step": 5411 }, { "epoch": 7.383728249389223, "grad_norm": 0.23498567082365704, "learning_rate": 2.2802352230918927e-06, "loss": 0.1586, "num_tokens": 4128157771.0, "step": 5412 }, { "epoch": 7.385093830347903, "grad_norm": 0.23007797008583242, "learning_rate": 2.279005772623093e-06, "loss": 0.1633, "num_tokens": 4128873959.0, "step": 5413 }, { "epoch": 7.386459411306584, "grad_norm": 0.2892705140450129, "learning_rate": 2.2777789825519565e-06, "loss": 0.1623, "num_tokens": 4129575159.0, "step": 5414 }, { "epoch": 7.387824992265264, "grad_norm": 0.23239039064603048, "learning_rate": 2.2765548532527262e-06, "loss": 0.1697, "num_tokens": 4130343499.0, "step": 5415 }, { "epoch": 7.389190573223945, "grad_norm": 0.23033456038534036, "learning_rate": 2.275333385098831e-06, "loss": 0.1687, "num_tokens": 4131102787.0, "step": 5416 }, { "epoch": 7.390556154182625, "grad_norm": 0.21873809480546852, "learning_rate": 2.274114578462884e-06, "loss": 0.1634, "num_tokens": 4131898766.0, "step": 5417 }, { "epoch": 7.391921735141306, "grad_norm": 0.22143957526553262, "learning_rate": 2.2728984337166966e-06, "loss": 0.159, "num_tokens": 4132656536.0, "step": 5418 }, { "epoch": 7.393287316099986, "grad_norm": 0.24054884608427313, "learning_rate": 2.2716849512312556e-06, "loss": 0.1635, "num_tokens": 4133346237.0, "step": 5419 }, { "epoch": 7.394652897058666, "grad_norm": 0.22532009039323486, "learning_rate": 2.2704741313767463e-06, "loss": 0.1652, "num_tokens": 4134189582.0, "step": 5420 }, { "epoch": 7.396018478017347, "grad_norm": 0.2157562469480373, "learning_rate": 2.2692659745225364e-06, "loss": 0.1552, "num_tokens": 4134989699.0, "step": 5421 }, { "epoch": 7.397384058976027, "grad_norm": 0.2219089794554517, "learning_rate": 2.2680604810371797e-06, "loss": 0.1609, "num_tokens": 4135741480.0, "step": 5422 }, { "epoch": 7.398749639934708, "grad_norm": 0.22826756704605286, "learning_rate": 2.2668576512884237e-06, "loss": 0.1645, "num_tokens": 4136500074.0, "step": 5423 }, { "epoch": 7.4001152208933885, "grad_norm": 0.24017276696125633, "learning_rate": 2.2656574856431974e-06, "loss": 0.1571, "num_tokens": 4137173636.0, "step": 5424 }, { "epoch": 7.4014808018520695, "grad_norm": 0.23686505140072578, "learning_rate": 2.26445998446762e-06, "loss": 0.1597, "num_tokens": 4137924798.0, "step": 5425 }, { "epoch": 7.40284638281075, "grad_norm": 0.22941482175217398, "learning_rate": 2.263265148126997e-06, "loss": 0.1614, "num_tokens": 4138703041.0, "step": 5426 }, { "epoch": 7.404211963769431, "grad_norm": 0.2262492556554366, "learning_rate": 2.2620729769858224e-06, "loss": 0.1658, "num_tokens": 4139501233.0, "step": 5427 }, { "epoch": 7.405577544728111, "grad_norm": 0.23557078668470724, "learning_rate": 2.260883471407774e-06, "loss": 0.1585, "num_tokens": 4140262974.0, "step": 5428 }, { "epoch": 7.406943125686791, "grad_norm": 0.2519025906470743, "learning_rate": 2.2596966317557206e-06, "loss": 0.1574, "num_tokens": 4141023091.0, "step": 5429 }, { "epoch": 7.408308706645472, "grad_norm": 0.22490592504843296, "learning_rate": 2.2585124583917143e-06, "loss": 0.1704, "num_tokens": 4141814281.0, "step": 5430 }, { "epoch": 7.409674287604152, "grad_norm": 0.22622606230475514, "learning_rate": 2.2573309516769946e-06, "loss": 0.1628, "num_tokens": 4142606230.0, "step": 5431 }, { "epoch": 7.411039868562833, "grad_norm": 0.2772416701984129, "learning_rate": 2.256152111971989e-06, "loss": 0.1629, "num_tokens": 4143312583.0, "step": 5432 }, { "epoch": 7.412405449521513, "grad_norm": 0.21021950980041723, "learning_rate": 2.25497593963631e-06, "loss": 0.1663, "num_tokens": 4144215805.0, "step": 5433 }, { "epoch": 7.413771030480194, "grad_norm": 0.23290846818078983, "learning_rate": 2.2538024350287567e-06, "loss": 0.1679, "num_tokens": 4144951031.0, "step": 5434 }, { "epoch": 7.415136611438874, "grad_norm": 0.22475026383591534, "learning_rate": 2.252631598507316e-06, "loss": 0.1595, "num_tokens": 4145712658.0, "step": 5435 }, { "epoch": 7.416502192397555, "grad_norm": 0.25808395729030437, "learning_rate": 2.2514634304291586e-06, "loss": 0.1683, "num_tokens": 4146455701.0, "step": 5436 }, { "epoch": 7.417867773356235, "grad_norm": 0.22431585702973372, "learning_rate": 2.2502979311506407e-06, "loss": 0.1617, "num_tokens": 4147205041.0, "step": 5437 }, { "epoch": 7.419233354314915, "grad_norm": 0.2216069727919839, "learning_rate": 2.249135101027306e-06, "loss": 0.1607, "num_tokens": 4147930925.0, "step": 5438 }, { "epoch": 7.420598935273596, "grad_norm": 0.22872514256900858, "learning_rate": 2.247974940413886e-06, "loss": 0.1648, "num_tokens": 4148658370.0, "step": 5439 }, { "epoch": 7.4219645162322765, "grad_norm": 0.21007083939862498, "learning_rate": 2.246817449664292e-06, "loss": 0.1632, "num_tokens": 4149549310.0, "step": 5440 }, { "epoch": 7.4233300971909575, "grad_norm": 0.23438433982900994, "learning_rate": 2.2456626291316275e-06, "loss": 0.1624, "num_tokens": 4150300638.0, "step": 5441 }, { "epoch": 7.424695678149638, "grad_norm": 0.2284251992642905, "learning_rate": 2.2445104791681753e-06, "loss": 0.1706, "num_tokens": 4151143648.0, "step": 5442 }, { "epoch": 7.426061259108319, "grad_norm": 0.2417271987505163, "learning_rate": 2.2433610001254083e-06, "loss": 0.1606, "num_tokens": 4151838169.0, "step": 5443 }, { "epoch": 7.427426840066999, "grad_norm": 0.22361063076221996, "learning_rate": 2.2422141923539832e-06, "loss": 0.1624, "num_tokens": 4152620430.0, "step": 5444 }, { "epoch": 7.42879242102568, "grad_norm": 0.2284761918129643, "learning_rate": 2.2410700562037396e-06, "loss": 0.1607, "num_tokens": 4153337195.0, "step": 5445 }, { "epoch": 7.43015800198436, "grad_norm": 0.23745975542986758, "learning_rate": 2.239928592023705e-06, "loss": 0.1637, "num_tokens": 4154082976.0, "step": 5446 }, { "epoch": 7.43152358294304, "grad_norm": 0.2298311767451642, "learning_rate": 2.2387898001620907e-06, "loss": 0.1556, "num_tokens": 4154858235.0, "step": 5447 }, { "epoch": 7.432889163901721, "grad_norm": 0.2355552281367777, "learning_rate": 2.2376536809662934e-06, "loss": 0.1559, "num_tokens": 4155578727.0, "step": 5448 }, { "epoch": 7.434254744860401, "grad_norm": 0.23045269188141895, "learning_rate": 2.2365202347828922e-06, "loss": 0.1597, "num_tokens": 4156331529.0, "step": 5449 }, { "epoch": 7.435620325819082, "grad_norm": 0.230259647708039, "learning_rate": 2.235389461957653e-06, "loss": 0.1693, "num_tokens": 4157164345.0, "step": 5450 }, { "epoch": 7.436985906777762, "grad_norm": 0.22994529688349277, "learning_rate": 2.2342613628355266e-06, "loss": 0.1559, "num_tokens": 4157849679.0, "step": 5451 }, { "epoch": 7.438351487736443, "grad_norm": 0.23548115797510027, "learning_rate": 2.233135937760645e-06, "loss": 0.169, "num_tokens": 4158598366.0, "step": 5452 }, { "epoch": 7.439717068695123, "grad_norm": 0.22739756076198167, "learning_rate": 2.2320131870763304e-06, "loss": 0.1627, "num_tokens": 4159356430.0, "step": 5453 }, { "epoch": 7.441082649653804, "grad_norm": 0.2507301457999663, "learning_rate": 2.2308931111250824e-06, "loss": 0.1643, "num_tokens": 4160090988.0, "step": 5454 }, { "epoch": 7.442448230612484, "grad_norm": 0.2179018354305088, "learning_rate": 2.2297757102485877e-06, "loss": 0.1629, "num_tokens": 4160914099.0, "step": 5455 }, { "epoch": 7.4438138115711645, "grad_norm": 0.24255547841418856, "learning_rate": 2.228660984787719e-06, "loss": 0.1591, "num_tokens": 4161742068.0, "step": 5456 }, { "epoch": 7.4451793925298455, "grad_norm": 0.22070178889061312, "learning_rate": 2.227548935082529e-06, "loss": 0.1648, "num_tokens": 4162594640.0, "step": 5457 }, { "epoch": 7.446544973488526, "grad_norm": 0.21769301296641685, "learning_rate": 2.2264395614722546e-06, "loss": 0.1595, "num_tokens": 4163356647.0, "step": 5458 }, { "epoch": 7.447910554447207, "grad_norm": 0.21905754343969122, "learning_rate": 2.2253328642953213e-06, "loss": 0.1593, "num_tokens": 4164146467.0, "step": 5459 }, { "epoch": 7.449276135405887, "grad_norm": 0.23417037009819328, "learning_rate": 2.2242288438893333e-06, "loss": 0.1645, "num_tokens": 4164915824.0, "step": 5460 }, { "epoch": 7.450641716364568, "grad_norm": 0.2316063881888279, "learning_rate": 2.2231275005910763e-06, "loss": 0.1699, "num_tokens": 4165695668.0, "step": 5461 }, { "epoch": 7.452007297323248, "grad_norm": 0.23074039011508662, "learning_rate": 2.222028834736527e-06, "loss": 0.1586, "num_tokens": 4166426520.0, "step": 5462 }, { "epoch": 7.453372878281929, "grad_norm": 0.22419746487946282, "learning_rate": 2.220932846660838e-06, "loss": 0.1636, "num_tokens": 4167262428.0, "step": 5463 }, { "epoch": 7.454738459240609, "grad_norm": 0.23946512475988727, "learning_rate": 2.219839536698348e-06, "loss": 0.1597, "num_tokens": 4167975944.0, "step": 5464 }, { "epoch": 7.456104040199289, "grad_norm": 0.2324745923146015, "learning_rate": 2.218748905182579e-06, "loss": 0.1576, "num_tokens": 4168729649.0, "step": 5465 }, { "epoch": 7.45746962115797, "grad_norm": 0.22045528954554758, "learning_rate": 2.2176609524462354e-06, "loss": 0.1663, "num_tokens": 4169513202.0, "step": 5466 }, { "epoch": 7.45883520211665, "grad_norm": 0.2277971879354185, "learning_rate": 2.2165756788212045e-06, "loss": 0.1496, "num_tokens": 4170232010.0, "step": 5467 }, { "epoch": 7.460200783075331, "grad_norm": 0.23467645391967995, "learning_rate": 2.215493084638556e-06, "loss": 0.17, "num_tokens": 4170953220.0, "step": 5468 }, { "epoch": 7.461566364034011, "grad_norm": 0.2357110112494784, "learning_rate": 2.214413170228544e-06, "loss": 0.1687, "num_tokens": 4171707166.0, "step": 5469 }, { "epoch": 7.462931944992692, "grad_norm": 0.2257504394353422, "learning_rate": 2.2133359359206e-06, "loss": 0.1583, "num_tokens": 4172493230.0, "step": 5470 }, { "epoch": 7.464297525951372, "grad_norm": 0.2300022981337233, "learning_rate": 2.212261382043346e-06, "loss": 0.1666, "num_tokens": 4173224197.0, "step": 5471 }, { "epoch": 7.465663106910053, "grad_norm": 0.22555337031371314, "learning_rate": 2.2111895089245786e-06, "loss": 0.1633, "num_tokens": 4174001840.0, "step": 5472 }, { "epoch": 7.4670286878687335, "grad_norm": 0.23069533129575095, "learning_rate": 2.210120316891281e-06, "loss": 0.1628, "num_tokens": 4174752645.0, "step": 5473 }, { "epoch": 7.468394268827414, "grad_norm": 0.24439364481760778, "learning_rate": 2.2090538062696175e-06, "loss": 0.1586, "num_tokens": 4175423825.0, "step": 5474 }, { "epoch": 7.469759849786095, "grad_norm": 0.2271174151854004, "learning_rate": 2.2079899773849352e-06, "loss": 0.1708, "num_tokens": 4176225836.0, "step": 5475 }, { "epoch": 7.471125430744775, "grad_norm": 0.23298841467470516, "learning_rate": 2.2069288305617596e-06, "loss": 0.1701, "num_tokens": 4176997683.0, "step": 5476 }, { "epoch": 7.472491011703456, "grad_norm": 0.22525121116689123, "learning_rate": 2.2058703661238034e-06, "loss": 0.1669, "num_tokens": 4177852628.0, "step": 5477 }, { "epoch": 7.473856592662136, "grad_norm": 0.24545973347416827, "learning_rate": 2.2048145843939567e-06, "loss": 0.159, "num_tokens": 4178509634.0, "step": 5478 }, { "epoch": 7.475222173620817, "grad_norm": 0.23203636477827122, "learning_rate": 2.203761485694293e-06, "loss": 0.1596, "num_tokens": 4179195843.0, "step": 5479 }, { "epoch": 7.476587754579497, "grad_norm": 0.2284662364301498, "learning_rate": 2.202711070346068e-06, "loss": 0.1752, "num_tokens": 4180004847.0, "step": 5480 }, { "epoch": 7.477953335538178, "grad_norm": 0.2340162880730845, "learning_rate": 2.201663338669716e-06, "loss": 0.157, "num_tokens": 4180756201.0, "step": 5481 }, { "epoch": 7.479318916496858, "grad_norm": 0.248499207811264, "learning_rate": 2.2006182909848573e-06, "loss": 0.1678, "num_tokens": 4181439582.0, "step": 5482 }, { "epoch": 7.480684497455538, "grad_norm": 0.23836672123451974, "learning_rate": 2.1995759276102886e-06, "loss": 0.1594, "num_tokens": 4182130928.0, "step": 5483 }, { "epoch": 7.482050078414219, "grad_norm": 0.2269814692514156, "learning_rate": 2.19853624886399e-06, "loss": 0.1578, "num_tokens": 4182890035.0, "step": 5484 }, { "epoch": 7.483415659372899, "grad_norm": 0.21827536438608663, "learning_rate": 2.197499255063123e-06, "loss": 0.1625, "num_tokens": 4183683743.0, "step": 5485 }, { "epoch": 7.48478124033158, "grad_norm": 0.23530599084127618, "learning_rate": 2.19646494652403e-06, "loss": 0.1649, "num_tokens": 4184467586.0, "step": 5486 }, { "epoch": 7.48614682129026, "grad_norm": 0.23156137836294632, "learning_rate": 2.195433323562233e-06, "loss": 0.1675, "num_tokens": 4185241141.0, "step": 5487 }, { "epoch": 7.487512402248941, "grad_norm": 0.22903587756903418, "learning_rate": 2.194404386492434e-06, "loss": 0.1577, "num_tokens": 4186021640.0, "step": 5488 }, { "epoch": 7.4888779832076215, "grad_norm": 0.22760272992787886, "learning_rate": 2.193378135628521e-06, "loss": 0.1507, "num_tokens": 4186717049.0, "step": 5489 }, { "epoch": 7.4902435641663025, "grad_norm": 0.2202943005927957, "learning_rate": 2.192354571283555e-06, "loss": 0.1755, "num_tokens": 4187583237.0, "step": 5490 }, { "epoch": 7.491609145124983, "grad_norm": 0.2224772101245489, "learning_rate": 2.1913336937697838e-06, "loss": 0.16, "num_tokens": 4188327879.0, "step": 5491 }, { "epoch": 7.492974726083663, "grad_norm": 0.2437925467704619, "learning_rate": 2.190315503398632e-06, "loss": 0.1667, "num_tokens": 4189066775.0, "step": 5492 }, { "epoch": 7.494340307042344, "grad_norm": 0.2173876376465795, "learning_rate": 2.189300000480704e-06, "loss": 0.1624, "num_tokens": 4189906569.0, "step": 5493 }, { "epoch": 7.495705888001024, "grad_norm": 0.3046790197215832, "learning_rate": 2.188287185325786e-06, "loss": 0.1583, "num_tokens": 4190611056.0, "step": 5494 }, { "epoch": 7.497071468959705, "grad_norm": 0.24047908728019865, "learning_rate": 2.1872770582428463e-06, "loss": 0.1651, "num_tokens": 4191347172.0, "step": 5495 }, { "epoch": 7.498437049918385, "grad_norm": 0.23080118845600903, "learning_rate": 2.1862696195400302e-06, "loss": 0.1695, "num_tokens": 4192128320.0, "step": 5496 }, { "epoch": 7.499802630877066, "grad_norm": 0.24549930220224558, "learning_rate": 2.1852648695246603e-06, "loss": 0.1617, "num_tokens": 4192895087.0, "step": 5497 }, { "epoch": 7.501168211835746, "grad_norm": 0.23521297534839372, "learning_rate": 2.1842628085032474e-06, "loss": 0.1719, "num_tokens": 4193620655.0, "step": 5498 }, { "epoch": 7.502533792794427, "grad_norm": 0.21629831405809177, "learning_rate": 2.1832634367814733e-06, "loss": 0.1673, "num_tokens": 4194457929.0, "step": 5499 }, { "epoch": 7.503899373753107, "grad_norm": 0.23899283956091946, "learning_rate": 2.1822667546642053e-06, "loss": 0.1654, "num_tokens": 4195198127.0, "step": 5500 }, { "epoch": 7.505264954711787, "grad_norm": 0.21519958918771503, "learning_rate": 2.181272762455486e-06, "loss": 0.1683, "num_tokens": 4195999689.0, "step": 5501 }, { "epoch": 7.506630535670468, "grad_norm": 0.2289392335814238, "learning_rate": 2.1802814604585415e-06, "loss": 0.1629, "num_tokens": 4196746598.0, "step": 5502 }, { "epoch": 7.5079961166291485, "grad_norm": 0.24934483054704798, "learning_rate": 2.179292848975772e-06, "loss": 0.1705, "num_tokens": 4197526887.0, "step": 5503 }, { "epoch": 7.5093616975878295, "grad_norm": 0.23114527158815684, "learning_rate": 2.178306928308763e-06, "loss": 0.1587, "num_tokens": 4198238171.0, "step": 5504 }, { "epoch": 7.51072727854651, "grad_norm": 0.2203592199464207, "learning_rate": 2.1773236987582754e-06, "loss": 0.1622, "num_tokens": 4199013631.0, "step": 5505 }, { "epoch": 7.512092859505191, "grad_norm": 0.22853439677623752, "learning_rate": 2.17634316062425e-06, "loss": 0.1564, "num_tokens": 4199743217.0, "step": 5506 }, { "epoch": 7.513458440463871, "grad_norm": 0.25059932581261285, "learning_rate": 2.1753653142058046e-06, "loss": 0.1578, "num_tokens": 4200372510.0, "step": 5507 }, { "epoch": 7.514824021422552, "grad_norm": 0.2258713299844882, "learning_rate": 2.1743901598012397e-06, "loss": 0.1545, "num_tokens": 4201109315.0, "step": 5508 }, { "epoch": 7.516189602381232, "grad_norm": 0.23002230918610533, "learning_rate": 2.173417697708033e-06, "loss": 0.1617, "num_tokens": 4201822007.0, "step": 5509 }, { "epoch": 7.517555183339912, "grad_norm": 0.2183740863375253, "learning_rate": 2.172447928222841e-06, "loss": 0.1631, "num_tokens": 4202609220.0, "step": 5510 }, { "epoch": 7.518920764298593, "grad_norm": 0.2198181518294382, "learning_rate": 2.1714808516414957e-06, "loss": 0.1703, "num_tokens": 4203406508.0, "step": 5511 }, { "epoch": 7.520286345257273, "grad_norm": 0.22738582248690328, "learning_rate": 2.1705164682590128e-06, "loss": 0.1604, "num_tokens": 4204161855.0, "step": 5512 }, { "epoch": 7.521651926215954, "grad_norm": 0.2256715125099633, "learning_rate": 2.169554778369582e-06, "loss": 0.1677, "num_tokens": 4204985656.0, "step": 5513 }, { "epoch": 7.523017507174634, "grad_norm": 0.23164497230222347, "learning_rate": 2.1685957822665753e-06, "loss": 0.1647, "num_tokens": 4205738691.0, "step": 5514 }, { "epoch": 7.524383088133315, "grad_norm": 0.24011936187132654, "learning_rate": 2.1676394802425393e-06, "loss": 0.1622, "num_tokens": 4206429060.0, "step": 5515 }, { "epoch": 7.525748669091995, "grad_norm": 0.2338030440863196, "learning_rate": 2.166685872589201e-06, "loss": 0.1648, "num_tokens": 4207210208.0, "step": 5516 }, { "epoch": 7.527114250050676, "grad_norm": 0.22972510343289876, "learning_rate": 2.1657349595974646e-06, "loss": 0.1656, "num_tokens": 4207975644.0, "step": 5517 }, { "epoch": 7.528479831009356, "grad_norm": 0.2617730148378994, "learning_rate": 2.164786741557413e-06, "loss": 0.1569, "num_tokens": 4208678975.0, "step": 5518 }, { "epoch": 7.5298454119680365, "grad_norm": 0.23046668257373867, "learning_rate": 2.1638412187583056e-06, "loss": 0.1562, "num_tokens": 4209415556.0, "step": 5519 }, { "epoch": 7.5312109929267175, "grad_norm": 0.24650723150960943, "learning_rate": 2.1628983914885803e-06, "loss": 0.1649, "num_tokens": 4210201705.0, "step": 5520 }, { "epoch": 7.532576573885398, "grad_norm": 0.22596135246867338, "learning_rate": 2.1619582600358545e-06, "loss": 0.1684, "num_tokens": 4210976350.0, "step": 5521 }, { "epoch": 7.533942154844079, "grad_norm": 0.22159350095386565, "learning_rate": 2.1610208246869207e-06, "loss": 0.1664, "num_tokens": 4211788177.0, "step": 5522 }, { "epoch": 7.535307735802759, "grad_norm": 0.22589921602042196, "learning_rate": 2.1600860857277475e-06, "loss": 0.1555, "num_tokens": 4212533016.0, "step": 5523 }, { "epoch": 7.53667331676144, "grad_norm": 0.24237008658790982, "learning_rate": 2.1591540434434877e-06, "loss": 0.162, "num_tokens": 4213246739.0, "step": 5524 }, { "epoch": 7.53803889772012, "grad_norm": 0.23985502586362126, "learning_rate": 2.158224698118464e-06, "loss": 0.1622, "num_tokens": 4213917748.0, "step": 5525 }, { "epoch": 7.539404478678801, "grad_norm": 0.22047689494018607, "learning_rate": 2.1572980500361793e-06, "loss": 0.1653, "num_tokens": 4214723890.0, "step": 5526 }, { "epoch": 7.540770059637481, "grad_norm": 0.22773861956241115, "learning_rate": 2.1563740994793156e-06, "loss": 0.1632, "num_tokens": 4215514935.0, "step": 5527 }, { "epoch": 7.542135640596161, "grad_norm": 0.2322147621156714, "learning_rate": 2.1554528467297297e-06, "loss": 0.166, "num_tokens": 4216296505.0, "step": 5528 }, { "epoch": 7.543501221554842, "grad_norm": 0.2255593958272912, "learning_rate": 2.1545342920684544e-06, "loss": 0.1674, "num_tokens": 4217112885.0, "step": 5529 }, { "epoch": 7.544866802513522, "grad_norm": 0.23378911465010338, "learning_rate": 2.1536184357757023e-06, "loss": 0.1637, "num_tokens": 4217865474.0, "step": 5530 }, { "epoch": 7.546232383472203, "grad_norm": 0.2267790415453801, "learning_rate": 2.152705278130862e-06, "loss": 0.1622, "num_tokens": 4218624778.0, "step": 5531 }, { "epoch": 7.547597964430883, "grad_norm": 0.28671473966655814, "learning_rate": 2.1517948194124964e-06, "loss": 0.1582, "num_tokens": 4219373320.0, "step": 5532 }, { "epoch": 7.548963545389564, "grad_norm": 0.22445377573573266, "learning_rate": 2.1508870598983494e-06, "loss": 0.1618, "num_tokens": 4220088313.0, "step": 5533 }, { "epoch": 7.550329126348244, "grad_norm": 0.2273240052272407, "learning_rate": 2.149981999865337e-06, "loss": 0.1632, "num_tokens": 4220917675.0, "step": 5534 }, { "epoch": 7.551694707306925, "grad_norm": 0.21645296814240506, "learning_rate": 2.149079639589555e-06, "loss": 0.1566, "num_tokens": 4221707119.0, "step": 5535 }, { "epoch": 7.5530602882656055, "grad_norm": 0.23556768009320467, "learning_rate": 2.148179979346274e-06, "loss": 0.1599, "num_tokens": 4222388781.0, "step": 5536 }, { "epoch": 7.554425869224286, "grad_norm": 0.22983303940123676, "learning_rate": 2.1472830194099433e-06, "loss": 0.164, "num_tokens": 4223162294.0, "step": 5537 }, { "epoch": 7.555791450182967, "grad_norm": 0.23254554436580332, "learning_rate": 2.146388760054183e-06, "loss": 0.1624, "num_tokens": 4223915655.0, "step": 5538 }, { "epoch": 7.557157031141647, "grad_norm": 0.2624553968508459, "learning_rate": 2.145497201551795e-06, "loss": 0.1692, "num_tokens": 4224703165.0, "step": 5539 }, { "epoch": 7.558522612100328, "grad_norm": 0.22632991156559723, "learning_rate": 2.1446083441747556e-06, "loss": 0.1555, "num_tokens": 4225431541.0, "step": 5540 }, { "epoch": 7.559888193059008, "grad_norm": 0.23957595766670822, "learning_rate": 2.1437221881942144e-06, "loss": 0.161, "num_tokens": 4226139751.0, "step": 5541 }, { "epoch": 7.561253774017689, "grad_norm": 0.21881229495062984, "learning_rate": 2.1428387338805016e-06, "loss": 0.1584, "num_tokens": 4226983884.0, "step": 5542 }, { "epoch": 7.562619354976369, "grad_norm": 0.22715462051204646, "learning_rate": 2.1419579815031206e-06, "loss": 0.1594, "num_tokens": 4227715997.0, "step": 5543 }, { "epoch": 7.56398493593505, "grad_norm": 0.22813273952291502, "learning_rate": 2.14107993133075e-06, "loss": 0.1619, "num_tokens": 4228496400.0, "step": 5544 }, { "epoch": 7.56535051689373, "grad_norm": 0.2396291826799164, "learning_rate": 2.140204583631245e-06, "loss": 0.1637, "num_tokens": 4229234000.0, "step": 5545 }, { "epoch": 7.56671609785241, "grad_norm": 0.22194638813440393, "learning_rate": 2.139331938671636e-06, "loss": 0.1603, "num_tokens": 4230028081.0, "step": 5546 }, { "epoch": 7.568081678811091, "grad_norm": 0.23081771234128629, "learning_rate": 2.13846199671813e-06, "loss": 0.1606, "num_tokens": 4230778419.0, "step": 5547 }, { "epoch": 7.569447259769771, "grad_norm": 0.23340666750307787, "learning_rate": 2.1375947580361085e-06, "loss": 0.1613, "num_tokens": 4231480697.0, "step": 5548 }, { "epoch": 7.570812840728452, "grad_norm": 0.2379337012504938, "learning_rate": 2.1367302228901282e-06, "loss": 0.166, "num_tokens": 4232197174.0, "step": 5549 }, { "epoch": 7.572178421687132, "grad_norm": 0.2379019763119666, "learning_rate": 2.135868391543921e-06, "loss": 0.1625, "num_tokens": 4232922912.0, "step": 5550 }, { "epoch": 7.573544002645813, "grad_norm": 0.250811060839321, "learning_rate": 2.135009264260394e-06, "loss": 0.157, "num_tokens": 4233602241.0, "step": 5551 }, { "epoch": 7.5749095836044935, "grad_norm": 0.2359871715732713, "learning_rate": 2.134152841301632e-06, "loss": 0.1642, "num_tokens": 4234287631.0, "step": 5552 }, { "epoch": 7.5762751645631745, "grad_norm": 0.21733438065396665, "learning_rate": 2.1332991229288898e-06, "loss": 0.1613, "num_tokens": 4235094778.0, "step": 5553 }, { "epoch": 7.577640745521855, "grad_norm": 0.2347293452009135, "learning_rate": 2.132448109402601e-06, "loss": 0.1719, "num_tokens": 4235893463.0, "step": 5554 }, { "epoch": 7.579006326480535, "grad_norm": 0.23525492918761154, "learning_rate": 2.1315998009823725e-06, "loss": 0.1701, "num_tokens": 4236664179.0, "step": 5555 }, { "epoch": 7.580371907439216, "grad_norm": 0.21905559813879413, "learning_rate": 2.1307541979269884e-06, "loss": 0.1599, "num_tokens": 4237448857.0, "step": 5556 }, { "epoch": 7.581737488397896, "grad_norm": 0.22292081240207917, "learning_rate": 2.1299113004944034e-06, "loss": 0.1679, "num_tokens": 4238252299.0, "step": 5557 }, { "epoch": 7.583103069356577, "grad_norm": 0.23009664532887078, "learning_rate": 2.1290711089417497e-06, "loss": 0.1588, "num_tokens": 4238974110.0, "step": 5558 }, { "epoch": 7.584468650315257, "grad_norm": 0.2301347477449659, "learning_rate": 2.1282336235253327e-06, "loss": 0.1585, "num_tokens": 4239676460.0, "step": 5559 }, { "epoch": 7.585834231273938, "grad_norm": 0.22782764762904362, "learning_rate": 2.1273988445006344e-06, "loss": 0.1614, "num_tokens": 4240442537.0, "step": 5560 }, { "epoch": 7.587199812232618, "grad_norm": 0.2385555706511238, "learning_rate": 2.1265667721223097e-06, "loss": 0.1635, "num_tokens": 4241219742.0, "step": 5561 }, { "epoch": 7.588565393191299, "grad_norm": 0.22426871921556685, "learning_rate": 2.1257374066441857e-06, "loss": 0.1534, "num_tokens": 4241935772.0, "step": 5562 }, { "epoch": 7.589930974149979, "grad_norm": 0.22777939946231038, "learning_rate": 2.1249107483192677e-06, "loss": 0.1586, "num_tokens": 4242679511.0, "step": 5563 }, { "epoch": 7.591296555108659, "grad_norm": 0.23555130565751028, "learning_rate": 2.124086797399734e-06, "loss": 0.1638, "num_tokens": 4243388949.0, "step": 5564 }, { "epoch": 7.59266213606734, "grad_norm": 0.24221875650731395, "learning_rate": 2.1232655541369337e-06, "loss": 0.1626, "num_tokens": 4244127946.0, "step": 5565 }, { "epoch": 7.59402771702602, "grad_norm": 0.24108301122319606, "learning_rate": 2.122447018781395e-06, "loss": 0.1603, "num_tokens": 4244860431.0, "step": 5566 }, { "epoch": 7.595393297984701, "grad_norm": 0.22859831500280756, "learning_rate": 2.121631191582817e-06, "loss": 0.1587, "num_tokens": 4245602666.0, "step": 5567 }, { "epoch": 7.5967588789433815, "grad_norm": 0.23096959752641216, "learning_rate": 2.1208180727900717e-06, "loss": 0.1639, "num_tokens": 4246362040.0, "step": 5568 }, { "epoch": 7.5981244599020625, "grad_norm": 0.21843940024098793, "learning_rate": 2.1200076626512088e-06, "loss": 0.1582, "num_tokens": 4247185471.0, "step": 5569 }, { "epoch": 7.599490040860743, "grad_norm": 0.24927281037017776, "learning_rate": 2.119199961413448e-06, "loss": 0.1594, "num_tokens": 4247869239.0, "step": 5570 }, { "epoch": 7.600855621819424, "grad_norm": 0.22776022585813438, "learning_rate": 2.1183949693231843e-06, "loss": 0.1649, "num_tokens": 4248607679.0, "step": 5571 }, { "epoch": 7.602221202778104, "grad_norm": 0.22870548308964725, "learning_rate": 2.117592686625986e-06, "loss": 0.1614, "num_tokens": 4249340741.0, "step": 5572 }, { "epoch": 7.603586783736784, "grad_norm": 0.22683098331234824, "learning_rate": 2.1167931135665944e-06, "loss": 0.1633, "num_tokens": 4250090826.0, "step": 5573 }, { "epoch": 7.604952364695465, "grad_norm": 0.2231332807634199, "learning_rate": 2.1159962503889252e-06, "loss": 0.1683, "num_tokens": 4250919262.0, "step": 5574 }, { "epoch": 7.606317945654145, "grad_norm": 0.2155636440576396, "learning_rate": 2.1152020973360654e-06, "loss": 0.1666, "num_tokens": 4251735124.0, "step": 5575 }, { "epoch": 7.607683526612826, "grad_norm": 0.22977739752236445, "learning_rate": 2.1144106546502794e-06, "loss": 0.1558, "num_tokens": 4252507199.0, "step": 5576 }, { "epoch": 7.609049107571506, "grad_norm": 0.22616557731485556, "learning_rate": 2.113621922573e-06, "loss": 0.1615, "num_tokens": 4253297497.0, "step": 5577 }, { "epoch": 7.610414688530187, "grad_norm": 0.2286926385382071, "learning_rate": 2.1128359013448348e-06, "loss": 0.1673, "num_tokens": 4254082039.0, "step": 5578 }, { "epoch": 7.611780269488867, "grad_norm": 0.22396008303396614, "learning_rate": 2.112052591205567e-06, "loss": 0.1653, "num_tokens": 4254872067.0, "step": 5579 }, { "epoch": 7.613145850447548, "grad_norm": 0.2314282837683109, "learning_rate": 2.1112719923941486e-06, "loss": 0.1638, "num_tokens": 4255625883.0, "step": 5580 }, { "epoch": 7.614511431406228, "grad_norm": 0.22014724717980177, "learning_rate": 2.1104941051487077e-06, "loss": 0.1614, "num_tokens": 4256397935.0, "step": 5581 }, { "epoch": 7.615877012364908, "grad_norm": 0.23160068243099677, "learning_rate": 2.109718929706543e-06, "loss": 0.1599, "num_tokens": 4257103990.0, "step": 5582 }, { "epoch": 7.617242593323589, "grad_norm": 0.22705993999122392, "learning_rate": 2.1089464663041276e-06, "loss": 0.1624, "num_tokens": 4257898798.0, "step": 5583 }, { "epoch": 7.6186081742822696, "grad_norm": 0.21799461545738255, "learning_rate": 2.108176715177107e-06, "loss": 0.1534, "num_tokens": 4258763320.0, "step": 5584 }, { "epoch": 7.6199737552409506, "grad_norm": 0.22946450840977553, "learning_rate": 2.1074096765602986e-06, "loss": 0.1647, "num_tokens": 4259544492.0, "step": 5585 }, { "epoch": 7.621339336199631, "grad_norm": 0.23161275476133786, "learning_rate": 2.1066453506876915e-06, "loss": 0.1651, "num_tokens": 4260319941.0, "step": 5586 }, { "epoch": 7.622704917158312, "grad_norm": 0.23188470536162867, "learning_rate": 2.10588373779245e-06, "loss": 0.1628, "num_tokens": 4261094297.0, "step": 5587 }, { "epoch": 7.624070498116992, "grad_norm": 0.22924320418228888, "learning_rate": 2.1051248381069088e-06, "loss": 0.1604, "num_tokens": 4261875853.0, "step": 5588 }, { "epoch": 7.625436079075673, "grad_norm": 0.22235161872365514, "learning_rate": 2.104368651862575e-06, "loss": 0.1595, "num_tokens": 4262633431.0, "step": 5589 }, { "epoch": 7.626801660034353, "grad_norm": 0.24294410489781848, "learning_rate": 2.1036151792901284e-06, "loss": 0.1667, "num_tokens": 4263366019.0, "step": 5590 }, { "epoch": 7.628167240993033, "grad_norm": 0.22445717660867656, "learning_rate": 2.102864420619421e-06, "loss": 0.1602, "num_tokens": 4264138548.0, "step": 5591 }, { "epoch": 7.629532821951714, "grad_norm": 0.226305430420918, "learning_rate": 2.102116376079477e-06, "loss": 0.1582, "num_tokens": 4264915910.0, "step": 5592 }, { "epoch": 7.630898402910394, "grad_norm": 0.242667506040787, "learning_rate": 2.1013710458984925e-06, "loss": 0.1626, "num_tokens": 4265694151.0, "step": 5593 }, { "epoch": 7.632263983869075, "grad_norm": 0.24569131691511192, "learning_rate": 2.100628430303834e-06, "loss": 0.1592, "num_tokens": 4266397777.0, "step": 5594 }, { "epoch": 7.633629564827755, "grad_norm": 0.23534177657419672, "learning_rate": 2.0998885295220427e-06, "loss": 0.1669, "num_tokens": 4267159364.0, "step": 5595 }, { "epoch": 7.634995145786436, "grad_norm": 0.23167649934848064, "learning_rate": 2.0991513437788306e-06, "loss": 0.1687, "num_tokens": 4267961966.0, "step": 5596 }, { "epoch": 7.636360726745116, "grad_norm": 0.22933977655395796, "learning_rate": 2.0984168732990795e-06, "loss": 0.1653, "num_tokens": 4268702128.0, "step": 5597 }, { "epoch": 7.637726307703797, "grad_norm": 0.23567798360752543, "learning_rate": 2.097685118306846e-06, "loss": 0.1681, "num_tokens": 4269436421.0, "step": 5598 }, { "epoch": 7.6390918886624775, "grad_norm": 0.2201935687639933, "learning_rate": 2.0969560790253572e-06, "loss": 0.171, "num_tokens": 4270255269.0, "step": 5599 }, { "epoch": 7.640457469621158, "grad_norm": 0.23402958780088984, "learning_rate": 2.0962297556770093e-06, "loss": 0.165, "num_tokens": 4271045928.0, "step": 5600 }, { "epoch": 7.641823050579839, "grad_norm": 0.2209829480382855, "learning_rate": 2.0955061484833737e-06, "loss": 0.1606, "num_tokens": 4271813367.0, "step": 5601 }, { "epoch": 7.643188631538519, "grad_norm": 0.2123923221330911, "learning_rate": 2.094785257665193e-06, "loss": 0.1562, "num_tokens": 4272611906.0, "step": 5602 }, { "epoch": 7.6445542124972, "grad_norm": 0.20934295094152916, "learning_rate": 2.0940670834423758e-06, "loss": 0.1589, "num_tokens": 4273415167.0, "step": 5603 }, { "epoch": 7.64591979345588, "grad_norm": 0.22745727803756655, "learning_rate": 2.09335162603401e-06, "loss": 0.1639, "num_tokens": 4274183571.0, "step": 5604 }, { "epoch": 7.647285374414561, "grad_norm": 0.23438347963361575, "learning_rate": 2.0926388856583475e-06, "loss": 0.1643, "num_tokens": 4274919812.0, "step": 5605 }, { "epoch": 7.648650955373241, "grad_norm": 0.22571750691377118, "learning_rate": 2.0919288625328177e-06, "loss": 0.1652, "num_tokens": 4275708311.0, "step": 5606 }, { "epoch": 7.650016536331922, "grad_norm": 0.23671008708624533, "learning_rate": 2.091221556874015e-06, "loss": 0.157, "num_tokens": 4276429570.0, "step": 5607 }, { "epoch": 7.651382117290602, "grad_norm": 0.23139679546701525, "learning_rate": 2.0905169688977095e-06, "loss": 0.1587, "num_tokens": 4277148659.0, "step": 5608 }, { "epoch": 7.652747698249282, "grad_norm": 0.21894467829856853, "learning_rate": 2.0898150988188393e-06, "loss": 0.1753, "num_tokens": 4278018018.0, "step": 5609 }, { "epoch": 7.654113279207963, "grad_norm": 0.6557668962701796, "learning_rate": 2.0891159468515147e-06, "loss": 0.1559, "num_tokens": 4278707498.0, "step": 5610 }, { "epoch": 7.655478860166643, "grad_norm": 0.23883127184638833, "learning_rate": 2.0884195132090187e-06, "loss": 0.1596, "num_tokens": 4279407365.0, "step": 5611 }, { "epoch": 7.656844441125324, "grad_norm": 0.2402950305737766, "learning_rate": 2.0877257981037998e-06, "loss": 0.1652, "num_tokens": 4280197335.0, "step": 5612 }, { "epoch": 7.658210022084004, "grad_norm": 0.2360624182193832, "learning_rate": 2.0870348017474833e-06, "loss": 0.1624, "num_tokens": 4280878851.0, "step": 5613 }, { "epoch": 7.659575603042685, "grad_norm": 0.22804426428487826, "learning_rate": 2.086346524350861e-06, "loss": 0.1688, "num_tokens": 4281653166.0, "step": 5614 }, { "epoch": 7.6609411840013655, "grad_norm": 0.5434807770038477, "learning_rate": 2.0856609661238958e-06, "loss": 0.1586, "num_tokens": 4282414805.0, "step": 5615 }, { "epoch": 7.6623067649600465, "grad_norm": 0.2218984823156082, "learning_rate": 2.084978127275724e-06, "loss": 0.1619, "num_tokens": 4283220922.0, "step": 5616 }, { "epoch": 7.663672345918727, "grad_norm": 0.22752028443454805, "learning_rate": 2.0842980080146474e-06, "loss": 0.1552, "num_tokens": 4283942755.0, "step": 5617 }, { "epoch": 7.665037926877407, "grad_norm": 0.2209312197164226, "learning_rate": 2.0836206085481434e-06, "loss": 0.1693, "num_tokens": 4284729803.0, "step": 5618 }, { "epoch": 7.666403507836088, "grad_norm": 0.2264874711019726, "learning_rate": 2.0829459290828547e-06, "loss": 0.1595, "num_tokens": 4285479439.0, "step": 5619 }, { "epoch": 7.667769088794768, "grad_norm": 0.23524366562930718, "learning_rate": 2.0822739698245996e-06, "loss": 0.1602, "num_tokens": 4286246199.0, "step": 5620 }, { "epoch": 7.669134669753449, "grad_norm": 0.2326030733628879, "learning_rate": 2.0816047309783613e-06, "loss": 0.1625, "num_tokens": 4287012966.0, "step": 5621 }, { "epoch": 7.670500250712129, "grad_norm": 0.22843521283782578, "learning_rate": 2.080938212748298e-06, "loss": 0.1712, "num_tokens": 4287795106.0, "step": 5622 }, { "epoch": 7.67186583167081, "grad_norm": 0.23204770479621506, "learning_rate": 2.080274415337734e-06, "loss": 0.168, "num_tokens": 4288575740.0, "step": 5623 }, { "epoch": 7.67323141262949, "grad_norm": 0.21909627774246884, "learning_rate": 2.0796133389491642e-06, "loss": 0.1572, "num_tokens": 4289366514.0, "step": 5624 }, { "epoch": 7.674596993588171, "grad_norm": 0.26196489866359984, "learning_rate": 2.078954983784257e-06, "loss": 0.166, "num_tokens": 4290145448.0, "step": 5625 }, { "epoch": 7.675962574546851, "grad_norm": 0.22778695260810575, "learning_rate": 2.0782993500438467e-06, "loss": 0.1645, "num_tokens": 4290939396.0, "step": 5626 }, { "epoch": 7.677328155505531, "grad_norm": 0.21393939883106536, "learning_rate": 2.077646437927938e-06, "loss": 0.1613, "num_tokens": 4291772439.0, "step": 5627 }, { "epoch": 7.678693736464212, "grad_norm": 0.24923267836755772, "learning_rate": 2.076996247635707e-06, "loss": 0.1671, "num_tokens": 4292457538.0, "step": 5628 }, { "epoch": 7.680059317422892, "grad_norm": 0.2316628065276969, "learning_rate": 2.0763487793654985e-06, "loss": 0.1657, "num_tokens": 4293227394.0, "step": 5629 }, { "epoch": 7.681424898381573, "grad_norm": 0.2253695108915374, "learning_rate": 2.0757040333148256e-06, "loss": 0.1654, "num_tokens": 4293985898.0, "step": 5630 }, { "epoch": 7.6827904793402535, "grad_norm": 0.2369140313434243, "learning_rate": 2.0750620096803754e-06, "loss": 0.1604, "num_tokens": 4294802281.0, "step": 5631 }, { "epoch": 7.6841560602989345, "grad_norm": 0.24863466774122447, "learning_rate": 2.074422708658e-06, "loss": 0.164, "num_tokens": 4295554157.0, "step": 5632 }, { "epoch": 7.685521641257615, "grad_norm": 0.26615768876365253, "learning_rate": 2.0737861304427202e-06, "loss": 0.1625, "num_tokens": 4296332249.0, "step": 5633 }, { "epoch": 7.686887222216296, "grad_norm": 0.22956231589603956, "learning_rate": 2.0731522752287333e-06, "loss": 0.1624, "num_tokens": 4297064083.0, "step": 5634 }, { "epoch": 7.688252803174976, "grad_norm": 0.22163347733383945, "learning_rate": 2.072521143209397e-06, "loss": 0.169, "num_tokens": 4297864897.0, "step": 5635 }, { "epoch": 7.689618384133656, "grad_norm": 0.22488991388255175, "learning_rate": 2.0718927345772433e-06, "loss": 0.1679, "num_tokens": 4298641227.0, "step": 5636 }, { "epoch": 7.690983965092337, "grad_norm": 0.23150333768130102, "learning_rate": 2.071267049523973e-06, "loss": 0.1698, "num_tokens": 4299419364.0, "step": 5637 }, { "epoch": 7.692349546051017, "grad_norm": 0.23845821684385485, "learning_rate": 2.0706440882404557e-06, "loss": 0.1692, "num_tokens": 4300134093.0, "step": 5638 }, { "epoch": 7.693715127009698, "grad_norm": 0.2233182935272216, "learning_rate": 2.0700238509167298e-06, "loss": 0.1618, "num_tokens": 4300913438.0, "step": 5639 }, { "epoch": 7.695080707968378, "grad_norm": 0.23469968153697288, "learning_rate": 2.0694063377420013e-06, "loss": 0.1555, "num_tokens": 4301625451.0, "step": 5640 }, { "epoch": 7.696446288927059, "grad_norm": 0.23618231856719774, "learning_rate": 2.0687915489046497e-06, "loss": 0.1681, "num_tokens": 4302348341.0, "step": 5641 }, { "epoch": 7.697811869885739, "grad_norm": 0.22986242274178742, "learning_rate": 2.068179484592218e-06, "loss": 0.1617, "num_tokens": 4303083122.0, "step": 5642 }, { "epoch": 7.69917745084442, "grad_norm": 0.23578369046557582, "learning_rate": 2.067570144991421e-06, "loss": 0.1676, "num_tokens": 4303867621.0, "step": 5643 }, { "epoch": 7.7005430318031, "grad_norm": 0.23189934429872586, "learning_rate": 2.0669635302881423e-06, "loss": 0.1697, "num_tokens": 4304628453.0, "step": 5644 }, { "epoch": 7.70190861276178, "grad_norm": 0.2344651358793124, "learning_rate": 2.0663596406674335e-06, "loss": 0.1676, "num_tokens": 4305363500.0, "step": 5645 }, { "epoch": 7.703274193720461, "grad_norm": 0.23220448229746343, "learning_rate": 2.0657584763135156e-06, "loss": 0.1653, "num_tokens": 4306134091.0, "step": 5646 }, { "epoch": 7.7046397746791415, "grad_norm": 0.24080867512847068, "learning_rate": 2.0651600374097775e-06, "loss": 0.1676, "num_tokens": 4306888889.0, "step": 5647 }, { "epoch": 7.7060053556378225, "grad_norm": 0.22649088685280952, "learning_rate": 2.0645643241387765e-06, "loss": 0.1644, "num_tokens": 4307682161.0, "step": 5648 }, { "epoch": 7.707370936596503, "grad_norm": 0.23168309715116134, "learning_rate": 2.0639713366822396e-06, "loss": 0.1643, "num_tokens": 4308468276.0, "step": 5649 }, { "epoch": 7.708736517555184, "grad_norm": 0.2208436645644644, "learning_rate": 2.063381075221062e-06, "loss": 0.1543, "num_tokens": 4309211531.0, "step": 5650 }, { "epoch": 7.710102098513864, "grad_norm": 0.24792998449696957, "learning_rate": 2.062793539935305e-06, "loss": 0.1639, "num_tokens": 4309938678.0, "step": 5651 }, { "epoch": 7.711467679472545, "grad_norm": 0.22905534391981697, "learning_rate": 2.062208731004203e-06, "loss": 0.1736, "num_tokens": 4310728728.0, "step": 5652 }, { "epoch": 7.712833260431225, "grad_norm": 0.2293452171570073, "learning_rate": 2.061626648606153e-06, "loss": 0.1652, "num_tokens": 4311513003.0, "step": 5653 }, { "epoch": 7.714198841389905, "grad_norm": 0.2443424620264314, "learning_rate": 2.0610472929187244e-06, "loss": 0.1656, "num_tokens": 4312240353.0, "step": 5654 }, { "epoch": 7.715564422348586, "grad_norm": 0.24374680337491908, "learning_rate": 2.060470664118655e-06, "loss": 0.1632, "num_tokens": 4312948725.0, "step": 5655 }, { "epoch": 7.716930003307266, "grad_norm": 0.250226850174123, "learning_rate": 2.0598967623818466e-06, "loss": 0.1708, "num_tokens": 4313671782.0, "step": 5656 }, { "epoch": 7.718295584265947, "grad_norm": 0.25342983834768384, "learning_rate": 2.0593255878833733e-06, "loss": 0.1581, "num_tokens": 4314409234.0, "step": 5657 }, { "epoch": 7.719661165224627, "grad_norm": 0.2556771768555858, "learning_rate": 2.0587571407974756e-06, "loss": 0.163, "num_tokens": 4315219812.0, "step": 5658 }, { "epoch": 7.721026746183308, "grad_norm": 0.2270600704174803, "learning_rate": 2.0581914212975624e-06, "loss": 0.1644, "num_tokens": 4316004176.0, "step": 5659 }, { "epoch": 7.722392327141988, "grad_norm": 0.22462075497014708, "learning_rate": 2.057628429556209e-06, "loss": 0.1619, "num_tokens": 4316775897.0, "step": 5660 }, { "epoch": 7.723757908100669, "grad_norm": 0.24277685830968798, "learning_rate": 2.0570681657451614e-06, "loss": 0.1602, "num_tokens": 4317434547.0, "step": 5661 }, { "epoch": 7.725123489059349, "grad_norm": 0.25223063685405184, "learning_rate": 2.0565106300353297e-06, "loss": 0.1613, "num_tokens": 4318145943.0, "step": 5662 }, { "epoch": 7.7264890700180295, "grad_norm": 0.2559312241450896, "learning_rate": 2.055955822596797e-06, "loss": 0.1675, "num_tokens": 4318935831.0, "step": 5663 }, { "epoch": 7.7278546509767105, "grad_norm": 0.23307853996582323, "learning_rate": 2.0554037435988074e-06, "loss": 0.1653, "num_tokens": 4319712715.0, "step": 5664 }, { "epoch": 7.729220231935391, "grad_norm": 0.23633622955554284, "learning_rate": 2.0548543932097788e-06, "loss": 0.163, "num_tokens": 4320443316.0, "step": 5665 }, { "epoch": 7.730585812894072, "grad_norm": 0.2455185756758313, "learning_rate": 2.054307771597294e-06, "loss": 0.1619, "num_tokens": 4321094926.0, "step": 5666 }, { "epoch": 7.731951393852752, "grad_norm": 0.22575486184759685, "learning_rate": 2.0537638789281027e-06, "loss": 0.1607, "num_tokens": 4321899490.0, "step": 5667 }, { "epoch": 7.733316974811433, "grad_norm": 0.23578296058722556, "learning_rate": 2.0532227153681226e-06, "loss": 0.1612, "num_tokens": 4322661455.0, "step": 5668 }, { "epoch": 7.734682555770113, "grad_norm": 0.23569997217261338, "learning_rate": 2.052684281082441e-06, "loss": 0.1608, "num_tokens": 4323399130.0, "step": 5669 }, { "epoch": 7.736048136728794, "grad_norm": 0.23909288153668423, "learning_rate": 2.05214857623531e-06, "loss": 0.1665, "num_tokens": 4324181042.0, "step": 5670 }, { "epoch": 7.737413717687474, "grad_norm": 0.21650033339881938, "learning_rate": 2.0516156009901496e-06, "loss": 0.1589, "num_tokens": 4324963362.0, "step": 5671 }, { "epoch": 7.738779298646154, "grad_norm": 0.2185085629754561, "learning_rate": 2.051085355509548e-06, "loss": 0.157, "num_tokens": 4325731492.0, "step": 5672 }, { "epoch": 7.740144879604835, "grad_norm": 0.22665077790870772, "learning_rate": 2.0505578399552596e-06, "loss": 0.1609, "num_tokens": 4326579466.0, "step": 5673 }, { "epoch": 7.741510460563515, "grad_norm": 0.2420843286434037, "learning_rate": 2.0500330544882066e-06, "loss": 0.1626, "num_tokens": 4327254928.0, "step": 5674 }, { "epoch": 7.742876041522196, "grad_norm": 0.21984156823866063, "learning_rate": 2.0495109992684788e-06, "loss": 0.1582, "num_tokens": 4328031796.0, "step": 5675 }, { "epoch": 7.744241622480876, "grad_norm": 0.22008861924819703, "learning_rate": 2.048991674455333e-06, "loss": 0.1698, "num_tokens": 4328884460.0, "step": 5676 }, { "epoch": 7.745607203439557, "grad_norm": 0.2266049104319786, "learning_rate": 2.048475080207191e-06, "loss": 0.1679, "num_tokens": 4329672937.0, "step": 5677 }, { "epoch": 7.746972784398237, "grad_norm": 0.22814706169759635, "learning_rate": 2.047961216681645e-06, "loss": 0.1637, "num_tokens": 4330525777.0, "step": 5678 }, { "epoch": 7.7483383653569184, "grad_norm": 0.21482636546633013, "learning_rate": 2.047450084035452e-06, "loss": 0.1589, "num_tokens": 4331371549.0, "step": 5679 }, { "epoch": 7.749703946315599, "grad_norm": 0.23924548712841862, "learning_rate": 2.0469416824245368e-06, "loss": 0.1635, "num_tokens": 4332102751.0, "step": 5680 }, { "epoch": 7.751069527274279, "grad_norm": 0.23707449878166037, "learning_rate": 2.0464360120039893e-06, "loss": 0.1662, "num_tokens": 4332826462.0, "step": 5681 }, { "epoch": 7.75243510823296, "grad_norm": 0.2332907433357398, "learning_rate": 2.0459330729280693e-06, "loss": 0.1538, "num_tokens": 4333593586.0, "step": 5682 }, { "epoch": 7.75380068919164, "grad_norm": 0.22227697584953587, "learning_rate": 2.045432865350201e-06, "loss": 0.17, "num_tokens": 4334394964.0, "step": 5683 }, { "epoch": 7.755166270150321, "grad_norm": 0.24641471647896343, "learning_rate": 2.044935389422977e-06, "loss": 0.1605, "num_tokens": 4335121580.0, "step": 5684 }, { "epoch": 7.756531851109001, "grad_norm": 0.23110830376079727, "learning_rate": 2.0444406452981537e-06, "loss": 0.1642, "num_tokens": 4335891077.0, "step": 5685 }, { "epoch": 7.757897432067682, "grad_norm": 0.21981947332053675, "learning_rate": 2.043948633126657e-06, "loss": 0.1694, "num_tokens": 4336732907.0, "step": 5686 }, { "epoch": 7.759263013026362, "grad_norm": 0.22728343245644558, "learning_rate": 2.0434593530585798e-06, "loss": 0.1613, "num_tokens": 4337476574.0, "step": 5687 }, { "epoch": 7.760628593985043, "grad_norm": 0.2254435308476042, "learning_rate": 2.0429728052431778e-06, "loss": 0.1716, "num_tokens": 4338242953.0, "step": 5688 }, { "epoch": 7.761994174943723, "grad_norm": 0.23334919113182714, "learning_rate": 2.042488989828878e-06, "loss": 0.1597, "num_tokens": 4338940363.0, "step": 5689 }, { "epoch": 7.763359755902403, "grad_norm": 0.24782873893939325, "learning_rate": 2.0420079069632698e-06, "loss": 0.1755, "num_tokens": 4339717726.0, "step": 5690 }, { "epoch": 7.764725336861084, "grad_norm": 0.23448511595156893, "learning_rate": 2.0415295567931124e-06, "loss": 0.1577, "num_tokens": 4340436587.0, "step": 5691 }, { "epoch": 7.766090917819764, "grad_norm": 0.22926705753515136, "learning_rate": 2.041053939464328e-06, "loss": 0.1656, "num_tokens": 4341200795.0, "step": 5692 }, { "epoch": 7.767456498778445, "grad_norm": 0.2273107269925038, "learning_rate": 2.040581055122009e-06, "loss": 0.163, "num_tokens": 4341984068.0, "step": 5693 }, { "epoch": 7.7688220797371255, "grad_norm": 0.22361047194847913, "learning_rate": 2.040110903910411e-06, "loss": 0.1687, "num_tokens": 4342809273.0, "step": 5694 }, { "epoch": 7.7701876606958065, "grad_norm": 0.21466018367330225, "learning_rate": 2.0396434859729546e-06, "loss": 0.1599, "num_tokens": 4343622220.0, "step": 5695 }, { "epoch": 7.771553241654487, "grad_norm": 0.23164027787933114, "learning_rate": 2.0391788014522313e-06, "loss": 0.1632, "num_tokens": 4344358399.0, "step": 5696 }, { "epoch": 7.772918822613168, "grad_norm": 0.22661777065008248, "learning_rate": 2.0387168504899954e-06, "loss": 0.1663, "num_tokens": 4345143797.0, "step": 5697 }, { "epoch": 7.774284403571848, "grad_norm": 0.23498770018744924, "learning_rate": 2.0382576332271683e-06, "loss": 0.165, "num_tokens": 4345873593.0, "step": 5698 }, { "epoch": 7.775649984530528, "grad_norm": 0.23078714946272702, "learning_rate": 2.0378011498038373e-06, "loss": 0.1627, "num_tokens": 4346677840.0, "step": 5699 }, { "epoch": 7.777015565489209, "grad_norm": 0.21666362952645107, "learning_rate": 2.0373474003592554e-06, "loss": 0.1617, "num_tokens": 4347518016.0, "step": 5700 }, { "epoch": 7.778381146447889, "grad_norm": 0.27560741888472323, "learning_rate": 2.0368963850318423e-06, "loss": 0.1669, "num_tokens": 4348242363.0, "step": 5701 }, { "epoch": 7.77974672740657, "grad_norm": 0.2304284975952098, "learning_rate": 2.036448103959183e-06, "loss": 0.1667, "num_tokens": 4349004666.0, "step": 5702 }, { "epoch": 7.78111230836525, "grad_norm": 0.22658286695172702, "learning_rate": 2.0360025572780296e-06, "loss": 0.167, "num_tokens": 4349746566.0, "step": 5703 }, { "epoch": 7.782477889323931, "grad_norm": 0.23824088289721332, "learning_rate": 2.035559745124297e-06, "loss": 0.1594, "num_tokens": 4350522403.0, "step": 5704 }, { "epoch": 7.783843470282611, "grad_norm": 0.2354651711644883, "learning_rate": 2.035119667633071e-06, "loss": 0.1712, "num_tokens": 4351323412.0, "step": 5705 }, { "epoch": 7.785209051241292, "grad_norm": 0.2231076846835459, "learning_rate": 2.0346823249385964e-06, "loss": 0.1687, "num_tokens": 4352090319.0, "step": 5706 }, { "epoch": 7.786574632199972, "grad_norm": 0.2423707454473669, "learning_rate": 2.0342477171742903e-06, "loss": 0.1695, "num_tokens": 4352829996.0, "step": 5707 }, { "epoch": 7.787940213158652, "grad_norm": 0.23278357525332485, "learning_rate": 2.0338158444727327e-06, "loss": 0.1609, "num_tokens": 4353605066.0, "step": 5708 }, { "epoch": 7.789305794117333, "grad_norm": 0.23088120536019338, "learning_rate": 2.033386706965668e-06, "loss": 0.1519, "num_tokens": 4354325949.0, "step": 5709 }, { "epoch": 7.7906713750760135, "grad_norm": 0.22316114721375432, "learning_rate": 2.0329603047840076e-06, "loss": 0.1632, "num_tokens": 4355152884.0, "step": 5710 }, { "epoch": 7.7920369560346945, "grad_norm": 0.22193539125647546, "learning_rate": 2.032536638057829e-06, "loss": 0.1673, "num_tokens": 4355961251.0, "step": 5711 }, { "epoch": 7.793402536993375, "grad_norm": 0.2209789640136189, "learning_rate": 2.0321157069163745e-06, "loss": 0.166, "num_tokens": 4356759682.0, "step": 5712 }, { "epoch": 7.794768117952056, "grad_norm": 0.2298567191915556, "learning_rate": 2.0316975114880514e-06, "loss": 0.1609, "num_tokens": 4357551264.0, "step": 5713 }, { "epoch": 7.796133698910736, "grad_norm": 0.22451504309700104, "learning_rate": 2.031282051900434e-06, "loss": 0.1663, "num_tokens": 4358305887.0, "step": 5714 }, { "epoch": 7.797499279869417, "grad_norm": 0.22725607142980153, "learning_rate": 2.03086932828026e-06, "loss": 0.1662, "num_tokens": 4359098670.0, "step": 5715 }, { "epoch": 7.798864860828097, "grad_norm": 0.22606529697135308, "learning_rate": 2.030459340753434e-06, "loss": 0.1639, "num_tokens": 4359918950.0, "step": 5716 }, { "epoch": 7.800230441786777, "grad_norm": 0.24451090606679707, "learning_rate": 2.0300520894450256e-06, "loss": 0.1614, "num_tokens": 4360609556.0, "step": 5717 }, { "epoch": 7.801596022745458, "grad_norm": 0.2306603844569006, "learning_rate": 2.0296475744792686e-06, "loss": 0.1662, "num_tokens": 4361378677.0, "step": 5718 }, { "epoch": 7.802961603704138, "grad_norm": 0.2168357129117411, "learning_rate": 2.029245795979564e-06, "loss": 0.1593, "num_tokens": 4362154272.0, "step": 5719 }, { "epoch": 7.804327184662819, "grad_norm": 0.2367226112014442, "learning_rate": 2.028846754068477e-06, "loss": 0.1569, "num_tokens": 4362891551.0, "step": 5720 }, { "epoch": 7.805692765621499, "grad_norm": 0.22715230304945933, "learning_rate": 2.0284504488677378e-06, "loss": 0.1628, "num_tokens": 4363717693.0, "step": 5721 }, { "epoch": 7.80705834658018, "grad_norm": 0.24923957447817674, "learning_rate": 2.0280568804982413e-06, "loss": 0.1611, "num_tokens": 4364430068.0, "step": 5722 }, { "epoch": 7.80842392753886, "grad_norm": 0.2239697470869312, "learning_rate": 2.0276660490800493e-06, "loss": 0.1729, "num_tokens": 4365146133.0, "step": 5723 }, { "epoch": 7.809789508497541, "grad_norm": 0.23542700461522417, "learning_rate": 2.027277954732387e-06, "loss": 0.1534, "num_tokens": 4365900939.0, "step": 5724 }, { "epoch": 7.811155089456221, "grad_norm": 0.2216613721472706, "learning_rate": 2.0268925975736448e-06, "loss": 0.154, "num_tokens": 4366649632.0, "step": 5725 }, { "epoch": 7.8125206704149015, "grad_norm": 0.2269030139121805, "learning_rate": 2.026509977721379e-06, "loss": 0.1672, "num_tokens": 4367449728.0, "step": 5726 }, { "epoch": 7.8138862513735825, "grad_norm": 0.218361026103539, "learning_rate": 2.0261300952923098e-06, "loss": 0.1571, "num_tokens": 4368233746.0, "step": 5727 }, { "epoch": 7.815251832332263, "grad_norm": 0.23208570208713702, "learning_rate": 2.0257529504023237e-06, "loss": 0.1581, "num_tokens": 4368952106.0, "step": 5728 }, { "epoch": 7.816617413290944, "grad_norm": 0.23705078823700867, "learning_rate": 2.0253785431664707e-06, "loss": 0.1612, "num_tokens": 4369733180.0, "step": 5729 }, { "epoch": 7.817982994249624, "grad_norm": 0.2299089153466336, "learning_rate": 2.025006873698966e-06, "loss": 0.1633, "num_tokens": 4370484183.0, "step": 5730 }, { "epoch": 7.819348575208305, "grad_norm": 0.2390415799414524, "learning_rate": 2.024637942113191e-06, "loss": 0.1625, "num_tokens": 4371231738.0, "step": 5731 }, { "epoch": 7.820714156166985, "grad_norm": 0.23699975754806069, "learning_rate": 2.02427174852169e-06, "loss": 0.1625, "num_tokens": 4371947466.0, "step": 5732 }, { "epoch": 7.822079737125666, "grad_norm": 0.24114459665648982, "learning_rate": 2.023908293036172e-06, "loss": 0.167, "num_tokens": 4372706425.0, "step": 5733 }, { "epoch": 7.823445318084346, "grad_norm": 0.2318576601466569, "learning_rate": 2.023547575767512e-06, "loss": 0.1616, "num_tokens": 4373479554.0, "step": 5734 }, { "epoch": 7.824810899043026, "grad_norm": 0.2286933648806978, "learning_rate": 2.0231895968257503e-06, "loss": 0.1626, "num_tokens": 4374191308.0, "step": 5735 }, { "epoch": 7.826176480001707, "grad_norm": 0.22783816435490872, "learning_rate": 2.0228343563200904e-06, "loss": 0.1698, "num_tokens": 4374959117.0, "step": 5736 }, { "epoch": 7.827542060960387, "grad_norm": 0.22249672129837558, "learning_rate": 2.0224818543589e-06, "loss": 0.1626, "num_tokens": 4375759560.0, "step": 5737 }, { "epoch": 7.828907641919068, "grad_norm": 0.23352062420770436, "learning_rate": 2.0221320910497117e-06, "loss": 0.1645, "num_tokens": 4376504922.0, "step": 5738 }, { "epoch": 7.830273222877748, "grad_norm": 0.22390704223636193, "learning_rate": 2.0217850664992252e-06, "loss": 0.1636, "num_tokens": 4377284243.0, "step": 5739 }, { "epoch": 7.831638803836429, "grad_norm": 0.24637578668792356, "learning_rate": 2.0214407808133007e-06, "loss": 0.1599, "num_tokens": 4378062143.0, "step": 5740 }, { "epoch": 7.833004384795109, "grad_norm": 0.23071453219811044, "learning_rate": 2.0210992340969666e-06, "loss": 0.1672, "num_tokens": 4378843580.0, "step": 5741 }, { "epoch": 7.83436996575379, "grad_norm": 0.23710977812868814, "learning_rate": 2.0207604264544124e-06, "loss": 0.1698, "num_tokens": 4379566578.0, "step": 5742 }, { "epoch": 7.8357355467124705, "grad_norm": 0.22746995952955124, "learning_rate": 2.020424357988994e-06, "loss": 0.1609, "num_tokens": 4380375684.0, "step": 5743 }, { "epoch": 7.837101127671151, "grad_norm": 0.22590324113629393, "learning_rate": 2.020091028803233e-06, "loss": 0.1604, "num_tokens": 4381111621.0, "step": 5744 }, { "epoch": 7.838466708629832, "grad_norm": 0.236031069708685, "learning_rate": 2.0197604389988102e-06, "loss": 0.1638, "num_tokens": 4381847102.0, "step": 5745 }, { "epoch": 7.839832289588512, "grad_norm": 0.22523824978792492, "learning_rate": 2.019432588676578e-06, "loss": 0.1638, "num_tokens": 4382647528.0, "step": 5746 }, { "epoch": 7.841197870547193, "grad_norm": 0.2400957039473371, "learning_rate": 2.0191074779365467e-06, "loss": 0.1637, "num_tokens": 4383335820.0, "step": 5747 }, { "epoch": 7.842563451505873, "grad_norm": 0.22913069777931405, "learning_rate": 2.018785106877895e-06, "loss": 0.1638, "num_tokens": 4384130788.0, "step": 5748 }, { "epoch": 7.843929032464554, "grad_norm": 0.3513505194952437, "learning_rate": 2.0184654755989637e-06, "loss": 0.1629, "num_tokens": 4384836634.0, "step": 5749 }, { "epoch": 7.845294613423234, "grad_norm": 0.23031107382293126, "learning_rate": 2.018148584197258e-06, "loss": 0.1581, "num_tokens": 4385552495.0, "step": 5750 }, { "epoch": 7.846660194381915, "grad_norm": 0.23592917949762848, "learning_rate": 2.0178344327694498e-06, "loss": 0.1648, "num_tokens": 4386363784.0, "step": 5751 }, { "epoch": 7.848025775340595, "grad_norm": 0.21840499656084786, "learning_rate": 2.01752302141137e-06, "loss": 0.1577, "num_tokens": 4387160920.0, "step": 5752 }, { "epoch": 7.849391356299275, "grad_norm": 0.23721222503906794, "learning_rate": 2.01721435021802e-06, "loss": 0.1575, "num_tokens": 4387875521.0, "step": 5753 }, { "epoch": 7.850756937257956, "grad_norm": 0.2161465991853738, "learning_rate": 2.0169084192835594e-06, "loss": 0.1698, "num_tokens": 4388702632.0, "step": 5754 }, { "epoch": 7.852122518216636, "grad_norm": 0.22947820256753523, "learning_rate": 2.016605228701315e-06, "loss": 0.1676, "num_tokens": 4389482307.0, "step": 5755 }, { "epoch": 7.853488099175317, "grad_norm": 0.2216088296414581, "learning_rate": 2.0163047785637786e-06, "loss": 0.1625, "num_tokens": 4390253423.0, "step": 5756 }, { "epoch": 7.854853680133997, "grad_norm": 0.2291404959661472, "learning_rate": 2.0160070689626035e-06, "loss": 0.1693, "num_tokens": 4390994550.0, "step": 5757 }, { "epoch": 7.856219261092678, "grad_norm": 0.2426440145807451, "learning_rate": 2.0157120999886073e-06, "loss": 0.1669, "num_tokens": 4391709670.0, "step": 5758 }, { "epoch": 7.8575848420513585, "grad_norm": 0.2224656898226779, "learning_rate": 2.0154198717317733e-06, "loss": 0.1664, "num_tokens": 4392487443.0, "step": 5759 }, { "epoch": 7.8589504230100395, "grad_norm": 0.23013506086045252, "learning_rate": 2.0151303842812472e-06, "loss": 0.1651, "num_tokens": 4393185793.0, "step": 5760 }, { "epoch": 7.86031600396872, "grad_norm": 0.2423108466517439, "learning_rate": 2.0148436377253393e-06, "loss": 0.1676, "num_tokens": 4393888622.0, "step": 5761 }, { "epoch": 7.8616815849274, "grad_norm": 0.21993651990189325, "learning_rate": 2.014559632151523e-06, "loss": 0.1651, "num_tokens": 4394729601.0, "step": 5762 }, { "epoch": 7.863047165886081, "grad_norm": 0.23443289705509907, "learning_rate": 2.0142783676464377e-06, "loss": 0.1556, "num_tokens": 4395423553.0, "step": 5763 }, { "epoch": 7.864412746844761, "grad_norm": 0.22060208411532642, "learning_rate": 2.0139998442958834e-06, "loss": 0.1645, "num_tokens": 4396147564.0, "step": 5764 }, { "epoch": 7.865778327803442, "grad_norm": 0.2270216831338646, "learning_rate": 2.0137240621848257e-06, "loss": 0.1617, "num_tokens": 4396908754.0, "step": 5765 }, { "epoch": 7.867143908762122, "grad_norm": 0.23110200613689, "learning_rate": 2.013451021397394e-06, "loss": 0.1617, "num_tokens": 4397668319.0, "step": 5766 }, { "epoch": 7.868509489720803, "grad_norm": 0.22949546828353412, "learning_rate": 2.013180722016882e-06, "loss": 0.1664, "num_tokens": 4398412988.0, "step": 5767 }, { "epoch": 7.869875070679483, "grad_norm": 0.22913739954759696, "learning_rate": 2.0129131641257456e-06, "loss": 0.1699, "num_tokens": 4399222157.0, "step": 5768 }, { "epoch": 7.871240651638164, "grad_norm": 0.2369859196539888, "learning_rate": 2.012648347805605e-06, "loss": 0.1643, "num_tokens": 4399945292.0, "step": 5769 }, { "epoch": 7.872606232596844, "grad_norm": 0.23879210479187435, "learning_rate": 2.012386273137244e-06, "loss": 0.1613, "num_tokens": 4400650315.0, "step": 5770 }, { "epoch": 7.873971813555524, "grad_norm": 0.23240493322999725, "learning_rate": 2.012126940200611e-06, "loss": 0.1679, "num_tokens": 4401418763.0, "step": 5771 }, { "epoch": 7.875337394514205, "grad_norm": 0.23463897466912376, "learning_rate": 2.0118703490748164e-06, "loss": 0.1609, "num_tokens": 4402151992.0, "step": 5772 }, { "epoch": 7.8767029754728854, "grad_norm": 0.23305948495299283, "learning_rate": 2.011616499838136e-06, "loss": 0.1646, "num_tokens": 4402951899.0, "step": 5773 }, { "epoch": 7.8780685564315664, "grad_norm": 0.22931067591527607, "learning_rate": 2.0113653925680078e-06, "loss": 0.1612, "num_tokens": 4403722148.0, "step": 5774 }, { "epoch": 7.879434137390247, "grad_norm": 0.2310737475518446, "learning_rate": 2.0111170273410334e-06, "loss": 0.1629, "num_tokens": 4404501409.0, "step": 5775 }, { "epoch": 7.880799718348928, "grad_norm": 0.23150549262900103, "learning_rate": 2.0108714042329787e-06, "loss": 0.1666, "num_tokens": 4405223566.0, "step": 5776 }, { "epoch": 7.882165299307608, "grad_norm": 0.24904634431804173, "learning_rate": 2.0106285233187716e-06, "loss": 0.169, "num_tokens": 4406055533.0, "step": 5777 }, { "epoch": 7.883530880266289, "grad_norm": 0.22853614951874485, "learning_rate": 2.0103883846725065e-06, "loss": 0.1566, "num_tokens": 4406827390.0, "step": 5778 }, { "epoch": 7.884896461224969, "grad_norm": 0.23626205255810287, "learning_rate": 2.0101509883674377e-06, "loss": 0.1739, "num_tokens": 4407627513.0, "step": 5779 }, { "epoch": 7.886262042183649, "grad_norm": 0.2259126194016761, "learning_rate": 2.0099163344759857e-06, "loss": 0.1676, "num_tokens": 4408432536.0, "step": 5780 }, { "epoch": 7.88762762314233, "grad_norm": 0.22755757564161358, "learning_rate": 2.009684423069732e-06, "loss": 0.1648, "num_tokens": 4409200492.0, "step": 5781 }, { "epoch": 7.88899320410101, "grad_norm": 0.22455917801932146, "learning_rate": 2.0094552542194236e-06, "loss": 0.1643, "num_tokens": 4410004029.0, "step": 5782 }, { "epoch": 7.890358785059691, "grad_norm": 0.2279879572280495, "learning_rate": 2.0092288279949696e-06, "loss": 0.1602, "num_tokens": 4410748180.0, "step": 5783 }, { "epoch": 7.891724366018371, "grad_norm": 0.22776380631196652, "learning_rate": 2.009005144465443e-06, "loss": 0.1584, "num_tokens": 4411509424.0, "step": 5784 }, { "epoch": 7.893089946977052, "grad_norm": 0.22992693555650937, "learning_rate": 2.0087842036990806e-06, "loss": 0.1669, "num_tokens": 4412307429.0, "step": 5785 }, { "epoch": 7.894455527935732, "grad_norm": 0.2343023393780146, "learning_rate": 2.00856600576328e-06, "loss": 0.1647, "num_tokens": 4413097684.0, "step": 5786 }, { "epoch": 7.895821108894413, "grad_norm": 0.2206469111461284, "learning_rate": 2.008350550724606e-06, "loss": 0.163, "num_tokens": 4413939799.0, "step": 5787 }, { "epoch": 7.897186689853093, "grad_norm": 0.2273835199921538, "learning_rate": 2.0081378386487837e-06, "loss": 0.1567, "num_tokens": 4414626930.0, "step": 5788 }, { "epoch": 7.8985522708117735, "grad_norm": 0.22444434641868596, "learning_rate": 2.0079278696007023e-06, "loss": 0.1666, "num_tokens": 4415347297.0, "step": 5789 }, { "epoch": 7.8999178517704545, "grad_norm": 0.2292479245554363, "learning_rate": 2.0077206436444145e-06, "loss": 0.163, "num_tokens": 4416152337.0, "step": 5790 }, { "epoch": 7.901283432729135, "grad_norm": 0.22513725162752082, "learning_rate": 2.007516160843135e-06, "loss": 0.1668, "num_tokens": 4416914935.0, "step": 5791 }, { "epoch": 7.902649013687816, "grad_norm": 0.23789816066470512, "learning_rate": 2.007314421259245e-06, "loss": 0.1663, "num_tokens": 4417712092.0, "step": 5792 }, { "epoch": 7.904014594646496, "grad_norm": 0.23557190487652216, "learning_rate": 2.0071154249542837e-06, "loss": 0.164, "num_tokens": 4418459100.0, "step": 5793 }, { "epoch": 7.905380175605177, "grad_norm": 0.22926588464163575, "learning_rate": 2.006919171988958e-06, "loss": 0.1711, "num_tokens": 4419199699.0, "step": 5794 }, { "epoch": 7.906745756563857, "grad_norm": 0.24726729400574046, "learning_rate": 2.0067256624231356e-06, "loss": 0.1628, "num_tokens": 4419940643.0, "step": 5795 }, { "epoch": 7.908111337522538, "grad_norm": 0.24011560569304177, "learning_rate": 2.0065348963158477e-06, "loss": 0.1577, "num_tokens": 4420648349.0, "step": 5796 }, { "epoch": 7.909476918481218, "grad_norm": 0.2214240962632836, "learning_rate": 2.0063468737252896e-06, "loss": 0.1539, "num_tokens": 4421479249.0, "step": 5797 }, { "epoch": 7.910842499439898, "grad_norm": 0.2534549203367501, "learning_rate": 2.0061615947088177e-06, "loss": 0.1707, "num_tokens": 4422236236.0, "step": 5798 }, { "epoch": 7.912208080398579, "grad_norm": 0.21514106150071363, "learning_rate": 2.0059790593229532e-06, "loss": 0.1633, "num_tokens": 4423086163.0, "step": 5799 }, { "epoch": 7.913573661357259, "grad_norm": 0.22136366756049505, "learning_rate": 2.005799267623381e-06, "loss": 0.1618, "num_tokens": 4423859762.0, "step": 5800 }, { "epoch": 7.91493924231594, "grad_norm": 0.24422040870992315, "learning_rate": 2.005622219664945e-06, "loss": 0.1589, "num_tokens": 4424599649.0, "step": 5801 }, { "epoch": 7.91630482327462, "grad_norm": 0.2285332856698791, "learning_rate": 2.005447915501657e-06, "loss": 0.1599, "num_tokens": 4425355237.0, "step": 5802 }, { "epoch": 7.917670404233301, "grad_norm": 0.2274700619622788, "learning_rate": 2.00527635518669e-06, "loss": 0.1589, "num_tokens": 4426050404.0, "step": 5803 }, { "epoch": 7.919035985191981, "grad_norm": 0.22806067447867245, "learning_rate": 2.005107538772377e-06, "loss": 0.1614, "num_tokens": 4426810280.0, "step": 5804 }, { "epoch": 7.920401566150662, "grad_norm": 0.2204717274411671, "learning_rate": 2.004941466310219e-06, "loss": 0.1631, "num_tokens": 4427622329.0, "step": 5805 }, { "epoch": 7.9217671471093425, "grad_norm": 0.23261722589902298, "learning_rate": 2.004778137850877e-06, "loss": 0.1613, "num_tokens": 4428385467.0, "step": 5806 }, { "epoch": 7.923132728068023, "grad_norm": 0.22765915015892715, "learning_rate": 2.0046175534441746e-06, "loss": 0.1653, "num_tokens": 4429176875.0, "step": 5807 }, { "epoch": 7.924498309026704, "grad_norm": 0.24113808654934155, "learning_rate": 2.0044597131391005e-06, "loss": 0.1601, "num_tokens": 4429877116.0, "step": 5808 }, { "epoch": 7.925863889985384, "grad_norm": 0.23003361406376932, "learning_rate": 2.0043046169838036e-06, "loss": 0.166, "num_tokens": 4430638504.0, "step": 5809 }, { "epoch": 7.927229470944065, "grad_norm": 0.23594396696863296, "learning_rate": 2.0041522650255984e-06, "loss": 0.1751, "num_tokens": 4431409379.0, "step": 5810 }, { "epoch": 7.928595051902745, "grad_norm": 0.22624894422767877, "learning_rate": 2.0040026573109598e-06, "loss": 0.1668, "num_tokens": 4432204821.0, "step": 5811 }, { "epoch": 7.929960632861426, "grad_norm": 0.2323060424036415, "learning_rate": 2.0038557938855255e-06, "loss": 0.1745, "num_tokens": 4432994112.0, "step": 5812 }, { "epoch": 7.931326213820106, "grad_norm": 0.2369827939123705, "learning_rate": 2.0037116747941e-06, "loss": 0.1606, "num_tokens": 4433710631.0, "step": 5813 }, { "epoch": 7.932691794778787, "grad_norm": 0.22812058939469612, "learning_rate": 2.003570300080647e-06, "loss": 0.1617, "num_tokens": 4434489370.0, "step": 5814 }, { "epoch": 7.934057375737467, "grad_norm": 0.22354799730178548, "learning_rate": 2.0034316697882926e-06, "loss": 0.1681, "num_tokens": 4435291244.0, "step": 5815 }, { "epoch": 7.935422956696147, "grad_norm": 0.22645349374305923, "learning_rate": 2.0032957839593266e-06, "loss": 0.1694, "num_tokens": 4436063853.0, "step": 5816 }, { "epoch": 7.936788537654828, "grad_norm": 0.23311485342332058, "learning_rate": 2.0031626426352035e-06, "loss": 0.166, "num_tokens": 4436795008.0, "step": 5817 }, { "epoch": 7.938154118613508, "grad_norm": 0.23052284802768483, "learning_rate": 2.003032245856538e-06, "loss": 0.162, "num_tokens": 4437604560.0, "step": 5818 }, { "epoch": 7.939519699572189, "grad_norm": 0.23153608092506478, "learning_rate": 2.002904593663109e-06, "loss": 0.1617, "num_tokens": 4438322936.0, "step": 5819 }, { "epoch": 7.940885280530869, "grad_norm": 0.22916282215184833, "learning_rate": 2.0027796860938575e-06, "loss": 0.1686, "num_tokens": 4439182873.0, "step": 5820 }, { "epoch": 7.94225086148955, "grad_norm": 0.22583611489705843, "learning_rate": 2.0026575231868873e-06, "loss": 0.1627, "num_tokens": 4439938860.0, "step": 5821 }, { "epoch": 7.9436164424482305, "grad_norm": 0.22713970553575472, "learning_rate": 2.0025381049794645e-06, "loss": 0.1614, "num_tokens": 4440665271.0, "step": 5822 }, { "epoch": 7.9449820234069115, "grad_norm": 0.22368015856935375, "learning_rate": 2.0024214315080195e-06, "loss": 0.163, "num_tokens": 4441501480.0, "step": 5823 }, { "epoch": 7.946347604365592, "grad_norm": 0.22659122828025474, "learning_rate": 2.0023075028081433e-06, "loss": 0.1724, "num_tokens": 4442324090.0, "step": 5824 }, { "epoch": 7.947713185324272, "grad_norm": 0.22882080487759981, "learning_rate": 2.002196318914591e-06, "loss": 0.166, "num_tokens": 4443171301.0, "step": 5825 }, { "epoch": 7.949078766282953, "grad_norm": 0.23451597926457088, "learning_rate": 2.00208787986128e-06, "loss": 0.163, "num_tokens": 4443929898.0, "step": 5826 }, { "epoch": 7.950444347241633, "grad_norm": 0.24043210321592215, "learning_rate": 2.001982185681291e-06, "loss": 0.1629, "num_tokens": 4444648349.0, "step": 5827 }, { "epoch": 7.951809928200314, "grad_norm": 0.2529564157296662, "learning_rate": 2.001879236406866e-06, "loss": 0.1695, "num_tokens": 4445342332.0, "step": 5828 }, { "epoch": 7.953175509158994, "grad_norm": 0.2281000859942969, "learning_rate": 2.0017790320694103e-06, "loss": 0.1571, "num_tokens": 4446061984.0, "step": 5829 }, { "epoch": 7.954541090117675, "grad_norm": 0.2317148264340576, "learning_rate": 2.001681572699492e-06, "loss": 0.1693, "num_tokens": 4446847852.0, "step": 5830 }, { "epoch": 7.955906671076355, "grad_norm": 0.23600375494856554, "learning_rate": 2.001586858326842e-06, "loss": 0.172, "num_tokens": 4447628621.0, "step": 5831 }, { "epoch": 7.957272252035036, "grad_norm": 0.2438173260041368, "learning_rate": 2.0014948889803537e-06, "loss": 0.156, "num_tokens": 4448297652.0, "step": 5832 }, { "epoch": 7.958637832993716, "grad_norm": 0.23376242238024142, "learning_rate": 2.001405664688082e-06, "loss": 0.161, "num_tokens": 4449126333.0, "step": 5833 }, { "epoch": 7.960003413952396, "grad_norm": 0.22917588956974264, "learning_rate": 2.0013191854772457e-06, "loss": 0.1599, "num_tokens": 4449876491.0, "step": 5834 }, { "epoch": 7.961368994911077, "grad_norm": 0.23614418517987648, "learning_rate": 2.0012354513742276e-06, "loss": 0.1618, "num_tokens": 4450570027.0, "step": 5835 }, { "epoch": 7.962734575869757, "grad_norm": 0.23976807978458395, "learning_rate": 2.0011544624045685e-06, "loss": 0.1655, "num_tokens": 4451297840.0, "step": 5836 }, { "epoch": 7.964100156828438, "grad_norm": 0.25022630745065805, "learning_rate": 2.001076218592977e-06, "loss": 0.1688, "num_tokens": 4452001197.0, "step": 5837 }, { "epoch": 7.9654657377871185, "grad_norm": 0.23063561912550065, "learning_rate": 2.001000719963321e-06, "loss": 0.1599, "num_tokens": 4452751462.0, "step": 5838 }, { "epoch": 7.9668313187457995, "grad_norm": 0.23641585980107854, "learning_rate": 2.000927966538631e-06, "loss": 0.1643, "num_tokens": 4453564004.0, "step": 5839 }, { "epoch": 7.96819689970448, "grad_norm": 0.22947367409534156, "learning_rate": 2.000857958341102e-06, "loss": 0.1606, "num_tokens": 4454328075.0, "step": 5840 }, { "epoch": 7.969562480663161, "grad_norm": 0.24899531725306326, "learning_rate": 2.00079069539209e-06, "loss": 0.1564, "num_tokens": 4455088262.0, "step": 5841 }, { "epoch": 7.970928061621841, "grad_norm": 0.22776711538512154, "learning_rate": 2.0007261777121147e-06, "loss": 0.164, "num_tokens": 4455918584.0, "step": 5842 }, { "epoch": 7.972293642580521, "grad_norm": 0.28848476696571973, "learning_rate": 2.0006644053208566e-06, "loss": 0.1633, "num_tokens": 4456703635.0, "step": 5843 }, { "epoch": 7.973659223539202, "grad_norm": 0.28223751443149914, "learning_rate": 2.000605378237161e-06, "loss": 0.1701, "num_tokens": 4457505976.0, "step": 5844 }, { "epoch": 7.975024804497882, "grad_norm": 0.23072968678468886, "learning_rate": 2.000549096479034e-06, "loss": 0.1667, "num_tokens": 4458334172.0, "step": 5845 }, { "epoch": 7.976390385456563, "grad_norm": 0.2377879780340767, "learning_rate": 2.0004955600636437e-06, "loss": 0.1674, "num_tokens": 4459129273.0, "step": 5846 }, { "epoch": 7.977755966415243, "grad_norm": 0.2304807991146126, "learning_rate": 2.000444769007323e-06, "loss": 0.1666, "num_tokens": 4459890208.0, "step": 5847 }, { "epoch": 7.979121547373924, "grad_norm": 0.23782296374662132, "learning_rate": 2.000396723325566e-06, "loss": 0.164, "num_tokens": 4460602020.0, "step": 5848 }, { "epoch": 7.980487128332604, "grad_norm": 0.248930851723284, "learning_rate": 2.000351423033029e-06, "loss": 0.1556, "num_tokens": 4461314684.0, "step": 5849 }, { "epoch": 7.981852709291285, "grad_norm": 0.22867970105483598, "learning_rate": 2.0003088681435307e-06, "loss": 0.1642, "num_tokens": 4462016477.0, "step": 5850 }, { "epoch": 7.983218290249965, "grad_norm": 0.21992654659741154, "learning_rate": 2.0002690586700542e-06, "loss": 0.159, "num_tokens": 4462791454.0, "step": 5851 }, { "epoch": 7.984583871208645, "grad_norm": 0.22730552138059296, "learning_rate": 2.000231994624741e-06, "loss": 0.1615, "num_tokens": 4463584158.0, "step": 5852 }, { "epoch": 7.985949452167326, "grad_norm": 0.22994005126658953, "learning_rate": 2.000197676018901e-06, "loss": 0.1682, "num_tokens": 4464412790.0, "step": 5853 }, { "epoch": 7.9873150331260065, "grad_norm": 0.2353155350076908, "learning_rate": 2.000166102863001e-06, "loss": 0.1571, "num_tokens": 4465099606.0, "step": 5854 }, { "epoch": 7.9886806140846875, "grad_norm": 0.23455558669834398, "learning_rate": 2.000137275166673e-06, "loss": 0.1614, "num_tokens": 4465810649.0, "step": 5855 }, { "epoch": 7.990046195043368, "grad_norm": 0.2316126251583514, "learning_rate": 2.000111192938713e-06, "loss": 0.1584, "num_tokens": 4466574254.0, "step": 5856 }, { "epoch": 7.991411776002049, "grad_norm": 0.25012097239487935, "learning_rate": 2.0000878561870745e-06, "loss": 0.1685, "num_tokens": 4467355861.0, "step": 5857 }, { "epoch": 7.992777356960729, "grad_norm": 0.22797698087241033, "learning_rate": 2.0000672649188782e-06, "loss": 0.1671, "num_tokens": 4468156350.0, "step": 5858 }, { "epoch": 7.99414293791941, "grad_norm": 0.22354187453919552, "learning_rate": 2.0000494191404064e-06, "loss": 0.1636, "num_tokens": 4468921301.0, "step": 5859 }, { "epoch": 7.99550851887809, "grad_norm": 0.23043010466861574, "learning_rate": 2.0000343188571013e-06, "loss": 0.1703, "num_tokens": 4469761157.0, "step": 5860 }, { "epoch": 7.99687409983677, "grad_norm": 0.2402547414806548, "learning_rate": 2.0000219640735703e-06, "loss": 0.1635, "num_tokens": 4470542645.0, "step": 5861 }, { "epoch": 7.998239680795451, "grad_norm": 0.21872558371132542, "learning_rate": 2.0000123547935814e-06, "loss": 0.1575, "num_tokens": 4471318562.0, "step": 5862 }, { "epoch": 7.999605261754131, "grad_norm": 0.22836278980538602, "learning_rate": 2.000005491020068e-06, "loss": 0.167, "num_tokens": 4472100076.0, "step": 5863 }, { "epoch": 8.0, "grad_norm": 0.22836278980538602, "learning_rate": 2.0000013727551216e-06, "loss": 0.1635, "num_tokens": 4472337088.0, "step": 5864 }, { "epoch": 8.0, "step": 5864, "total_flos": 1.6577411091894632e+19, "train_loss": 0.26399336885381364, "train_runtime": 845440.9049, "train_samples_per_second": 0.887, "train_steps_per_second": 0.007 } ], "logging_steps": 1, "max_steps": 5864, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.6577411091894632e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }